{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 2764, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00036181086337117274, "grad_norm": 56.303955078125, "learning_rate": 0.0, "loss": 0.71990966796875, "step": 1 }, { "epoch": 0.0007236217267423455, "grad_norm": 57.72604751586914, "learning_rate": 7.8125e-08, "loss": 0.72125244140625, "step": 2 }, { "epoch": 0.001085432590113518, "grad_norm": 57.74155807495117, "learning_rate": 1.5625e-07, "loss": 0.7230224609375, "step": 3 }, { "epoch": 0.001447243453484691, "grad_norm": 60.98445129394531, "learning_rate": 2.3437500000000003e-07, "loss": 0.72564697265625, "step": 4 }, { "epoch": 0.0018090543168558636, "grad_norm": 61.082489013671875, "learning_rate": 3.125e-07, "loss": 0.7237548828125, "step": 5 }, { "epoch": 0.002170865180227036, "grad_norm": 60.27531814575195, "learning_rate": 3.90625e-07, "loss": 0.72161865234375, "step": 6 }, { "epoch": 0.002532676043598209, "grad_norm": 59.99946975708008, "learning_rate": 4.6875000000000006e-07, "loss": 0.7218017578125, "step": 7 }, { "epoch": 0.002894486906969382, "grad_norm": 57.5752067565918, "learning_rate": 5.468750000000001e-07, "loss": 0.72076416015625, "step": 8 }, { "epoch": 0.0032562977703405544, "grad_norm": 57.596885681152344, "learning_rate": 6.25e-07, "loss": 0.7218017578125, "step": 9 }, { "epoch": 0.0036181086337117273, "grad_norm": 58.94000244140625, "learning_rate": 7.03125e-07, "loss": 0.72174072265625, "step": 10 }, { "epoch": 0.0039799194970829, "grad_norm": 56.81222915649414, "learning_rate": 7.8125e-07, "loss": 0.71942138671875, "step": 11 }, { "epoch": 0.004341730360454072, "grad_norm": 54.88041305541992, "learning_rate": 8.59375e-07, "loss": 0.7149658203125, "step": 12 }, { "epoch": 0.004703541223825245, "grad_norm": 59.76363754272461, "learning_rate": 9.375000000000001e-07, "loss": 0.718505859375, "step": 13 }, { "epoch": 0.005065352087196418, "grad_norm": 56.969024658203125, "learning_rate": 1.0156250000000001e-06, "loss": 0.71429443359375, "step": 14 }, { "epoch": 0.005427162950567591, "grad_norm": 59.61025619506836, "learning_rate": 1.0937500000000001e-06, "loss": 0.71661376953125, "step": 15 }, { "epoch": 0.005788973813938764, "grad_norm": 55.884925842285156, "learning_rate": 1.1718750000000001e-06, "loss": 0.70086669921875, "step": 16 }, { "epoch": 0.006150784677309936, "grad_norm": 54.968482971191406, "learning_rate": 1.25e-06, "loss": 0.702392578125, "step": 17 }, { "epoch": 0.006512595540681109, "grad_norm": 56.36518859863281, "learning_rate": 1.328125e-06, "loss": 0.7003173828125, "step": 18 }, { "epoch": 0.006874406404052282, "grad_norm": 57.77788543701172, "learning_rate": 1.40625e-06, "loss": 0.6982421875, "step": 19 }, { "epoch": 0.0072362172674234546, "grad_norm": 53.70185852050781, "learning_rate": 1.484375e-06, "loss": 0.69342041015625, "step": 20 }, { "epoch": 0.0075980281307946275, "grad_norm": 56.984458923339844, "learning_rate": 1.5625e-06, "loss": 0.6951904296875, "step": 21 }, { "epoch": 0.0079598389941658, "grad_norm": 52.101280212402344, "learning_rate": 1.640625e-06, "loss": 0.66278076171875, "step": 22 }, { "epoch": 0.008321649857536972, "grad_norm": 50.710208892822266, "learning_rate": 1.71875e-06, "loss": 0.66217041015625, "step": 23 }, { "epoch": 0.008683460720908144, "grad_norm": 51.51614761352539, "learning_rate": 1.796875e-06, "loss": 0.6573486328125, "step": 24 }, { "epoch": 0.009045271584279318, "grad_norm": 48.48180389404297, "learning_rate": 1.8750000000000003e-06, "loss": 0.65869140625, "step": 25 }, { "epoch": 0.00940708244765049, "grad_norm": 47.873741149902344, "learning_rate": 1.953125e-06, "loss": 0.64959716796875, "step": 26 }, { "epoch": 0.009768893311021664, "grad_norm": 48.56371307373047, "learning_rate": 2.0312500000000002e-06, "loss": 0.64666748046875, "step": 27 }, { "epoch": 0.010130704174392836, "grad_norm": 46.48714065551758, "learning_rate": 2.109375e-06, "loss": 0.64508056640625, "step": 28 }, { "epoch": 0.010492515037764008, "grad_norm": 45.06576919555664, "learning_rate": 2.1875000000000002e-06, "loss": 0.64208984375, "step": 29 }, { "epoch": 0.010854325901135182, "grad_norm": 42.81542205810547, "learning_rate": 2.265625e-06, "loss": 0.58819580078125, "step": 30 }, { "epoch": 0.011216136764506354, "grad_norm": 36.4161491394043, "learning_rate": 2.3437500000000002e-06, "loss": 0.59228515625, "step": 31 }, { "epoch": 0.011577947627877528, "grad_norm": 39.596195220947266, "learning_rate": 2.421875e-06, "loss": 0.5792236328125, "step": 32 }, { "epoch": 0.0119397584912487, "grad_norm": 37.4307861328125, "learning_rate": 2.5e-06, "loss": 0.57672119140625, "step": 33 }, { "epoch": 0.012301569354619872, "grad_norm": 38.476806640625, "learning_rate": 2.5781250000000004e-06, "loss": 0.57073974609375, "step": 34 }, { "epoch": 0.012663380217991045, "grad_norm": 39.424007415771484, "learning_rate": 2.65625e-06, "loss": 0.561279296875, "step": 35 }, { "epoch": 0.013025191081362218, "grad_norm": 37.29197311401367, "learning_rate": 2.7343750000000004e-06, "loss": 0.5511474609375, "step": 36 }, { "epoch": 0.013387001944733391, "grad_norm": 34.89186096191406, "learning_rate": 2.8125e-06, "loss": 0.54486083984375, "step": 37 }, { "epoch": 0.013748812808104563, "grad_norm": 34.53691864013672, "learning_rate": 2.8906250000000004e-06, "loss": 0.541046142578125, "step": 38 }, { "epoch": 0.014110623671475735, "grad_norm": 35.88742446899414, "learning_rate": 2.96875e-06, "loss": 0.51776123046875, "step": 39 }, { "epoch": 0.014472434534846909, "grad_norm": 39.61392593383789, "learning_rate": 3.0468750000000004e-06, "loss": 0.489715576171875, "step": 40 }, { "epoch": 0.014834245398218081, "grad_norm": 32.71275329589844, "learning_rate": 3.125e-06, "loss": 0.49127197265625, "step": 41 }, { "epoch": 0.015196056261589255, "grad_norm": 34.40239334106445, "learning_rate": 3.2031250000000004e-06, "loss": 0.45941162109375, "step": 42 }, { "epoch": 0.015557867124960427, "grad_norm": 27.877994537353516, "learning_rate": 3.28125e-06, "loss": 0.4722900390625, "step": 43 }, { "epoch": 0.0159196779883316, "grad_norm": 27.59468650817871, "learning_rate": 3.3593750000000003e-06, "loss": 0.462432861328125, "step": 44 }, { "epoch": 0.01628148885170277, "grad_norm": 29.329545974731445, "learning_rate": 3.4375e-06, "loss": 0.436065673828125, "step": 45 }, { "epoch": 0.016643299715073945, "grad_norm": 26.580181121826172, "learning_rate": 3.5156250000000003e-06, "loss": 0.431854248046875, "step": 46 }, { "epoch": 0.01700511057844512, "grad_norm": 25.13237762451172, "learning_rate": 3.59375e-06, "loss": 0.420684814453125, "step": 47 }, { "epoch": 0.01736692144181629, "grad_norm": 21.065155029296875, "learning_rate": 3.6718750000000003e-06, "loss": 0.430023193359375, "step": 48 }, { "epoch": 0.017728732305187463, "grad_norm": 22.901107788085938, "learning_rate": 3.7500000000000005e-06, "loss": 0.4000244140625, "step": 49 }, { "epoch": 0.018090543168558636, "grad_norm": 20.034738540649414, "learning_rate": 3.828125000000001e-06, "loss": 0.402618408203125, "step": 50 }, { "epoch": 0.01845235403192981, "grad_norm": 19.484838485717773, "learning_rate": 3.90625e-06, "loss": 0.387847900390625, "step": 51 }, { "epoch": 0.01881416489530098, "grad_norm": 18.220962524414062, "learning_rate": 3.984375e-06, "loss": 0.3779296875, "step": 52 }, { "epoch": 0.019175975758672154, "grad_norm": 15.05900764465332, "learning_rate": 4.0625000000000005e-06, "loss": 0.38287353515625, "step": 53 }, { "epoch": 0.019537786622043328, "grad_norm": 11.30367660522461, "learning_rate": 4.140625000000001e-06, "loss": 0.395538330078125, "step": 54 }, { "epoch": 0.0198995974854145, "grad_norm": 11.097336769104004, "learning_rate": 4.21875e-06, "loss": 0.3792572021484375, "step": 55 }, { "epoch": 0.020261408348785672, "grad_norm": 8.56688404083252, "learning_rate": 4.296875e-06, "loss": 0.3841400146484375, "step": 56 }, { "epoch": 0.020623219212156846, "grad_norm": 7.840727806091309, "learning_rate": 4.3750000000000005e-06, "loss": 0.3713531494140625, "step": 57 }, { "epoch": 0.020985030075528016, "grad_norm": 7.882927894592285, "learning_rate": 4.453125000000001e-06, "loss": 0.3556671142578125, "step": 58 }, { "epoch": 0.02134684093889919, "grad_norm": 4.1720685958862305, "learning_rate": 4.53125e-06, "loss": 0.3815460205078125, "step": 59 }, { "epoch": 0.021708651802270364, "grad_norm": 5.938807487487793, "learning_rate": 4.609375e-06, "loss": 0.3455657958984375, "step": 60 }, { "epoch": 0.022070462665641537, "grad_norm": 4.192846298217773, "learning_rate": 4.6875000000000004e-06, "loss": 0.35076904296875, "step": 61 }, { "epoch": 0.022432273529012708, "grad_norm": 3.2213594913482666, "learning_rate": 4.765625000000001e-06, "loss": 0.4026336669921875, "step": 62 }, { "epoch": 0.02279408439238388, "grad_norm": 2.3461005687713623, "learning_rate": 4.84375e-06, "loss": 0.3467559814453125, "step": 63 }, { "epoch": 0.023155895255755055, "grad_norm": 3.146305561065674, "learning_rate": 4.921875e-06, "loss": 0.39910888671875, "step": 64 }, { "epoch": 0.023517706119126226, "grad_norm": 2.48130202293396, "learning_rate": 5e-06, "loss": 0.34130859375, "step": 65 }, { "epoch": 0.0238795169824974, "grad_norm": 2.023998498916626, "learning_rate": 5.078125000000001e-06, "loss": 0.3471832275390625, "step": 66 }, { "epoch": 0.024241327845868573, "grad_norm": 2.5918710231781006, "learning_rate": 5.156250000000001e-06, "loss": 0.365936279296875, "step": 67 }, { "epoch": 0.024603138709239743, "grad_norm": 2.095276355743408, "learning_rate": 5.234375e-06, "loss": 0.337799072265625, "step": 68 }, { "epoch": 0.024964949572610917, "grad_norm": 2.4429900646209717, "learning_rate": 5.3125e-06, "loss": 0.33356475830078125, "step": 69 }, { "epoch": 0.02532676043598209, "grad_norm": 1.756453037261963, "learning_rate": 5.390625000000001e-06, "loss": 0.372528076171875, "step": 70 }, { "epoch": 0.025688571299353265, "grad_norm": 4.324148178100586, "learning_rate": 5.468750000000001e-06, "loss": 0.3233184814453125, "step": 71 }, { "epoch": 0.026050382162724435, "grad_norm": 5.255918979644775, "learning_rate": 5.546875e-06, "loss": 0.326171875, "step": 72 }, { "epoch": 0.02641219302609561, "grad_norm": 3.09546160697937, "learning_rate": 5.625e-06, "loss": 0.328948974609375, "step": 73 }, { "epoch": 0.026774003889466783, "grad_norm": 2.037350654602051, "learning_rate": 5.7031250000000006e-06, "loss": 0.3283538818359375, "step": 74 }, { "epoch": 0.027135814752837953, "grad_norm": 1.7104395627975464, "learning_rate": 5.781250000000001e-06, "loss": 0.343658447265625, "step": 75 }, { "epoch": 0.027497625616209127, "grad_norm": 2.904224157333374, "learning_rate": 5.859375e-06, "loss": 0.3206329345703125, "step": 76 }, { "epoch": 0.0278594364795803, "grad_norm": 3.7463557720184326, "learning_rate": 5.9375e-06, "loss": 0.3200531005859375, "step": 77 }, { "epoch": 0.02822124734295147, "grad_norm": 1.525284767150879, "learning_rate": 6.0156250000000005e-06, "loss": 0.34644317626953125, "step": 78 }, { "epoch": 0.028583058206322644, "grad_norm": 1.62236750125885, "learning_rate": 6.093750000000001e-06, "loss": 0.3220977783203125, "step": 79 }, { "epoch": 0.028944869069693818, "grad_norm": 2.5046024322509766, "learning_rate": 6.171875e-06, "loss": 0.32147216796875, "step": 80 }, { "epoch": 0.029306679933064992, "grad_norm": 7.334758758544922, "learning_rate": 6.25e-06, "loss": 0.40606689453125, "step": 81 }, { "epoch": 0.029668490796436162, "grad_norm": 1.6804760694503784, "learning_rate": 6.3281250000000005e-06, "loss": 0.3244171142578125, "step": 82 }, { "epoch": 0.030030301659807336, "grad_norm": 1.158706545829773, "learning_rate": 6.406250000000001e-06, "loss": 0.32047271728515625, "step": 83 }, { "epoch": 0.03039211252317851, "grad_norm": 1.990893840789795, "learning_rate": 6.484375000000001e-06, "loss": 0.34123992919921875, "step": 84 }, { "epoch": 0.03075392338654968, "grad_norm": 4.480165481567383, "learning_rate": 6.5625e-06, "loss": 0.34180450439453125, "step": 85 }, { "epoch": 0.031115734249920854, "grad_norm": 1.598747968673706, "learning_rate": 6.6406250000000005e-06, "loss": 0.344482421875, "step": 86 }, { "epoch": 0.031477545113292024, "grad_norm": 5.076443195343018, "learning_rate": 6.718750000000001e-06, "loss": 0.3570709228515625, "step": 87 }, { "epoch": 0.0318393559766632, "grad_norm": 3.723548650741577, "learning_rate": 6.796875000000001e-06, "loss": 0.3214569091796875, "step": 88 }, { "epoch": 0.03220116684003437, "grad_norm": 7.2032246589660645, "learning_rate": 6.875e-06, "loss": 0.3690605163574219, "step": 89 }, { "epoch": 0.03256297770340554, "grad_norm": 7.369372844696045, "learning_rate": 6.9531250000000004e-06, "loss": 0.388092041015625, "step": 90 }, { "epoch": 0.03292478856677672, "grad_norm": 1.4578914642333984, "learning_rate": 7.031250000000001e-06, "loss": 0.3430023193359375, "step": 91 }, { "epoch": 0.03328659943014789, "grad_norm": 7.015164852142334, "learning_rate": 7.109375000000001e-06, "loss": 0.3303985595703125, "step": 92 }, { "epoch": 0.03364841029351906, "grad_norm": 2.901564359664917, "learning_rate": 7.1875e-06, "loss": 0.3538970947265625, "step": 93 }, { "epoch": 0.03401022115689024, "grad_norm": 6.7856340408325195, "learning_rate": 7.265625e-06, "loss": 0.3305816650390625, "step": 94 }, { "epoch": 0.03437203202026141, "grad_norm": 7.778658866882324, "learning_rate": 7.343750000000001e-06, "loss": 0.32659149169921875, "step": 95 }, { "epoch": 0.03473384288363258, "grad_norm": 6.041079044342041, "learning_rate": 7.421875000000001e-06, "loss": 0.334991455078125, "step": 96 }, { "epoch": 0.035095653747003755, "grad_norm": 2.3788111209869385, "learning_rate": 7.500000000000001e-06, "loss": 0.307830810546875, "step": 97 }, { "epoch": 0.035457464610374925, "grad_norm": 8.597260475158691, "learning_rate": 7.578125e-06, "loss": 0.39483642578125, "step": 98 }, { "epoch": 0.0358192754737461, "grad_norm": 3.175433874130249, "learning_rate": 7.656250000000001e-06, "loss": 0.306793212890625, "step": 99 }, { "epoch": 0.03618108633711727, "grad_norm": 5.6385321617126465, "learning_rate": 7.734375e-06, "loss": 0.382415771484375, "step": 100 }, { "epoch": 0.03654289720048844, "grad_norm": 4.539242744445801, "learning_rate": 7.8125e-06, "loss": 0.3727264404296875, "step": 101 }, { "epoch": 0.03690470806385962, "grad_norm": 3.33024001121521, "learning_rate": 7.890625e-06, "loss": 0.3176422119140625, "step": 102 }, { "epoch": 0.03726651892723079, "grad_norm": 4.580371856689453, "learning_rate": 7.96875e-06, "loss": 0.3067626953125, "step": 103 }, { "epoch": 0.03762832979060196, "grad_norm": 0.9373947381973267, "learning_rate": 8.046875e-06, "loss": 0.34577178955078125, "step": 104 }, { "epoch": 0.03799014065397314, "grad_norm": 2.0265326499938965, "learning_rate": 8.125000000000001e-06, "loss": 0.3272552490234375, "step": 105 }, { "epoch": 0.03835195151734431, "grad_norm": 1.9656240940093994, "learning_rate": 8.203125000000001e-06, "loss": 0.3217926025390625, "step": 106 }, { "epoch": 0.03871376238071548, "grad_norm": 4.561341285705566, "learning_rate": 8.281250000000001e-06, "loss": 0.311614990234375, "step": 107 }, { "epoch": 0.039075573244086656, "grad_norm": 2.8528876304626465, "learning_rate": 8.359375e-06, "loss": 0.331939697265625, "step": 108 }, { "epoch": 0.039437384107457826, "grad_norm": 2.93536114692688, "learning_rate": 8.4375e-06, "loss": 0.3298797607421875, "step": 109 }, { "epoch": 0.039799194970829, "grad_norm": 8.50965690612793, "learning_rate": 8.515625e-06, "loss": 0.3150177001953125, "step": 110 }, { "epoch": 0.040161005834200174, "grad_norm": 9.330976486206055, "learning_rate": 8.59375e-06, "loss": 0.3586578369140625, "step": 111 }, { "epoch": 0.040522816697571344, "grad_norm": 7.738970756530762, "learning_rate": 8.671875e-06, "loss": 0.308563232421875, "step": 112 }, { "epoch": 0.040884627560942514, "grad_norm": 1.1263664960861206, "learning_rate": 8.750000000000001e-06, "loss": 0.335662841796875, "step": 113 }, { "epoch": 0.04124643842431369, "grad_norm": 3.4978387355804443, "learning_rate": 8.828125000000001e-06, "loss": 0.33463287353515625, "step": 114 }, { "epoch": 0.04160824928768486, "grad_norm": 6.418590545654297, "learning_rate": 8.906250000000001e-06, "loss": 0.34897613525390625, "step": 115 }, { "epoch": 0.04197006015105603, "grad_norm": 5.085871696472168, "learning_rate": 8.984375000000002e-06, "loss": 0.3512115478515625, "step": 116 }, { "epoch": 0.04233187101442721, "grad_norm": 6.91829776763916, "learning_rate": 9.0625e-06, "loss": 0.37976837158203125, "step": 117 }, { "epoch": 0.04269368187779838, "grad_norm": 2.873131513595581, "learning_rate": 9.140625e-06, "loss": 0.33803558349609375, "step": 118 }, { "epoch": 0.04305549274116956, "grad_norm": 1.8683881759643555, "learning_rate": 9.21875e-06, "loss": 0.34814453125, "step": 119 }, { "epoch": 0.04341730360454073, "grad_norm": 1.064295768737793, "learning_rate": 9.296875e-06, "loss": 0.3420562744140625, "step": 120 }, { "epoch": 0.0437791144679119, "grad_norm": 4.668102741241455, "learning_rate": 9.375000000000001e-06, "loss": 0.3234405517578125, "step": 121 }, { "epoch": 0.044140925331283075, "grad_norm": 1.9518241882324219, "learning_rate": 9.453125000000001e-06, "loss": 0.3412628173828125, "step": 122 }, { "epoch": 0.044502736194654245, "grad_norm": 3.2637321949005127, "learning_rate": 9.531250000000001e-06, "loss": 0.359039306640625, "step": 123 }, { "epoch": 0.044864547058025415, "grad_norm": 7.525784969329834, "learning_rate": 9.609375000000001e-06, "loss": 0.2971954345703125, "step": 124 }, { "epoch": 0.04522635792139659, "grad_norm": 4.177999973297119, "learning_rate": 9.6875e-06, "loss": 0.3059844970703125, "step": 125 }, { "epoch": 0.04558816878476776, "grad_norm": 1.8466285467147827, "learning_rate": 9.765625e-06, "loss": 0.3350830078125, "step": 126 }, { "epoch": 0.04594997964813893, "grad_norm": 4.295635223388672, "learning_rate": 9.84375e-06, "loss": 0.2991485595703125, "step": 127 }, { "epoch": 0.04631179051151011, "grad_norm": 1.2084883451461792, "learning_rate": 9.921875e-06, "loss": 0.31522369384765625, "step": 128 }, { "epoch": 0.04667360137488128, "grad_norm": 2.834717273712158, "learning_rate": 1e-05, "loss": 0.291412353515625, "step": 129 }, { "epoch": 0.04703541223825245, "grad_norm": 4.732810020446777, "learning_rate": 1.0078125000000001e-05, "loss": 0.3327178955078125, "step": 130 }, { "epoch": 0.04739722310162363, "grad_norm": 6.669642925262451, "learning_rate": 1.0156250000000001e-05, "loss": 0.3905448913574219, "step": 131 }, { "epoch": 0.0477590339649948, "grad_norm": 2.1808369159698486, "learning_rate": 1.0234375000000001e-05, "loss": 0.31104278564453125, "step": 132 }, { "epoch": 0.04812084482836597, "grad_norm": 5.455481052398682, "learning_rate": 1.0312500000000002e-05, "loss": 0.354400634765625, "step": 133 }, { "epoch": 0.048482655691737146, "grad_norm": 3.07791805267334, "learning_rate": 1.0390625e-05, "loss": 0.3325042724609375, "step": 134 }, { "epoch": 0.048844466555108317, "grad_norm": 5.874892234802246, "learning_rate": 1.046875e-05, "loss": 0.3571929931640625, "step": 135 }, { "epoch": 0.04920627741847949, "grad_norm": 4.0007805824279785, "learning_rate": 1.0546875e-05, "loss": 0.36090087890625, "step": 136 }, { "epoch": 0.049568088281850664, "grad_norm": 4.828671455383301, "learning_rate": 1.0625e-05, "loss": 0.3497161865234375, "step": 137 }, { "epoch": 0.049929899145221834, "grad_norm": 5.523787498474121, "learning_rate": 1.0703125000000001e-05, "loss": 0.3650054931640625, "step": 138 }, { "epoch": 0.050291710008593005, "grad_norm": 4.55745267868042, "learning_rate": 1.0781250000000001e-05, "loss": 0.33190155029296875, "step": 139 }, { "epoch": 0.05065352087196418, "grad_norm": 4.216424465179443, "learning_rate": 1.0859375000000001e-05, "loss": 0.3406715393066406, "step": 140 }, { "epoch": 0.05101533173533535, "grad_norm": 2.379257917404175, "learning_rate": 1.0937500000000002e-05, "loss": 0.335662841796875, "step": 141 }, { "epoch": 0.05137714259870653, "grad_norm": 3.4555275440216064, "learning_rate": 1.1015625e-05, "loss": 0.3092803955078125, "step": 142 }, { "epoch": 0.0517389534620777, "grad_norm": 4.636330604553223, "learning_rate": 1.109375e-05, "loss": 0.311767578125, "step": 143 }, { "epoch": 0.05210076432544887, "grad_norm": 3.170297861099243, "learning_rate": 1.1171875e-05, "loss": 0.31998443603515625, "step": 144 }, { "epoch": 0.05246257518882005, "grad_norm": 6.00010871887207, "learning_rate": 1.125e-05, "loss": 0.31041717529296875, "step": 145 }, { "epoch": 0.05282438605219122, "grad_norm": 2.52350115776062, "learning_rate": 1.1328125000000001e-05, "loss": 0.31308746337890625, "step": 146 }, { "epoch": 0.05318619691556239, "grad_norm": 1.5120203495025635, "learning_rate": 1.1406250000000001e-05, "loss": 0.30926513671875, "step": 147 }, { "epoch": 0.053548007778933565, "grad_norm": 2.03025221824646, "learning_rate": 1.1484375000000001e-05, "loss": 0.3170318603515625, "step": 148 }, { "epoch": 0.053909818642304735, "grad_norm": 2.1440768241882324, "learning_rate": 1.1562500000000002e-05, "loss": 0.2982940673828125, "step": 149 }, { "epoch": 0.054271629505675906, "grad_norm": 1.8045042753219604, "learning_rate": 1.1640625000000002e-05, "loss": 0.31031036376953125, "step": 150 }, { "epoch": 0.05463344036904708, "grad_norm": 2.1278035640716553, "learning_rate": 1.171875e-05, "loss": 0.3195075988769531, "step": 151 }, { "epoch": 0.05499525123241825, "grad_norm": 5.490327835083008, "learning_rate": 1.1796875e-05, "loss": 0.3505821228027344, "step": 152 }, { "epoch": 0.055357062095789424, "grad_norm": 4.229641914367676, "learning_rate": 1.1875e-05, "loss": 0.3521881103515625, "step": 153 }, { "epoch": 0.0557188729591606, "grad_norm": 4.079693794250488, "learning_rate": 1.1953125000000001e-05, "loss": 0.3107147216796875, "step": 154 }, { "epoch": 0.05608068382253177, "grad_norm": 5.585252285003662, "learning_rate": 1.2031250000000001e-05, "loss": 0.3002166748046875, "step": 155 }, { "epoch": 0.05644249468590294, "grad_norm": 2.1126458644866943, "learning_rate": 1.2109375000000001e-05, "loss": 0.336761474609375, "step": 156 }, { "epoch": 0.05680430554927412, "grad_norm": 3.168208122253418, "learning_rate": 1.2187500000000001e-05, "loss": 0.34649658203125, "step": 157 }, { "epoch": 0.05716611641264529, "grad_norm": 2.493741750717163, "learning_rate": 1.2265625000000002e-05, "loss": 0.30517578125, "step": 158 }, { "epoch": 0.05752792727601646, "grad_norm": 1.9899773597717285, "learning_rate": 1.234375e-05, "loss": 0.3533172607421875, "step": 159 }, { "epoch": 0.057889738139387636, "grad_norm": 1.360213279724121, "learning_rate": 1.2421875e-05, "loss": 0.355072021484375, "step": 160 }, { "epoch": 0.05825154900275881, "grad_norm": 1.1036359071731567, "learning_rate": 1.25e-05, "loss": 0.348052978515625, "step": 161 }, { "epoch": 0.058613359866129984, "grad_norm": 2.5144832134246826, "learning_rate": 1.2578125e-05, "loss": 0.3399200439453125, "step": 162 }, { "epoch": 0.058975170729501154, "grad_norm": 4.0345988273620605, "learning_rate": 1.2656250000000001e-05, "loss": 0.308868408203125, "step": 163 }, { "epoch": 0.059336981592872325, "grad_norm": 2.3414883613586426, "learning_rate": 1.2734375000000001e-05, "loss": 0.3107147216796875, "step": 164 }, { "epoch": 0.0596987924562435, "grad_norm": 1.958597183227539, "learning_rate": 1.2812500000000001e-05, "loss": 0.32305908203125, "step": 165 }, { "epoch": 0.06006060331961467, "grad_norm": 1.4433528184890747, "learning_rate": 1.2890625000000002e-05, "loss": 0.312774658203125, "step": 166 }, { "epoch": 0.06042241418298584, "grad_norm": 1.619063377380371, "learning_rate": 1.2968750000000002e-05, "loss": 0.3090972900390625, "step": 167 }, { "epoch": 0.06078422504635702, "grad_norm": 1.8342885971069336, "learning_rate": 1.3046875e-05, "loss": 0.3355865478515625, "step": 168 }, { "epoch": 0.06114603590972819, "grad_norm": 2.9005661010742188, "learning_rate": 1.3125e-05, "loss": 0.28627777099609375, "step": 169 }, { "epoch": 0.06150784677309936, "grad_norm": 2.280622720718384, "learning_rate": 1.3203125e-05, "loss": 0.32970428466796875, "step": 170 }, { "epoch": 0.06186965763647054, "grad_norm": 1.9930329322814941, "learning_rate": 1.3281250000000001e-05, "loss": 0.3200531005859375, "step": 171 }, { "epoch": 0.06223146849984171, "grad_norm": 1.0536694526672363, "learning_rate": 1.3359375000000001e-05, "loss": 0.30750274658203125, "step": 172 }, { "epoch": 0.06259327936321288, "grad_norm": 5.353970050811768, "learning_rate": 1.3437500000000001e-05, "loss": 0.34918212890625, "step": 173 }, { "epoch": 0.06295509022658405, "grad_norm": 2.5283145904541016, "learning_rate": 1.3515625000000002e-05, "loss": 0.3102569580078125, "step": 174 }, { "epoch": 0.06331690108995523, "grad_norm": 2.3658623695373535, "learning_rate": 1.3593750000000002e-05, "loss": 0.322784423828125, "step": 175 }, { "epoch": 0.0636787119533264, "grad_norm": 1.1554356813430786, "learning_rate": 1.3671875e-05, "loss": 0.3137359619140625, "step": 176 }, { "epoch": 0.06404052281669757, "grad_norm": 4.7419819831848145, "learning_rate": 1.375e-05, "loss": 0.30474853515625, "step": 177 }, { "epoch": 0.06440233368006874, "grad_norm": 2.1886496543884277, "learning_rate": 1.3828125e-05, "loss": 0.3108978271484375, "step": 178 }, { "epoch": 0.06476414454343991, "grad_norm": 1.273016095161438, "learning_rate": 1.3906250000000001e-05, "loss": 0.332489013671875, "step": 179 }, { "epoch": 0.06512595540681108, "grad_norm": 1.7701873779296875, "learning_rate": 1.3984375000000001e-05, "loss": 0.32848358154296875, "step": 180 }, { "epoch": 0.06548776627018227, "grad_norm": 1.8177587985992432, "learning_rate": 1.4062500000000001e-05, "loss": 0.330230712890625, "step": 181 }, { "epoch": 0.06584957713355344, "grad_norm": 1.2521125078201294, "learning_rate": 1.4140625000000002e-05, "loss": 0.323455810546875, "step": 182 }, { "epoch": 0.06621138799692461, "grad_norm": 2.289564371109009, "learning_rate": 1.4218750000000002e-05, "loss": 0.323577880859375, "step": 183 }, { "epoch": 0.06657319886029578, "grad_norm": 4.467409610748291, "learning_rate": 1.4296875000000002e-05, "loss": 0.330169677734375, "step": 184 }, { "epoch": 0.06693500972366695, "grad_norm": 1.3879272937774658, "learning_rate": 1.4375e-05, "loss": 0.3028717041015625, "step": 185 }, { "epoch": 0.06729682058703812, "grad_norm": 1.195576548576355, "learning_rate": 1.4453125e-05, "loss": 0.31476593017578125, "step": 186 }, { "epoch": 0.0676586314504093, "grad_norm": 5.389822959899902, "learning_rate": 1.453125e-05, "loss": 0.3974761962890625, "step": 187 }, { "epoch": 0.06802044231378047, "grad_norm": 1.7825981378555298, "learning_rate": 1.4609375000000001e-05, "loss": 0.32221221923828125, "step": 188 }, { "epoch": 0.06838225317715164, "grad_norm": 2.011655807495117, "learning_rate": 1.4687500000000001e-05, "loss": 0.29555511474609375, "step": 189 }, { "epoch": 0.06874406404052281, "grad_norm": 1.3893828392028809, "learning_rate": 1.4765625000000001e-05, "loss": 0.29267120361328125, "step": 190 }, { "epoch": 0.06910587490389399, "grad_norm": 1.4821046590805054, "learning_rate": 1.4843750000000002e-05, "loss": 0.3100738525390625, "step": 191 }, { "epoch": 0.06946768576726516, "grad_norm": 0.8407244682312012, "learning_rate": 1.4921875000000002e-05, "loss": 0.33875274658203125, "step": 192 }, { "epoch": 0.06982949663063634, "grad_norm": 4.9257121086120605, "learning_rate": 1.5000000000000002e-05, "loss": 0.32769775390625, "step": 193 }, { "epoch": 0.07019130749400751, "grad_norm": 3.7752749919891357, "learning_rate": 1.5078125e-05, "loss": 0.31103515625, "step": 194 }, { "epoch": 0.07055311835737868, "grad_norm": 5.382282257080078, "learning_rate": 1.515625e-05, "loss": 0.3194427490234375, "step": 195 }, { "epoch": 0.07091492922074985, "grad_norm": 2.842374086380005, "learning_rate": 1.5234375000000001e-05, "loss": 0.3406829833984375, "step": 196 }, { "epoch": 0.07127674008412102, "grad_norm": 3.203148603439331, "learning_rate": 1.5312500000000003e-05, "loss": 0.3641357421875, "step": 197 }, { "epoch": 0.0716385509474922, "grad_norm": 3.07952880859375, "learning_rate": 1.5390625e-05, "loss": 0.32904052734375, "step": 198 }, { "epoch": 0.07200036181086338, "grad_norm": 1.2521392107009888, "learning_rate": 1.546875e-05, "loss": 0.30065155029296875, "step": 199 }, { "epoch": 0.07236217267423455, "grad_norm": 5.428802013397217, "learning_rate": 1.5546875e-05, "loss": 0.3651847839355469, "step": 200 }, { "epoch": 0.07272398353760572, "grad_norm": 3.0751709938049316, "learning_rate": 1.5625e-05, "loss": 0.3267974853515625, "step": 201 }, { "epoch": 0.07308579440097689, "grad_norm": 2.9025797843933105, "learning_rate": 1.5703125e-05, "loss": 0.3340911865234375, "step": 202 }, { "epoch": 0.07344760526434806, "grad_norm": 2.9203438758850098, "learning_rate": 1.578125e-05, "loss": 0.3307685852050781, "step": 203 }, { "epoch": 0.07380941612771924, "grad_norm": 1.603403091430664, "learning_rate": 1.5859375e-05, "loss": 0.31781005859375, "step": 204 }, { "epoch": 0.07417122699109041, "grad_norm": 1.9248676300048828, "learning_rate": 1.59375e-05, "loss": 0.3105010986328125, "step": 205 }, { "epoch": 0.07453303785446158, "grad_norm": 1.5704975128173828, "learning_rate": 1.6015625e-05, "loss": 0.3154449462890625, "step": 206 }, { "epoch": 0.07489484871783275, "grad_norm": 3.452789068222046, "learning_rate": 1.609375e-05, "loss": 0.378509521484375, "step": 207 }, { "epoch": 0.07525665958120392, "grad_norm": 1.1874754428863525, "learning_rate": 1.6171875000000002e-05, "loss": 0.3017578125, "step": 208 }, { "epoch": 0.07561847044457509, "grad_norm": 1.7783361673355103, "learning_rate": 1.6250000000000002e-05, "loss": 0.3100433349609375, "step": 209 }, { "epoch": 0.07598028130794628, "grad_norm": 2.295149803161621, "learning_rate": 1.6328125000000002e-05, "loss": 0.30135345458984375, "step": 210 }, { "epoch": 0.07634209217131745, "grad_norm": 6.223602771759033, "learning_rate": 1.6406250000000002e-05, "loss": 0.29302978515625, "step": 211 }, { "epoch": 0.07670390303468862, "grad_norm": 2.966956377029419, "learning_rate": 1.6484375000000003e-05, "loss": 0.29399871826171875, "step": 212 }, { "epoch": 0.07706571389805979, "grad_norm": 3.207153558731079, "learning_rate": 1.6562500000000003e-05, "loss": 0.3072967529296875, "step": 213 }, { "epoch": 0.07742752476143096, "grad_norm": 1.459588885307312, "learning_rate": 1.6640625000000003e-05, "loss": 0.319671630859375, "step": 214 }, { "epoch": 0.07778933562480213, "grad_norm": 5.689385890960693, "learning_rate": 1.671875e-05, "loss": 0.32338714599609375, "step": 215 }, { "epoch": 0.07815114648817331, "grad_norm": 2.129652738571167, "learning_rate": 1.6796875e-05, "loss": 0.3062744140625, "step": 216 }, { "epoch": 0.07851295735154448, "grad_norm": 4.841994285583496, "learning_rate": 1.6875e-05, "loss": 0.33860015869140625, "step": 217 }, { "epoch": 0.07887476821491565, "grad_norm": 2.0220606327056885, "learning_rate": 1.6953125e-05, "loss": 0.316192626953125, "step": 218 }, { "epoch": 0.07923657907828682, "grad_norm": 2.3422164916992188, "learning_rate": 1.703125e-05, "loss": 0.28803253173828125, "step": 219 }, { "epoch": 0.079598389941658, "grad_norm": 1.5763109922409058, "learning_rate": 1.7109375e-05, "loss": 0.3206024169921875, "step": 220 }, { "epoch": 0.07996020080502918, "grad_norm": 2.3780252933502197, "learning_rate": 1.71875e-05, "loss": 0.3138771057128906, "step": 221 }, { "epoch": 0.08032201166840035, "grad_norm": 3.5715153217315674, "learning_rate": 1.7265625e-05, "loss": 0.33107757568359375, "step": 222 }, { "epoch": 0.08068382253177152, "grad_norm": 4.55086088180542, "learning_rate": 1.734375e-05, "loss": 0.3180389404296875, "step": 223 }, { "epoch": 0.08104563339514269, "grad_norm": 4.1225266456604, "learning_rate": 1.7421875e-05, "loss": 0.3148193359375, "step": 224 }, { "epoch": 0.08140744425851386, "grad_norm": 4.1191725730896, "learning_rate": 1.7500000000000002e-05, "loss": 0.3319244384765625, "step": 225 }, { "epoch": 0.08176925512188503, "grad_norm": 1.9202286005020142, "learning_rate": 1.7578125000000002e-05, "loss": 0.32205963134765625, "step": 226 }, { "epoch": 0.08213106598525621, "grad_norm": 4.751047134399414, "learning_rate": 1.7656250000000002e-05, "loss": 0.3274383544921875, "step": 227 }, { "epoch": 0.08249287684862738, "grad_norm": 2.9117162227630615, "learning_rate": 1.7734375000000002e-05, "loss": 0.30306243896484375, "step": 228 }, { "epoch": 0.08285468771199855, "grad_norm": 4.351421356201172, "learning_rate": 1.7812500000000003e-05, "loss": 0.32228851318359375, "step": 229 }, { "epoch": 0.08321649857536972, "grad_norm": 3.4643638134002686, "learning_rate": 1.7890625000000003e-05, "loss": 0.3359375, "step": 230 }, { "epoch": 0.0835783094387409, "grad_norm": 4.837928295135498, "learning_rate": 1.7968750000000003e-05, "loss": 0.3184051513671875, "step": 231 }, { "epoch": 0.08394012030211206, "grad_norm": 1.009413242340088, "learning_rate": 1.8046875e-05, "loss": 0.328094482421875, "step": 232 }, { "epoch": 0.08430193116548325, "grad_norm": 5.936887264251709, "learning_rate": 1.8125e-05, "loss": 0.3255767822265625, "step": 233 }, { "epoch": 0.08466374202885442, "grad_norm": 8.036457061767578, "learning_rate": 1.8203125e-05, "loss": 0.3245086669921875, "step": 234 }, { "epoch": 0.08502555289222559, "grad_norm": 2.1027021408081055, "learning_rate": 1.828125e-05, "loss": 0.3460540771484375, "step": 235 }, { "epoch": 0.08538736375559676, "grad_norm": 5.130549907684326, "learning_rate": 1.8359375e-05, "loss": 0.34600830078125, "step": 236 }, { "epoch": 0.08574917461896793, "grad_norm": 2.227900743484497, "learning_rate": 1.84375e-05, "loss": 0.3111419677734375, "step": 237 }, { "epoch": 0.08611098548233911, "grad_norm": 1.5847405195236206, "learning_rate": 1.8515625e-05, "loss": 0.29412841796875, "step": 238 }, { "epoch": 0.08647279634571028, "grad_norm": 4.638050079345703, "learning_rate": 1.859375e-05, "loss": 0.3343505859375, "step": 239 }, { "epoch": 0.08683460720908145, "grad_norm": 2.2645208835601807, "learning_rate": 1.8671875e-05, "loss": 0.31114959716796875, "step": 240 }, { "epoch": 0.08719641807245262, "grad_norm": 5.251259803771973, "learning_rate": 1.8750000000000002e-05, "loss": 0.28427886962890625, "step": 241 }, { "epoch": 0.0875582289358238, "grad_norm": 1.677249550819397, "learning_rate": 1.8828125000000002e-05, "loss": 0.30852508544921875, "step": 242 }, { "epoch": 0.08792003979919497, "grad_norm": 1.2353934049606323, "learning_rate": 1.8906250000000002e-05, "loss": 0.28969573974609375, "step": 243 }, { "epoch": 0.08828185066256615, "grad_norm": 3.3290600776672363, "learning_rate": 1.8984375000000002e-05, "loss": 0.36603546142578125, "step": 244 }, { "epoch": 0.08864366152593732, "grad_norm": 3.5500576496124268, "learning_rate": 1.9062500000000003e-05, "loss": 0.31586456298828125, "step": 245 }, { "epoch": 0.08900547238930849, "grad_norm": 2.1192173957824707, "learning_rate": 1.9140625000000003e-05, "loss": 0.3157501220703125, "step": 246 }, { "epoch": 0.08936728325267966, "grad_norm": 4.1376051902771, "learning_rate": 1.9218750000000003e-05, "loss": 0.33746337890625, "step": 247 }, { "epoch": 0.08972909411605083, "grad_norm": 2.448640823364258, "learning_rate": 1.9296875000000003e-05, "loss": 0.30957794189453125, "step": 248 }, { "epoch": 0.090090904979422, "grad_norm": 5.85255765914917, "learning_rate": 1.9375e-05, "loss": 0.3074798583984375, "step": 249 }, { "epoch": 0.09045271584279319, "grad_norm": 3.7473175525665283, "learning_rate": 1.9453125e-05, "loss": 0.30821990966796875, "step": 250 }, { "epoch": 0.09081452670616436, "grad_norm": 5.901209831237793, "learning_rate": 1.953125e-05, "loss": 0.3608436584472656, "step": 251 }, { "epoch": 0.09117633756953553, "grad_norm": 4.2665605545043945, "learning_rate": 1.9609375e-05, "loss": 0.3525390625, "step": 252 }, { "epoch": 0.0915381484329067, "grad_norm": 2.050827741622925, "learning_rate": 1.96875e-05, "loss": 0.291259765625, "step": 253 }, { "epoch": 0.09189995929627787, "grad_norm": 1.089281678199768, "learning_rate": 1.9765625e-05, "loss": 0.3170166015625, "step": 254 }, { "epoch": 0.09226177015964904, "grad_norm": 2.272850751876831, "learning_rate": 1.984375e-05, "loss": 0.329437255859375, "step": 255 }, { "epoch": 0.09262358102302022, "grad_norm": 4.748713970184326, "learning_rate": 1.9921875e-05, "loss": 0.286590576171875, "step": 256 }, { "epoch": 0.09298539188639139, "grad_norm": 1.9400951862335205, "learning_rate": 2e-05, "loss": 0.2982940673828125, "step": 257 }, { "epoch": 0.09334720274976256, "grad_norm": 3.6007423400878906, "learning_rate": 2.0078125000000002e-05, "loss": 0.3014678955078125, "step": 258 }, { "epoch": 0.09370901361313373, "grad_norm": 3.67166805267334, "learning_rate": 2.0156250000000002e-05, "loss": 0.29170989990234375, "step": 259 }, { "epoch": 0.0940708244765049, "grad_norm": 2.2516767978668213, "learning_rate": 2.0234375000000002e-05, "loss": 0.35052490234375, "step": 260 }, { "epoch": 0.09443263533987609, "grad_norm": 4.660161972045898, "learning_rate": 2.0312500000000002e-05, "loss": 0.31668853759765625, "step": 261 }, { "epoch": 0.09479444620324726, "grad_norm": 3.4785687923431396, "learning_rate": 2.0390625000000003e-05, "loss": 0.2838592529296875, "step": 262 }, { "epoch": 0.09515625706661843, "grad_norm": 2.6602399349212646, "learning_rate": 2.0468750000000003e-05, "loss": 0.30577850341796875, "step": 263 }, { "epoch": 0.0955180679299896, "grad_norm": 1.7962664365768433, "learning_rate": 2.0546875000000003e-05, "loss": 0.300018310546875, "step": 264 }, { "epoch": 0.09587987879336077, "grad_norm": 1.269987940788269, "learning_rate": 2.0625000000000003e-05, "loss": 0.3271942138671875, "step": 265 }, { "epoch": 0.09624168965673194, "grad_norm": 1.6133774518966675, "learning_rate": 2.0703125e-05, "loss": 0.27989959716796875, "step": 266 }, { "epoch": 0.09660350052010312, "grad_norm": 1.4550375938415527, "learning_rate": 2.078125e-05, "loss": 0.32929229736328125, "step": 267 }, { "epoch": 0.09696531138347429, "grad_norm": 2.112224578857422, "learning_rate": 2.0859375e-05, "loss": 0.33214569091796875, "step": 268 }, { "epoch": 0.09732712224684546, "grad_norm": 3.6100404262542725, "learning_rate": 2.09375e-05, "loss": 0.352264404296875, "step": 269 }, { "epoch": 0.09768893311021663, "grad_norm": 1.82188081741333, "learning_rate": 2.1015625e-05, "loss": 0.301849365234375, "step": 270 }, { "epoch": 0.0980507439735878, "grad_norm": 2.109396457672119, "learning_rate": 2.109375e-05, "loss": 0.306304931640625, "step": 271 }, { "epoch": 0.09841255483695897, "grad_norm": 2.010523557662964, "learning_rate": 2.1171875e-05, "loss": 0.285064697265625, "step": 272 }, { "epoch": 0.09877436570033016, "grad_norm": 0.8527321815490723, "learning_rate": 2.125e-05, "loss": 0.2846527099609375, "step": 273 }, { "epoch": 0.09913617656370133, "grad_norm": 2.6287238597869873, "learning_rate": 2.1328125000000002e-05, "loss": 0.27728271484375, "step": 274 }, { "epoch": 0.0994979874270725, "grad_norm": 1.1901217699050903, "learning_rate": 2.1406250000000002e-05, "loss": 0.3282012939453125, "step": 275 }, { "epoch": 0.09985979829044367, "grad_norm": 2.0190930366516113, "learning_rate": 2.1484375000000002e-05, "loss": 0.293304443359375, "step": 276 }, { "epoch": 0.10022160915381484, "grad_norm": 2.342837333679199, "learning_rate": 2.1562500000000002e-05, "loss": 0.3344535827636719, "step": 277 }, { "epoch": 0.10058342001718601, "grad_norm": 2.719825029373169, "learning_rate": 2.1640625000000003e-05, "loss": 0.3028106689453125, "step": 278 }, { "epoch": 0.1009452308805572, "grad_norm": 7.847158908843994, "learning_rate": 2.1718750000000003e-05, "loss": 0.3625030517578125, "step": 279 }, { "epoch": 0.10130704174392836, "grad_norm": 0.9973350167274475, "learning_rate": 2.1796875000000003e-05, "loss": 0.3081512451171875, "step": 280 }, { "epoch": 0.10166885260729953, "grad_norm": 1.6078464984893799, "learning_rate": 2.1875000000000003e-05, "loss": 0.3137474060058594, "step": 281 }, { "epoch": 0.1020306634706707, "grad_norm": 3.498126268386841, "learning_rate": 2.1953125000000003e-05, "loss": 0.3376007080078125, "step": 282 }, { "epoch": 0.10239247433404187, "grad_norm": 2.322739839553833, "learning_rate": 2.203125e-05, "loss": 0.331573486328125, "step": 283 }, { "epoch": 0.10275428519741306, "grad_norm": 3.1832423210144043, "learning_rate": 2.2109375e-05, "loss": 0.341766357421875, "step": 284 }, { "epoch": 0.10311609606078423, "grad_norm": 0.7393573522567749, "learning_rate": 2.21875e-05, "loss": 0.33373260498046875, "step": 285 }, { "epoch": 0.1034779069241554, "grad_norm": 2.874314785003662, "learning_rate": 2.2265625e-05, "loss": 0.316680908203125, "step": 286 }, { "epoch": 0.10383971778752657, "grad_norm": 2.3433563709259033, "learning_rate": 2.234375e-05, "loss": 0.32037353515625, "step": 287 }, { "epoch": 0.10420152865089774, "grad_norm": 2.8142337799072266, "learning_rate": 2.2421875e-05, "loss": 0.333740234375, "step": 288 }, { "epoch": 0.10456333951426891, "grad_norm": 1.2302968502044678, "learning_rate": 2.25e-05, "loss": 0.3038482666015625, "step": 289 }, { "epoch": 0.1049251503776401, "grad_norm": 1.928783655166626, "learning_rate": 2.2578125e-05, "loss": 0.3385772705078125, "step": 290 }, { "epoch": 0.10528696124101126, "grad_norm": 3.6893582344055176, "learning_rate": 2.2656250000000002e-05, "loss": 0.34409332275390625, "step": 291 }, { "epoch": 0.10564877210438244, "grad_norm": 6.042638778686523, "learning_rate": 2.2734375000000002e-05, "loss": 0.318817138671875, "step": 292 }, { "epoch": 0.1060105829677536, "grad_norm": 5.242068290710449, "learning_rate": 2.2812500000000002e-05, "loss": 0.33315277099609375, "step": 293 }, { "epoch": 0.10637239383112478, "grad_norm": 7.499780654907227, "learning_rate": 2.2890625000000002e-05, "loss": 0.359039306640625, "step": 294 }, { "epoch": 0.10673420469449595, "grad_norm": 1.7392460107803345, "learning_rate": 2.2968750000000003e-05, "loss": 0.32785797119140625, "step": 295 }, { "epoch": 0.10709601555786713, "grad_norm": 2.8874671459198, "learning_rate": 2.3046875000000003e-05, "loss": 0.2796478271484375, "step": 296 }, { "epoch": 0.1074578264212383, "grad_norm": 3.725468635559082, "learning_rate": 2.3125000000000003e-05, "loss": 0.34285736083984375, "step": 297 }, { "epoch": 0.10781963728460947, "grad_norm": 3.725623369216919, "learning_rate": 2.3203125000000003e-05, "loss": 0.329376220703125, "step": 298 }, { "epoch": 0.10818144814798064, "grad_norm": 5.444820880889893, "learning_rate": 2.3281250000000003e-05, "loss": 0.32103729248046875, "step": 299 }, { "epoch": 0.10854325901135181, "grad_norm": 3.7980754375457764, "learning_rate": 2.3359375e-05, "loss": 0.34047698974609375, "step": 300 }, { "epoch": 0.10890506987472298, "grad_norm": 3.2579891681671143, "learning_rate": 2.34375e-05, "loss": 0.2889556884765625, "step": 301 }, { "epoch": 0.10926688073809417, "grad_norm": 4.646206378936768, "learning_rate": 2.3515625e-05, "loss": 0.30115509033203125, "step": 302 }, { "epoch": 0.10962869160146534, "grad_norm": 4.768317699432373, "learning_rate": 2.359375e-05, "loss": 0.321502685546875, "step": 303 }, { "epoch": 0.1099905024648365, "grad_norm": 3.0190274715423584, "learning_rate": 2.3671875e-05, "loss": 0.3036651611328125, "step": 304 }, { "epoch": 0.11035231332820768, "grad_norm": 1.8319185972213745, "learning_rate": 2.375e-05, "loss": 0.28607940673828125, "step": 305 }, { "epoch": 0.11071412419157885, "grad_norm": 1.1666005849838257, "learning_rate": 2.3828125e-05, "loss": 0.28412628173828125, "step": 306 }, { "epoch": 0.11107593505495003, "grad_norm": 1.983309030532837, "learning_rate": 2.3906250000000002e-05, "loss": 0.27523040771484375, "step": 307 }, { "epoch": 0.1114377459183212, "grad_norm": 6.838927268981934, "learning_rate": 2.3984375000000002e-05, "loss": 0.33509063720703125, "step": 308 }, { "epoch": 0.11179955678169237, "grad_norm": 2.8152148723602295, "learning_rate": 2.4062500000000002e-05, "loss": 0.29035186767578125, "step": 309 }, { "epoch": 0.11216136764506354, "grad_norm": 7.031606674194336, "learning_rate": 2.4140625000000002e-05, "loss": 0.371734619140625, "step": 310 }, { "epoch": 0.11252317850843471, "grad_norm": 1.685935378074646, "learning_rate": 2.4218750000000003e-05, "loss": 0.3379669189453125, "step": 311 }, { "epoch": 0.11288498937180588, "grad_norm": 7.242028713226318, "learning_rate": 2.4296875000000003e-05, "loss": 0.3100128173828125, "step": 312 }, { "epoch": 0.11324680023517707, "grad_norm": 7.296047210693359, "learning_rate": 2.4375000000000003e-05, "loss": 0.2711181640625, "step": 313 }, { "epoch": 0.11360861109854824, "grad_norm": 5.524932861328125, "learning_rate": 2.4453125000000003e-05, "loss": 0.36468505859375, "step": 314 }, { "epoch": 0.11397042196191941, "grad_norm": 2.786062479019165, "learning_rate": 2.4531250000000003e-05, "loss": 0.33075714111328125, "step": 315 }, { "epoch": 0.11433223282529058, "grad_norm": 1.6782023906707764, "learning_rate": 2.4609375000000004e-05, "loss": 0.31040191650390625, "step": 316 }, { "epoch": 0.11469404368866175, "grad_norm": 3.146627902984619, "learning_rate": 2.46875e-05, "loss": 0.3161811828613281, "step": 317 }, { "epoch": 0.11505585455203292, "grad_norm": 4.231575012207031, "learning_rate": 2.4765625e-05, "loss": 0.32793426513671875, "step": 318 }, { "epoch": 0.1154176654154041, "grad_norm": 4.71300745010376, "learning_rate": 2.484375e-05, "loss": 0.31235504150390625, "step": 319 }, { "epoch": 0.11577947627877527, "grad_norm": 7.831963539123535, "learning_rate": 2.4921875e-05, "loss": 0.340484619140625, "step": 320 }, { "epoch": 0.11614128714214644, "grad_norm": 3.474580764770508, "learning_rate": 2.5e-05, "loss": 0.3449249267578125, "step": 321 }, { "epoch": 0.11650309800551761, "grad_norm": 1.098471760749817, "learning_rate": 2.5078125e-05, "loss": 0.32343292236328125, "step": 322 }, { "epoch": 0.11686490886888878, "grad_norm": 3.524101734161377, "learning_rate": 2.515625e-05, "loss": 0.312164306640625, "step": 323 }, { "epoch": 0.11722671973225997, "grad_norm": 3.272657632827759, "learning_rate": 2.5234375000000002e-05, "loss": 0.280914306640625, "step": 324 }, { "epoch": 0.11758853059563114, "grad_norm": 1.2878810167312622, "learning_rate": 2.5312500000000002e-05, "loss": 0.28539276123046875, "step": 325 }, { "epoch": 0.11795034145900231, "grad_norm": 1.2524614334106445, "learning_rate": 2.5390625000000002e-05, "loss": 0.29010009765625, "step": 326 }, { "epoch": 0.11831215232237348, "grad_norm": 1.5170360803604126, "learning_rate": 2.5468750000000002e-05, "loss": 0.304412841796875, "step": 327 }, { "epoch": 0.11867396318574465, "grad_norm": 1.2196840047836304, "learning_rate": 2.5546875000000003e-05, "loss": 0.30873870849609375, "step": 328 }, { "epoch": 0.11903577404911582, "grad_norm": 1.512597918510437, "learning_rate": 2.5625000000000003e-05, "loss": 0.326263427734375, "step": 329 }, { "epoch": 0.119397584912487, "grad_norm": 2.331676483154297, "learning_rate": 2.5703125000000003e-05, "loss": 0.30437469482421875, "step": 330 }, { "epoch": 0.11975939577585817, "grad_norm": 3.0507397651672363, "learning_rate": 2.5781250000000003e-05, "loss": 0.298370361328125, "step": 331 }, { "epoch": 0.12012120663922934, "grad_norm": 1.7281666994094849, "learning_rate": 2.5859375000000003e-05, "loss": 0.29723358154296875, "step": 332 }, { "epoch": 0.12048301750260051, "grad_norm": 5.4625020027160645, "learning_rate": 2.5937500000000004e-05, "loss": 0.3333892822265625, "step": 333 }, { "epoch": 0.12084482836597168, "grad_norm": 6.92440938949585, "learning_rate": 2.6015625e-05, "loss": 0.31586456298828125, "step": 334 }, { "epoch": 0.12120663922934286, "grad_norm": 7.761816501617432, "learning_rate": 2.609375e-05, "loss": 0.3202972412109375, "step": 335 }, { "epoch": 0.12156845009271404, "grad_norm": 8.077901840209961, "learning_rate": 2.6171875e-05, "loss": 0.3304290771484375, "step": 336 }, { "epoch": 0.12193026095608521, "grad_norm": 4.067325115203857, "learning_rate": 2.625e-05, "loss": 0.31606292724609375, "step": 337 }, { "epoch": 0.12229207181945638, "grad_norm": 3.6509623527526855, "learning_rate": 2.6328125e-05, "loss": 0.2985687255859375, "step": 338 }, { "epoch": 0.12265388268282755, "grad_norm": 2.847510814666748, "learning_rate": 2.640625e-05, "loss": 0.28467559814453125, "step": 339 }, { "epoch": 0.12301569354619872, "grad_norm": 4.775704383850098, "learning_rate": 2.6484375000000002e-05, "loss": 0.29157257080078125, "step": 340 }, { "epoch": 0.12337750440956989, "grad_norm": 5.419490814208984, "learning_rate": 2.6562500000000002e-05, "loss": 0.34055328369140625, "step": 341 }, { "epoch": 0.12373931527294108, "grad_norm": 4.1291422843933105, "learning_rate": 2.6640625000000002e-05, "loss": 0.3487815856933594, "step": 342 }, { "epoch": 0.12410112613631225, "grad_norm": 6.697582721710205, "learning_rate": 2.6718750000000002e-05, "loss": 0.3296546936035156, "step": 343 }, { "epoch": 0.12446293699968342, "grad_norm": 4.982509613037109, "learning_rate": 2.6796875000000003e-05, "loss": 0.30584716796875, "step": 344 }, { "epoch": 0.12482474786305459, "grad_norm": 2.103585720062256, "learning_rate": 2.6875000000000003e-05, "loss": 0.3460235595703125, "step": 345 }, { "epoch": 0.12518655872642576, "grad_norm": 2.8998796939849854, "learning_rate": 2.6953125000000003e-05, "loss": 0.33306884765625, "step": 346 }, { "epoch": 0.12554836958979693, "grad_norm": 3.62669038772583, "learning_rate": 2.7031250000000003e-05, "loss": 0.31287384033203125, "step": 347 }, { "epoch": 0.1259101804531681, "grad_norm": 5.079300880432129, "learning_rate": 2.7109375000000003e-05, "loss": 0.28563690185546875, "step": 348 }, { "epoch": 0.12627199131653927, "grad_norm": 2.3782761096954346, "learning_rate": 2.7187500000000004e-05, "loss": 0.31391143798828125, "step": 349 }, { "epoch": 0.12663380217991047, "grad_norm": 2.738563060760498, "learning_rate": 2.7265625000000004e-05, "loss": 0.290313720703125, "step": 350 }, { "epoch": 0.12699561304328164, "grad_norm": 3.0749142169952393, "learning_rate": 2.734375e-05, "loss": 0.329132080078125, "step": 351 }, { "epoch": 0.1273574239066528, "grad_norm": 3.123577356338501, "learning_rate": 2.7421875e-05, "loss": 0.3314361572265625, "step": 352 }, { "epoch": 0.12771923477002398, "grad_norm": 1.9788901805877686, "learning_rate": 2.75e-05, "loss": 0.30500030517578125, "step": 353 }, { "epoch": 0.12808104563339515, "grad_norm": 1.668421745300293, "learning_rate": 2.7578125e-05, "loss": 0.3156890869140625, "step": 354 }, { "epoch": 0.12844285649676632, "grad_norm": 2.0005695819854736, "learning_rate": 2.765625e-05, "loss": 0.33843994140625, "step": 355 }, { "epoch": 0.1288046673601375, "grad_norm": 4.346624851226807, "learning_rate": 2.7734375e-05, "loss": 0.29817962646484375, "step": 356 }, { "epoch": 0.12916647822350866, "grad_norm": 2.0684895515441895, "learning_rate": 2.7812500000000002e-05, "loss": 0.31284332275390625, "step": 357 }, { "epoch": 0.12952828908687983, "grad_norm": 4.148639678955078, "learning_rate": 2.7890625000000002e-05, "loss": 0.3140716552734375, "step": 358 }, { "epoch": 0.129890099950251, "grad_norm": 5.7633538246154785, "learning_rate": 2.7968750000000002e-05, "loss": 0.3405914306640625, "step": 359 }, { "epoch": 0.13025191081362217, "grad_norm": 6.098840236663818, "learning_rate": 2.8046875000000002e-05, "loss": 0.3507843017578125, "step": 360 }, { "epoch": 0.13061372167699334, "grad_norm": 7.699246883392334, "learning_rate": 2.8125000000000003e-05, "loss": 0.33826446533203125, "step": 361 }, { "epoch": 0.13097553254036454, "grad_norm": 6.971790313720703, "learning_rate": 2.8203125000000003e-05, "loss": 0.33699798583984375, "step": 362 }, { "epoch": 0.1313373434037357, "grad_norm": 2.7890195846557617, "learning_rate": 2.8281250000000003e-05, "loss": 0.3085479736328125, "step": 363 }, { "epoch": 0.13169915426710688, "grad_norm": 1.5155807733535767, "learning_rate": 2.8359375000000003e-05, "loss": 0.320648193359375, "step": 364 }, { "epoch": 0.13206096513047805, "grad_norm": 3.65187406539917, "learning_rate": 2.8437500000000003e-05, "loss": 0.354949951171875, "step": 365 }, { "epoch": 0.13242277599384922, "grad_norm": 9.23796558380127, "learning_rate": 2.8515625000000004e-05, "loss": 0.3038482666015625, "step": 366 }, { "epoch": 0.1327845868572204, "grad_norm": 7.671686172485352, "learning_rate": 2.8593750000000004e-05, "loss": 0.32257080078125, "step": 367 }, { "epoch": 0.13314639772059156, "grad_norm": 10.057748794555664, "learning_rate": 2.8671875e-05, "loss": 0.324432373046875, "step": 368 }, { "epoch": 0.13350820858396273, "grad_norm": 6.299663066864014, "learning_rate": 2.875e-05, "loss": 0.33548736572265625, "step": 369 }, { "epoch": 0.1338700194473339, "grad_norm": 8.23047161102295, "learning_rate": 2.8828125e-05, "loss": 0.29601287841796875, "step": 370 }, { "epoch": 0.13423183031070507, "grad_norm": 2.212462902069092, "learning_rate": 2.890625e-05, "loss": 0.294097900390625, "step": 371 }, { "epoch": 0.13459364117407624, "grad_norm": 1.1359989643096924, "learning_rate": 2.8984375e-05, "loss": 0.31423187255859375, "step": 372 }, { "epoch": 0.13495545203744744, "grad_norm": 5.48944091796875, "learning_rate": 2.90625e-05, "loss": 0.347503662109375, "step": 373 }, { "epoch": 0.1353172629008186, "grad_norm": 3.6425797939300537, "learning_rate": 2.9140625000000002e-05, "loss": 0.3266448974609375, "step": 374 }, { "epoch": 0.13567907376418978, "grad_norm": 4.553709506988525, "learning_rate": 2.9218750000000002e-05, "loss": 0.30748748779296875, "step": 375 }, { "epoch": 0.13604088462756095, "grad_norm": 2.946887493133545, "learning_rate": 2.9296875000000002e-05, "loss": 0.26982879638671875, "step": 376 }, { "epoch": 0.13640269549093212, "grad_norm": 1.3977760076522827, "learning_rate": 2.9375000000000003e-05, "loss": 0.283111572265625, "step": 377 }, { "epoch": 0.1367645063543033, "grad_norm": 2.5351505279541016, "learning_rate": 2.9453125000000003e-05, "loss": 0.3004608154296875, "step": 378 }, { "epoch": 0.13712631721767446, "grad_norm": 4.68310022354126, "learning_rate": 2.9531250000000003e-05, "loss": 0.29323577880859375, "step": 379 }, { "epoch": 0.13748812808104563, "grad_norm": 6.961161136627197, "learning_rate": 2.9609375000000003e-05, "loss": 0.298065185546875, "step": 380 }, { "epoch": 0.1378499389444168, "grad_norm": 4.851480960845947, "learning_rate": 2.9687500000000003e-05, "loss": 0.311737060546875, "step": 381 }, { "epoch": 0.13821174980778797, "grad_norm": 6.703673362731934, "learning_rate": 2.9765625000000004e-05, "loss": 0.3278045654296875, "step": 382 }, { "epoch": 0.13857356067115914, "grad_norm": 2.058117389678955, "learning_rate": 2.9843750000000004e-05, "loss": 0.3104248046875, "step": 383 }, { "epoch": 0.1389353715345303, "grad_norm": 6.151540756225586, "learning_rate": 2.9921875000000004e-05, "loss": 0.3582038879394531, "step": 384 }, { "epoch": 0.1392971823979015, "grad_norm": 7.717421531677246, "learning_rate": 3.0000000000000004e-05, "loss": 0.3355865478515625, "step": 385 }, { "epoch": 0.13965899326127268, "grad_norm": 5.3153462409973145, "learning_rate": 3.0078125e-05, "loss": 0.3018646240234375, "step": 386 }, { "epoch": 0.14002080412464385, "grad_norm": 6.361465930938721, "learning_rate": 3.015625e-05, "loss": 0.3482818603515625, "step": 387 }, { "epoch": 0.14038261498801502, "grad_norm": 3.7824294567108154, "learning_rate": 3.0234375e-05, "loss": 0.30912017822265625, "step": 388 }, { "epoch": 0.1407444258513862, "grad_norm": 3.81579852104187, "learning_rate": 3.03125e-05, "loss": 0.35794830322265625, "step": 389 }, { "epoch": 0.14110623671475736, "grad_norm": 0.9523757100105286, "learning_rate": 3.0390625000000002e-05, "loss": 0.3433685302734375, "step": 390 }, { "epoch": 0.14146804757812853, "grad_norm": 5.825382709503174, "learning_rate": 3.0468750000000002e-05, "loss": 0.3246612548828125, "step": 391 }, { "epoch": 0.1418298584414997, "grad_norm": 6.91917085647583, "learning_rate": 3.0546875e-05, "loss": 0.33649444580078125, "step": 392 }, { "epoch": 0.14219166930487087, "grad_norm": 5.924407482147217, "learning_rate": 3.0625000000000006e-05, "loss": 0.3476715087890625, "step": 393 }, { "epoch": 0.14255348016824204, "grad_norm": 10.407352447509766, "learning_rate": 3.0703125e-05, "loss": 0.30495452880859375, "step": 394 }, { "epoch": 0.1429152910316132, "grad_norm": 6.8216047286987305, "learning_rate": 3.078125e-05, "loss": 0.324951171875, "step": 395 }, { "epoch": 0.1432771018949844, "grad_norm": 4.285946369171143, "learning_rate": 3.0859375e-05, "loss": 0.33050537109375, "step": 396 }, { "epoch": 0.14363891275835558, "grad_norm": 3.2587685585021973, "learning_rate": 3.09375e-05, "loss": 0.29846954345703125, "step": 397 }, { "epoch": 0.14400072362172675, "grad_norm": 2.924387216567993, "learning_rate": 3.1015625000000003e-05, "loss": 0.3307342529296875, "step": 398 }, { "epoch": 0.14436253448509792, "grad_norm": 1.885193109512329, "learning_rate": 3.109375e-05, "loss": 0.2931671142578125, "step": 399 }, { "epoch": 0.1447243453484691, "grad_norm": 4.909581184387207, "learning_rate": 3.1171875000000004e-05, "loss": 0.3358917236328125, "step": 400 }, { "epoch": 0.14508615621184026, "grad_norm": 5.4233832359313965, "learning_rate": 3.125e-05, "loss": 0.33780670166015625, "step": 401 }, { "epoch": 0.14544796707521143, "grad_norm": 4.595810890197754, "learning_rate": 3.1328125000000004e-05, "loss": 0.307220458984375, "step": 402 }, { "epoch": 0.1458097779385826, "grad_norm": 0.8423001766204834, "learning_rate": 3.140625e-05, "loss": 0.30844879150390625, "step": 403 }, { "epoch": 0.14617158880195377, "grad_norm": 0.9439176917076111, "learning_rate": 3.1484375000000005e-05, "loss": 0.28096771240234375, "step": 404 }, { "epoch": 0.14653339966532494, "grad_norm": 4.9955291748046875, "learning_rate": 3.15625e-05, "loss": 0.319091796875, "step": 405 }, { "epoch": 0.1468952105286961, "grad_norm": 2.5458128452301025, "learning_rate": 3.1640625000000005e-05, "loss": 0.3313140869140625, "step": 406 }, { "epoch": 0.14725702139206728, "grad_norm": 4.3272624015808105, "learning_rate": 3.171875e-05, "loss": 0.28955078125, "step": 407 }, { "epoch": 0.14761883225543848, "grad_norm": 2.989856004714966, "learning_rate": 3.1796875000000005e-05, "loss": 0.34417724609375, "step": 408 }, { "epoch": 0.14798064311880965, "grad_norm": 1.0945425033569336, "learning_rate": 3.1875e-05, "loss": 0.29155731201171875, "step": 409 }, { "epoch": 0.14834245398218082, "grad_norm": 2.193042516708374, "learning_rate": 3.1953125000000006e-05, "loss": 0.28211212158203125, "step": 410 }, { "epoch": 0.148704264845552, "grad_norm": 1.7022372484207153, "learning_rate": 3.203125e-05, "loss": 0.2731475830078125, "step": 411 }, { "epoch": 0.14906607570892316, "grad_norm": 2.708834409713745, "learning_rate": 3.2109375e-05, "loss": 0.305389404296875, "step": 412 }, { "epoch": 0.14942788657229433, "grad_norm": 0.7522433996200562, "learning_rate": 3.21875e-05, "loss": 0.28791046142578125, "step": 413 }, { "epoch": 0.1497896974356655, "grad_norm": 1.810685634613037, "learning_rate": 3.2265625e-05, "loss": 0.30857086181640625, "step": 414 }, { "epoch": 0.15015150829903667, "grad_norm": 3.8224098682403564, "learning_rate": 3.2343750000000004e-05, "loss": 0.30765533447265625, "step": 415 }, { "epoch": 0.15051331916240784, "grad_norm": 1.0994304418563843, "learning_rate": 3.2421875e-05, "loss": 0.31158447265625, "step": 416 }, { "epoch": 0.15087513002577901, "grad_norm": 8.616171836853027, "learning_rate": 3.2500000000000004e-05, "loss": 0.312469482421875, "step": 417 }, { "epoch": 0.15123694088915018, "grad_norm": 7.550068378448486, "learning_rate": 3.2578125e-05, "loss": 0.2958831787109375, "step": 418 }, { "epoch": 0.15159875175252138, "grad_norm": 0.8666940927505493, "learning_rate": 3.2656250000000004e-05, "loss": 0.3199119567871094, "step": 419 }, { "epoch": 0.15196056261589255, "grad_norm": 2.4095072746276855, "learning_rate": 3.2734375e-05, "loss": 0.3163909912109375, "step": 420 }, { "epoch": 0.15232237347926372, "grad_norm": 2.432054042816162, "learning_rate": 3.2812500000000005e-05, "loss": 0.28929901123046875, "step": 421 }, { "epoch": 0.1526841843426349, "grad_norm": 1.0143142938613892, "learning_rate": 3.2890625e-05, "loss": 0.3165283203125, "step": 422 }, { "epoch": 0.15304599520600606, "grad_norm": 1.114471673965454, "learning_rate": 3.2968750000000005e-05, "loss": 0.3124237060546875, "step": 423 }, { "epoch": 0.15340780606937723, "grad_norm": 3.362055540084839, "learning_rate": 3.3046875e-05, "loss": 0.31536102294921875, "step": 424 }, { "epoch": 0.1537696169327484, "grad_norm": 1.8428089618682861, "learning_rate": 3.3125000000000006e-05, "loss": 0.35330963134765625, "step": 425 }, { "epoch": 0.15413142779611957, "grad_norm": 5.359971523284912, "learning_rate": 3.3203125e-05, "loss": 0.3000335693359375, "step": 426 }, { "epoch": 0.15449323865949074, "grad_norm": 3.742872953414917, "learning_rate": 3.3281250000000006e-05, "loss": 0.33306884765625, "step": 427 }, { "epoch": 0.15485504952286192, "grad_norm": 1.4889291524887085, "learning_rate": 3.3359375e-05, "loss": 0.3353729248046875, "step": 428 }, { "epoch": 0.15521686038623309, "grad_norm": 1.0208121538162231, "learning_rate": 3.34375e-05, "loss": 0.30695343017578125, "step": 429 }, { "epoch": 0.15557867124960426, "grad_norm": 1.4697812795639038, "learning_rate": 3.3515625e-05, "loss": 0.333404541015625, "step": 430 }, { "epoch": 0.15594048211297545, "grad_norm": 3.389782428741455, "learning_rate": 3.359375e-05, "loss": 0.3051605224609375, "step": 431 }, { "epoch": 0.15630229297634662, "grad_norm": 6.684220314025879, "learning_rate": 3.3671875000000004e-05, "loss": 0.3944549560546875, "step": 432 }, { "epoch": 0.1566641038397178, "grad_norm": 2.4113924503326416, "learning_rate": 3.375e-05, "loss": 0.3187751770019531, "step": 433 }, { "epoch": 0.15702591470308896, "grad_norm": 1.5389349460601807, "learning_rate": 3.3828125000000004e-05, "loss": 0.3234405517578125, "step": 434 }, { "epoch": 0.15738772556646013, "grad_norm": 2.597938299179077, "learning_rate": 3.390625e-05, "loss": 0.3236846923828125, "step": 435 }, { "epoch": 0.1577495364298313, "grad_norm": 3.3355796337127686, "learning_rate": 3.3984375000000004e-05, "loss": 0.3336029052734375, "step": 436 }, { "epoch": 0.15811134729320248, "grad_norm": 3.7167725563049316, "learning_rate": 3.40625e-05, "loss": 0.3076934814453125, "step": 437 }, { "epoch": 0.15847315815657365, "grad_norm": 1.9837671518325806, "learning_rate": 3.4140625000000005e-05, "loss": 0.30333709716796875, "step": 438 }, { "epoch": 0.15883496901994482, "grad_norm": 1.656579852104187, "learning_rate": 3.421875e-05, "loss": 0.30545806884765625, "step": 439 }, { "epoch": 0.159196779883316, "grad_norm": 1.7044553756713867, "learning_rate": 3.4296875000000005e-05, "loss": 0.30788421630859375, "step": 440 }, { "epoch": 0.15955859074668716, "grad_norm": 1.6231391429901123, "learning_rate": 3.4375e-05, "loss": 0.33286285400390625, "step": 441 }, { "epoch": 0.15992040161005835, "grad_norm": 5.460050106048584, "learning_rate": 3.4453125000000006e-05, "loss": 0.34198760986328125, "step": 442 }, { "epoch": 0.16028221247342953, "grad_norm": 4.151707649230957, "learning_rate": 3.453125e-05, "loss": 0.29595947265625, "step": 443 }, { "epoch": 0.1606440233368007, "grad_norm": 5.832501411437988, "learning_rate": 3.4609375000000006e-05, "loss": 0.297698974609375, "step": 444 }, { "epoch": 0.16100583420017187, "grad_norm": 2.065251350402832, "learning_rate": 3.46875e-05, "loss": 0.3409271240234375, "step": 445 }, { "epoch": 0.16136764506354304, "grad_norm": 3.547245979309082, "learning_rate": 3.4765625e-05, "loss": 0.3026275634765625, "step": 446 }, { "epoch": 0.1617294559269142, "grad_norm": 1.2834694385528564, "learning_rate": 3.484375e-05, "loss": 0.28936004638671875, "step": 447 }, { "epoch": 0.16209126679028538, "grad_norm": 3.806171417236328, "learning_rate": 3.4921875e-05, "loss": 0.333038330078125, "step": 448 }, { "epoch": 0.16245307765365655, "grad_norm": 7.033389091491699, "learning_rate": 3.5000000000000004e-05, "loss": 0.357574462890625, "step": 449 }, { "epoch": 0.16281488851702772, "grad_norm": 4.790836334228516, "learning_rate": 3.5078125e-05, "loss": 0.3080291748046875, "step": 450 }, { "epoch": 0.1631766993803989, "grad_norm": 1.2065296173095703, "learning_rate": 3.5156250000000004e-05, "loss": 0.28949737548828125, "step": 451 }, { "epoch": 0.16353851024377006, "grad_norm": 1.9047245979309082, "learning_rate": 3.5234375e-05, "loss": 0.28765869140625, "step": 452 }, { "epoch": 0.16390032110714123, "grad_norm": 6.151847839355469, "learning_rate": 3.5312500000000005e-05, "loss": 0.291961669921875, "step": 453 }, { "epoch": 0.16426213197051243, "grad_norm": 4.470073223114014, "learning_rate": 3.5390625e-05, "loss": 0.32281494140625, "step": 454 }, { "epoch": 0.1646239428338836, "grad_norm": 1.735475778579712, "learning_rate": 3.5468750000000005e-05, "loss": 0.34223175048828125, "step": 455 }, { "epoch": 0.16498575369725477, "grad_norm": 3.9318671226501465, "learning_rate": 3.5546875e-05, "loss": 0.29571533203125, "step": 456 }, { "epoch": 0.16534756456062594, "grad_norm": 2.0243101119995117, "learning_rate": 3.5625000000000005e-05, "loss": 0.3200225830078125, "step": 457 }, { "epoch": 0.1657093754239971, "grad_norm": 2.5498709678649902, "learning_rate": 3.5703125e-05, "loss": 0.30051422119140625, "step": 458 }, { "epoch": 0.16607118628736828, "grad_norm": 3.988349437713623, "learning_rate": 3.5781250000000006e-05, "loss": 0.26194000244140625, "step": 459 }, { "epoch": 0.16643299715073945, "grad_norm": 3.072321653366089, "learning_rate": 3.5859375e-05, "loss": 0.3116455078125, "step": 460 }, { "epoch": 0.16679480801411062, "grad_norm": 1.914525032043457, "learning_rate": 3.5937500000000006e-05, "loss": 0.2991485595703125, "step": 461 }, { "epoch": 0.1671566188774818, "grad_norm": 2.8440489768981934, "learning_rate": 3.6015625e-05, "loss": 0.2767333984375, "step": 462 }, { "epoch": 0.16751842974085296, "grad_norm": 4.473794937133789, "learning_rate": 3.609375e-05, "loss": 0.29400634765625, "step": 463 }, { "epoch": 0.16788024060422413, "grad_norm": 0.9242031574249268, "learning_rate": 3.6171875000000003e-05, "loss": 0.2922821044921875, "step": 464 }, { "epoch": 0.16824205146759533, "grad_norm": 1.460456371307373, "learning_rate": 3.625e-05, "loss": 0.2898712158203125, "step": 465 }, { "epoch": 0.1686038623309665, "grad_norm": 1.3791265487670898, "learning_rate": 3.6328125000000004e-05, "loss": 0.3405609130859375, "step": 466 }, { "epoch": 0.16896567319433767, "grad_norm": 1.6507954597473145, "learning_rate": 3.640625e-05, "loss": 0.355712890625, "step": 467 }, { "epoch": 0.16932748405770884, "grad_norm": 2.0999255180358887, "learning_rate": 3.6484375000000004e-05, "loss": 0.2963104248046875, "step": 468 }, { "epoch": 0.16968929492108, "grad_norm": 2.3169867992401123, "learning_rate": 3.65625e-05, "loss": 0.317352294921875, "step": 469 }, { "epoch": 0.17005110578445118, "grad_norm": 2.459679126739502, "learning_rate": 3.6640625000000005e-05, "loss": 0.28569793701171875, "step": 470 }, { "epoch": 0.17041291664782235, "grad_norm": 1.2383174896240234, "learning_rate": 3.671875e-05, "loss": 0.305908203125, "step": 471 }, { "epoch": 0.17077472751119352, "grad_norm": 5.076645851135254, "learning_rate": 3.6796875000000005e-05, "loss": 0.27552032470703125, "step": 472 }, { "epoch": 0.1711365383745647, "grad_norm": 4.0052385330200195, "learning_rate": 3.6875e-05, "loss": 0.35291290283203125, "step": 473 }, { "epoch": 0.17149834923793586, "grad_norm": 1.0960533618927002, "learning_rate": 3.6953125000000005e-05, "loss": 0.28191375732421875, "step": 474 }, { "epoch": 0.17186016010130703, "grad_norm": 0.7132631540298462, "learning_rate": 3.703125e-05, "loss": 0.28321075439453125, "step": 475 }, { "epoch": 0.17222197096467823, "grad_norm": 1.251947045326233, "learning_rate": 3.7109375000000006e-05, "loss": 0.27135467529296875, "step": 476 }, { "epoch": 0.1725837818280494, "grad_norm": 3.058656692504883, "learning_rate": 3.71875e-05, "loss": 0.2953033447265625, "step": 477 }, { "epoch": 0.17294559269142057, "grad_norm": 4.702386379241943, "learning_rate": 3.7265625000000006e-05, "loss": 0.33394622802734375, "step": 478 }, { "epoch": 0.17330740355479174, "grad_norm": 11.198600769042969, "learning_rate": 3.734375e-05, "loss": 0.3454437255859375, "step": 479 }, { "epoch": 0.1736692144181629, "grad_norm": 2.590474843978882, "learning_rate": 3.7421875e-05, "loss": 0.3292236328125, "step": 480 }, { "epoch": 0.17403102528153408, "grad_norm": 5.959778785705566, "learning_rate": 3.7500000000000003e-05, "loss": 0.3318023681640625, "step": 481 }, { "epoch": 0.17439283614490525, "grad_norm": 5.693119525909424, "learning_rate": 3.7578125e-05, "loss": 0.30229949951171875, "step": 482 }, { "epoch": 0.17475464700827642, "grad_norm": 1.940354347229004, "learning_rate": 3.7656250000000004e-05, "loss": 0.29250335693359375, "step": 483 }, { "epoch": 0.1751164578716476, "grad_norm": 2.072295904159546, "learning_rate": 3.7734375e-05, "loss": 0.336822509765625, "step": 484 }, { "epoch": 0.17547826873501876, "grad_norm": 2.3678579330444336, "learning_rate": 3.7812500000000004e-05, "loss": 0.30550384521484375, "step": 485 }, { "epoch": 0.17584007959838993, "grad_norm": 2.2440881729125977, "learning_rate": 3.7890625e-05, "loss": 0.3224334716796875, "step": 486 }, { "epoch": 0.1762018904617611, "grad_norm": 1.1627780199050903, "learning_rate": 3.7968750000000005e-05, "loss": 0.3274078369140625, "step": 487 }, { "epoch": 0.1765637013251323, "grad_norm": 2.392596960067749, "learning_rate": 3.8046875e-05, "loss": 0.319488525390625, "step": 488 }, { "epoch": 0.17692551218850347, "grad_norm": 2.654536485671997, "learning_rate": 3.8125000000000005e-05, "loss": 0.327606201171875, "step": 489 }, { "epoch": 0.17728732305187464, "grad_norm": 4.383605003356934, "learning_rate": 3.8203125e-05, "loss": 0.3080291748046875, "step": 490 }, { "epoch": 0.1776491339152458, "grad_norm": 2.0640547275543213, "learning_rate": 3.8281250000000006e-05, "loss": 0.327392578125, "step": 491 }, { "epoch": 0.17801094477861698, "grad_norm": 4.508538722991943, "learning_rate": 3.8359375e-05, "loss": 0.28391265869140625, "step": 492 }, { "epoch": 0.17837275564198815, "grad_norm": 3.5407426357269287, "learning_rate": 3.8437500000000006e-05, "loss": 0.32318878173828125, "step": 493 }, { "epoch": 0.17873456650535932, "grad_norm": 6.6172661781311035, "learning_rate": 3.8515625e-05, "loss": 0.326446533203125, "step": 494 }, { "epoch": 0.1790963773687305, "grad_norm": 4.0937886238098145, "learning_rate": 3.8593750000000006e-05, "loss": 0.2720184326171875, "step": 495 }, { "epoch": 0.17945818823210166, "grad_norm": 4.880975246429443, "learning_rate": 3.8671875e-05, "loss": 0.310699462890625, "step": 496 }, { "epoch": 0.17981999909547283, "grad_norm": 0.9348528385162354, "learning_rate": 3.875e-05, "loss": 0.3045234680175781, "step": 497 }, { "epoch": 0.180181809958844, "grad_norm": 2.5962352752685547, "learning_rate": 3.8828125000000004e-05, "loss": 0.3031005859375, "step": 498 }, { "epoch": 0.1805436208222152, "grad_norm": 1.289233684539795, "learning_rate": 3.890625e-05, "loss": 0.2947235107421875, "step": 499 }, { "epoch": 0.18090543168558637, "grad_norm": 1.8522255420684814, "learning_rate": 3.8984375000000004e-05, "loss": 0.32891845703125, "step": 500 }, { "epoch": 0.18126724254895754, "grad_norm": 2.257736921310425, "learning_rate": 3.90625e-05, "loss": 0.2926788330078125, "step": 501 }, { "epoch": 0.1816290534123287, "grad_norm": 1.910180926322937, "learning_rate": 3.9140625000000004e-05, "loss": 0.3141937255859375, "step": 502 }, { "epoch": 0.18199086427569988, "grad_norm": 1.1680269241333008, "learning_rate": 3.921875e-05, "loss": 0.33272552490234375, "step": 503 }, { "epoch": 0.18235267513907105, "grad_norm": 8.599577903747559, "learning_rate": 3.9296875000000005e-05, "loss": 0.2837066650390625, "step": 504 }, { "epoch": 0.18271448600244222, "grad_norm": 8.343697547912598, "learning_rate": 3.9375e-05, "loss": 0.3059844970703125, "step": 505 }, { "epoch": 0.1830762968658134, "grad_norm": 8.760024070739746, "learning_rate": 3.9453125000000005e-05, "loss": 0.308258056640625, "step": 506 }, { "epoch": 0.18343810772918456, "grad_norm": 6.354249477386475, "learning_rate": 3.953125e-05, "loss": 0.2949371337890625, "step": 507 }, { "epoch": 0.18379991859255573, "grad_norm": 2.317493200302124, "learning_rate": 3.9609375000000006e-05, "loss": 0.32471466064453125, "step": 508 }, { "epoch": 0.1841617294559269, "grad_norm": 2.412139892578125, "learning_rate": 3.96875e-05, "loss": 0.30413818359375, "step": 509 }, { "epoch": 0.18452354031929807, "grad_norm": 1.3731802701950073, "learning_rate": 3.9765625000000006e-05, "loss": 0.28839874267578125, "step": 510 }, { "epoch": 0.18488535118266927, "grad_norm": 2.8529276847839355, "learning_rate": 3.984375e-05, "loss": 0.31296539306640625, "step": 511 }, { "epoch": 0.18524716204604044, "grad_norm": 2.5576894283294678, "learning_rate": 3.9921875000000006e-05, "loss": 0.27385711669921875, "step": 512 }, { "epoch": 0.1856089729094116, "grad_norm": 1.67961585521698, "learning_rate": 4e-05, "loss": 0.31165313720703125, "step": 513 }, { "epoch": 0.18597078377278278, "grad_norm": 2.583343505859375, "learning_rate": 3.999998053910083e-05, "loss": 0.3049888610839844, "step": 514 }, { "epoch": 0.18633259463615395, "grad_norm": 2.3308157920837402, "learning_rate": 3.9999922156441175e-05, "loss": 0.3388519287109375, "step": 515 }, { "epoch": 0.18669440549952512, "grad_norm": 8.325671195983887, "learning_rate": 3.999982485213467e-05, "loss": 0.2945404052734375, "step": 516 }, { "epoch": 0.1870562163628963, "grad_norm": 9.504853248596191, "learning_rate": 3.999968862637067e-05, "loss": 0.31490325927734375, "step": 517 }, { "epoch": 0.18741802722626746, "grad_norm": 3.7766225337982178, "learning_rate": 3.999951347941428e-05, "loss": 0.31915283203125, "step": 518 }, { "epoch": 0.18777983808963863, "grad_norm": 2.61600923538208, "learning_rate": 3.9999299411606356e-05, "loss": 0.3373870849609375, "step": 519 }, { "epoch": 0.1881416489530098, "grad_norm": 3.5270979404449463, "learning_rate": 3.999904642336349e-05, "loss": 0.26848602294921875, "step": 520 }, { "epoch": 0.18850345981638097, "grad_norm": 4.6719465255737305, "learning_rate": 3.9998754515178024e-05, "loss": 0.34850311279296875, "step": 521 }, { "epoch": 0.18886527067975217, "grad_norm": 4.002064228057861, "learning_rate": 3.9998423687618036e-05, "loss": 0.2899322509765625, "step": 522 }, { "epoch": 0.18922708154312334, "grad_norm": 5.820341110229492, "learning_rate": 3.999805394132734e-05, "loss": 0.322235107421875, "step": 523 }, { "epoch": 0.1895888924064945, "grad_norm": 4.461676120758057, "learning_rate": 3.9997645277025515e-05, "loss": 0.30857086181640625, "step": 524 }, { "epoch": 0.18995070326986568, "grad_norm": 4.874349117279053, "learning_rate": 3.9997197695507836e-05, "loss": 0.32271575927734375, "step": 525 }, { "epoch": 0.19031251413323685, "grad_norm": 2.22426700592041, "learning_rate": 3.999671119764534e-05, "loss": 0.30115509033203125, "step": 526 }, { "epoch": 0.19067432499660802, "grad_norm": 1.0885236263275146, "learning_rate": 3.9996185784384805e-05, "loss": 0.323883056640625, "step": 527 }, { "epoch": 0.1910361358599792, "grad_norm": 1.7384871244430542, "learning_rate": 3.9995621456748725e-05, "loss": 0.3038177490234375, "step": 528 }, { "epoch": 0.19139794672335037, "grad_norm": 3.5983059406280518, "learning_rate": 3.999501821583534e-05, "loss": 0.337188720703125, "step": 529 }, { "epoch": 0.19175975758672154, "grad_norm": 2.5384304523468018, "learning_rate": 3.9994376062818605e-05, "loss": 0.3170166015625, "step": 530 }, { "epoch": 0.1921215684500927, "grad_norm": 1.6566380262374878, "learning_rate": 3.9993694998948205e-05, "loss": 0.3285064697265625, "step": 531 }, { "epoch": 0.19248337931346388, "grad_norm": 1.3954503536224365, "learning_rate": 3.999297502554956e-05, "loss": 0.2941131591796875, "step": 532 }, { "epoch": 0.19284519017683505, "grad_norm": 1.568354606628418, "learning_rate": 3.99922161440238e-05, "loss": 0.3225250244140625, "step": 533 }, { "epoch": 0.19320700104020624, "grad_norm": 1.981322169303894, "learning_rate": 3.999141835584778e-05, "loss": 0.31328582763671875, "step": 534 }, { "epoch": 0.19356881190357741, "grad_norm": 1.2675772905349731, "learning_rate": 3.9990581662574065e-05, "loss": 0.3331146240234375, "step": 535 }, { "epoch": 0.19393062276694858, "grad_norm": 2.9738361835479736, "learning_rate": 3.998970606583092e-05, "loss": 0.2941131591796875, "step": 536 }, { "epoch": 0.19429243363031976, "grad_norm": 5.546016693115234, "learning_rate": 3.9988791567322354e-05, "loss": 0.28385162353515625, "step": 537 }, { "epoch": 0.19465424449369093, "grad_norm": 3.7226343154907227, "learning_rate": 3.998783816882806e-05, "loss": 0.322967529296875, "step": 538 }, { "epoch": 0.1950160553570621, "grad_norm": 7.198750972747803, "learning_rate": 3.998684587220343e-05, "loss": 0.2930908203125, "step": 539 }, { "epoch": 0.19537786622043327, "grad_norm": 0.7103931903839111, "learning_rate": 3.998581467937957e-05, "loss": 0.277374267578125, "step": 540 }, { "epoch": 0.19573967708380444, "grad_norm": 4.5579752922058105, "learning_rate": 3.998474459236327e-05, "loss": 0.35872650146484375, "step": 541 }, { "epoch": 0.1961014879471756, "grad_norm": 1.9076733589172363, "learning_rate": 3.9983635613237024e-05, "loss": 0.3291664123535156, "step": 542 }, { "epoch": 0.19646329881054678, "grad_norm": 1.1490517854690552, "learning_rate": 3.998248774415899e-05, "loss": 0.3167877197265625, "step": 543 }, { "epoch": 0.19682510967391795, "grad_norm": 2.2985925674438477, "learning_rate": 3.998130098736303e-05, "loss": 0.324066162109375, "step": 544 }, { "epoch": 0.19718692053728915, "grad_norm": 3.4733409881591797, "learning_rate": 3.9980075345158694e-05, "loss": 0.3142242431640625, "step": 545 }, { "epoch": 0.19754873140066032, "grad_norm": 5.589982032775879, "learning_rate": 3.997881081993117e-05, "loss": 0.36358642578125, "step": 546 }, { "epoch": 0.19791054226403149, "grad_norm": 1.358647346496582, "learning_rate": 3.997750741414135e-05, "loss": 0.29808807373046875, "step": 547 }, { "epoch": 0.19827235312740266, "grad_norm": 1.81339693069458, "learning_rate": 3.997616513032578e-05, "loss": 0.31235504150390625, "step": 548 }, { "epoch": 0.19863416399077383, "grad_norm": 3.5983357429504395, "learning_rate": 3.997478397109665e-05, "loss": 0.3089599609375, "step": 549 }, { "epoch": 0.198995974854145, "grad_norm": 2.55422306060791, "learning_rate": 3.997336393914185e-05, "loss": 0.3412017822265625, "step": 550 }, { "epoch": 0.19935778571751617, "grad_norm": 1.8626264333724976, "learning_rate": 3.997190503722486e-05, "loss": 0.3748016357421875, "step": 551 }, { "epoch": 0.19971959658088734, "grad_norm": 9.150833129882812, "learning_rate": 3.997040726818484e-05, "loss": 0.33880615234375, "step": 552 }, { "epoch": 0.2000814074442585, "grad_norm": 2.4318296909332275, "learning_rate": 3.9968870634936596e-05, "loss": 0.35247802734375, "step": 553 }, { "epoch": 0.20044321830762968, "grad_norm": 2.868720531463623, "learning_rate": 3.996729514047055e-05, "loss": 0.30841064453125, "step": 554 }, { "epoch": 0.20080502917100085, "grad_norm": 1.217453122138977, "learning_rate": 3.996568078785275e-05, "loss": 0.29718017578125, "step": 555 }, { "epoch": 0.20116684003437202, "grad_norm": 5.086585521697998, "learning_rate": 3.996402758022487e-05, "loss": 0.32057952880859375, "step": 556 }, { "epoch": 0.20152865089774322, "grad_norm": 4.876135349273682, "learning_rate": 3.996233552080421e-05, "loss": 0.30417633056640625, "step": 557 }, { "epoch": 0.2018904617611144, "grad_norm": 4.950448513031006, "learning_rate": 3.996060461288367e-05, "loss": 0.31951141357421875, "step": 558 }, { "epoch": 0.20225227262448556, "grad_norm": 3.139052152633667, "learning_rate": 3.995883485983174e-05, "loss": 0.2889404296875, "step": 559 }, { "epoch": 0.20261408348785673, "grad_norm": 4.599117755889893, "learning_rate": 3.995702626509252e-05, "loss": 0.3553466796875, "step": 560 }, { "epoch": 0.2029758943512279, "grad_norm": 3.346595287322998, "learning_rate": 3.995517883218572e-05, "loss": 0.276092529296875, "step": 561 }, { "epoch": 0.20333770521459907, "grad_norm": 3.1192545890808105, "learning_rate": 3.995329256470658e-05, "loss": 0.33498382568359375, "step": 562 }, { "epoch": 0.20369951607797024, "grad_norm": 3.2076728343963623, "learning_rate": 3.9951367466325974e-05, "loss": 0.3036346435546875, "step": 563 }, { "epoch": 0.2040613269413414, "grad_norm": 0.8389816284179688, "learning_rate": 3.9949403540790295e-05, "loss": 0.3063812255859375, "step": 564 }, { "epoch": 0.20442313780471258, "grad_norm": 2.330794334411621, "learning_rate": 3.994740079192153e-05, "loss": 0.32720184326171875, "step": 565 }, { "epoch": 0.20478494866808375, "grad_norm": 1.0425561666488647, "learning_rate": 3.9945359223617206e-05, "loss": 0.3083953857421875, "step": 566 }, { "epoch": 0.20514675953145492, "grad_norm": 2.3938653469085693, "learning_rate": 3.994327883985039e-05, "loss": 0.3063201904296875, "step": 567 }, { "epoch": 0.20550857039482612, "grad_norm": 3.480863332748413, "learning_rate": 3.994115964466971e-05, "loss": 0.35376739501953125, "step": 568 }, { "epoch": 0.2058703812581973, "grad_norm": 2.3469128608703613, "learning_rate": 3.993900164219931e-05, "loss": 0.349761962890625, "step": 569 }, { "epoch": 0.20623219212156846, "grad_norm": 0.9834126234054565, "learning_rate": 3.993680483663884e-05, "loss": 0.3047332763671875, "step": 570 }, { "epoch": 0.20659400298493963, "grad_norm": 2.3997011184692383, "learning_rate": 3.99345692322635e-05, "loss": 0.33548736572265625, "step": 571 }, { "epoch": 0.2069558138483108, "grad_norm": 5.433207035064697, "learning_rate": 3.993229483342396e-05, "loss": 0.2866058349609375, "step": 572 }, { "epoch": 0.20731762471168197, "grad_norm": 1.0014246702194214, "learning_rate": 3.992998164454642e-05, "loss": 0.3238372802734375, "step": 573 }, { "epoch": 0.20767943557505314, "grad_norm": 2.6612181663513184, "learning_rate": 3.992762967013255e-05, "loss": 0.3123779296875, "step": 574 }, { "epoch": 0.2080412464384243, "grad_norm": 2.236093282699585, "learning_rate": 3.99252389147595e-05, "loss": 0.29000091552734375, "step": 575 }, { "epoch": 0.20840305730179548, "grad_norm": 1.7546403408050537, "learning_rate": 3.9922809383079895e-05, "loss": 0.28913116455078125, "step": 576 }, { "epoch": 0.20876486816516665, "grad_norm": 2.307420253753662, "learning_rate": 3.992034107982182e-05, "loss": 0.3116302490234375, "step": 577 }, { "epoch": 0.20912667902853782, "grad_norm": 6.783812522888184, "learning_rate": 3.991783400978882e-05, "loss": 0.37435150146484375, "step": 578 }, { "epoch": 0.209488489891909, "grad_norm": 1.4529393911361694, "learning_rate": 3.991528817785988e-05, "loss": 0.31787109375, "step": 579 }, { "epoch": 0.2098503007552802, "grad_norm": 2.577481269836426, "learning_rate": 3.991270358898941e-05, "loss": 0.30814361572265625, "step": 580 }, { "epoch": 0.21021211161865136, "grad_norm": 4.1034955978393555, "learning_rate": 3.991008024820726e-05, "loss": 0.3002777099609375, "step": 581 }, { "epoch": 0.21057392248202253, "grad_norm": 10.472536087036133, "learning_rate": 3.990741816061868e-05, "loss": 0.28351593017578125, "step": 582 }, { "epoch": 0.2109357333453937, "grad_norm": 3.423326015472412, "learning_rate": 3.990471733140434e-05, "loss": 0.2955474853515625, "step": 583 }, { "epoch": 0.21129754420876487, "grad_norm": 0.9806787371635437, "learning_rate": 3.99019777658203e-05, "loss": 0.32117462158203125, "step": 584 }, { "epoch": 0.21165935507213604, "grad_norm": 1.8854475021362305, "learning_rate": 3.989919946919799e-05, "loss": 0.34069061279296875, "step": 585 }, { "epoch": 0.2120211659355072, "grad_norm": 1.9766969680786133, "learning_rate": 3.9896382446944216e-05, "loss": 0.3194122314453125, "step": 586 }, { "epoch": 0.21238297679887838, "grad_norm": 1.7778327465057373, "learning_rate": 3.989352670454118e-05, "loss": 0.28601837158203125, "step": 587 }, { "epoch": 0.21274478766224955, "grad_norm": 5.133213043212891, "learning_rate": 3.98906322475464e-05, "loss": 0.36785888671875, "step": 588 }, { "epoch": 0.21310659852562072, "grad_norm": 1.8287158012390137, "learning_rate": 3.988769908159275e-05, "loss": 0.350738525390625, "step": 589 }, { "epoch": 0.2134684093889919, "grad_norm": 2.272655963897705, "learning_rate": 3.988472721238844e-05, "loss": 0.330078125, "step": 590 }, { "epoch": 0.2138302202523631, "grad_norm": 3.694236993789673, "learning_rate": 3.9881716645716985e-05, "loss": 0.3230743408203125, "step": 591 }, { "epoch": 0.21419203111573426, "grad_norm": 6.369925022125244, "learning_rate": 3.987866738743724e-05, "loss": 0.3099517822265625, "step": 592 }, { "epoch": 0.21455384197910543, "grad_norm": 2.8181042671203613, "learning_rate": 3.987557944348331e-05, "loss": 0.3043212890625, "step": 593 }, { "epoch": 0.2149156528424766, "grad_norm": 1.464660882949829, "learning_rate": 3.987245281986462e-05, "loss": 0.3492279052734375, "step": 594 }, { "epoch": 0.21527746370584777, "grad_norm": 0.874129593372345, "learning_rate": 3.986928752266587e-05, "loss": 0.3152008056640625, "step": 595 }, { "epoch": 0.21563927456921894, "grad_norm": 1.2474035024642944, "learning_rate": 3.9866083558047004e-05, "loss": 0.3164215087890625, "step": 596 }, { "epoch": 0.2160010854325901, "grad_norm": 0.7503196001052856, "learning_rate": 3.9862840932243225e-05, "loss": 0.30213165283203125, "step": 597 }, { "epoch": 0.21636289629596128, "grad_norm": 0.9995457530021667, "learning_rate": 3.985955965156498e-05, "loss": 0.3008880615234375, "step": 598 }, { "epoch": 0.21672470715933245, "grad_norm": 1.5829278230667114, "learning_rate": 3.9856239722397924e-05, "loss": 0.295196533203125, "step": 599 }, { "epoch": 0.21708651802270362, "grad_norm": 2.5539438724517822, "learning_rate": 3.985288115120296e-05, "loss": 0.27813720703125, "step": 600 }, { "epoch": 0.2174483288860748, "grad_norm": 0.7882852554321289, "learning_rate": 3.984948394451615e-05, "loss": 0.2942352294921875, "step": 601 }, { "epoch": 0.21781013974944596, "grad_norm": 2.2552151679992676, "learning_rate": 3.984604810894877e-05, "loss": 0.31122589111328125, "step": 602 }, { "epoch": 0.21817195061281716, "grad_norm": 1.845597743988037, "learning_rate": 3.9842573651187264e-05, "loss": 0.2527923583984375, "step": 603 }, { "epoch": 0.21853376147618833, "grad_norm": 4.883584976196289, "learning_rate": 3.983906057799323e-05, "loss": 0.29369354248046875, "step": 604 }, { "epoch": 0.2188955723395595, "grad_norm": 1.0242358446121216, "learning_rate": 3.983550889620345e-05, "loss": 0.29949951171875, "step": 605 }, { "epoch": 0.21925738320293067, "grad_norm": 3.1970841884613037, "learning_rate": 3.983191861272979e-05, "loss": 0.3288764953613281, "step": 606 }, { "epoch": 0.21961919406630184, "grad_norm": 1.0991249084472656, "learning_rate": 3.982828973455929e-05, "loss": 0.32346343994140625, "step": 607 }, { "epoch": 0.219981004929673, "grad_norm": 2.0455868244171143, "learning_rate": 3.9824622268754045e-05, "loss": 0.3343658447265625, "step": 608 }, { "epoch": 0.22034281579304418, "grad_norm": 3.8508236408233643, "learning_rate": 3.982091622245129e-05, "loss": 0.29582977294921875, "step": 609 }, { "epoch": 0.22070462665641535, "grad_norm": 4.326939582824707, "learning_rate": 3.9817171602863316e-05, "loss": 0.33013916015625, "step": 610 }, { "epoch": 0.22106643751978652, "grad_norm": 1.458966851234436, "learning_rate": 3.98133884172775e-05, "loss": 0.3201446533203125, "step": 611 }, { "epoch": 0.2214282483831577, "grad_norm": 2.4854586124420166, "learning_rate": 3.9809566673056255e-05, "loss": 0.3321533203125, "step": 612 }, { "epoch": 0.22179005924652886, "grad_norm": 0.877308189868927, "learning_rate": 3.9805706377637036e-05, "loss": 0.27506256103515625, "step": 613 }, { "epoch": 0.22215187010990006, "grad_norm": 2.449988842010498, "learning_rate": 3.9801807538532335e-05, "loss": 0.3009796142578125, "step": 614 }, { "epoch": 0.22251368097327123, "grad_norm": 2.098395586013794, "learning_rate": 3.979787016332963e-05, "loss": 0.297393798828125, "step": 615 }, { "epoch": 0.2228754918366424, "grad_norm": 1.1747655868530273, "learning_rate": 3.9793894259691413e-05, "loss": 0.270721435546875, "step": 616 }, { "epoch": 0.22323730270001357, "grad_norm": 1.1307977437973022, "learning_rate": 3.9789879835355154e-05, "loss": 0.28974151611328125, "step": 617 }, { "epoch": 0.22359911356338474, "grad_norm": 1.7742372751235962, "learning_rate": 3.9785826898133285e-05, "loss": 0.279571533203125, "step": 618 }, { "epoch": 0.22396092442675591, "grad_norm": 4.676658630371094, "learning_rate": 3.9781735455913175e-05, "loss": 0.33834075927734375, "step": 619 }, { "epoch": 0.22432273529012708, "grad_norm": 2.5623345375061035, "learning_rate": 3.977760551665715e-05, "loss": 0.343414306640625, "step": 620 }, { "epoch": 0.22468454615349825, "grad_norm": 1.1071882247924805, "learning_rate": 3.977343708840244e-05, "loss": 0.30218505859375, "step": 621 }, { "epoch": 0.22504635701686942, "grad_norm": 1.424065351486206, "learning_rate": 3.976923017926117e-05, "loss": 0.31513214111328125, "step": 622 }, { "epoch": 0.2254081678802406, "grad_norm": 3.069551944732666, "learning_rate": 3.9764984797420384e-05, "loss": 0.2870330810546875, "step": 623 }, { "epoch": 0.22576997874361177, "grad_norm": 5.723482608795166, "learning_rate": 3.976070095114196e-05, "loss": 0.293670654296875, "step": 624 }, { "epoch": 0.22613178960698294, "grad_norm": 1.04414963722229, "learning_rate": 3.975637864876266e-05, "loss": 0.32166290283203125, "step": 625 }, { "epoch": 0.22649360047035413, "grad_norm": 0.7728726267814636, "learning_rate": 3.975201789869406e-05, "loss": 0.29676055908203125, "step": 626 }, { "epoch": 0.2268554113337253, "grad_norm": 3.482280731201172, "learning_rate": 3.974761870942258e-05, "loss": 0.283935546875, "step": 627 }, { "epoch": 0.22721722219709647, "grad_norm": 1.3038164377212524, "learning_rate": 3.974318108950944e-05, "loss": 0.31699371337890625, "step": 628 }, { "epoch": 0.22757903306046764, "grad_norm": 1.5605974197387695, "learning_rate": 3.973870504759064e-05, "loss": 0.3052177429199219, "step": 629 }, { "epoch": 0.22794084392383882, "grad_norm": 0.9818633198738098, "learning_rate": 3.9734190592376967e-05, "loss": 0.2892608642578125, "step": 630 }, { "epoch": 0.22830265478720999, "grad_norm": 1.2112611532211304, "learning_rate": 3.9729637732653956e-05, "loss": 0.23697662353515625, "step": 631 }, { "epoch": 0.22866446565058116, "grad_norm": 3.07555890083313, "learning_rate": 3.972504647728188e-05, "loss": 0.29047393798828125, "step": 632 }, { "epoch": 0.22902627651395233, "grad_norm": 4.758583068847656, "learning_rate": 3.972041683519573e-05, "loss": 0.30206298828125, "step": 633 }, { "epoch": 0.2293880873773235, "grad_norm": 3.154860019683838, "learning_rate": 3.971574881540521e-05, "loss": 0.29473114013671875, "step": 634 }, { "epoch": 0.22974989824069467, "grad_norm": 3.5332753658294678, "learning_rate": 3.971104242699471e-05, "loss": 0.33106231689453125, "step": 635 }, { "epoch": 0.23011170910406584, "grad_norm": 1.3140453100204468, "learning_rate": 3.9706297679123277e-05, "loss": 0.29322052001953125, "step": 636 }, { "epoch": 0.23047351996743703, "grad_norm": 3.7752418518066406, "learning_rate": 3.9701514581024626e-05, "loss": 0.302947998046875, "step": 637 }, { "epoch": 0.2308353308308082, "grad_norm": 5.070938587188721, "learning_rate": 3.969669314200708e-05, "loss": 0.30956268310546875, "step": 638 }, { "epoch": 0.23119714169417938, "grad_norm": 4.770543575286865, "learning_rate": 3.9691833371453616e-05, "loss": 0.3473052978515625, "step": 639 }, { "epoch": 0.23155895255755055, "grad_norm": 7.536214351654053, "learning_rate": 3.968693527882177e-05, "loss": 0.27176666259765625, "step": 640 }, { "epoch": 0.23192076342092172, "grad_norm": 0.5982236266136169, "learning_rate": 3.968199887364367e-05, "loss": 0.320037841796875, "step": 641 }, { "epoch": 0.2322825742842929, "grad_norm": 1.1033058166503906, "learning_rate": 3.9677024165526005e-05, "loss": 0.337493896484375, "step": 642 }, { "epoch": 0.23264438514766406, "grad_norm": 0.6832305192947388, "learning_rate": 3.967201116415002e-05, "loss": 0.3018798828125, "step": 643 }, { "epoch": 0.23300619601103523, "grad_norm": 2.327439069747925, "learning_rate": 3.966695987927145e-05, "loss": 0.2654266357421875, "step": 644 }, { "epoch": 0.2333680068744064, "grad_norm": 2.079411745071411, "learning_rate": 3.966187032072054e-05, "loss": 0.3027801513671875, "step": 645 }, { "epoch": 0.23372981773777757, "grad_norm": 3.0928056240081787, "learning_rate": 3.965674249840205e-05, "loss": 0.30135345458984375, "step": 646 }, { "epoch": 0.23409162860114874, "grad_norm": 1.4758027791976929, "learning_rate": 3.9651576422295173e-05, "loss": 0.30332183837890625, "step": 647 }, { "epoch": 0.23445343946451994, "grad_norm": 1.751461386680603, "learning_rate": 3.9646372102453565e-05, "loss": 0.3053436279296875, "step": 648 }, { "epoch": 0.2348152503278911, "grad_norm": 6.579773902893066, "learning_rate": 3.964112954900529e-05, "loss": 0.38916015625, "step": 649 }, { "epoch": 0.23517706119126228, "grad_norm": 4.549195289611816, "learning_rate": 3.963584877215283e-05, "loss": 0.2991790771484375, "step": 650 }, { "epoch": 0.23553887205463345, "grad_norm": 10.992432594299316, "learning_rate": 3.963052978217306e-05, "loss": 0.31890869140625, "step": 651 }, { "epoch": 0.23590068291800462, "grad_norm": 6.123596668243408, "learning_rate": 3.96251725894172e-05, "loss": 0.29496002197265625, "step": 652 }, { "epoch": 0.2362624937813758, "grad_norm": 3.8793106079101562, "learning_rate": 3.961977720431084e-05, "loss": 0.25994873046875, "step": 653 }, { "epoch": 0.23662430464474696, "grad_norm": 3.9979145526885986, "learning_rate": 3.961434363735389e-05, "loss": 0.29931640625, "step": 654 }, { "epoch": 0.23698611550811813, "grad_norm": 1.1406888961791992, "learning_rate": 3.960887189912054e-05, "loss": 0.2855262756347656, "step": 655 }, { "epoch": 0.2373479263714893, "grad_norm": 3.5460870265960693, "learning_rate": 3.9603362000259304e-05, "loss": 0.29488372802734375, "step": 656 }, { "epoch": 0.23770973723486047, "grad_norm": 4.2551398277282715, "learning_rate": 3.9597813951492924e-05, "loss": 0.29807281494140625, "step": 657 }, { "epoch": 0.23807154809823164, "grad_norm": 4.865359783172607, "learning_rate": 3.9592227763618414e-05, "loss": 0.3118782043457031, "step": 658 }, { "epoch": 0.2384333589616028, "grad_norm": 6.749338626861572, "learning_rate": 3.958660344750699e-05, "loss": 0.334320068359375, "step": 659 }, { "epoch": 0.238795169824974, "grad_norm": 5.776943683624268, "learning_rate": 3.9580941014104086e-05, "loss": 0.32787322998046875, "step": 660 }, { "epoch": 0.23915698068834518, "grad_norm": 2.59554386138916, "learning_rate": 3.9575240474429296e-05, "loss": 0.3081779479980469, "step": 661 }, { "epoch": 0.23951879155171635, "grad_norm": 3.028926372528076, "learning_rate": 3.9569501839576385e-05, "loss": 0.291229248046875, "step": 662 }, { "epoch": 0.23988060241508752, "grad_norm": 1.136025309562683, "learning_rate": 3.9563725120713264e-05, "loss": 0.28079986572265625, "step": 663 }, { "epoch": 0.2402424132784587, "grad_norm": 0.9777661561965942, "learning_rate": 3.955791032908193e-05, "loss": 0.3205413818359375, "step": 664 }, { "epoch": 0.24060422414182986, "grad_norm": 1.0066372156143188, "learning_rate": 3.955205747599851e-05, "loss": 0.31195068359375, "step": 665 }, { "epoch": 0.24096603500520103, "grad_norm": 1.9088504314422607, "learning_rate": 3.954616657285316e-05, "loss": 0.297027587890625, "step": 666 }, { "epoch": 0.2413278458685722, "grad_norm": 0.639315128326416, "learning_rate": 3.9540237631110126e-05, "loss": 0.2813873291015625, "step": 667 }, { "epoch": 0.24168965673194337, "grad_norm": 0.7303865551948547, "learning_rate": 3.9534270662307655e-05, "loss": 0.30243682861328125, "step": 668 }, { "epoch": 0.24205146759531454, "grad_norm": 0.7588809132575989, "learning_rate": 3.9528265678058e-05, "loss": 0.3089447021484375, "step": 669 }, { "epoch": 0.2424132784586857, "grad_norm": 3.1815524101257324, "learning_rate": 3.9522222690047404e-05, "loss": 0.3312835693359375, "step": 670 }, { "epoch": 0.2427750893220569, "grad_norm": 1.0982428789138794, "learning_rate": 3.9516141710036066e-05, "loss": 0.318511962890625, "step": 671 }, { "epoch": 0.24313690018542808, "grad_norm": 2.8511674404144287, "learning_rate": 3.951002274985812e-05, "loss": 0.322052001953125, "step": 672 }, { "epoch": 0.24349871104879925, "grad_norm": 0.7695657014846802, "learning_rate": 3.9503865821421616e-05, "loss": 0.3236541748046875, "step": 673 }, { "epoch": 0.24386052191217042, "grad_norm": 2.084360122680664, "learning_rate": 3.9497670936708486e-05, "loss": 0.3253021240234375, "step": 674 }, { "epoch": 0.2442223327755416, "grad_norm": 2.009584426879883, "learning_rate": 3.949143810777453e-05, "loss": 0.32149505615234375, "step": 675 }, { "epoch": 0.24458414363891276, "grad_norm": 1.3577736616134644, "learning_rate": 3.94851673467494e-05, "loss": 0.295989990234375, "step": 676 }, { "epoch": 0.24494595450228393, "grad_norm": 0.5689055323600769, "learning_rate": 3.947885866583655e-05, "loss": 0.317718505859375, "step": 677 }, { "epoch": 0.2453077653656551, "grad_norm": 0.6469590067863464, "learning_rate": 3.947251207731325e-05, "loss": 0.32950592041015625, "step": 678 }, { "epoch": 0.24566957622902627, "grad_norm": 3.0523006916046143, "learning_rate": 3.9466127593530533e-05, "loss": 0.34310150146484375, "step": 679 }, { "epoch": 0.24603138709239744, "grad_norm": 0.9020960927009583, "learning_rate": 3.945970522691317e-05, "loss": 0.31552886962890625, "step": 680 }, { "epoch": 0.2463931979557686, "grad_norm": 1.2101303339004517, "learning_rate": 3.945324498995968e-05, "loss": 0.3113555908203125, "step": 681 }, { "epoch": 0.24675500881913978, "grad_norm": 1.1988967657089233, "learning_rate": 3.944674689524225e-05, "loss": 0.33557891845703125, "step": 682 }, { "epoch": 0.24711681968251098, "grad_norm": 2.8067080974578857, "learning_rate": 3.944021095540676e-05, "loss": 0.307403564453125, "step": 683 }, { "epoch": 0.24747863054588215, "grad_norm": 0.8730652332305908, "learning_rate": 3.9433637183172734e-05, "loss": 0.2962799072265625, "step": 684 }, { "epoch": 0.24784044140925332, "grad_norm": 1.094242811203003, "learning_rate": 3.942702559133333e-05, "loss": 0.2953643798828125, "step": 685 }, { "epoch": 0.2482022522726245, "grad_norm": 3.243309736251831, "learning_rate": 3.942037619275529e-05, "loss": 0.270050048828125, "step": 686 }, { "epoch": 0.24856406313599566, "grad_norm": 2.8082773685455322, "learning_rate": 3.941368900037896e-05, "loss": 0.3308563232421875, "step": 687 }, { "epoch": 0.24892587399936683, "grad_norm": 2.478132963180542, "learning_rate": 3.9406964027218196e-05, "loss": 0.3335151672363281, "step": 688 }, { "epoch": 0.249287684862738, "grad_norm": 2.055680751800537, "learning_rate": 3.9400201286360425e-05, "loss": 0.31558990478515625, "step": 689 }, { "epoch": 0.24964949572610917, "grad_norm": 3.3810629844665527, "learning_rate": 3.939340079096652e-05, "loss": 0.3100128173828125, "step": 690 }, { "epoch": 0.25001130658948034, "grad_norm": 0.8430792093276978, "learning_rate": 3.938656255427088e-05, "loss": 0.29974365234375, "step": 691 }, { "epoch": 0.2503731174528515, "grad_norm": 3.3444671630859375, "learning_rate": 3.937968658958132e-05, "loss": 0.26529693603515625, "step": 692 }, { "epoch": 0.2507349283162227, "grad_norm": 4.84736442565918, "learning_rate": 3.937277291027909e-05, "loss": 0.2901763916015625, "step": 693 }, { "epoch": 0.25109673917959385, "grad_norm": 0.7884296178817749, "learning_rate": 3.936582152981881e-05, "loss": 0.34461212158203125, "step": 694 }, { "epoch": 0.251458550042965, "grad_norm": 3.3545594215393066, "learning_rate": 3.935883246172853e-05, "loss": 0.28351593017578125, "step": 695 }, { "epoch": 0.2518203609063362, "grad_norm": 4.445746421813965, "learning_rate": 3.935180571960957e-05, "loss": 0.32497406005859375, "step": 696 }, { "epoch": 0.25218217176970736, "grad_norm": 3.377713918685913, "learning_rate": 3.934474131713661e-05, "loss": 0.32146453857421875, "step": 697 }, { "epoch": 0.25254398263307853, "grad_norm": 2.838954210281372, "learning_rate": 3.933763926805763e-05, "loss": 0.31755828857421875, "step": 698 }, { "epoch": 0.2529057934964497, "grad_norm": 1.2740541696548462, "learning_rate": 3.933049958619384e-05, "loss": 0.2998199462890625, "step": 699 }, { "epoch": 0.25326760435982093, "grad_norm": 0.8307880759239197, "learning_rate": 3.932332228543971e-05, "loss": 0.306610107421875, "step": 700 }, { "epoch": 0.2536294152231921, "grad_norm": 2.3816239833831787, "learning_rate": 3.9316107379762906e-05, "loss": 0.28650665283203125, "step": 701 }, { "epoch": 0.25399122608656327, "grad_norm": 2.259713888168335, "learning_rate": 3.930885488320429e-05, "loss": 0.3012542724609375, "step": 702 }, { "epoch": 0.25435303694993444, "grad_norm": 1.4557541608810425, "learning_rate": 3.9301564809877865e-05, "loss": 0.3283538818359375, "step": 703 }, { "epoch": 0.2547148478133056, "grad_norm": 4.5338826179504395, "learning_rate": 3.929423717397078e-05, "loss": 0.2921142578125, "step": 704 }, { "epoch": 0.2550766586766768, "grad_norm": 0.8838272094726562, "learning_rate": 3.928687198974325e-05, "loss": 0.31119537353515625, "step": 705 }, { "epoch": 0.25543846954004795, "grad_norm": 1.8268072605133057, "learning_rate": 3.927946927152862e-05, "loss": 0.30513763427734375, "step": 706 }, { "epoch": 0.2558002804034191, "grad_norm": 3.2063992023468018, "learning_rate": 3.927202903373322e-05, "loss": 0.28461456298828125, "step": 707 }, { "epoch": 0.2561620912667903, "grad_norm": 2.714996576309204, "learning_rate": 3.9264551290836436e-05, "loss": 0.3240509033203125, "step": 708 }, { "epoch": 0.25652390213016146, "grad_norm": 0.7533718347549438, "learning_rate": 3.9257036057390615e-05, "loss": 0.27840423583984375, "step": 709 }, { "epoch": 0.25688571299353263, "grad_norm": 0.9561322331428528, "learning_rate": 3.924948334802109e-05, "loss": 0.2693939208984375, "step": 710 }, { "epoch": 0.2572475238569038, "grad_norm": 3.922743082046509, "learning_rate": 3.9241893177426096e-05, "loss": 0.32977294921875, "step": 711 }, { "epoch": 0.257609334720275, "grad_norm": 0.7462506890296936, "learning_rate": 3.92342655603768e-05, "loss": 0.27240753173828125, "step": 712 }, { "epoch": 0.25797114558364614, "grad_norm": 3.026108741760254, "learning_rate": 3.922660051171723e-05, "loss": 0.2998504638671875, "step": 713 }, { "epoch": 0.2583329564470173, "grad_norm": 3.6099987030029297, "learning_rate": 3.9218898046364265e-05, "loss": 0.31945037841796875, "step": 714 }, { "epoch": 0.2586947673103885, "grad_norm": 2.7191855907440186, "learning_rate": 3.921115817930758e-05, "loss": 0.329681396484375, "step": 715 }, { "epoch": 0.25905657817375966, "grad_norm": 1.143491506576538, "learning_rate": 3.920338092560966e-05, "loss": 0.2695159912109375, "step": 716 }, { "epoch": 0.2594183890371308, "grad_norm": 0.5449677109718323, "learning_rate": 3.9195566300405745e-05, "loss": 0.27788543701171875, "step": 717 }, { "epoch": 0.259780199900502, "grad_norm": 1.4126399755477905, "learning_rate": 3.9187714318903794e-05, "loss": 0.2897796630859375, "step": 718 }, { "epoch": 0.26014201076387317, "grad_norm": 0.6715978384017944, "learning_rate": 3.9179824996384466e-05, "loss": 0.3074188232421875, "step": 719 }, { "epoch": 0.26050382162724434, "grad_norm": 2.7223033905029297, "learning_rate": 3.91718983482011e-05, "loss": 0.3237152099609375, "step": 720 }, { "epoch": 0.2608656324906155, "grad_norm": 4.1190266609191895, "learning_rate": 3.916393438977965e-05, "loss": 0.27909088134765625, "step": 721 }, { "epoch": 0.2612274433539867, "grad_norm": 2.2585134506225586, "learning_rate": 3.9155933136618716e-05, "loss": 0.3351593017578125, "step": 722 }, { "epoch": 0.2615892542173579, "grad_norm": 1.2023251056671143, "learning_rate": 3.914789460428944e-05, "loss": 0.32470703125, "step": 723 }, { "epoch": 0.2619510650807291, "grad_norm": 0.8283178210258484, "learning_rate": 3.913981880843554e-05, "loss": 0.2910614013671875, "step": 724 }, { "epoch": 0.26231287594410024, "grad_norm": 1.3752673864364624, "learning_rate": 3.913170576477324e-05, "loss": 0.3119964599609375, "step": 725 }, { "epoch": 0.2626746868074714, "grad_norm": 0.714100182056427, "learning_rate": 3.912355548909123e-05, "loss": 0.2928924560546875, "step": 726 }, { "epoch": 0.2630364976708426, "grad_norm": 3.2939980030059814, "learning_rate": 3.911536799725072e-05, "loss": 0.291290283203125, "step": 727 }, { "epoch": 0.26339830853421375, "grad_norm": 1.3623833656311035, "learning_rate": 3.9107143305185274e-05, "loss": 0.33502197265625, "step": 728 }, { "epoch": 0.2637601193975849, "grad_norm": 0.9083791971206665, "learning_rate": 3.909888142890089e-05, "loss": 0.31917572021484375, "step": 729 }, { "epoch": 0.2641219302609561, "grad_norm": 1.1305270195007324, "learning_rate": 3.9090582384475924e-05, "loss": 0.297821044921875, "step": 730 }, { "epoch": 0.26448374112432727, "grad_norm": 0.7980315685272217, "learning_rate": 3.9082246188061056e-05, "loss": 0.3152923583984375, "step": 731 }, { "epoch": 0.26484555198769844, "grad_norm": 1.2555317878723145, "learning_rate": 3.9073872855879294e-05, "loss": 0.31934356689453125, "step": 732 }, { "epoch": 0.2652073628510696, "grad_norm": 0.9389625191688538, "learning_rate": 3.906546240422587e-05, "loss": 0.30432891845703125, "step": 733 }, { "epoch": 0.2655691737144408, "grad_norm": 2.0330140590667725, "learning_rate": 3.9057014849468296e-05, "loss": 0.27489471435546875, "step": 734 }, { "epoch": 0.26593098457781195, "grad_norm": 1.6321793794631958, "learning_rate": 3.904853020804627e-05, "loss": 0.33074188232421875, "step": 735 }, { "epoch": 0.2662927954411831, "grad_norm": 1.3125239610671997, "learning_rate": 3.904000849647165e-05, "loss": 0.29138946533203125, "step": 736 }, { "epoch": 0.2666546063045543, "grad_norm": 3.990304946899414, "learning_rate": 3.9031449731328485e-05, "loss": 0.30022621154785156, "step": 737 }, { "epoch": 0.26701641716792546, "grad_norm": 4.150036811828613, "learning_rate": 3.9022853929272874e-05, "loss": 0.2978630065917969, "step": 738 }, { "epoch": 0.2673782280312966, "grad_norm": 5.977537155151367, "learning_rate": 3.9014221107033036e-05, "loss": 0.2983856201171875, "step": 739 }, { "epoch": 0.2677400388946678, "grad_norm": 4.522609233856201, "learning_rate": 3.900555128140921e-05, "loss": 0.29288482666015625, "step": 740 }, { "epoch": 0.26810184975803897, "grad_norm": 7.219893455505371, "learning_rate": 3.899684446927366e-05, "loss": 0.34867095947265625, "step": 741 }, { "epoch": 0.26846366062141014, "grad_norm": 2.876025915145874, "learning_rate": 3.898810068757063e-05, "loss": 0.3301544189453125, "step": 742 }, { "epoch": 0.2688254714847813, "grad_norm": 1.503074288368225, "learning_rate": 3.8979319953316304e-05, "loss": 0.28057098388671875, "step": 743 }, { "epoch": 0.2691872823481525, "grad_norm": 0.9496503472328186, "learning_rate": 3.8970502283598775e-05, "loss": 0.2948188781738281, "step": 744 }, { "epoch": 0.26954909321152365, "grad_norm": 9.690984725952148, "learning_rate": 3.896164769557801e-05, "loss": 0.312225341796875, "step": 745 }, { "epoch": 0.2699109040748949, "grad_norm": 6.881040573120117, "learning_rate": 3.895275620648587e-05, "loss": 0.308837890625, "step": 746 }, { "epoch": 0.27027271493826605, "grad_norm": 2.391634225845337, "learning_rate": 3.894382783362596e-05, "loss": 0.336334228515625, "step": 747 }, { "epoch": 0.2706345258016372, "grad_norm": 3.5359199047088623, "learning_rate": 3.89348625943737e-05, "loss": 0.30593109130859375, "step": 748 }, { "epoch": 0.2709963366650084, "grad_norm": 1.131230354309082, "learning_rate": 3.892586050617626e-05, "loss": 0.3131103515625, "step": 749 }, { "epoch": 0.27135814752837956, "grad_norm": 2.7110085487365723, "learning_rate": 3.891682158655251e-05, "loss": 0.275848388671875, "step": 750 }, { "epoch": 0.2717199583917507, "grad_norm": 3.7294297218322754, "learning_rate": 3.890774585309301e-05, "loss": 0.30788421630859375, "step": 751 }, { "epoch": 0.2720817692551219, "grad_norm": 7.240702152252197, "learning_rate": 3.889863332345994e-05, "loss": 0.34287261962890625, "step": 752 }, { "epoch": 0.27244358011849307, "grad_norm": 4.601446151733398, "learning_rate": 3.88894840153871e-05, "loss": 0.313873291015625, "step": 753 }, { "epoch": 0.27280539098186424, "grad_norm": 8.349262237548828, "learning_rate": 3.8880297946679875e-05, "loss": 0.37300872802734375, "step": 754 }, { "epoch": 0.2731672018452354, "grad_norm": 3.0367937088012695, "learning_rate": 3.887107513521518e-05, "loss": 0.279693603515625, "step": 755 }, { "epoch": 0.2735290127086066, "grad_norm": 3.8166415691375732, "learning_rate": 3.886181559894144e-05, "loss": 0.3050537109375, "step": 756 }, { "epoch": 0.27389082357197775, "grad_norm": 0.8591893911361694, "learning_rate": 3.885251935587853e-05, "loss": 0.30512237548828125, "step": 757 }, { "epoch": 0.2742526344353489, "grad_norm": 0.6971132159233093, "learning_rate": 3.884318642411779e-05, "loss": 0.300262451171875, "step": 758 }, { "epoch": 0.2746144452987201, "grad_norm": 2.052218437194824, "learning_rate": 3.883381682182193e-05, "loss": 0.2919158935546875, "step": 759 }, { "epoch": 0.27497625616209126, "grad_norm": 1.9440864324569702, "learning_rate": 3.8824410567225054e-05, "loss": 0.34552764892578125, "step": 760 }, { "epoch": 0.27533806702546243, "grad_norm": 2.6703200340270996, "learning_rate": 3.881496767863257e-05, "loss": 0.272308349609375, "step": 761 }, { "epoch": 0.2756998778888336, "grad_norm": 1.2469053268432617, "learning_rate": 3.880548817442119e-05, "loss": 0.3184661865234375, "step": 762 }, { "epoch": 0.27606168875220477, "grad_norm": 2.474146604537964, "learning_rate": 3.879597207303888e-05, "loss": 0.290618896484375, "step": 763 }, { "epoch": 0.27642349961557594, "grad_norm": 1.014906644821167, "learning_rate": 3.878641939300483e-05, "loss": 0.29634857177734375, "step": 764 }, { "epoch": 0.2767853104789471, "grad_norm": 1.042439341545105, "learning_rate": 3.877683015290942e-05, "loss": 0.289520263671875, "step": 765 }, { "epoch": 0.2771471213423183, "grad_norm": 1.708919882774353, "learning_rate": 3.8767204371414164e-05, "loss": 0.3260650634765625, "step": 766 }, { "epoch": 0.27750893220568945, "grad_norm": 1.4102545976638794, "learning_rate": 3.875754206725171e-05, "loss": 0.29563140869140625, "step": 767 }, { "epoch": 0.2778707430690606, "grad_norm": 2.3736138343811035, "learning_rate": 3.874784325922575e-05, "loss": 0.34003448486328125, "step": 768 }, { "epoch": 0.27823255393243185, "grad_norm": 1.3323041200637817, "learning_rate": 3.8738107966211066e-05, "loss": 0.32111358642578125, "step": 769 }, { "epoch": 0.278594364795803, "grad_norm": 2.5836949348449707, "learning_rate": 3.872833620715339e-05, "loss": 0.298248291015625, "step": 770 }, { "epoch": 0.2789561756591742, "grad_norm": 1.6293621063232422, "learning_rate": 3.871852800106945e-05, "loss": 0.26708984375, "step": 771 }, { "epoch": 0.27931798652254536, "grad_norm": 4.615795135498047, "learning_rate": 3.87086833670469e-05, "loss": 0.35367584228515625, "step": 772 }, { "epoch": 0.27967979738591653, "grad_norm": 0.9024555087089539, "learning_rate": 3.869880232424428e-05, "loss": 0.26998138427734375, "step": 773 }, { "epoch": 0.2800416082492877, "grad_norm": 3.0832858085632324, "learning_rate": 3.8688884891891e-05, "loss": 0.29651641845703125, "step": 774 }, { "epoch": 0.28040341911265887, "grad_norm": 3.0090038776397705, "learning_rate": 3.8678931089287245e-05, "loss": 0.30370330810546875, "step": 775 }, { "epoch": 0.28076522997603004, "grad_norm": 1.9161865711212158, "learning_rate": 3.866894093580405e-05, "loss": 0.32390594482421875, "step": 776 }, { "epoch": 0.2811270408394012, "grad_norm": 3.799424648284912, "learning_rate": 3.865891445088311e-05, "loss": 0.289581298828125, "step": 777 }, { "epoch": 0.2814888517027724, "grad_norm": 4.0138421058654785, "learning_rate": 3.8648851654036905e-05, "loss": 0.2880706787109375, "step": 778 }, { "epoch": 0.28185066256614355, "grad_norm": 2.1437571048736572, "learning_rate": 3.863875256484851e-05, "loss": 0.3480224609375, "step": 779 }, { "epoch": 0.2822124734295147, "grad_norm": 1.0057824850082397, "learning_rate": 3.8628617202971684e-05, "loss": 0.32213592529296875, "step": 780 }, { "epoch": 0.2825742842928859, "grad_norm": 1.4987822771072388, "learning_rate": 3.8618445588130746e-05, "loss": 0.28006744384765625, "step": 781 }, { "epoch": 0.28293609515625706, "grad_norm": 0.7310562133789062, "learning_rate": 3.8608237740120566e-05, "loss": 0.31363677978515625, "step": 782 }, { "epoch": 0.28329790601962823, "grad_norm": 1.9678982496261597, "learning_rate": 3.859799367880654e-05, "loss": 0.29437255859375, "step": 783 }, { "epoch": 0.2836597168829994, "grad_norm": 1.854134202003479, "learning_rate": 3.8587713424124534e-05, "loss": 0.30108642578125, "step": 784 }, { "epoch": 0.2840215277463706, "grad_norm": 1.468666911125183, "learning_rate": 3.8577396996080846e-05, "loss": 0.2723236083984375, "step": 785 }, { "epoch": 0.28438333860974174, "grad_norm": 1.53202223777771, "learning_rate": 3.8567044414752175e-05, "loss": 0.3056793212890625, "step": 786 }, { "epoch": 0.2847451494731129, "grad_norm": 0.8726559281349182, "learning_rate": 3.8556655700285566e-05, "loss": 0.31085205078125, "step": 787 }, { "epoch": 0.2851069603364841, "grad_norm": 4.2807488441467285, "learning_rate": 3.8546230872898395e-05, "loss": 0.27419281005859375, "step": 788 }, { "epoch": 0.28546877119985525, "grad_norm": 1.7207067012786865, "learning_rate": 3.8535769952878326e-05, "loss": 0.3299713134765625, "step": 789 }, { "epoch": 0.2858305820632264, "grad_norm": 3.363920211791992, "learning_rate": 3.8525272960583235e-05, "loss": 0.3178558349609375, "step": 790 }, { "epoch": 0.2861923929265976, "grad_norm": 1.228042483329773, "learning_rate": 3.8514739916441225e-05, "loss": 0.334197998046875, "step": 791 }, { "epoch": 0.2865542037899688, "grad_norm": 2.5815138816833496, "learning_rate": 3.850417084095053e-05, "loss": 0.31188201904296875, "step": 792 }, { "epoch": 0.28691601465334, "grad_norm": 0.8115054965019226, "learning_rate": 3.849356575467953e-05, "loss": 0.3017730712890625, "step": 793 }, { "epoch": 0.28727782551671116, "grad_norm": 0.5301522612571716, "learning_rate": 3.8482924678266684e-05, "loss": 0.27976226806640625, "step": 794 }, { "epoch": 0.28763963638008233, "grad_norm": 3.596527576446533, "learning_rate": 3.847224763242048e-05, "loss": 0.31915283203125, "step": 795 }, { "epoch": 0.2880014472434535, "grad_norm": 2.1309754848480225, "learning_rate": 3.84615346379194e-05, "loss": 0.27642822265625, "step": 796 }, { "epoch": 0.28836325810682467, "grad_norm": 2.0398762226104736, "learning_rate": 3.845078571561191e-05, "loss": 0.26761627197265625, "step": 797 }, { "epoch": 0.28872506897019584, "grad_norm": 3.5312530994415283, "learning_rate": 3.8440000886416365e-05, "loss": 0.29538726806640625, "step": 798 }, { "epoch": 0.289086879833567, "grad_norm": 4.042747497558594, "learning_rate": 3.842918017132102e-05, "loss": 0.312286376953125, "step": 799 }, { "epoch": 0.2894486906969382, "grad_norm": 2.2581958770751953, "learning_rate": 3.841832359138395e-05, "loss": 0.341644287109375, "step": 800 }, { "epoch": 0.28981050156030935, "grad_norm": 1.8728244304656982, "learning_rate": 3.8407431167733055e-05, "loss": 0.3229522705078125, "step": 801 }, { "epoch": 0.2901723124236805, "grad_norm": 2.1750648021698, "learning_rate": 3.839650292156595e-05, "loss": 0.30876922607421875, "step": 802 }, { "epoch": 0.2905341232870517, "grad_norm": 0.6316021084785461, "learning_rate": 3.838553887415e-05, "loss": 0.3145904541015625, "step": 803 }, { "epoch": 0.29089593415042286, "grad_norm": 4.73663854598999, "learning_rate": 3.8374539046822216e-05, "loss": 0.30133819580078125, "step": 804 }, { "epoch": 0.29125774501379403, "grad_norm": 3.752488851547241, "learning_rate": 3.836350346098926e-05, "loss": 0.2901763916015625, "step": 805 }, { "epoch": 0.2916195558771652, "grad_norm": 2.461852550506592, "learning_rate": 3.835243213812737e-05, "loss": 0.30138397216796875, "step": 806 }, { "epoch": 0.2919813667405364, "grad_norm": 0.929441511631012, "learning_rate": 3.8341325099782324e-05, "loss": 0.29911041259765625, "step": 807 }, { "epoch": 0.29234317760390754, "grad_norm": 3.5348031520843506, "learning_rate": 3.8330182367569444e-05, "loss": 0.31011962890625, "step": 808 }, { "epoch": 0.2927049884672787, "grad_norm": 3.9471359252929688, "learning_rate": 3.8319003963173475e-05, "loss": 0.30908203125, "step": 809 }, { "epoch": 0.2930667993306499, "grad_norm": 0.6376729011535645, "learning_rate": 3.830778990834859e-05, "loss": 0.289337158203125, "step": 810 }, { "epoch": 0.29342861019402106, "grad_norm": 2.5377373695373535, "learning_rate": 3.8296540224918355e-05, "loss": 0.28876495361328125, "step": 811 }, { "epoch": 0.2937904210573922, "grad_norm": 1.4066625833511353, "learning_rate": 3.8285254934775666e-05, "loss": 0.27848052978515625, "step": 812 }, { "epoch": 0.2941522319207634, "grad_norm": 2.3494327068328857, "learning_rate": 3.827393405988271e-05, "loss": 0.2826576232910156, "step": 813 }, { "epoch": 0.29451404278413457, "grad_norm": 1.355328917503357, "learning_rate": 3.826257762227093e-05, "loss": 0.324127197265625, "step": 814 }, { "epoch": 0.2948758536475058, "grad_norm": 3.0240318775177, "learning_rate": 3.825118564404097e-05, "loss": 0.2983856201171875, "step": 815 }, { "epoch": 0.29523766451087696, "grad_norm": 1.375542163848877, "learning_rate": 3.8239758147362656e-05, "loss": 0.298797607421875, "step": 816 }, { "epoch": 0.29559947537424813, "grad_norm": 1.9652663469314575, "learning_rate": 3.8228295154474904e-05, "loss": 0.3115081787109375, "step": 817 }, { "epoch": 0.2959612862376193, "grad_norm": 0.9980189204216003, "learning_rate": 3.821679668768575e-05, "loss": 0.32204437255859375, "step": 818 }, { "epoch": 0.2963230971009905, "grad_norm": 0.9760767817497253, "learning_rate": 3.820526276937223e-05, "loss": 0.29579925537109375, "step": 819 }, { "epoch": 0.29668490796436164, "grad_norm": 1.8587474822998047, "learning_rate": 3.8193693421980384e-05, "loss": 0.31268310546875, "step": 820 }, { "epoch": 0.2970467188277328, "grad_norm": 2.2737314701080322, "learning_rate": 3.818208866802522e-05, "loss": 0.27347564697265625, "step": 821 }, { "epoch": 0.297408529691104, "grad_norm": 0.9190908670425415, "learning_rate": 3.817044853009062e-05, "loss": 0.269073486328125, "step": 822 }, { "epoch": 0.29777034055447515, "grad_norm": 3.9192843437194824, "learning_rate": 3.8158773030829334e-05, "loss": 0.3292999267578125, "step": 823 }, { "epoch": 0.2981321514178463, "grad_norm": 1.4318889379501343, "learning_rate": 3.814706219296295e-05, "loss": 0.29134368896484375, "step": 824 }, { "epoch": 0.2984939622812175, "grad_norm": 0.49898937344551086, "learning_rate": 3.8135316039281794e-05, "loss": 0.2774200439453125, "step": 825 }, { "epoch": 0.29885577314458867, "grad_norm": 1.4995325803756714, "learning_rate": 3.812353459264495e-05, "loss": 0.296844482421875, "step": 826 }, { "epoch": 0.29921758400795984, "grad_norm": 1.3465394973754883, "learning_rate": 3.8111717875980166e-05, "loss": 0.29347991943359375, "step": 827 }, { "epoch": 0.299579394871331, "grad_norm": 0.7100794911384583, "learning_rate": 3.8099865912283834e-05, "loss": 0.2893791198730469, "step": 828 }, { "epoch": 0.2999412057347022, "grad_norm": 2.4809365272521973, "learning_rate": 3.8087978724620943e-05, "loss": 0.3302001953125, "step": 829 }, { "epoch": 0.30030301659807335, "grad_norm": 0.6920439600944519, "learning_rate": 3.8076056336125034e-05, "loss": 0.28882598876953125, "step": 830 }, { "epoch": 0.3006648274614445, "grad_norm": 2.6202359199523926, "learning_rate": 3.806409876999814e-05, "loss": 0.3285064697265625, "step": 831 }, { "epoch": 0.3010266383248157, "grad_norm": 2.9088892936706543, "learning_rate": 3.805210604951077e-05, "loss": 0.2962493896484375, "step": 832 }, { "epoch": 0.30138844918818686, "grad_norm": 0.9902125000953674, "learning_rate": 3.804007819800182e-05, "loss": 0.30279541015625, "step": 833 }, { "epoch": 0.30175026005155803, "grad_norm": 3.1217665672302246, "learning_rate": 3.802801523887858e-05, "loss": 0.30142974853515625, "step": 834 }, { "epoch": 0.3021120709149292, "grad_norm": 0.8037191033363342, "learning_rate": 3.801591719561665e-05, "loss": 0.32843780517578125, "step": 835 }, { "epoch": 0.30247388177830037, "grad_norm": 1.906739354133606, "learning_rate": 3.8003784091759926e-05, "loss": 0.3055458068847656, "step": 836 }, { "epoch": 0.30283569264167154, "grad_norm": 1.8847544193267822, "learning_rate": 3.799161595092049e-05, "loss": 0.32070159912109375, "step": 837 }, { "epoch": 0.30319750350504276, "grad_norm": 1.4314225912094116, "learning_rate": 3.797941279677867e-05, "loss": 0.32763671875, "step": 838 }, { "epoch": 0.30355931436841394, "grad_norm": 1.6395933628082275, "learning_rate": 3.7967174653082874e-05, "loss": 0.29969024658203125, "step": 839 }, { "epoch": 0.3039211252317851, "grad_norm": 1.9525961875915527, "learning_rate": 3.795490154364964e-05, "loss": 0.3097076416015625, "step": 840 }, { "epoch": 0.3042829360951563, "grad_norm": 0.7116675972938538, "learning_rate": 3.794259349236355e-05, "loss": 0.312225341796875, "step": 841 }, { "epoch": 0.30464474695852745, "grad_norm": 0.9759135842323303, "learning_rate": 3.793025052317717e-05, "loss": 0.3054351806640625, "step": 842 }, { "epoch": 0.3050065578218986, "grad_norm": 0.7763060331344604, "learning_rate": 3.791787266011103e-05, "loss": 0.285736083984375, "step": 843 }, { "epoch": 0.3053683686852698, "grad_norm": 0.6582294702529907, "learning_rate": 3.790545992725357e-05, "loss": 0.29034423828125, "step": 844 }, { "epoch": 0.30573017954864096, "grad_norm": 1.0410913228988647, "learning_rate": 3.789301234876108e-05, "loss": 0.29610443115234375, "step": 845 }, { "epoch": 0.3060919904120121, "grad_norm": 0.6927215456962585, "learning_rate": 3.7880529948857655e-05, "loss": 0.307098388671875, "step": 846 }, { "epoch": 0.3064538012753833, "grad_norm": 1.6102485656738281, "learning_rate": 3.786801275183518e-05, "loss": 0.2926483154296875, "step": 847 }, { "epoch": 0.30681561213875447, "grad_norm": 1.1393144130706787, "learning_rate": 3.785546078205325e-05, "loss": 0.34368133544921875, "step": 848 }, { "epoch": 0.30717742300212564, "grad_norm": 1.8671590089797974, "learning_rate": 3.7842874063939114e-05, "loss": 0.29506683349609375, "step": 849 }, { "epoch": 0.3075392338654968, "grad_norm": 0.4968050420284271, "learning_rate": 3.7830252621987675e-05, "loss": 0.29654693603515625, "step": 850 }, { "epoch": 0.307901044728868, "grad_norm": 0.7793421745300293, "learning_rate": 3.7817596480761384e-05, "loss": 0.2934722900390625, "step": 851 }, { "epoch": 0.30826285559223915, "grad_norm": 4.494914531707764, "learning_rate": 3.780490566489022e-05, "loss": 0.2767486572265625, "step": 852 }, { "epoch": 0.3086246664556103, "grad_norm": 0.6000627875328064, "learning_rate": 3.779218019907167e-05, "loss": 0.3053436279296875, "step": 853 }, { "epoch": 0.3089864773189815, "grad_norm": 0.7339837551116943, "learning_rate": 3.777942010807062e-05, "loss": 0.287750244140625, "step": 854 }, { "epoch": 0.30934828818235266, "grad_norm": 1.3407981395721436, "learning_rate": 3.776662541671936e-05, "loss": 0.2830810546875, "step": 855 }, { "epoch": 0.30971009904572383, "grad_norm": 1.877882480621338, "learning_rate": 3.775379614991751e-05, "loss": 0.30844879150390625, "step": 856 }, { "epoch": 0.310071909909095, "grad_norm": 1.5733872652053833, "learning_rate": 3.7740932332631984e-05, "loss": 0.301239013671875, "step": 857 }, { "epoch": 0.31043372077246617, "grad_norm": 1.4109644889831543, "learning_rate": 3.772803398989691e-05, "loss": 0.29582977294921875, "step": 858 }, { "epoch": 0.31079553163583734, "grad_norm": 0.9435315728187561, "learning_rate": 3.771510114681364e-05, "loss": 0.3329620361328125, "step": 859 }, { "epoch": 0.3111573424992085, "grad_norm": 2.262355089187622, "learning_rate": 3.770213382855064e-05, "loss": 0.28643035888671875, "step": 860 }, { "epoch": 0.31151915336257974, "grad_norm": 2.3553783893585205, "learning_rate": 3.768913206034347e-05, "loss": 0.28948974609375, "step": 861 }, { "epoch": 0.3118809642259509, "grad_norm": 1.6980204582214355, "learning_rate": 3.767609586749477e-05, "loss": 0.341339111328125, "step": 862 }, { "epoch": 0.3122427750893221, "grad_norm": 1.204485535621643, "learning_rate": 3.766302527537411e-05, "loss": 0.2968902587890625, "step": 863 }, { "epoch": 0.31260458595269325, "grad_norm": 1.0089871883392334, "learning_rate": 3.7649920309418054e-05, "loss": 0.3132896423339844, "step": 864 }, { "epoch": 0.3129663968160644, "grad_norm": 0.494862824678421, "learning_rate": 3.763678099513004e-05, "loss": 0.2797088623046875, "step": 865 }, { "epoch": 0.3133282076794356, "grad_norm": 3.4925570487976074, "learning_rate": 3.7623607358080355e-05, "loss": 0.32694244384765625, "step": 866 }, { "epoch": 0.31369001854280676, "grad_norm": 3.3210484981536865, "learning_rate": 3.761039942390608e-05, "loss": 0.31668853759765625, "step": 867 }, { "epoch": 0.31405182940617793, "grad_norm": 1.854534387588501, "learning_rate": 3.7597157218311055e-05, "loss": 0.291015625, "step": 868 }, { "epoch": 0.3144136402695491, "grad_norm": 0.4651390016078949, "learning_rate": 3.758388076706578e-05, "loss": 0.2769012451171875, "step": 869 }, { "epoch": 0.31477545113292027, "grad_norm": 1.0684144496917725, "learning_rate": 3.7570570096007444e-05, "loss": 0.2693023681640625, "step": 870 }, { "epoch": 0.31513726199629144, "grad_norm": 0.9082270860671997, "learning_rate": 3.75572252310398e-05, "loss": 0.2812347412109375, "step": 871 }, { "epoch": 0.3154990728596626, "grad_norm": 0.9559834003448486, "learning_rate": 3.7543846198133146e-05, "loss": 0.275634765625, "step": 872 }, { "epoch": 0.3158608837230338, "grad_norm": 0.7735543251037598, "learning_rate": 3.7530433023324305e-05, "loss": 0.289794921875, "step": 873 }, { "epoch": 0.31622269458640495, "grad_norm": 3.8966758251190186, "learning_rate": 3.75169857327165e-05, "loss": 0.325775146484375, "step": 874 }, { "epoch": 0.3165845054497761, "grad_norm": 3.6413300037384033, "learning_rate": 3.750350435247939e-05, "loss": 0.3197174072265625, "step": 875 }, { "epoch": 0.3169463163131473, "grad_norm": 1.8478413820266724, "learning_rate": 3.7489988908848926e-05, "loss": 0.31258392333984375, "step": 876 }, { "epoch": 0.31730812717651846, "grad_norm": 1.1395543813705444, "learning_rate": 3.74764394281274e-05, "loss": 0.3005828857421875, "step": 877 }, { "epoch": 0.31766993803988963, "grad_norm": 3.628523349761963, "learning_rate": 3.746285593668331e-05, "loss": 0.292266845703125, "step": 878 }, { "epoch": 0.3180317489032608, "grad_norm": 2.1868958473205566, "learning_rate": 3.744923846095135e-05, "loss": 0.31341552734375, "step": 879 }, { "epoch": 0.318393559766632, "grad_norm": 2.6483407020568848, "learning_rate": 3.743558702743236e-05, "loss": 0.29265594482421875, "step": 880 }, { "epoch": 0.31875537063000314, "grad_norm": 2.9110782146453857, "learning_rate": 3.7421901662693246e-05, "loss": 0.305572509765625, "step": 881 }, { "epoch": 0.3191171814933743, "grad_norm": 0.6528190970420837, "learning_rate": 3.740818239336696e-05, "loss": 0.279266357421875, "step": 882 }, { "epoch": 0.3194789923567455, "grad_norm": 1.5177690982818604, "learning_rate": 3.7394429246152445e-05, "loss": 0.28476715087890625, "step": 883 }, { "epoch": 0.3198408032201167, "grad_norm": 0.7111299633979797, "learning_rate": 3.738064224781455e-05, "loss": 0.26122283935546875, "step": 884 }, { "epoch": 0.3202026140834879, "grad_norm": 3.122220993041992, "learning_rate": 3.736682142518403e-05, "loss": 0.2860107421875, "step": 885 }, { "epoch": 0.32056442494685905, "grad_norm": 1.3907917737960815, "learning_rate": 3.735296680515743e-05, "loss": 0.27341461181640625, "step": 886 }, { "epoch": 0.3209262358102302, "grad_norm": 3.017122983932495, "learning_rate": 3.733907841469709e-05, "loss": 0.29982757568359375, "step": 887 }, { "epoch": 0.3212880466736014, "grad_norm": 1.343322992324829, "learning_rate": 3.732515628083107e-05, "loss": 0.2970123291015625, "step": 888 }, { "epoch": 0.32164985753697256, "grad_norm": 1.4175777435302734, "learning_rate": 3.7311200430653095e-05, "loss": 0.30849456787109375, "step": 889 }, { "epoch": 0.32201166840034373, "grad_norm": 0.7500147819519043, "learning_rate": 3.729721089132251e-05, "loss": 0.31878662109375, "step": 890 }, { "epoch": 0.3223734792637149, "grad_norm": 3.313047170639038, "learning_rate": 3.7283187690064197e-05, "loss": 0.29327392578125, "step": 891 }, { "epoch": 0.32273529012708607, "grad_norm": 5.473238945007324, "learning_rate": 3.726913085416859e-05, "loss": 0.27971649169921875, "step": 892 }, { "epoch": 0.32309710099045724, "grad_norm": 4.50167989730835, "learning_rate": 3.7255040410991544e-05, "loss": 0.29021453857421875, "step": 893 }, { "epoch": 0.3234589118538284, "grad_norm": 1.5677417516708374, "learning_rate": 3.724091638795433e-05, "loss": 0.3188934326171875, "step": 894 }, { "epoch": 0.3238207227171996, "grad_norm": 5.38286018371582, "learning_rate": 3.722675881254357e-05, "loss": 0.2901611328125, "step": 895 }, { "epoch": 0.32418253358057075, "grad_norm": 4.861820697784424, "learning_rate": 3.721256771231117e-05, "loss": 0.29638671875, "step": 896 }, { "epoch": 0.3245443444439419, "grad_norm": 1.3400627374649048, "learning_rate": 3.7198343114874293e-05, "loss": 0.3293609619140625, "step": 897 }, { "epoch": 0.3249061553073131, "grad_norm": 1.958787202835083, "learning_rate": 3.718408504791528e-05, "loss": 0.315765380859375, "step": 898 }, { "epoch": 0.32526796617068426, "grad_norm": 0.3871093988418579, "learning_rate": 3.716979353918162e-05, "loss": 0.286468505859375, "step": 899 }, { "epoch": 0.32562977703405543, "grad_norm": 1.4140691757202148, "learning_rate": 3.715546861648587e-05, "loss": 0.31681060791015625, "step": 900 }, { "epoch": 0.3259915878974266, "grad_norm": 2.3110239505767822, "learning_rate": 3.714111030770561e-05, "loss": 0.3142852783203125, "step": 901 }, { "epoch": 0.3263533987607978, "grad_norm": 0.6431041359901428, "learning_rate": 3.712671864078342e-05, "loss": 0.2922210693359375, "step": 902 }, { "epoch": 0.32671520962416895, "grad_norm": 0.7195540070533752, "learning_rate": 3.711229364372676e-05, "loss": 0.3240966796875, "step": 903 }, { "epoch": 0.3270770204875401, "grad_norm": 2.7283074855804443, "learning_rate": 3.7097835344607973e-05, "loss": 0.26590728759765625, "step": 904 }, { "epoch": 0.3274388313509113, "grad_norm": 1.31633460521698, "learning_rate": 3.708334377156421e-05, "loss": 0.2795257568359375, "step": 905 }, { "epoch": 0.32780064221428246, "grad_norm": 1.005703330039978, "learning_rate": 3.706881895279739e-05, "loss": 0.29461669921875, "step": 906 }, { "epoch": 0.3281624530776537, "grad_norm": 1.1034278869628906, "learning_rate": 3.7054260916574094e-05, "loss": 0.3009185791015625, "step": 907 }, { "epoch": 0.32852426394102485, "grad_norm": 0.6670051217079163, "learning_rate": 3.703966969122558e-05, "loss": 0.2798919677734375, "step": 908 }, { "epoch": 0.328886074804396, "grad_norm": 2.3078885078430176, "learning_rate": 3.702504530514768e-05, "loss": 0.29016876220703125, "step": 909 }, { "epoch": 0.3292478856677672, "grad_norm": 0.66514652967453, "learning_rate": 3.701038778680078e-05, "loss": 0.26479339599609375, "step": 910 }, { "epoch": 0.32960969653113836, "grad_norm": 2.2104344367980957, "learning_rate": 3.6995697164709704e-05, "loss": 0.279541015625, "step": 911 }, { "epoch": 0.32997150739450953, "grad_norm": 1.7039453983306885, "learning_rate": 3.698097346746374e-05, "loss": 0.2736015319824219, "step": 912 }, { "epoch": 0.3303333182578807, "grad_norm": 2.5456292629241943, "learning_rate": 3.6966216723716526e-05, "loss": 0.3278961181640625, "step": 913 }, { "epoch": 0.3306951291212519, "grad_norm": 2.5226871967315674, "learning_rate": 3.695142696218601e-05, "loss": 0.31027984619140625, "step": 914 }, { "epoch": 0.33105693998462304, "grad_norm": 3.057473659515381, "learning_rate": 3.693660421165439e-05, "loss": 0.314788818359375, "step": 915 }, { "epoch": 0.3314187508479942, "grad_norm": 2.8090834617614746, "learning_rate": 3.692174850096809e-05, "loss": 0.29571533203125, "step": 916 }, { "epoch": 0.3317805617113654, "grad_norm": 1.0512312650680542, "learning_rate": 3.690685985903764e-05, "loss": 0.286376953125, "step": 917 }, { "epoch": 0.33214237257473656, "grad_norm": 1.5433757305145264, "learning_rate": 3.689193831483769e-05, "loss": 0.3090667724609375, "step": 918 }, { "epoch": 0.3325041834381077, "grad_norm": 1.9701045751571655, "learning_rate": 3.687698389740689e-05, "loss": 0.265472412109375, "step": 919 }, { "epoch": 0.3328659943014789, "grad_norm": 0.8291819095611572, "learning_rate": 3.68619966358479e-05, "loss": 0.2917938232421875, "step": 920 }, { "epoch": 0.33322780516485007, "grad_norm": 1.9437626600265503, "learning_rate": 3.684697655932726e-05, "loss": 0.27208709716796875, "step": 921 }, { "epoch": 0.33358961602822124, "grad_norm": 0.887065052986145, "learning_rate": 3.6831923697075405e-05, "loss": 0.30002593994140625, "step": 922 }, { "epoch": 0.3339514268915924, "grad_norm": 0.5961707234382629, "learning_rate": 3.681683807838656e-05, "loss": 0.2956695556640625, "step": 923 }, { "epoch": 0.3343132377549636, "grad_norm": 2.0563223361968994, "learning_rate": 3.680171973261867e-05, "loss": 0.3023834228515625, "step": 924 }, { "epoch": 0.33467504861833475, "grad_norm": 1.4294114112854004, "learning_rate": 3.678656868919343e-05, "loss": 0.329193115234375, "step": 925 }, { "epoch": 0.3350368594817059, "grad_norm": 3.0059046745300293, "learning_rate": 3.677138497759611e-05, "loss": 0.33972930908203125, "step": 926 }, { "epoch": 0.3353986703450771, "grad_norm": 1.451389193534851, "learning_rate": 3.675616862737558e-05, "loss": 0.276580810546875, "step": 927 }, { "epoch": 0.33576048120844826, "grad_norm": 1.9790090322494507, "learning_rate": 3.6740919668144235e-05, "loss": 0.2917633056640625, "step": 928 }, { "epoch": 0.3361222920718195, "grad_norm": 1.7404868602752686, "learning_rate": 3.672563812957791e-05, "loss": 0.32579803466796875, "step": 929 }, { "epoch": 0.33648410293519065, "grad_norm": 0.9997764825820923, "learning_rate": 3.6710324041415865e-05, "loss": 0.28923797607421875, "step": 930 }, { "epoch": 0.3368459137985618, "grad_norm": 0.7913169860839844, "learning_rate": 3.6694977433460685e-05, "loss": 0.30617523193359375, "step": 931 }, { "epoch": 0.337207724661933, "grad_norm": 0.48933473229408264, "learning_rate": 3.667959833557825e-05, "loss": 0.295257568359375, "step": 932 }, { "epoch": 0.33756953552530417, "grad_norm": 0.8282255530357361, "learning_rate": 3.666418677769767e-05, "loss": 0.26529693603515625, "step": 933 }, { "epoch": 0.33793134638867534, "grad_norm": 4.836225509643555, "learning_rate": 3.664874278981121e-05, "loss": 0.37109375, "step": 934 }, { "epoch": 0.3382931572520465, "grad_norm": 0.7932726144790649, "learning_rate": 3.6633266401974285e-05, "loss": 0.29978179931640625, "step": 935 }, { "epoch": 0.3386549681154177, "grad_norm": 1.9349439144134521, "learning_rate": 3.661775764430531e-05, "loss": 0.287261962890625, "step": 936 }, { "epoch": 0.33901677897878885, "grad_norm": 3.0766446590423584, "learning_rate": 3.660221654698574e-05, "loss": 0.321990966796875, "step": 937 }, { "epoch": 0.33937858984216, "grad_norm": 1.9131392240524292, "learning_rate": 3.658664314025994e-05, "loss": 0.3084564208984375, "step": 938 }, { "epoch": 0.3397404007055312, "grad_norm": 2.7436861991882324, "learning_rate": 3.6571037454435155e-05, "loss": 0.302093505859375, "step": 939 }, { "epoch": 0.34010221156890236, "grad_norm": 0.7588583827018738, "learning_rate": 3.6555399519881463e-05, "loss": 0.330535888671875, "step": 940 }, { "epoch": 0.3404640224322735, "grad_norm": 4.952359676361084, "learning_rate": 3.653972936703169e-05, "loss": 0.29168701171875, "step": 941 }, { "epoch": 0.3408258332956447, "grad_norm": 2.775674343109131, "learning_rate": 3.6524027026381355e-05, "loss": 0.285980224609375, "step": 942 }, { "epoch": 0.34118764415901587, "grad_norm": 1.6003223657608032, "learning_rate": 3.650829252848863e-05, "loss": 0.29718017578125, "step": 943 }, { "epoch": 0.34154945502238704, "grad_norm": 3.546053409576416, "learning_rate": 3.649252590397426e-05, "loss": 0.2363739013671875, "step": 944 }, { "epoch": 0.3419112658857582, "grad_norm": 1.4707825183868408, "learning_rate": 3.6476727183521516e-05, "loss": 0.25917816162109375, "step": 945 }, { "epoch": 0.3422730767491294, "grad_norm": 2.9068939685821533, "learning_rate": 3.646089639787613e-05, "loss": 0.29602813720703125, "step": 946 }, { "epoch": 0.34263488761250055, "grad_norm": 1.3602372407913208, "learning_rate": 3.644503357784624e-05, "loss": 0.31140899658203125, "step": 947 }, { "epoch": 0.3429966984758717, "grad_norm": 5.684846878051758, "learning_rate": 3.64291387543023e-05, "loss": 0.34278106689453125, "step": 948 }, { "epoch": 0.3433585093392429, "grad_norm": 6.501363754272461, "learning_rate": 3.641321195817709e-05, "loss": 0.3567352294921875, "step": 949 }, { "epoch": 0.34372032020261406, "grad_norm": 2.017427682876587, "learning_rate": 3.639725322046556e-05, "loss": 0.28943634033203125, "step": 950 }, { "epoch": 0.34408213106598523, "grad_norm": 1.2754764556884766, "learning_rate": 3.638126257222488e-05, "loss": 0.29856109619140625, "step": 951 }, { "epoch": 0.34444394192935646, "grad_norm": 1.8227031230926514, "learning_rate": 3.636524004457427e-05, "loss": 0.3075714111328125, "step": 952 }, { "epoch": 0.3448057527927276, "grad_norm": 1.6195993423461914, "learning_rate": 3.6349185668695005e-05, "loss": 0.328765869140625, "step": 953 }, { "epoch": 0.3451675636560988, "grad_norm": 4.13592004776001, "learning_rate": 3.633309947583036e-05, "loss": 0.2841644287109375, "step": 954 }, { "epoch": 0.34552937451946997, "grad_norm": 3.3197622299194336, "learning_rate": 3.63169814972855e-05, "loss": 0.28128814697265625, "step": 955 }, { "epoch": 0.34589118538284114, "grad_norm": 3.300501823425293, "learning_rate": 3.630083176442747e-05, "loss": 0.28033447265625, "step": 956 }, { "epoch": 0.3462529962462123, "grad_norm": 0.8423886299133301, "learning_rate": 3.62846503086851e-05, "loss": 0.305023193359375, "step": 957 }, { "epoch": 0.3466148071095835, "grad_norm": 0.48895418643951416, "learning_rate": 3.626843716154895e-05, "loss": 0.287750244140625, "step": 958 }, { "epoch": 0.34697661797295465, "grad_norm": 0.6301549673080444, "learning_rate": 3.6252192354571275e-05, "loss": 0.2766075134277344, "step": 959 }, { "epoch": 0.3473384288363258, "grad_norm": 1.3560065031051636, "learning_rate": 3.623591591936592e-05, "loss": 0.2896537780761719, "step": 960 }, { "epoch": 0.347700239699697, "grad_norm": 0.6615847945213318, "learning_rate": 3.621960788760829e-05, "loss": 0.29683685302734375, "step": 961 }, { "epoch": 0.34806205056306816, "grad_norm": 5.362089157104492, "learning_rate": 3.620326829103529e-05, "loss": 0.335113525390625, "step": 962 }, { "epoch": 0.34842386142643933, "grad_norm": 3.727457284927368, "learning_rate": 3.6186897161445235e-05, "loss": 0.265472412109375, "step": 963 }, { "epoch": 0.3487856722898105, "grad_norm": 1.7269208431243896, "learning_rate": 3.6170494530697826e-05, "loss": 0.29840087890625, "step": 964 }, { "epoch": 0.34914748315318167, "grad_norm": 2.0954794883728027, "learning_rate": 3.615406043071405e-05, "loss": 0.2858123779296875, "step": 965 }, { "epoch": 0.34950929401655284, "grad_norm": 2.2998909950256348, "learning_rate": 3.6137594893476145e-05, "loss": 0.33678436279296875, "step": 966 }, { "epoch": 0.349871104879924, "grad_norm": 1.7855744361877441, "learning_rate": 3.612109795102752e-05, "loss": 0.2930450439453125, "step": 967 }, { "epoch": 0.3502329157432952, "grad_norm": 1.5002706050872803, "learning_rate": 3.610456963547272e-05, "loss": 0.3166351318359375, "step": 968 }, { "epoch": 0.35059472660666635, "grad_norm": 2.813845157623291, "learning_rate": 3.6088009978977325e-05, "loss": 0.28607940673828125, "step": 969 }, { "epoch": 0.3509565374700375, "grad_norm": 0.9841387271881104, "learning_rate": 3.6071419013767924e-05, "loss": 0.271881103515625, "step": 970 }, { "epoch": 0.3513183483334087, "grad_norm": 0.7235221266746521, "learning_rate": 3.605479677213202e-05, "loss": 0.30153656005859375, "step": 971 }, { "epoch": 0.35168015919677986, "grad_norm": 0.5711708068847656, "learning_rate": 3.603814328641799e-05, "loss": 0.31097412109375, "step": 972 }, { "epoch": 0.35204197006015103, "grad_norm": 1.2486423254013062, "learning_rate": 3.602145858903502e-05, "loss": 0.31336212158203125, "step": 973 }, { "epoch": 0.3524037809235222, "grad_norm": 0.7004234790802002, "learning_rate": 3.6004742712453025e-05, "loss": 0.33351898193359375, "step": 974 }, { "epoch": 0.35276559178689343, "grad_norm": 1.741961121559143, "learning_rate": 3.598799568920261e-05, "loss": 0.3009185791015625, "step": 975 }, { "epoch": 0.3531274026502646, "grad_norm": 1.3755271434783936, "learning_rate": 3.5971217551874984e-05, "loss": 0.30950927734375, "step": 976 }, { "epoch": 0.35348921351363577, "grad_norm": 0.9454299807548523, "learning_rate": 3.595440833312191e-05, "loss": 0.30469512939453125, "step": 977 }, { "epoch": 0.35385102437700694, "grad_norm": 0.7628579139709473, "learning_rate": 3.593756806565564e-05, "loss": 0.32379913330078125, "step": 978 }, { "epoch": 0.3542128352403781, "grad_norm": 2.6918070316314697, "learning_rate": 3.5920696782248844e-05, "loss": 0.33769989013671875, "step": 979 }, { "epoch": 0.3545746461037493, "grad_norm": 1.6533551216125488, "learning_rate": 3.590379451573457e-05, "loss": 0.29175567626953125, "step": 980 }, { "epoch": 0.35493645696712045, "grad_norm": 2.083111524581909, "learning_rate": 3.588686129900613e-05, "loss": 0.32904815673828125, "step": 981 }, { "epoch": 0.3552982678304916, "grad_norm": 0.5541350841522217, "learning_rate": 3.586989716501711e-05, "loss": 0.2956695556640625, "step": 982 }, { "epoch": 0.3556600786938628, "grad_norm": 3.199348211288452, "learning_rate": 3.5852902146781224e-05, "loss": 0.2750244140625, "step": 983 }, { "epoch": 0.35602188955723396, "grad_norm": 4.292212963104248, "learning_rate": 3.5835876277372304e-05, "loss": 0.33408355712890625, "step": 984 }, { "epoch": 0.35638370042060513, "grad_norm": 1.1300591230392456, "learning_rate": 3.581881958992423e-05, "loss": 0.31175994873046875, "step": 985 }, { "epoch": 0.3567455112839763, "grad_norm": 1.2519391775131226, "learning_rate": 3.580173211763085e-05, "loss": 0.3220367431640625, "step": 986 }, { "epoch": 0.3571073221473475, "grad_norm": 1.7726850509643555, "learning_rate": 3.5784613893745905e-05, "loss": 0.31731414794921875, "step": 987 }, { "epoch": 0.35746913301071864, "grad_norm": 1.5037575960159302, "learning_rate": 3.5767464951583024e-05, "loss": 0.253814697265625, "step": 988 }, { "epoch": 0.3578309438740898, "grad_norm": 2.6933743953704834, "learning_rate": 3.5750285324515576e-05, "loss": 0.31107330322265625, "step": 989 }, { "epoch": 0.358192754737461, "grad_norm": 2.230949640274048, "learning_rate": 3.573307504597666e-05, "loss": 0.33551025390625, "step": 990 }, { "epoch": 0.35855456560083215, "grad_norm": 0.8718732595443726, "learning_rate": 3.5715834149459025e-05, "loss": 0.276092529296875, "step": 991 }, { "epoch": 0.3589163764642033, "grad_norm": 0.5651978850364685, "learning_rate": 3.569856266851501e-05, "loss": 0.295166015625, "step": 992 }, { "epoch": 0.3592781873275745, "grad_norm": 1.6592488288879395, "learning_rate": 3.5681260636756466e-05, "loss": 0.3021087646484375, "step": 993 }, { "epoch": 0.35963999819094566, "grad_norm": 0.6461229920387268, "learning_rate": 3.566392808785471e-05, "loss": 0.313995361328125, "step": 994 }, { "epoch": 0.36000180905431683, "grad_norm": 1.1513818502426147, "learning_rate": 3.564656505554043e-05, "loss": 0.32294464111328125, "step": 995 }, { "epoch": 0.360363619917688, "grad_norm": 2.926407814025879, "learning_rate": 3.562917157360365e-05, "loss": 0.298614501953125, "step": 996 }, { "epoch": 0.3607254307810592, "grad_norm": 2.675126075744629, "learning_rate": 3.5611747675893666e-05, "loss": 0.271575927734375, "step": 997 }, { "epoch": 0.3610872416444304, "grad_norm": 1.8451306819915771, "learning_rate": 3.559429339631892e-05, "loss": 0.35589599609375, "step": 998 }, { "epoch": 0.36144905250780157, "grad_norm": 2.2784342765808105, "learning_rate": 3.557680876884704e-05, "loss": 0.29584503173828125, "step": 999 }, { "epoch": 0.36181086337117274, "grad_norm": 3.2382113933563232, "learning_rate": 3.555929382750467e-05, "loss": 0.3030548095703125, "step": 1000 }, { "epoch": 0.3621726742345439, "grad_norm": 1.7801439762115479, "learning_rate": 3.554174860637745e-05, "loss": 0.284088134765625, "step": 1001 }, { "epoch": 0.3625344850979151, "grad_norm": 1.5638760328292847, "learning_rate": 3.552417313960998e-05, "loss": 0.3115386962890625, "step": 1002 }, { "epoch": 0.36289629596128625, "grad_norm": 1.5611469745635986, "learning_rate": 3.550656746140567e-05, "loss": 0.27831268310546875, "step": 1003 }, { "epoch": 0.3632581068246574, "grad_norm": 1.6722251176834106, "learning_rate": 3.548893160602679e-05, "loss": 0.3018951416015625, "step": 1004 }, { "epoch": 0.3636199176880286, "grad_norm": 0.8530523777008057, "learning_rate": 3.547126560779427e-05, "loss": 0.3272552490234375, "step": 1005 }, { "epoch": 0.36398172855139976, "grad_norm": 2.3161070346832275, "learning_rate": 3.5453569501087735e-05, "loss": 0.304901123046875, "step": 1006 }, { "epoch": 0.36434353941477093, "grad_norm": 0.5198149681091309, "learning_rate": 3.5435843320345416e-05, "loss": 0.32574462890625, "step": 1007 }, { "epoch": 0.3647053502781421, "grad_norm": 1.4167041778564453, "learning_rate": 3.541808710006404e-05, "loss": 0.31136322021484375, "step": 1008 }, { "epoch": 0.3650671611415133, "grad_norm": 0.6282628178596497, "learning_rate": 3.540030087479881e-05, "loss": 0.30155181884765625, "step": 1009 }, { "epoch": 0.36542897200488444, "grad_norm": 1.2710930109024048, "learning_rate": 3.538248467916333e-05, "loss": 0.30150604248046875, "step": 1010 }, { "epoch": 0.3657907828682556, "grad_norm": 1.2112982273101807, "learning_rate": 3.536463854782951e-05, "loss": 0.28265380859375, "step": 1011 }, { "epoch": 0.3661525937316268, "grad_norm": 2.0862488746643066, "learning_rate": 3.534676251552752e-05, "loss": 0.28612518310546875, "step": 1012 }, { "epoch": 0.36651440459499796, "grad_norm": 0.5249060988426208, "learning_rate": 3.532885661704574e-05, "loss": 0.306976318359375, "step": 1013 }, { "epoch": 0.3668762154583691, "grad_norm": 1.4672083854675293, "learning_rate": 3.531092088723065e-05, "loss": 0.28411865234375, "step": 1014 }, { "epoch": 0.3672380263217403, "grad_norm": 2.6627819538116455, "learning_rate": 3.52929553609868e-05, "loss": 0.326934814453125, "step": 1015 }, { "epoch": 0.36759983718511147, "grad_norm": 1.0537512302398682, "learning_rate": 3.527496007327671e-05, "loss": 0.2833824157714844, "step": 1016 }, { "epoch": 0.36796164804848264, "grad_norm": 1.8928536176681519, "learning_rate": 3.525693505912083e-05, "loss": 0.27400970458984375, "step": 1017 }, { "epoch": 0.3683234589118538, "grad_norm": 1.793614387512207, "learning_rate": 3.5238880353597466e-05, "loss": 0.30680084228515625, "step": 1018 }, { "epoch": 0.368685269775225, "grad_norm": 0.9494420886039734, "learning_rate": 3.5220795991842696e-05, "loss": 0.2949676513671875, "step": 1019 }, { "epoch": 0.36904708063859615, "grad_norm": 2.598203420639038, "learning_rate": 3.52026820090503e-05, "loss": 0.31681060791015625, "step": 1020 }, { "epoch": 0.3694088915019674, "grad_norm": 0.7793492674827576, "learning_rate": 3.518453844047174e-05, "loss": 0.26103973388671875, "step": 1021 }, { "epoch": 0.36977070236533854, "grad_norm": 1.6026625633239746, "learning_rate": 3.516636532141602e-05, "loss": 0.26752471923828125, "step": 1022 }, { "epoch": 0.3701325132287097, "grad_norm": 2.2348198890686035, "learning_rate": 3.5148162687249654e-05, "loss": 0.31360626220703125, "step": 1023 }, { "epoch": 0.3704943240920809, "grad_norm": 2.9125938415527344, "learning_rate": 3.512993057339662e-05, "loss": 0.29735565185546875, "step": 1024 }, { "epoch": 0.37085613495545205, "grad_norm": 4.0807671546936035, "learning_rate": 3.511166901533825e-05, "loss": 0.30416107177734375, "step": 1025 }, { "epoch": 0.3712179458188232, "grad_norm": 0.8070108294487, "learning_rate": 3.509337804861317e-05, "loss": 0.29486846923828125, "step": 1026 }, { "epoch": 0.3715797566821944, "grad_norm": 1.2037549018859863, "learning_rate": 3.507505770881725e-05, "loss": 0.27710723876953125, "step": 1027 }, { "epoch": 0.37194156754556557, "grad_norm": 2.7757906913757324, "learning_rate": 3.505670803160352e-05, "loss": 0.2768745422363281, "step": 1028 }, { "epoch": 0.37230337840893674, "grad_norm": 4.128597736358643, "learning_rate": 3.50383290526821e-05, "loss": 0.322265625, "step": 1029 }, { "epoch": 0.3726651892723079, "grad_norm": 1.5166376829147339, "learning_rate": 3.501992080782014e-05, "loss": 0.283843994140625, "step": 1030 }, { "epoch": 0.3730270001356791, "grad_norm": 1.4669361114501953, "learning_rate": 3.5001483332841726e-05, "loss": 0.28142547607421875, "step": 1031 }, { "epoch": 0.37338881099905025, "grad_norm": 1.7446937561035156, "learning_rate": 3.4983016663627855e-05, "loss": 0.3317413330078125, "step": 1032 }, { "epoch": 0.3737506218624214, "grad_norm": 0.6678557395935059, "learning_rate": 3.4964520836116334e-05, "loss": 0.29430389404296875, "step": 1033 }, { "epoch": 0.3741124327257926, "grad_norm": 2.587698459625244, "learning_rate": 3.494599588630168e-05, "loss": 0.297515869140625, "step": 1034 }, { "epoch": 0.37447424358916376, "grad_norm": 0.7046387195587158, "learning_rate": 3.492744185023513e-05, "loss": 0.31011962890625, "step": 1035 }, { "epoch": 0.37483605445253493, "grad_norm": 3.228752374649048, "learning_rate": 3.490885876402451e-05, "loss": 0.256256103515625, "step": 1036 }, { "epoch": 0.3751978653159061, "grad_norm": 3.250152587890625, "learning_rate": 3.489024666383416e-05, "loss": 0.290802001953125, "step": 1037 }, { "epoch": 0.37555967617927727, "grad_norm": 0.6195076704025269, "learning_rate": 3.487160558588492e-05, "loss": 0.3094940185546875, "step": 1038 }, { "epoch": 0.37592148704264844, "grad_norm": 0.7523525357246399, "learning_rate": 3.4852935566453984e-05, "loss": 0.294219970703125, "step": 1039 }, { "epoch": 0.3762832979060196, "grad_norm": 1.8547297716140747, "learning_rate": 3.4834236641874904e-05, "loss": 0.3027801513671875, "step": 1040 }, { "epoch": 0.3766451087693908, "grad_norm": 5.450414180755615, "learning_rate": 3.4815508848537455e-05, "loss": 0.344268798828125, "step": 1041 }, { "epoch": 0.37700691963276195, "grad_norm": 2.9073293209075928, "learning_rate": 3.479675222288762e-05, "loss": 0.3046417236328125, "step": 1042 }, { "epoch": 0.3773687304961331, "grad_norm": 0.8162760734558105, "learning_rate": 3.477796680142747e-05, "loss": 0.3203277587890625, "step": 1043 }, { "epoch": 0.37773054135950435, "grad_norm": 0.5518684983253479, "learning_rate": 3.4759152620715135e-05, "loss": 0.29888153076171875, "step": 1044 }, { "epoch": 0.3780923522228755, "grad_norm": 0.9050284028053284, "learning_rate": 3.474030971736469e-05, "loss": 0.34572601318359375, "step": 1045 }, { "epoch": 0.3784541630862467, "grad_norm": 3.969403028488159, "learning_rate": 3.472143812804613e-05, "loss": 0.3552093505859375, "step": 1046 }, { "epoch": 0.37881597394961786, "grad_norm": 2.4990129470825195, "learning_rate": 3.470253788948525e-05, "loss": 0.300445556640625, "step": 1047 }, { "epoch": 0.379177784812989, "grad_norm": 1.968505620956421, "learning_rate": 3.468360903846363e-05, "loss": 0.28815460205078125, "step": 1048 }, { "epoch": 0.3795395956763602, "grad_norm": 1.126013159751892, "learning_rate": 3.466465161181851e-05, "loss": 0.31751251220703125, "step": 1049 }, { "epoch": 0.37990140653973137, "grad_norm": 1.1478023529052734, "learning_rate": 3.464566564644274e-05, "loss": 0.30987548828125, "step": 1050 }, { "epoch": 0.38026321740310254, "grad_norm": 1.2865787744522095, "learning_rate": 3.4626651179284726e-05, "loss": 0.3197784423828125, "step": 1051 }, { "epoch": 0.3806250282664737, "grad_norm": 0.8576163053512573, "learning_rate": 3.460760824734833e-05, "loss": 0.32280731201171875, "step": 1052 }, { "epoch": 0.3809868391298449, "grad_norm": 1.5807363986968994, "learning_rate": 3.458853688769281e-05, "loss": 0.3252716064453125, "step": 1053 }, { "epoch": 0.38134864999321605, "grad_norm": 3.008596420288086, "learning_rate": 3.4569437137432725e-05, "loss": 0.3401336669921875, "step": 1054 }, { "epoch": 0.3817104608565872, "grad_norm": 1.708917498588562, "learning_rate": 3.455030903373794e-05, "loss": 0.298736572265625, "step": 1055 }, { "epoch": 0.3820722717199584, "grad_norm": 0.821308970451355, "learning_rate": 3.453115261383344e-05, "loss": 0.34004974365234375, "step": 1056 }, { "epoch": 0.38243408258332956, "grad_norm": 1.2773860692977905, "learning_rate": 3.451196791499935e-05, "loss": 0.28066253662109375, "step": 1057 }, { "epoch": 0.38279589344670073, "grad_norm": 0.8942212462425232, "learning_rate": 3.449275497457083e-05, "loss": 0.3067474365234375, "step": 1058 }, { "epoch": 0.3831577043100719, "grad_norm": 1.2564599514007568, "learning_rate": 3.447351382993797e-05, "loss": 0.30103302001953125, "step": 1059 }, { "epoch": 0.38351951517344307, "grad_norm": 0.5800821185112, "learning_rate": 3.445424451854578e-05, "loss": 0.2954864501953125, "step": 1060 }, { "epoch": 0.38388132603681424, "grad_norm": 0.8049518465995789, "learning_rate": 3.4434947077894064e-05, "loss": 0.298492431640625, "step": 1061 }, { "epoch": 0.3842431369001854, "grad_norm": 1.2389129400253296, "learning_rate": 3.4415621545537384e-05, "loss": 0.3021240234375, "step": 1062 }, { "epoch": 0.3846049477635566, "grad_norm": 0.8863505721092224, "learning_rate": 3.439626795908496e-05, "loss": 0.3577423095703125, "step": 1063 }, { "epoch": 0.38496675862692775, "grad_norm": 1.4133096933364868, "learning_rate": 3.437688635620061e-05, "loss": 0.2968902587890625, "step": 1064 }, { "epoch": 0.3853285694902989, "grad_norm": 2.242852210998535, "learning_rate": 3.4357476774602684e-05, "loss": 0.3076934814453125, "step": 1065 }, { "epoch": 0.3856903803536701, "grad_norm": 0.6606834530830383, "learning_rate": 3.433803925206397e-05, "loss": 0.3098793029785156, "step": 1066 }, { "epoch": 0.3860521912170413, "grad_norm": 1.003861904144287, "learning_rate": 3.4318573826411625e-05, "loss": 0.3299102783203125, "step": 1067 }, { "epoch": 0.3864140020804125, "grad_norm": 0.9216336607933044, "learning_rate": 3.429908053552713e-05, "loss": 0.293701171875, "step": 1068 }, { "epoch": 0.38677581294378366, "grad_norm": 0.9743899703025818, "learning_rate": 3.427955941734617e-05, "loss": 0.2745208740234375, "step": 1069 }, { "epoch": 0.38713762380715483, "grad_norm": 3.8779215812683105, "learning_rate": 3.42600105098586e-05, "loss": 0.2694854736328125, "step": 1070 }, { "epoch": 0.387499434670526, "grad_norm": 1.777786374092102, "learning_rate": 3.424043385110837e-05, "loss": 0.2883758544921875, "step": 1071 }, { "epoch": 0.38786124553389717, "grad_norm": 3.5149121284484863, "learning_rate": 3.4220829479193395e-05, "loss": 0.3086204528808594, "step": 1072 }, { "epoch": 0.38822305639726834, "grad_norm": 3.0327610969543457, "learning_rate": 3.420119743226555e-05, "loss": 0.2853889465332031, "step": 1073 }, { "epoch": 0.3885848672606395, "grad_norm": 0.8803814649581909, "learning_rate": 3.418153774853057e-05, "loss": 0.2622528076171875, "step": 1074 }, { "epoch": 0.3889466781240107, "grad_norm": 3.2352614402770996, "learning_rate": 3.416185046624796e-05, "loss": 0.28076171875, "step": 1075 }, { "epoch": 0.38930848898738185, "grad_norm": 0.9232485890388489, "learning_rate": 3.4142135623730954e-05, "loss": 0.29148101806640625, "step": 1076 }, { "epoch": 0.389670299850753, "grad_norm": 0.7539937496185303, "learning_rate": 3.4122393259346396e-05, "loss": 0.3028717041015625, "step": 1077 }, { "epoch": 0.3900321107141242, "grad_norm": 1.0042561292648315, "learning_rate": 3.4102623411514705e-05, "loss": 0.30852508544921875, "step": 1078 }, { "epoch": 0.39039392157749536, "grad_norm": 1.5306121110916138, "learning_rate": 3.408282611870979e-05, "loss": 0.2894287109375, "step": 1079 }, { "epoch": 0.39075573244086653, "grad_norm": 0.9797554612159729, "learning_rate": 3.406300141945894e-05, "loss": 0.30771636962890625, "step": 1080 }, { "epoch": 0.3911175433042377, "grad_norm": 0.5364055633544922, "learning_rate": 3.4043149352342836e-05, "loss": 0.31261444091796875, "step": 1081 }, { "epoch": 0.3914793541676089, "grad_norm": 3.9867615699768066, "learning_rate": 3.4023269955995366e-05, "loss": 0.2823944091796875, "step": 1082 }, { "epoch": 0.39184116503098004, "grad_norm": 0.7765496969223022, "learning_rate": 3.400336326910363e-05, "loss": 0.286407470703125, "step": 1083 }, { "epoch": 0.3922029758943512, "grad_norm": 4.513707160949707, "learning_rate": 3.398342933040781e-05, "loss": 0.360992431640625, "step": 1084 }, { "epoch": 0.3925647867577224, "grad_norm": 0.6163355708122253, "learning_rate": 3.396346817870117e-05, "loss": 0.29288482666015625, "step": 1085 }, { "epoch": 0.39292659762109355, "grad_norm": 1.8934627771377563, "learning_rate": 3.394347985282989e-05, "loss": 0.3224754333496094, "step": 1086 }, { "epoch": 0.3932884084844647, "grad_norm": 1.2023169994354248, "learning_rate": 3.392346439169306e-05, "loss": 0.27509307861328125, "step": 1087 }, { "epoch": 0.3936502193478359, "grad_norm": 0.7281607389450073, "learning_rate": 3.390342183424256e-05, "loss": 0.28249359130859375, "step": 1088 }, { "epoch": 0.39401203021120706, "grad_norm": 0.8936858773231506, "learning_rate": 3.388335221948301e-05, "loss": 0.29640960693359375, "step": 1089 }, { "epoch": 0.3943738410745783, "grad_norm": 1.293529748916626, "learning_rate": 3.386325558647169e-05, "loss": 0.3184394836425781, "step": 1090 }, { "epoch": 0.39473565193794946, "grad_norm": 0.8540430068969727, "learning_rate": 3.384313197431845e-05, "loss": 0.3054656982421875, "step": 1091 }, { "epoch": 0.39509746280132063, "grad_norm": 6.2916741371154785, "learning_rate": 3.382298142218565e-05, "loss": 0.374664306640625, "step": 1092 }, { "epoch": 0.3954592736646918, "grad_norm": 3.25297212600708, "learning_rate": 3.380280396928807e-05, "loss": 0.34255218505859375, "step": 1093 }, { "epoch": 0.39582108452806297, "grad_norm": 0.7894220352172852, "learning_rate": 3.3782599654892855e-05, "loss": 0.33626556396484375, "step": 1094 }, { "epoch": 0.39618289539143414, "grad_norm": 1.020959734916687, "learning_rate": 3.3762368518319416e-05, "loss": 0.31462860107421875, "step": 1095 }, { "epoch": 0.3965447062548053, "grad_norm": 3.326040506362915, "learning_rate": 3.374211059893937e-05, "loss": 0.3034515380859375, "step": 1096 }, { "epoch": 0.3969065171181765, "grad_norm": 2.081482410430908, "learning_rate": 3.372182593617644e-05, "loss": 0.34369659423828125, "step": 1097 }, { "epoch": 0.39726832798154765, "grad_norm": 2.269800901412964, "learning_rate": 3.3701514569506404e-05, "loss": 0.27875518798828125, "step": 1098 }, { "epoch": 0.3976301388449188, "grad_norm": 1.4186244010925293, "learning_rate": 3.3681176538457015e-05, "loss": 0.299102783203125, "step": 1099 }, { "epoch": 0.39799194970829, "grad_norm": 0.5686178803443909, "learning_rate": 3.3660811882607895e-05, "loss": 0.277618408203125, "step": 1100 }, { "epoch": 0.39835376057166116, "grad_norm": 1.4450984001159668, "learning_rate": 3.364042064159052e-05, "loss": 0.2711448669433594, "step": 1101 }, { "epoch": 0.39871557143503233, "grad_norm": 3.255375385284424, "learning_rate": 3.362000285508806e-05, "loss": 0.3262786865234375, "step": 1102 }, { "epoch": 0.3990773822984035, "grad_norm": 0.6600077748298645, "learning_rate": 3.359955856283537e-05, "loss": 0.28521728515625, "step": 1103 }, { "epoch": 0.3994391931617747, "grad_norm": 1.4676554203033447, "learning_rate": 3.357908780461889e-05, "loss": 0.3206329345703125, "step": 1104 }, { "epoch": 0.39980100402514585, "grad_norm": 3.2509469985961914, "learning_rate": 3.355859062027653e-05, "loss": 0.31988525390625, "step": 1105 }, { "epoch": 0.400162814888517, "grad_norm": 0.6019963026046753, "learning_rate": 3.3538067049697666e-05, "loss": 0.31389617919921875, "step": 1106 }, { "epoch": 0.4005246257518882, "grad_norm": 0.7904555797576904, "learning_rate": 3.351751713282303e-05, "loss": 0.31175994873046875, "step": 1107 }, { "epoch": 0.40088643661525936, "grad_norm": 0.7872330546379089, "learning_rate": 3.349694090964459e-05, "loss": 0.32532501220703125, "step": 1108 }, { "epoch": 0.4012482474786305, "grad_norm": 3.086806297302246, "learning_rate": 3.347633842020553e-05, "loss": 0.2997894287109375, "step": 1109 }, { "epoch": 0.4016100583420017, "grad_norm": 4.51802396774292, "learning_rate": 3.3455709704600146e-05, "loss": 0.3074493408203125, "step": 1110 }, { "epoch": 0.40197186920537287, "grad_norm": 4.226115703582764, "learning_rate": 3.3435054802973765e-05, "loss": 0.3249664306640625, "step": 1111 }, { "epoch": 0.40233368006874404, "grad_norm": 5.311826229095459, "learning_rate": 3.34143737555227e-05, "loss": 0.2863616943359375, "step": 1112 }, { "epoch": 0.40269549093211526, "grad_norm": 5.062017440795898, "learning_rate": 3.339366660249412e-05, "loss": 0.2747650146484375, "step": 1113 }, { "epoch": 0.40305730179548643, "grad_norm": 2.716054677963257, "learning_rate": 3.3372933384186014e-05, "loss": 0.2891693115234375, "step": 1114 }, { "epoch": 0.4034191126588576, "grad_norm": 2.082224130630493, "learning_rate": 3.335217414094708e-05, "loss": 0.287689208984375, "step": 1115 }, { "epoch": 0.4037809235222288, "grad_norm": 1.9391794204711914, "learning_rate": 3.333138891317667e-05, "loss": 0.31795501708984375, "step": 1116 }, { "epoch": 0.40414273438559994, "grad_norm": 2.028102397918701, "learning_rate": 3.331057774132471e-05, "loss": 0.3117256164550781, "step": 1117 }, { "epoch": 0.4045045452489711, "grad_norm": 2.0489602088928223, "learning_rate": 3.3289740665891614e-05, "loss": 0.317474365234375, "step": 1118 }, { "epoch": 0.4048663561123423, "grad_norm": 3.512280225753784, "learning_rate": 3.3268877727428215e-05, "loss": 0.30155181884765625, "step": 1119 }, { "epoch": 0.40522816697571346, "grad_norm": 1.8672940731048584, "learning_rate": 3.324798896653565e-05, "loss": 0.28733253479003906, "step": 1120 }, { "epoch": 0.4055899778390846, "grad_norm": 1.3153176307678223, "learning_rate": 3.322707442386532e-05, "loss": 0.29864501953125, "step": 1121 }, { "epoch": 0.4059517887024558, "grad_norm": 1.0020976066589355, "learning_rate": 3.3206134140118826e-05, "loss": 0.29811859130859375, "step": 1122 }, { "epoch": 0.40631359956582697, "grad_norm": 0.4835487902164459, "learning_rate": 3.318516815604783e-05, "loss": 0.299896240234375, "step": 1123 }, { "epoch": 0.40667541042919814, "grad_norm": 4.767118453979492, "learning_rate": 3.316417651245402e-05, "loss": 0.28759765625, "step": 1124 }, { "epoch": 0.4070372212925693, "grad_norm": 3.048344373703003, "learning_rate": 3.314315925018904e-05, "loss": 0.2830810546875, "step": 1125 }, { "epoch": 0.4073990321559405, "grad_norm": 4.521396160125732, "learning_rate": 3.312211641015436e-05, "loss": 0.283172607421875, "step": 1126 }, { "epoch": 0.40776084301931165, "grad_norm": 2.7197234630584717, "learning_rate": 3.3101048033301234e-05, "loss": 0.28433990478515625, "step": 1127 }, { "epoch": 0.4081226538826828, "grad_norm": 0.8490431308746338, "learning_rate": 3.3079954160630626e-05, "loss": 0.28310394287109375, "step": 1128 }, { "epoch": 0.408484464746054, "grad_norm": 1.1848763227462769, "learning_rate": 3.30588348331931e-05, "loss": 0.26262664794921875, "step": 1129 }, { "epoch": 0.40884627560942516, "grad_norm": 0.5923740863800049, "learning_rate": 3.303769009208878e-05, "loss": 0.26088714599609375, "step": 1130 }, { "epoch": 0.40920808647279633, "grad_norm": 3.795348882675171, "learning_rate": 3.301651997846722e-05, "loss": 0.32501220703125, "step": 1131 }, { "epoch": 0.4095698973361675, "grad_norm": 4.791662693023682, "learning_rate": 3.299532453352737e-05, "loss": 0.333831787109375, "step": 1132 }, { "epoch": 0.40993170819953867, "grad_norm": 1.1126867532730103, "learning_rate": 3.2974103798517477e-05, "loss": 0.28155517578125, "step": 1133 }, { "epoch": 0.41029351906290984, "grad_norm": 6.6197075843811035, "learning_rate": 3.2952857814734995e-05, "loss": 0.3254547119140625, "step": 1134 }, { "epoch": 0.410655329926281, "grad_norm": 3.154195785522461, "learning_rate": 3.293158662352651e-05, "loss": 0.30306243896484375, "step": 1135 }, { "epoch": 0.41101714078965224, "grad_norm": 1.0469216108322144, "learning_rate": 3.2910290266287685e-05, "loss": 0.27288055419921875, "step": 1136 }, { "epoch": 0.4113789516530234, "grad_norm": 4.123516082763672, "learning_rate": 3.288896878446315e-05, "loss": 0.316314697265625, "step": 1137 }, { "epoch": 0.4117407625163946, "grad_norm": 1.2229353189468384, "learning_rate": 3.2867622219546404e-05, "loss": 0.31162261962890625, "step": 1138 }, { "epoch": 0.41210257337976575, "grad_norm": 1.9194694757461548, "learning_rate": 3.2846250613079805e-05, "loss": 0.3325843811035156, "step": 1139 }, { "epoch": 0.4124643842431369, "grad_norm": 1.6658730506896973, "learning_rate": 3.28248540066544e-05, "loss": 0.32073974609375, "step": 1140 }, { "epoch": 0.4128261951065081, "grad_norm": 5.8070387840271, "learning_rate": 3.2803432441909936e-05, "loss": 0.28399658203125, "step": 1141 }, { "epoch": 0.41318800596987926, "grad_norm": 4.052425861358643, "learning_rate": 3.278198596053468e-05, "loss": 0.32769012451171875, "step": 1142 }, { "epoch": 0.4135498168332504, "grad_norm": 5.424179553985596, "learning_rate": 3.276051460426542e-05, "loss": 0.309173583984375, "step": 1143 }, { "epoch": 0.4139116276966216, "grad_norm": 6.274005889892578, "learning_rate": 3.273901841488735e-05, "loss": 0.30391693115234375, "step": 1144 }, { "epoch": 0.41427343855999277, "grad_norm": 1.6764289140701294, "learning_rate": 3.2717497434233986e-05, "loss": 0.33390045166015625, "step": 1145 }, { "epoch": 0.41463524942336394, "grad_norm": 6.0407395362854, "learning_rate": 3.269595170418709e-05, "loss": 0.27503204345703125, "step": 1146 }, { "epoch": 0.4149970602867351, "grad_norm": 3.022706985473633, "learning_rate": 3.267438126667659e-05, "loss": 0.2878265380859375, "step": 1147 }, { "epoch": 0.4153588711501063, "grad_norm": 2.252051591873169, "learning_rate": 3.26527861636805e-05, "loss": 0.31426239013671875, "step": 1148 }, { "epoch": 0.41572068201347745, "grad_norm": 0.9914146661758423, "learning_rate": 3.263116643722482e-05, "loss": 0.3019866943359375, "step": 1149 }, { "epoch": 0.4160824928768486, "grad_norm": 1.22725510597229, "learning_rate": 3.2609522129383504e-05, "loss": 0.2840118408203125, "step": 1150 }, { "epoch": 0.4164443037402198, "grad_norm": 2.7446463108062744, "learning_rate": 3.258785328227831e-05, "loss": 0.29642486572265625, "step": 1151 }, { "epoch": 0.41680611460359096, "grad_norm": 3.4175751209259033, "learning_rate": 3.256615993807875e-05, "loss": 0.34952545166015625, "step": 1152 }, { "epoch": 0.41716792546696213, "grad_norm": 3.011971950531006, "learning_rate": 3.254444213900204e-05, "loss": 0.26395416259765625, "step": 1153 }, { "epoch": 0.4175297363303333, "grad_norm": 1.3666471242904663, "learning_rate": 3.2522699927312965e-05, "loss": 0.31707000732421875, "step": 1154 }, { "epoch": 0.41789154719370447, "grad_norm": 0.6917974948883057, "learning_rate": 3.250093334532382e-05, "loss": 0.3066558837890625, "step": 1155 }, { "epoch": 0.41825335805707564, "grad_norm": 1.201331377029419, "learning_rate": 3.247914243539433e-05, "loss": 0.26486968994140625, "step": 1156 }, { "epoch": 0.4186151689204468, "grad_norm": 0.8970139026641846, "learning_rate": 3.2457327239931575e-05, "loss": 0.33187103271484375, "step": 1157 }, { "epoch": 0.418976979783818, "grad_norm": 0.6835305690765381, "learning_rate": 3.243548780138988e-05, "loss": 0.28743743896484375, "step": 1158 }, { "epoch": 0.4193387906471892, "grad_norm": 2.9609551429748535, "learning_rate": 3.241362416227076e-05, "loss": 0.2500457763671875, "step": 1159 }, { "epoch": 0.4197006015105604, "grad_norm": 1.710502028465271, "learning_rate": 3.239173636512282e-05, "loss": 0.28006744384765625, "step": 1160 }, { "epoch": 0.42006241237393155, "grad_norm": 1.710971474647522, "learning_rate": 3.2369824452541666e-05, "loss": 0.26177978515625, "step": 1161 }, { "epoch": 0.4204242232373027, "grad_norm": 1.9761396646499634, "learning_rate": 3.234788846716987e-05, "loss": 0.2978019714355469, "step": 1162 }, { "epoch": 0.4207860341006739, "grad_norm": 2.046311616897583, "learning_rate": 3.232592845169683e-05, "loss": 0.2981414794921875, "step": 1163 }, { "epoch": 0.42114784496404506, "grad_norm": 2.0033488273620605, "learning_rate": 3.230394444885871e-05, "loss": 0.323028564453125, "step": 1164 }, { "epoch": 0.42150965582741623, "grad_norm": 2.109624147415161, "learning_rate": 3.228193650143835e-05, "loss": 0.302215576171875, "step": 1165 }, { "epoch": 0.4218714666907874, "grad_norm": 0.4845781624317169, "learning_rate": 3.2259904652265206e-05, "loss": 0.2947540283203125, "step": 1166 }, { "epoch": 0.42223327755415857, "grad_norm": 0.6289676427841187, "learning_rate": 3.223784894421522e-05, "loss": 0.299530029296875, "step": 1167 }, { "epoch": 0.42259508841752974, "grad_norm": 2.28533673286438, "learning_rate": 3.22157694202108e-05, "loss": 0.3284454345703125, "step": 1168 }, { "epoch": 0.4229568992809009, "grad_norm": 1.8992462158203125, "learning_rate": 3.219366612322068e-05, "loss": 0.28614044189453125, "step": 1169 }, { "epoch": 0.4233187101442721, "grad_norm": 2.9120981693267822, "learning_rate": 3.2171539096259855e-05, "loss": 0.2829742431640625, "step": 1170 }, { "epoch": 0.42368052100764325, "grad_norm": 0.49560728669166565, "learning_rate": 3.2149388382389515e-05, "loss": 0.3139495849609375, "step": 1171 }, { "epoch": 0.4240423318710144, "grad_norm": 2.122056007385254, "learning_rate": 3.212721402471695e-05, "loss": 0.2743263244628906, "step": 1172 }, { "epoch": 0.4244041427343856, "grad_norm": 0.3835802972316742, "learning_rate": 3.2105016066395425e-05, "loss": 0.2975616455078125, "step": 1173 }, { "epoch": 0.42476595359775676, "grad_norm": 1.8536453247070312, "learning_rate": 3.2082794550624204e-05, "loss": 0.30825042724609375, "step": 1174 }, { "epoch": 0.42512776446112793, "grad_norm": 1.9409990310668945, "learning_rate": 3.206054952064833e-05, "loss": 0.313629150390625, "step": 1175 }, { "epoch": 0.4254895753244991, "grad_norm": 0.4846198260784149, "learning_rate": 3.2038281019758625e-05, "loss": 0.27813720703125, "step": 1176 }, { "epoch": 0.4258513861878703, "grad_norm": 1.7132266759872437, "learning_rate": 3.2015989091291615e-05, "loss": 0.3350830078125, "step": 1177 }, { "epoch": 0.42621319705124144, "grad_norm": 1.0693695545196533, "learning_rate": 3.199367377862938e-05, "loss": 0.31170654296875, "step": 1178 }, { "epoch": 0.4265750079146126, "grad_norm": 0.532211422920227, "learning_rate": 3.197133512519954e-05, "loss": 0.31280517578125, "step": 1179 }, { "epoch": 0.4269368187779838, "grad_norm": 1.1320455074310303, "learning_rate": 3.194897317447511e-05, "loss": 0.30023956298828125, "step": 1180 }, { "epoch": 0.42729862964135495, "grad_norm": 1.2557941675186157, "learning_rate": 3.192658796997446e-05, "loss": 0.29038238525390625, "step": 1181 }, { "epoch": 0.4276604405047262, "grad_norm": 1.6399967670440674, "learning_rate": 3.190417955526121e-05, "loss": 0.31717681884765625, "step": 1182 }, { "epoch": 0.42802225136809735, "grad_norm": 1.3058273792266846, "learning_rate": 3.1881747973944157e-05, "loss": 0.28322601318359375, "step": 1183 }, { "epoch": 0.4283840622314685, "grad_norm": 0.6860548853874207, "learning_rate": 3.1859293269677167e-05, "loss": 0.31586456298828125, "step": 1184 }, { "epoch": 0.4287458730948397, "grad_norm": 4.41334342956543, "learning_rate": 3.183681548615913e-05, "loss": 0.3470115661621094, "step": 1185 }, { "epoch": 0.42910768395821086, "grad_norm": 0.6128289103507996, "learning_rate": 3.18143146671338e-05, "loss": 0.302734375, "step": 1186 }, { "epoch": 0.42946949482158203, "grad_norm": 1.1808887720108032, "learning_rate": 3.179179085638983e-05, "loss": 0.3258209228515625, "step": 1187 }, { "epoch": 0.4298313056849532, "grad_norm": 2.423178195953369, "learning_rate": 3.1769244097760566e-05, "loss": 0.2866973876953125, "step": 1188 }, { "epoch": 0.4301931165483244, "grad_norm": 1.735784649848938, "learning_rate": 3.1746674435124024e-05, "loss": 0.278594970703125, "step": 1189 }, { "epoch": 0.43055492741169554, "grad_norm": 1.2493592500686646, "learning_rate": 3.17240819124028e-05, "loss": 0.338592529296875, "step": 1190 }, { "epoch": 0.4309167382750667, "grad_norm": 1.1622284650802612, "learning_rate": 3.170146657356398e-05, "loss": 0.3153228759765625, "step": 1191 }, { "epoch": 0.4312785491384379, "grad_norm": 0.9449843168258667, "learning_rate": 3.167882846261904e-05, "loss": 0.29036712646484375, "step": 1192 }, { "epoch": 0.43164036000180905, "grad_norm": 2.2695868015289307, "learning_rate": 3.165616762362378e-05, "loss": 0.325653076171875, "step": 1193 }, { "epoch": 0.4320021708651802, "grad_norm": 0.6478063464164734, "learning_rate": 3.163348410067824e-05, "loss": 0.2987060546875, "step": 1194 }, { "epoch": 0.4323639817285514, "grad_norm": 1.7627894878387451, "learning_rate": 3.161077793792657e-05, "loss": 0.3005828857421875, "step": 1195 }, { "epoch": 0.43272579259192256, "grad_norm": 0.461010217666626, "learning_rate": 3.1588049179557036e-05, "loss": 0.2872467041015625, "step": 1196 }, { "epoch": 0.43308760345529373, "grad_norm": 1.4032667875289917, "learning_rate": 3.156529786980183e-05, "loss": 0.31441497802734375, "step": 1197 }, { "epoch": 0.4334494143186649, "grad_norm": 0.9939262270927429, "learning_rate": 3.154252405293704e-05, "loss": 0.28484344482421875, "step": 1198 }, { "epoch": 0.4338112251820361, "grad_norm": 2.067434072494507, "learning_rate": 3.151972777328258e-05, "loss": 0.26922607421875, "step": 1199 }, { "epoch": 0.43417303604540725, "grad_norm": 3.838878870010376, "learning_rate": 3.149690907520204e-05, "loss": 0.29674530029296875, "step": 1200 }, { "epoch": 0.4345348469087784, "grad_norm": 1.94950270652771, "learning_rate": 3.147406800310267e-05, "loss": 0.29473876953125, "step": 1201 }, { "epoch": 0.4348966577721496, "grad_norm": 3.18241810798645, "learning_rate": 3.1451204601435255e-05, "loss": 0.3055419921875, "step": 1202 }, { "epoch": 0.43525846863552076, "grad_norm": 2.1443867683410645, "learning_rate": 3.1428318914694015e-05, "loss": 0.2972831726074219, "step": 1203 }, { "epoch": 0.4356202794988919, "grad_norm": 1.2070133686065674, "learning_rate": 3.1405410987416566e-05, "loss": 0.26802825927734375, "step": 1204 }, { "epoch": 0.43598209036226315, "grad_norm": 2.558840751647949, "learning_rate": 3.1382480864183784e-05, "loss": 0.29358673095703125, "step": 1205 }, { "epoch": 0.4363439012256343, "grad_norm": 0.6149109601974487, "learning_rate": 3.1359528589619763e-05, "loss": 0.2708892822265625, "step": 1206 }, { "epoch": 0.4367057120890055, "grad_norm": 2.0042383670806885, "learning_rate": 3.133655420839168e-05, "loss": 0.32395172119140625, "step": 1207 }, { "epoch": 0.43706752295237666, "grad_norm": 0.7158917784690857, "learning_rate": 3.131355776520976e-05, "loss": 0.278961181640625, "step": 1208 }, { "epoch": 0.43742933381574783, "grad_norm": 0.9157243371009827, "learning_rate": 3.129053930482715e-05, "loss": 0.30692291259765625, "step": 1209 }, { "epoch": 0.437791144679119, "grad_norm": 0.7626466751098633, "learning_rate": 3.1267498872039826e-05, "loss": 0.3075714111328125, "step": 1210 }, { "epoch": 0.4381529555424902, "grad_norm": 1.5207412242889404, "learning_rate": 3.124443651168655e-05, "loss": 0.3267974853515625, "step": 1211 }, { "epoch": 0.43851476640586134, "grad_norm": 2.433471202850342, "learning_rate": 3.122135226864876e-05, "loss": 0.28649139404296875, "step": 1212 }, { "epoch": 0.4388765772692325, "grad_norm": 2.0354809761047363, "learning_rate": 3.119824618785046e-05, "loss": 0.28462982177734375, "step": 1213 }, { "epoch": 0.4392383881326037, "grad_norm": 1.8775181770324707, "learning_rate": 3.1175118314258156e-05, "loss": 0.28987884521484375, "step": 1214 }, { "epoch": 0.43960019899597486, "grad_norm": 2.7510194778442383, "learning_rate": 3.1151968692880786e-05, "loss": 0.315643310546875, "step": 1215 }, { "epoch": 0.439962009859346, "grad_norm": 0.6949251890182495, "learning_rate": 3.1128797368769574e-05, "loss": 0.27581787109375, "step": 1216 }, { "epoch": 0.4403238207227172, "grad_norm": 2.0072731971740723, "learning_rate": 3.110560438701801e-05, "loss": 0.31109619140625, "step": 1217 }, { "epoch": 0.44068563158608837, "grad_norm": 2.160753011703491, "learning_rate": 3.1082389792761726e-05, "loss": 0.30322265625, "step": 1218 }, { "epoch": 0.44104744244945954, "grad_norm": 3.7582154273986816, "learning_rate": 3.10591536311784e-05, "loss": 0.3113250732421875, "step": 1219 }, { "epoch": 0.4414092533128307, "grad_norm": 3.884761333465576, "learning_rate": 3.10358959474877e-05, "loss": 0.26970672607421875, "step": 1220 }, { "epoch": 0.4417710641762019, "grad_norm": 3.4577748775482178, "learning_rate": 3.1012616786951165e-05, "loss": 0.284210205078125, "step": 1221 }, { "epoch": 0.44213287503957305, "grad_norm": 3.301922082901001, "learning_rate": 3.098931619487214e-05, "loss": 0.2815093994140625, "step": 1222 }, { "epoch": 0.4424946859029442, "grad_norm": 4.064789295196533, "learning_rate": 3.096599421659567e-05, "loss": 0.32044219970703125, "step": 1223 }, { "epoch": 0.4428564967663154, "grad_norm": 0.6924370527267456, "learning_rate": 3.0942650897508414e-05, "loss": 0.29880523681640625, "step": 1224 }, { "epoch": 0.44321830762968656, "grad_norm": 2.2019355297088623, "learning_rate": 3.091928628303858e-05, "loss": 0.32239532470703125, "step": 1225 }, { "epoch": 0.44358011849305773, "grad_norm": 1.3994522094726562, "learning_rate": 3.089590041865581e-05, "loss": 0.292816162109375, "step": 1226 }, { "epoch": 0.4439419293564289, "grad_norm": 1.8536916971206665, "learning_rate": 3.0872493349871086e-05, "loss": 0.28310394287109375, "step": 1227 }, { "epoch": 0.4443037402198001, "grad_norm": 1.474736213684082, "learning_rate": 3.084906512223668e-05, "loss": 0.2963104248046875, "step": 1228 }, { "epoch": 0.4446655510831713, "grad_norm": 3.012213706970215, "learning_rate": 3.082561578134603e-05, "loss": 0.28205108642578125, "step": 1229 }, { "epoch": 0.44502736194654247, "grad_norm": 0.9099676609039307, "learning_rate": 3.080214537283365e-05, "loss": 0.28858184814453125, "step": 1230 }, { "epoch": 0.44538917280991364, "grad_norm": 1.4070706367492676, "learning_rate": 3.0778653942375086e-05, "loss": 0.26915740966796875, "step": 1231 }, { "epoch": 0.4457509836732848, "grad_norm": 1.2163704633712769, "learning_rate": 3.0755141535686755e-05, "loss": 0.292144775390625, "step": 1232 }, { "epoch": 0.446112794536656, "grad_norm": 0.567142903804779, "learning_rate": 3.073160819852592e-05, "loss": 0.3023681640625, "step": 1233 }, { "epoch": 0.44647460540002715, "grad_norm": 3.47658109664917, "learning_rate": 3.070805397669057e-05, "loss": 0.3268547058105469, "step": 1234 }, { "epoch": 0.4468364162633983, "grad_norm": 1.5958739519119263, "learning_rate": 3.068447891601935e-05, "loss": 0.3087158203125, "step": 1235 }, { "epoch": 0.4471982271267695, "grad_norm": 1.5646499395370483, "learning_rate": 3.066088306239143e-05, "loss": 0.3012886047363281, "step": 1236 }, { "epoch": 0.44756003799014066, "grad_norm": 0.7628026604652405, "learning_rate": 3.0637266461726475e-05, "loss": 0.3102264404296875, "step": 1237 }, { "epoch": 0.44792184885351183, "grad_norm": 1.7285056114196777, "learning_rate": 3.061362915998451e-05, "loss": 0.3148956298828125, "step": 1238 }, { "epoch": 0.448283659716883, "grad_norm": 0.5630816221237183, "learning_rate": 3.0589971203165844e-05, "loss": 0.316436767578125, "step": 1239 }, { "epoch": 0.44864547058025417, "grad_norm": 1.5751805305480957, "learning_rate": 3.0566292637311e-05, "loss": 0.31402587890625, "step": 1240 }, { "epoch": 0.44900728144362534, "grad_norm": 0.5029732584953308, "learning_rate": 3.054259350850059e-05, "loss": 0.30267333984375, "step": 1241 }, { "epoch": 0.4493690923069965, "grad_norm": 3.0143303871154785, "learning_rate": 3.051887386285525e-05, "loss": 0.3030548095703125, "step": 1242 }, { "epoch": 0.4497309031703677, "grad_norm": 0.8513891696929932, "learning_rate": 3.049513374653554e-05, "loss": 0.3072357177734375, "step": 1243 }, { "epoch": 0.45009271403373885, "grad_norm": 0.5407665967941284, "learning_rate": 3.0471373205741864e-05, "loss": 0.29473876953125, "step": 1244 }, { "epoch": 0.45045452489711, "grad_norm": 3.8150463104248047, "learning_rate": 3.0447592286714376e-05, "loss": 0.36505126953125, "step": 1245 }, { "epoch": 0.4508163357604812, "grad_norm": 1.8207588195800781, "learning_rate": 3.0423791035732875e-05, "loss": 0.3234405517578125, "step": 1246 }, { "epoch": 0.45117814662385236, "grad_norm": 0.9993175864219666, "learning_rate": 3.0399969499116735e-05, "loss": 0.3077239990234375, "step": 1247 }, { "epoch": 0.45153995748722353, "grad_norm": 0.6946387887001038, "learning_rate": 3.037612772322481e-05, "loss": 0.3080291748046875, "step": 1248 }, { "epoch": 0.4519017683505947, "grad_norm": 2.2298312187194824, "learning_rate": 3.0352265754455348e-05, "loss": 0.29254913330078125, "step": 1249 }, { "epoch": 0.45226357921396587, "grad_norm": 0.9877124428749084, "learning_rate": 3.0328383639245875e-05, "loss": 0.3129425048828125, "step": 1250 }, { "epoch": 0.4526253900773371, "grad_norm": 2.7843337059020996, "learning_rate": 3.0304481424073146e-05, "loss": 0.3077392578125, "step": 1251 }, { "epoch": 0.45298720094070827, "grad_norm": 3.200676918029785, "learning_rate": 3.0280559155453e-05, "loss": 0.2607421875, "step": 1252 }, { "epoch": 0.45334901180407944, "grad_norm": 1.877767562866211, "learning_rate": 3.0256616879940348e-05, "loss": 0.290435791015625, "step": 1253 }, { "epoch": 0.4537108226674506, "grad_norm": 1.8832727670669556, "learning_rate": 3.0232654644128993e-05, "loss": 0.3262481689453125, "step": 1254 }, { "epoch": 0.4540726335308218, "grad_norm": 0.7740796804428101, "learning_rate": 3.0208672494651606e-05, "loss": 0.31040191650390625, "step": 1255 }, { "epoch": 0.45443444439419295, "grad_norm": 0.5980003476142883, "learning_rate": 3.0184670478179606e-05, "loss": 0.29457855224609375, "step": 1256 }, { "epoch": 0.4547962552575641, "grad_norm": 2.734537363052368, "learning_rate": 3.0160648641423084e-05, "loss": 0.3168792724609375, "step": 1257 }, { "epoch": 0.4551580661209353, "grad_norm": 0.6968463063240051, "learning_rate": 3.013660703113068e-05, "loss": 0.2878265380859375, "step": 1258 }, { "epoch": 0.45551987698430646, "grad_norm": 1.2191978693008423, "learning_rate": 3.011254569408954e-05, "loss": 0.30428314208984375, "step": 1259 }, { "epoch": 0.45588168784767763, "grad_norm": 1.1687091588974, "learning_rate": 3.008846467712518e-05, "loss": 0.2816162109375, "step": 1260 }, { "epoch": 0.4562434987110488, "grad_norm": 0.7454785704612732, "learning_rate": 3.0064364027101437e-05, "loss": 0.2980384826660156, "step": 1261 }, { "epoch": 0.45660530957441997, "grad_norm": 1.61627197265625, "learning_rate": 3.004024379092033e-05, "loss": 0.2758941650390625, "step": 1262 }, { "epoch": 0.45696712043779114, "grad_norm": 0.6045423746109009, "learning_rate": 3.0016104015522017e-05, "loss": 0.27481842041015625, "step": 1263 }, { "epoch": 0.4573289313011623, "grad_norm": 0.651521623134613, "learning_rate": 2.9991944747884668e-05, "loss": 0.27710723876953125, "step": 1264 }, { "epoch": 0.4576907421645335, "grad_norm": 1.6651748418807983, "learning_rate": 2.996776603502438e-05, "loss": 0.280120849609375, "step": 1265 }, { "epoch": 0.45805255302790465, "grad_norm": 2.5629336833953857, "learning_rate": 2.9943567923995122e-05, "loss": 0.28649139404296875, "step": 1266 }, { "epoch": 0.4584143638912758, "grad_norm": 0.6586755514144897, "learning_rate": 2.991935046188858e-05, "loss": 0.3221931457519531, "step": 1267 }, { "epoch": 0.458776174754647, "grad_norm": 1.0984325408935547, "learning_rate": 2.9895113695834124e-05, "loss": 0.303375244140625, "step": 1268 }, { "epoch": 0.45913798561801816, "grad_norm": 2.350125312805176, "learning_rate": 2.9870857672998666e-05, "loss": 0.3087615966796875, "step": 1269 }, { "epoch": 0.45949979648138933, "grad_norm": 1.932478427886963, "learning_rate": 2.9846582440586614e-05, "loss": 0.27460479736328125, "step": 1270 }, { "epoch": 0.4598616073447605, "grad_norm": 1.6088624000549316, "learning_rate": 2.9822288045839757e-05, "loss": 0.3011322021484375, "step": 1271 }, { "epoch": 0.4602234182081317, "grad_norm": 0.9049437642097473, "learning_rate": 2.979797453603717e-05, "loss": 0.3089447021484375, "step": 1272 }, { "epoch": 0.4605852290715029, "grad_norm": 0.5911900401115417, "learning_rate": 2.9773641958495122e-05, "loss": 0.3314361572265625, "step": 1273 }, { "epoch": 0.46094703993487407, "grad_norm": 1.0140268802642822, "learning_rate": 2.9749290360567e-05, "loss": 0.27519989013671875, "step": 1274 }, { "epoch": 0.46130885079824524, "grad_norm": 2.903024435043335, "learning_rate": 2.9724919789643212e-05, "loss": 0.2822113037109375, "step": 1275 }, { "epoch": 0.4616706616616164, "grad_norm": 1.1710935831069946, "learning_rate": 2.970053029315107e-05, "loss": 0.34857177734375, "step": 1276 }, { "epoch": 0.4620324725249876, "grad_norm": 0.6740934252738953, "learning_rate": 2.967612191855474e-05, "loss": 0.29959869384765625, "step": 1277 }, { "epoch": 0.46239428338835875, "grad_norm": 1.031872272491455, "learning_rate": 2.965169471335509e-05, "loss": 0.31859588623046875, "step": 1278 }, { "epoch": 0.4627560942517299, "grad_norm": 0.5915992856025696, "learning_rate": 2.962724872508968e-05, "loss": 0.3002471923828125, "step": 1279 }, { "epoch": 0.4631179051151011, "grad_norm": 1.4674768447875977, "learning_rate": 2.9602784001332593e-05, "loss": 0.285675048828125, "step": 1280 }, { "epoch": 0.46347971597847226, "grad_norm": 0.787613034248352, "learning_rate": 2.9578300589694385e-05, "loss": 0.27115631103515625, "step": 1281 }, { "epoch": 0.46384152684184343, "grad_norm": 1.6597458124160767, "learning_rate": 2.955379853782197e-05, "loss": 0.2979698181152344, "step": 1282 }, { "epoch": 0.4642033377052146, "grad_norm": 0.6068805456161499, "learning_rate": 2.952927789339855e-05, "loss": 0.27025604248046875, "step": 1283 }, { "epoch": 0.4645651485685858, "grad_norm": 0.7945891618728638, "learning_rate": 2.95047387041435e-05, "loss": 0.2910919189453125, "step": 1284 }, { "epoch": 0.46492695943195694, "grad_norm": 1.1862224340438843, "learning_rate": 2.9480181017812294e-05, "loss": 0.28603363037109375, "step": 1285 }, { "epoch": 0.4652887702953281, "grad_norm": 0.9209780097007751, "learning_rate": 2.9455604882196386e-05, "loss": 0.2498626708984375, "step": 1286 }, { "epoch": 0.4656505811586993, "grad_norm": 0.5900594592094421, "learning_rate": 2.9431010345123155e-05, "loss": 0.29119873046875, "step": 1287 }, { "epoch": 0.46601239202207045, "grad_norm": 1.5511188507080078, "learning_rate": 2.9406397454455784e-05, "loss": 0.266021728515625, "step": 1288 }, { "epoch": 0.4663742028854416, "grad_norm": 3.550568103790283, "learning_rate": 2.938176625809317e-05, "loss": 0.33294677734375, "step": 1289 }, { "epoch": 0.4667360137488128, "grad_norm": 1.3977396488189697, "learning_rate": 2.935711680396983e-05, "loss": 0.2944755554199219, "step": 1290 }, { "epoch": 0.46709782461218396, "grad_norm": 0.7101364731788635, "learning_rate": 2.933244914005582e-05, "loss": 0.2936248779296875, "step": 1291 }, { "epoch": 0.46745963547555514, "grad_norm": 2.4462175369262695, "learning_rate": 2.9307763314356634e-05, "loss": 0.2815399169921875, "step": 1292 }, { "epoch": 0.4678214463389263, "grad_norm": 4.0454301834106445, "learning_rate": 2.9283059374913115e-05, "loss": 0.3246002197265625, "step": 1293 }, { "epoch": 0.4681832572022975, "grad_norm": 0.6505366563796997, "learning_rate": 2.925833736980134e-05, "loss": 0.256866455078125, "step": 1294 }, { "epoch": 0.46854506806566865, "grad_norm": 1.2600330114364624, "learning_rate": 2.9233597347132553e-05, "loss": 0.31386566162109375, "step": 1295 }, { "epoch": 0.46890687892903987, "grad_norm": 3.063969135284424, "learning_rate": 2.920883935505307e-05, "loss": 0.3372306823730469, "step": 1296 }, { "epoch": 0.46926868979241104, "grad_norm": 2.841845750808716, "learning_rate": 2.918406344174417e-05, "loss": 0.3353729248046875, "step": 1297 }, { "epoch": 0.4696305006557822, "grad_norm": 1.6180471181869507, "learning_rate": 2.9159269655422008e-05, "loss": 0.3010711669921875, "step": 1298 }, { "epoch": 0.4699923115191534, "grad_norm": 1.840827465057373, "learning_rate": 2.913445804433751e-05, "loss": 0.3175086975097656, "step": 1299 }, { "epoch": 0.47035412238252455, "grad_norm": 0.7326303124427795, "learning_rate": 2.9109628656776316e-05, "loss": 0.28924560546875, "step": 1300 }, { "epoch": 0.4707159332458957, "grad_norm": 2.43509840965271, "learning_rate": 2.908478154105864e-05, "loss": 0.3045501708984375, "step": 1301 }, { "epoch": 0.4710777441092669, "grad_norm": 1.9629064798355103, "learning_rate": 2.9059916745539206e-05, "loss": 0.27982330322265625, "step": 1302 }, { "epoch": 0.47143955497263806, "grad_norm": 1.752809762954712, "learning_rate": 2.9035034318607146e-05, "loss": 0.28607177734375, "step": 1303 }, { "epoch": 0.47180136583600923, "grad_norm": 1.7602018117904663, "learning_rate": 2.901013430868589e-05, "loss": 0.28807830810546875, "step": 1304 }, { "epoch": 0.4721631766993804, "grad_norm": 4.194561958312988, "learning_rate": 2.8985216764233097e-05, "loss": 0.3157691955566406, "step": 1305 }, { "epoch": 0.4725249875627516, "grad_norm": 3.5109198093414307, "learning_rate": 2.8960281733740562e-05, "loss": 0.30329132080078125, "step": 1306 }, { "epoch": 0.47288679842612275, "grad_norm": 0.9346293807029724, "learning_rate": 2.8935329265734083e-05, "loss": 0.2703094482421875, "step": 1307 }, { "epoch": 0.4732486092894939, "grad_norm": 4.35321044921875, "learning_rate": 2.8910359408773413e-05, "loss": 0.32569122314453125, "step": 1308 }, { "epoch": 0.4736104201528651, "grad_norm": 1.3886229991912842, "learning_rate": 2.8885372211452132e-05, "loss": 0.30358123779296875, "step": 1309 }, { "epoch": 0.47397223101623626, "grad_norm": 1.8932194709777832, "learning_rate": 2.8860367722397584e-05, "loss": 0.284454345703125, "step": 1310 }, { "epoch": 0.4743340418796074, "grad_norm": 2.135352849960327, "learning_rate": 2.8835345990270743e-05, "loss": 0.32562255859375, "step": 1311 }, { "epoch": 0.4746958527429786, "grad_norm": 0.556854248046875, "learning_rate": 2.881030706376616e-05, "loss": 0.27481842041015625, "step": 1312 }, { "epoch": 0.47505766360634977, "grad_norm": 2.4706459045410156, "learning_rate": 2.8785250991611826e-05, "loss": 0.27996826171875, "step": 1313 }, { "epoch": 0.47541947446972094, "grad_norm": 2.0223288536071777, "learning_rate": 2.8760177822569114e-05, "loss": 0.32665252685546875, "step": 1314 }, { "epoch": 0.4757812853330921, "grad_norm": 0.49354398250579834, "learning_rate": 2.873508760543267e-05, "loss": 0.29576873779296875, "step": 1315 }, { "epoch": 0.4761430961964633, "grad_norm": 1.3934407234191895, "learning_rate": 2.870998038903031e-05, "loss": 0.30462646484375, "step": 1316 }, { "epoch": 0.47650490705983445, "grad_norm": 0.4372749626636505, "learning_rate": 2.8684856222222922e-05, "loss": 0.2903175354003906, "step": 1317 }, { "epoch": 0.4768667179232056, "grad_norm": 2.8886454105377197, "learning_rate": 2.8659715153904423e-05, "loss": 0.3673858642578125, "step": 1318 }, { "epoch": 0.47722852878657684, "grad_norm": 1.1281393766403198, "learning_rate": 2.863455723300157e-05, "loss": 0.2915802001953125, "step": 1319 }, { "epoch": 0.477590339649948, "grad_norm": 0.6278925538063049, "learning_rate": 2.860938250847394e-05, "loss": 0.3049468994140625, "step": 1320 }, { "epoch": 0.4779521505133192, "grad_norm": 0.4735713601112366, "learning_rate": 2.8584191029313813e-05, "loss": 0.28232574462890625, "step": 1321 }, { "epoch": 0.47831396137669036, "grad_norm": 0.4527692198753357, "learning_rate": 2.8558982844546085e-05, "loss": 0.28180694580078125, "step": 1322 }, { "epoch": 0.4786757722400615, "grad_norm": 3.007319211959839, "learning_rate": 2.8533758003228134e-05, "loss": 0.3443145751953125, "step": 1323 }, { "epoch": 0.4790375831034327, "grad_norm": 0.6738729476928711, "learning_rate": 2.8508516554449777e-05, "loss": 0.28955841064453125, "step": 1324 }, { "epoch": 0.47939939396680387, "grad_norm": 0.4807127118110657, "learning_rate": 2.848325854733314e-05, "loss": 0.3031158447265625, "step": 1325 }, { "epoch": 0.47976120483017504, "grad_norm": 1.2126775979995728, "learning_rate": 2.8457984031032574e-05, "loss": 0.282928466796875, "step": 1326 }, { "epoch": 0.4801230156935462, "grad_norm": 1.2954347133636475, "learning_rate": 2.8432693054734568e-05, "loss": 0.28968048095703125, "step": 1327 }, { "epoch": 0.4804848265569174, "grad_norm": 2.001676321029663, "learning_rate": 2.8407385667657635e-05, "loss": 0.2651824951171875, "step": 1328 }, { "epoch": 0.48084663742028855, "grad_norm": 0.5045491456985474, "learning_rate": 2.8382061919052212e-05, "loss": 0.27097320556640625, "step": 1329 }, { "epoch": 0.4812084482836597, "grad_norm": 0.823172926902771, "learning_rate": 2.8356721858200606e-05, "loss": 0.2744598388671875, "step": 1330 }, { "epoch": 0.4815702591470309, "grad_norm": 0.6380828619003296, "learning_rate": 2.8331365534416853e-05, "loss": 0.302001953125, "step": 1331 }, { "epoch": 0.48193207001040206, "grad_norm": 4.572793006896973, "learning_rate": 2.8305992997046634e-05, "loss": 0.32378387451171875, "step": 1332 }, { "epoch": 0.48229388087377323, "grad_norm": 3.0457537174224854, "learning_rate": 2.8280604295467192e-05, "loss": 0.27813720703125, "step": 1333 }, { "epoch": 0.4826556917371444, "grad_norm": 2.201169729232788, "learning_rate": 2.8255199479087213e-05, "loss": 0.307159423828125, "step": 1334 }, { "epoch": 0.48301750260051557, "grad_norm": 2.834618091583252, "learning_rate": 2.8229778597346766e-05, "loss": 0.3057975769042969, "step": 1335 }, { "epoch": 0.48337931346388674, "grad_norm": 1.9127813577651978, "learning_rate": 2.8204341699717166e-05, "loss": 0.3020477294921875, "step": 1336 }, { "epoch": 0.4837411243272579, "grad_norm": 0.9948824644088745, "learning_rate": 2.817888883570091e-05, "loss": 0.288970947265625, "step": 1337 }, { "epoch": 0.4841029351906291, "grad_norm": 0.698628842830658, "learning_rate": 2.815342005483155e-05, "loss": 0.2972259521484375, "step": 1338 }, { "epoch": 0.48446474605400025, "grad_norm": 1.0415908098220825, "learning_rate": 2.8127935406673628e-05, "loss": 0.304229736328125, "step": 1339 }, { "epoch": 0.4848265569173714, "grad_norm": 0.7675443291664124, "learning_rate": 2.8102434940822568e-05, "loss": 0.315643310546875, "step": 1340 }, { "epoch": 0.4851883677807426, "grad_norm": 3.7947163581848145, "learning_rate": 2.807691870690456e-05, "loss": 0.2848052978515625, "step": 1341 }, { "epoch": 0.4855501786441138, "grad_norm": 0.7148364782333374, "learning_rate": 2.805138675457649e-05, "loss": 0.335693359375, "step": 1342 }, { "epoch": 0.485911989507485, "grad_norm": 4.458090782165527, "learning_rate": 2.802583913352584e-05, "loss": 0.306671142578125, "step": 1343 }, { "epoch": 0.48627380037085616, "grad_norm": 2.969378709793091, "learning_rate": 2.8000275893470568e-05, "loss": 0.3021240234375, "step": 1344 }, { "epoch": 0.4866356112342273, "grad_norm": 4.139227867126465, "learning_rate": 2.7974697084159052e-05, "loss": 0.2593994140625, "step": 1345 }, { "epoch": 0.4869974220975985, "grad_norm": 0.4963805675506592, "learning_rate": 2.794910275536994e-05, "loss": 0.2728271484375, "step": 1346 }, { "epoch": 0.48735923296096967, "grad_norm": 1.6088125705718994, "learning_rate": 2.79234929569121e-05, "loss": 0.29456329345703125, "step": 1347 }, { "epoch": 0.48772104382434084, "grad_norm": 2.945629835128784, "learning_rate": 2.7897867738624516e-05, "loss": 0.3170623779296875, "step": 1348 }, { "epoch": 0.488082854687712, "grad_norm": 1.830263614654541, "learning_rate": 2.787222715037615e-05, "loss": 0.296844482421875, "step": 1349 }, { "epoch": 0.4884446655510832, "grad_norm": 1.3290427923202515, "learning_rate": 2.78465712420659e-05, "loss": 0.30522918701171875, "step": 1350 }, { "epoch": 0.48880647641445435, "grad_norm": 0.5674301981925964, "learning_rate": 2.782090006362247e-05, "loss": 0.30036163330078125, "step": 1351 }, { "epoch": 0.4891682872778255, "grad_norm": 1.8748470544815063, "learning_rate": 2.779521366500428e-05, "loss": 0.2938690185546875, "step": 1352 }, { "epoch": 0.4895300981411967, "grad_norm": 0.6203882694244385, "learning_rate": 2.7769512096199377e-05, "loss": 0.33502197265625, "step": 1353 }, { "epoch": 0.48989190900456786, "grad_norm": 1.443452000617981, "learning_rate": 2.7743795407225316e-05, "loss": 0.27211761474609375, "step": 1354 }, { "epoch": 0.49025371986793903, "grad_norm": 0.8207395076751709, "learning_rate": 2.7718063648129086e-05, "loss": 0.28156280517578125, "step": 1355 }, { "epoch": 0.4906155307313102, "grad_norm": 0.8108890652656555, "learning_rate": 2.7692316868987014e-05, "loss": 0.288360595703125, "step": 1356 }, { "epoch": 0.49097734159468137, "grad_norm": 2.038196563720703, "learning_rate": 2.7666555119904646e-05, "loss": 0.2893829345703125, "step": 1357 }, { "epoch": 0.49133915245805254, "grad_norm": 1.0535776615142822, "learning_rate": 2.7640778451016653e-05, "loss": 0.2793426513671875, "step": 1358 }, { "epoch": 0.4917009633214237, "grad_norm": 1.5475332736968994, "learning_rate": 2.761498691248676e-05, "loss": 0.2929840087890625, "step": 1359 }, { "epoch": 0.4920627741847949, "grad_norm": 1.4374988079071045, "learning_rate": 2.7589180554507607e-05, "loss": 0.26824188232421875, "step": 1360 }, { "epoch": 0.49242458504816605, "grad_norm": 2.2241249084472656, "learning_rate": 2.7563359427300697e-05, "loss": 0.26686859130859375, "step": 1361 }, { "epoch": 0.4927863959115372, "grad_norm": 4.600025653839111, "learning_rate": 2.753752358111627e-05, "loss": 0.31951904296875, "step": 1362 }, { "epoch": 0.4931482067749084, "grad_norm": 4.000960350036621, "learning_rate": 2.7511673066233195e-05, "loss": 0.309234619140625, "step": 1363 }, { "epoch": 0.49351001763827956, "grad_norm": 6.298569679260254, "learning_rate": 2.74858079329589e-05, "loss": 0.322601318359375, "step": 1364 }, { "epoch": 0.4938718285016508, "grad_norm": 2.3033273220062256, "learning_rate": 2.745992823162927e-05, "loss": 0.2955055236816406, "step": 1365 }, { "epoch": 0.49423363936502196, "grad_norm": 2.6787471771240234, "learning_rate": 2.743403401260852e-05, "loss": 0.3110198974609375, "step": 1366 }, { "epoch": 0.49459545022839313, "grad_norm": 3.905235528945923, "learning_rate": 2.7408125326289137e-05, "loss": 0.312835693359375, "step": 1367 }, { "epoch": 0.4949572610917643, "grad_norm": 1.3572801351547241, "learning_rate": 2.7382202223091747e-05, "loss": 0.2987823486328125, "step": 1368 }, { "epoch": 0.49531907195513547, "grad_norm": 1.4372944831848145, "learning_rate": 2.7356264753465035e-05, "loss": 0.26641082763671875, "step": 1369 }, { "epoch": 0.49568088281850664, "grad_norm": 0.8664306402206421, "learning_rate": 2.7330312967885668e-05, "loss": 0.299224853515625, "step": 1370 }, { "epoch": 0.4960426936818778, "grad_norm": 1.705117106437683, "learning_rate": 2.7304346916858137e-05, "loss": 0.314605712890625, "step": 1371 }, { "epoch": 0.496404504545249, "grad_norm": 1.3103809356689453, "learning_rate": 2.727836665091472e-05, "loss": 0.2772369384765625, "step": 1372 }, { "epoch": 0.49676631540862015, "grad_norm": 0.9079526662826538, "learning_rate": 2.7252372220615345e-05, "loss": 0.2809333801269531, "step": 1373 }, { "epoch": 0.4971281262719913, "grad_norm": 0.39566150307655334, "learning_rate": 2.7226363676547515e-05, "loss": 0.2819366455078125, "step": 1374 }, { "epoch": 0.4974899371353625, "grad_norm": 0.4088035225868225, "learning_rate": 2.7200341069326194e-05, "loss": 0.27581787109375, "step": 1375 }, { "epoch": 0.49785174799873366, "grad_norm": 0.5093325972557068, "learning_rate": 2.717430444959372e-05, "loss": 0.3116912841796875, "step": 1376 }, { "epoch": 0.49821355886210483, "grad_norm": 1.686600923538208, "learning_rate": 2.7148253868019686e-05, "loss": 0.30408477783203125, "step": 1377 }, { "epoch": 0.498575369725476, "grad_norm": 1.7600536346435547, "learning_rate": 2.712218937530088e-05, "loss": 0.30883026123046875, "step": 1378 }, { "epoch": 0.4989371805888472, "grad_norm": 0.5142523050308228, "learning_rate": 2.7096111022161136e-05, "loss": 0.288604736328125, "step": 1379 }, { "epoch": 0.49929899145221834, "grad_norm": 0.9357963800430298, "learning_rate": 2.7070018859351282e-05, "loss": 0.2735748291015625, "step": 1380 }, { "epoch": 0.4996608023155895, "grad_norm": 2.2212891578674316, "learning_rate": 2.704391293764901e-05, "loss": 0.28369903564453125, "step": 1381 }, { "epoch": 0.5000226131789607, "grad_norm": 0.4773656129837036, "learning_rate": 2.7017793307858785e-05, "loss": 0.28923797607421875, "step": 1382 }, { "epoch": 0.5003844240423319, "grad_norm": 1.1430991888046265, "learning_rate": 2.6991660020811767e-05, "loss": 0.278961181640625, "step": 1383 }, { "epoch": 0.500746234905703, "grad_norm": 0.518297016620636, "learning_rate": 2.6965513127365675e-05, "loss": 0.30263519287109375, "step": 1384 }, { "epoch": 0.5011080457690742, "grad_norm": 2.4311389923095703, "learning_rate": 2.693935267840472e-05, "loss": 0.32861328125, "step": 1385 }, { "epoch": 0.5014698566324454, "grad_norm": 0.4654926061630249, "learning_rate": 2.6913178724839478e-05, "loss": 0.2842559814453125, "step": 1386 }, { "epoch": 0.5018316674958165, "grad_norm": 1.4687186479568481, "learning_rate": 2.688699131760682e-05, "loss": 0.29157257080078125, "step": 1387 }, { "epoch": 0.5021934783591877, "grad_norm": 0.6874675154685974, "learning_rate": 2.6860790507669805e-05, "loss": 0.29718017578125, "step": 1388 }, { "epoch": 0.5025552892225589, "grad_norm": 1.5363187789916992, "learning_rate": 2.6834576346017557e-05, "loss": 0.30675506591796875, "step": 1389 }, { "epoch": 0.50291710008593, "grad_norm": 0.7536321878433228, "learning_rate": 2.6808348883665184e-05, "loss": 0.30878448486328125, "step": 1390 }, { "epoch": 0.5032789109493012, "grad_norm": 0.635823667049408, "learning_rate": 2.67821081716537e-05, "loss": 0.27951812744140625, "step": 1391 }, { "epoch": 0.5036407218126724, "grad_norm": 1.1985141038894653, "learning_rate": 2.6755854261049886e-05, "loss": 0.30571746826171875, "step": 1392 }, { "epoch": 0.5040025326760436, "grad_norm": 0.8313190340995789, "learning_rate": 2.672958720294621e-05, "loss": 0.30049896240234375, "step": 1393 }, { "epoch": 0.5043643435394147, "grad_norm": 3.941141128540039, "learning_rate": 2.670330704846073e-05, "loss": 0.277618408203125, "step": 1394 }, { "epoch": 0.5047261544027859, "grad_norm": 1.768505573272705, "learning_rate": 2.667701384873699e-05, "loss": 0.2766571044921875, "step": 1395 }, { "epoch": 0.5050879652661571, "grad_norm": 0.7047168612480164, "learning_rate": 2.665070765494392e-05, "loss": 0.2938690185546875, "step": 1396 }, { "epoch": 0.5054497761295282, "grad_norm": 0.8252072334289551, "learning_rate": 2.662438851827574e-05, "loss": 0.2954864501953125, "step": 1397 }, { "epoch": 0.5058115869928994, "grad_norm": 2.2364394664764404, "learning_rate": 2.6598056489951858e-05, "loss": 0.289764404296875, "step": 1398 }, { "epoch": 0.5061733978562706, "grad_norm": 1.1547777652740479, "learning_rate": 2.6571711621216754e-05, "loss": 0.31790924072265625, "step": 1399 }, { "epoch": 0.5065352087196419, "grad_norm": 1.7227221727371216, "learning_rate": 2.6545353963339935e-05, "loss": 0.265533447265625, "step": 1400 }, { "epoch": 0.506897019583013, "grad_norm": 4.257976055145264, "learning_rate": 2.651898356761576e-05, "loss": 0.3329315185546875, "step": 1401 }, { "epoch": 0.5072588304463842, "grad_norm": 0.9843891859054565, "learning_rate": 2.6492600485363395e-05, "loss": 0.30348968505859375, "step": 1402 }, { "epoch": 0.5076206413097554, "grad_norm": 1.4294525384902954, "learning_rate": 2.6466204767926682e-05, "loss": 0.2899017333984375, "step": 1403 }, { "epoch": 0.5079824521731265, "grad_norm": 0.9021797180175781, "learning_rate": 2.6439796466674072e-05, "loss": 0.2614898681640625, "step": 1404 }, { "epoch": 0.5083442630364977, "grad_norm": 3.5849854946136475, "learning_rate": 2.6413375632998484e-05, "loss": 0.33551788330078125, "step": 1405 }, { "epoch": 0.5087060738998689, "grad_norm": 1.9343832731246948, "learning_rate": 2.6386942318317243e-05, "loss": 0.3017425537109375, "step": 1406 }, { "epoch": 0.50906788476324, "grad_norm": 1.4187672138214111, "learning_rate": 2.6360496574071954e-05, "loss": 0.2749176025390625, "step": 1407 }, { "epoch": 0.5094296956266112, "grad_norm": 1.0258021354675293, "learning_rate": 2.6334038451728406e-05, "loss": 0.2896270751953125, "step": 1408 }, { "epoch": 0.5097915064899824, "grad_norm": 1.7299269437789917, "learning_rate": 2.6307568002776497e-05, "loss": 0.28377532958984375, "step": 1409 }, { "epoch": 0.5101533173533536, "grad_norm": 1.0693410634994507, "learning_rate": 2.6281085278730093e-05, "loss": 0.29354095458984375, "step": 1410 }, { "epoch": 0.5105151282167247, "grad_norm": 0.6101860404014587, "learning_rate": 2.625459033112695e-05, "loss": 0.2867889404296875, "step": 1411 }, { "epoch": 0.5108769390800959, "grad_norm": 1.5811485052108765, "learning_rate": 2.6228083211528637e-05, "loss": 0.2550392150878906, "step": 1412 }, { "epoch": 0.5112387499434671, "grad_norm": 0.9576666355133057, "learning_rate": 2.6201563971520375e-05, "loss": 0.28917694091796875, "step": 1413 }, { "epoch": 0.5116005608068382, "grad_norm": 0.5754833221435547, "learning_rate": 2.6175032662711002e-05, "loss": 0.2591514587402344, "step": 1414 }, { "epoch": 0.5119623716702094, "grad_norm": 2.9105026721954346, "learning_rate": 2.6148489336732824e-05, "loss": 0.3245391845703125, "step": 1415 }, { "epoch": 0.5123241825335806, "grad_norm": 1.0816924571990967, "learning_rate": 2.6121934045241532e-05, "loss": 0.27741241455078125, "step": 1416 }, { "epoch": 0.5126859933969518, "grad_norm": 0.6371520161628723, "learning_rate": 2.6095366839916123e-05, "loss": 0.31201934814453125, "step": 1417 }, { "epoch": 0.5130478042603229, "grad_norm": 0.6866495013237, "learning_rate": 2.6068787772458762e-05, "loss": 0.29431915283203125, "step": 1418 }, { "epoch": 0.5134096151236941, "grad_norm": 0.887755811214447, "learning_rate": 2.6042196894594716e-05, "loss": 0.315521240234375, "step": 1419 }, { "epoch": 0.5137714259870653, "grad_norm": 1.086958408355713, "learning_rate": 2.601559425807221e-05, "loss": 0.31591796875, "step": 1420 }, { "epoch": 0.5141332368504364, "grad_norm": 2.1142075061798096, "learning_rate": 2.5988979914662366e-05, "loss": 0.2504615783691406, "step": 1421 }, { "epoch": 0.5144950477138076, "grad_norm": 0.6224052906036377, "learning_rate": 2.5962353916159098e-05, "loss": 0.28290557861328125, "step": 1422 }, { "epoch": 0.5148568585771788, "grad_norm": 1.6732370853424072, "learning_rate": 2.593571631437899e-05, "loss": 0.28395843505859375, "step": 1423 }, { "epoch": 0.51521866944055, "grad_norm": 2.2968294620513916, "learning_rate": 2.5909067161161207e-05, "loss": 0.29227447509765625, "step": 1424 }, { "epoch": 0.5155804803039211, "grad_norm": 0.6085972785949707, "learning_rate": 2.58824065083674e-05, "loss": 0.28649139404296875, "step": 1425 }, { "epoch": 0.5159422911672923, "grad_norm": 0.7332760691642761, "learning_rate": 2.58557344078816e-05, "loss": 0.301361083984375, "step": 1426 }, { "epoch": 0.5163041020306635, "grad_norm": 1.7178574800491333, "learning_rate": 2.5829050911610112e-05, "loss": 0.27237701416015625, "step": 1427 }, { "epoch": 0.5166659128940346, "grad_norm": 0.9061028361320496, "learning_rate": 2.5802356071481417e-05, "loss": 0.307647705078125, "step": 1428 }, { "epoch": 0.5170277237574058, "grad_norm": 1.808909296989441, "learning_rate": 2.577564993944607e-05, "loss": 0.2741127014160156, "step": 1429 }, { "epoch": 0.517389534620777, "grad_norm": 0.9245051741600037, "learning_rate": 2.5748932567476605e-05, "loss": 0.2659721374511719, "step": 1430 }, { "epoch": 0.5177513454841481, "grad_norm": 0.7961279153823853, "learning_rate": 2.5722204007567443e-05, "loss": 0.28448486328125, "step": 1431 }, { "epoch": 0.5181131563475193, "grad_norm": 0.6582099795341492, "learning_rate": 2.569546431173476e-05, "loss": 0.28000640869140625, "step": 1432 }, { "epoch": 0.5184749672108905, "grad_norm": 2.387915849685669, "learning_rate": 2.5668713532016396e-05, "loss": 0.3082122802734375, "step": 1433 }, { "epoch": 0.5188367780742617, "grad_norm": 0.5763548612594604, "learning_rate": 2.5641951720471783e-05, "loss": 0.2864341735839844, "step": 1434 }, { "epoch": 0.5191985889376328, "grad_norm": 1.5977274179458618, "learning_rate": 2.5615178929181816e-05, "loss": 0.309326171875, "step": 1435 }, { "epoch": 0.519560399801004, "grad_norm": 1.8025479316711426, "learning_rate": 2.558839521024875e-05, "loss": 0.33078765869140625, "step": 1436 }, { "epoch": 0.5199222106643752, "grad_norm": 0.7884765267372131, "learning_rate": 2.5561600615796113e-05, "loss": 0.31429290771484375, "step": 1437 }, { "epoch": 0.5202840215277463, "grad_norm": 2.28118634223938, "learning_rate": 2.5534795197968586e-05, "loss": 0.2569732666015625, "step": 1438 }, { "epoch": 0.5206458323911175, "grad_norm": 0.5170276165008545, "learning_rate": 2.5507979008931935e-05, "loss": 0.2784271240234375, "step": 1439 }, { "epoch": 0.5210076432544887, "grad_norm": 1.5520920753479004, "learning_rate": 2.5481152100872867e-05, "loss": 0.313323974609375, "step": 1440 }, { "epoch": 0.5213694541178598, "grad_norm": 2.1222381591796875, "learning_rate": 2.5454314525998962e-05, "loss": 0.284088134765625, "step": 1441 }, { "epoch": 0.521731264981231, "grad_norm": 2.1066417694091797, "learning_rate": 2.542746633653855e-05, "loss": 0.31748199462890625, "step": 1442 }, { "epoch": 0.5220930758446022, "grad_norm": 1.1564958095550537, "learning_rate": 2.540060758474062e-05, "loss": 0.30005645751953125, "step": 1443 }, { "epoch": 0.5224548867079734, "grad_norm": 3.25028920173645, "learning_rate": 2.5373738322874727e-05, "loss": 0.2722206115722656, "step": 1444 }, { "epoch": 0.5228166975713445, "grad_norm": 2.5811269283294678, "learning_rate": 2.5346858603230862e-05, "loss": 0.3062896728515625, "step": 1445 }, { "epoch": 0.5231785084347158, "grad_norm": 1.203660011291504, "learning_rate": 2.5319968478119386e-05, "loss": 0.2902069091796875, "step": 1446 }, { "epoch": 0.523540319298087, "grad_norm": 1.845399260520935, "learning_rate": 2.5293067999870884e-05, "loss": 0.27935791015625, "step": 1447 }, { "epoch": 0.5239021301614581, "grad_norm": 2.067983388900757, "learning_rate": 2.5266157220836124e-05, "loss": 0.2906951904296875, "step": 1448 }, { "epoch": 0.5242639410248293, "grad_norm": 0.5389843583106995, "learning_rate": 2.5239236193385884e-05, "loss": 0.29692840576171875, "step": 1449 }, { "epoch": 0.5246257518882005, "grad_norm": 1.6273231506347656, "learning_rate": 2.5212304969910916e-05, "loss": 0.28243255615234375, "step": 1450 }, { "epoch": 0.5249875627515717, "grad_norm": 1.28592848777771, "learning_rate": 2.51853636028218e-05, "loss": 0.29410552978515625, "step": 1451 }, { "epoch": 0.5253493736149428, "grad_norm": 1.092799186706543, "learning_rate": 2.515841214454886e-05, "loss": 0.296844482421875, "step": 1452 }, { "epoch": 0.525711184478314, "grad_norm": 3.3307552337646484, "learning_rate": 2.513145064754206e-05, "loss": 0.296417236328125, "step": 1453 }, { "epoch": 0.5260729953416852, "grad_norm": 2.827425479888916, "learning_rate": 2.5104479164270892e-05, "loss": 0.30757904052734375, "step": 1454 }, { "epoch": 0.5264348062050563, "grad_norm": 1.8583168983459473, "learning_rate": 2.507749774722428e-05, "loss": 0.29691314697265625, "step": 1455 }, { "epoch": 0.5267966170684275, "grad_norm": 0.6089897751808167, "learning_rate": 2.5050506448910498e-05, "loss": 0.2942657470703125, "step": 1456 }, { "epoch": 0.5271584279317987, "grad_norm": 1.978299617767334, "learning_rate": 2.5023505321857045e-05, "loss": 0.27596282958984375, "step": 1457 }, { "epoch": 0.5275202387951698, "grad_norm": 2.387354850769043, "learning_rate": 2.4996494418610534e-05, "loss": 0.2832756042480469, "step": 1458 }, { "epoch": 0.527882049658541, "grad_norm": 1.5280263423919678, "learning_rate": 2.4969473791736612e-05, "loss": 0.2744140625, "step": 1459 }, { "epoch": 0.5282438605219122, "grad_norm": 0.8344547748565674, "learning_rate": 2.4942443493819847e-05, "loss": 0.2869834899902344, "step": 1460 }, { "epoch": 0.5286056713852834, "grad_norm": 0.753171980381012, "learning_rate": 2.4915403577463624e-05, "loss": 0.3235015869140625, "step": 1461 }, { "epoch": 0.5289674822486545, "grad_norm": 0.472496896982193, "learning_rate": 2.4888354095290073e-05, "loss": 0.2977752685546875, "step": 1462 }, { "epoch": 0.5293292931120257, "grad_norm": 0.6867305040359497, "learning_rate": 2.4861295099939898e-05, "loss": 0.30400848388671875, "step": 1463 }, { "epoch": 0.5296911039753969, "grad_norm": 0.5071917176246643, "learning_rate": 2.4834226644072345e-05, "loss": 0.2746238708496094, "step": 1464 }, { "epoch": 0.530052914838768, "grad_norm": 0.5944591164588928, "learning_rate": 2.4807148780365057e-05, "loss": 0.28070068359375, "step": 1465 }, { "epoch": 0.5304147257021392, "grad_norm": 1.951818823814392, "learning_rate": 2.4780061561514e-05, "loss": 0.297821044921875, "step": 1466 }, { "epoch": 0.5307765365655104, "grad_norm": 0.5841723084449768, "learning_rate": 2.4752965040233332e-05, "loss": 0.28790283203125, "step": 1467 }, { "epoch": 0.5311383474288816, "grad_norm": 1.7216284275054932, "learning_rate": 2.4725859269255326e-05, "loss": 0.3142852783203125, "step": 1468 }, { "epoch": 0.5315001582922527, "grad_norm": 1.6771754026412964, "learning_rate": 2.4698744301330234e-05, "loss": 0.3202667236328125, "step": 1469 }, { "epoch": 0.5318619691556239, "grad_norm": 1.3196898698806763, "learning_rate": 2.4671620189226246e-05, "loss": 0.2787017822265625, "step": 1470 }, { "epoch": 0.5322237800189951, "grad_norm": 1.9509081840515137, "learning_rate": 2.4644486985729303e-05, "loss": 0.32956695556640625, "step": 1471 }, { "epoch": 0.5325855908823662, "grad_norm": 0.9008991718292236, "learning_rate": 2.4617344743643065e-05, "loss": 0.2806549072265625, "step": 1472 }, { "epoch": 0.5329474017457374, "grad_norm": 0.5316152572631836, "learning_rate": 2.4590193515788778e-05, "loss": 0.309783935546875, "step": 1473 }, { "epoch": 0.5333092126091086, "grad_norm": 3.3339526653289795, "learning_rate": 2.456303335500517e-05, "loss": 0.27622222900390625, "step": 1474 }, { "epoch": 0.5336710234724797, "grad_norm": 5.583130359649658, "learning_rate": 2.4535864314148357e-05, "loss": 0.3857269287109375, "step": 1475 }, { "epoch": 0.5340328343358509, "grad_norm": 0.9003186821937561, "learning_rate": 2.450868644609174e-05, "loss": 0.30611419677734375, "step": 1476 }, { "epoch": 0.5343946451992221, "grad_norm": 1.523910403251648, "learning_rate": 2.448149980372588e-05, "loss": 0.27764129638671875, "step": 1477 }, { "epoch": 0.5347564560625933, "grad_norm": 2.5182535648345947, "learning_rate": 2.445430443995845e-05, "loss": 0.3390960693359375, "step": 1478 }, { "epoch": 0.5351182669259644, "grad_norm": 1.0046602487564087, "learning_rate": 2.442710040771405e-05, "loss": 0.2815208435058594, "step": 1479 }, { "epoch": 0.5354800777893356, "grad_norm": 0.3948712646961212, "learning_rate": 2.4399887759934183e-05, "loss": 0.2884063720703125, "step": 1480 }, { "epoch": 0.5358418886527068, "grad_norm": 1.489838719367981, "learning_rate": 2.4372666549577116e-05, "loss": 0.2693023681640625, "step": 1481 }, { "epoch": 0.5362036995160779, "grad_norm": 2.062307834625244, "learning_rate": 2.4345436829617754e-05, "loss": 0.26287841796875, "step": 1482 }, { "epoch": 0.5365655103794491, "grad_norm": 1.264256477355957, "learning_rate": 2.43181986530476e-05, "loss": 0.318359375, "step": 1483 }, { "epoch": 0.5369273212428203, "grad_norm": 1.792694330215454, "learning_rate": 2.429095207287459e-05, "loss": 0.25157928466796875, "step": 1484 }, { "epoch": 0.5372891321061914, "grad_norm": 1.2608373165130615, "learning_rate": 2.4263697142123002e-05, "loss": 0.2915496826171875, "step": 1485 }, { "epoch": 0.5376509429695626, "grad_norm": 0.6433435082435608, "learning_rate": 2.42364339138334e-05, "loss": 0.2883148193359375, "step": 1486 }, { "epoch": 0.5380127538329338, "grad_norm": 0.966623067855835, "learning_rate": 2.4209162441062474e-05, "loss": 0.3124580383300781, "step": 1487 }, { "epoch": 0.538374564696305, "grad_norm": 0.5655458569526672, "learning_rate": 2.4181882776882963e-05, "loss": 0.28253936767578125, "step": 1488 }, { "epoch": 0.5387363755596761, "grad_norm": 1.8976682424545288, "learning_rate": 2.4154594974383542e-05, "loss": 0.29107666015625, "step": 1489 }, { "epoch": 0.5390981864230473, "grad_norm": 4.574169158935547, "learning_rate": 2.412729908666873e-05, "loss": 0.2799530029296875, "step": 1490 }, { "epoch": 0.5394599972864186, "grad_norm": 1.2623317241668701, "learning_rate": 2.4099995166858778e-05, "loss": 0.31681060791015625, "step": 1491 }, { "epoch": 0.5398218081497898, "grad_norm": 0.8378945589065552, "learning_rate": 2.407268326808957e-05, "loss": 0.290618896484375, "step": 1492 }, { "epoch": 0.5401836190131609, "grad_norm": 0.5783753991127014, "learning_rate": 2.404536344351252e-05, "loss": 0.28081512451171875, "step": 1493 }, { "epoch": 0.5405454298765321, "grad_norm": 1.6296522617340088, "learning_rate": 2.401803574629446e-05, "loss": 0.30938720703125, "step": 1494 }, { "epoch": 0.5409072407399033, "grad_norm": 1.034767746925354, "learning_rate": 2.3990700229617544e-05, "loss": 0.30035400390625, "step": 1495 }, { "epoch": 0.5412690516032744, "grad_norm": 1.2695690393447876, "learning_rate": 2.396335694667915e-05, "loss": 0.2968597412109375, "step": 1496 }, { "epoch": 0.5416308624666456, "grad_norm": 3.198944091796875, "learning_rate": 2.3936005950691765e-05, "loss": 0.29718017578125, "step": 1497 }, { "epoch": 0.5419926733300168, "grad_norm": 1.3164055347442627, "learning_rate": 2.3908647294882878e-05, "loss": 0.297576904296875, "step": 1498 }, { "epoch": 0.5423544841933879, "grad_norm": 4.224132061004639, "learning_rate": 2.38812810324949e-05, "loss": 0.298919677734375, "step": 1499 }, { "epoch": 0.5427162950567591, "grad_norm": 5.25214958190918, "learning_rate": 2.3853907216785043e-05, "loss": 0.28713226318359375, "step": 1500 }, { "epoch": 0.5430781059201303, "grad_norm": 2.549924850463867, "learning_rate": 2.382652590102521e-05, "loss": 0.291351318359375, "step": 1501 }, { "epoch": 0.5434399167835015, "grad_norm": 1.968400001525879, "learning_rate": 2.3799137138501903e-05, "loss": 0.31563568115234375, "step": 1502 }, { "epoch": 0.5438017276468726, "grad_norm": 0.8794369697570801, "learning_rate": 2.377174098251611e-05, "loss": 0.3192138671875, "step": 1503 }, { "epoch": 0.5441635385102438, "grad_norm": 1.5525373220443726, "learning_rate": 2.3744337486383225e-05, "loss": 0.294921875, "step": 1504 }, { "epoch": 0.544525349373615, "grad_norm": 2.8675193786621094, "learning_rate": 2.37169267034329e-05, "loss": 0.29975128173828125, "step": 1505 }, { "epoch": 0.5448871602369861, "grad_norm": 2.0355706214904785, "learning_rate": 2.3689508687009005e-05, "loss": 0.2826995849609375, "step": 1506 }, { "epoch": 0.5452489711003573, "grad_norm": 0.7797069549560547, "learning_rate": 2.3662083490469456e-05, "loss": 0.27956390380859375, "step": 1507 }, { "epoch": 0.5456107819637285, "grad_norm": 3.196265459060669, "learning_rate": 2.363465116718614e-05, "loss": 0.28469085693359375, "step": 1508 }, { "epoch": 0.5459725928270996, "grad_norm": 1.974998116493225, "learning_rate": 2.360721177054484e-05, "loss": 0.28417205810546875, "step": 1509 }, { "epoch": 0.5463344036904708, "grad_norm": 1.041357159614563, "learning_rate": 2.3579765353945082e-05, "loss": 0.26770782470703125, "step": 1510 }, { "epoch": 0.546696214553842, "grad_norm": 2.3026323318481445, "learning_rate": 2.355231197080006e-05, "loss": 0.31067657470703125, "step": 1511 }, { "epoch": 0.5470580254172132, "grad_norm": 0.6024349927902222, "learning_rate": 2.3524851674536526e-05, "loss": 0.27822113037109375, "step": 1512 }, { "epoch": 0.5474198362805843, "grad_norm": 1.283624291419983, "learning_rate": 2.3497384518594695e-05, "loss": 0.31976318359375, "step": 1513 }, { "epoch": 0.5477816471439555, "grad_norm": 1.4147448539733887, "learning_rate": 2.3469910556428114e-05, "loss": 0.2589263916015625, "step": 1514 }, { "epoch": 0.5481434580073267, "grad_norm": 2.0677969455718994, "learning_rate": 2.344242984150358e-05, "loss": 0.2776641845703125, "step": 1515 }, { "epoch": 0.5485052688706978, "grad_norm": 2.9903528690338135, "learning_rate": 2.341494242730104e-05, "loss": 0.29021453857421875, "step": 1516 }, { "epoch": 0.548867079734069, "grad_norm": 2.0389723777770996, "learning_rate": 2.338744836731348e-05, "loss": 0.27080535888671875, "step": 1517 }, { "epoch": 0.5492288905974402, "grad_norm": 1.0288947820663452, "learning_rate": 2.33599477150468e-05, "loss": 0.29974365234375, "step": 1518 }, { "epoch": 0.5495907014608113, "grad_norm": 1.8351942300796509, "learning_rate": 2.3332440524019755e-05, "loss": 0.26116943359375, "step": 1519 }, { "epoch": 0.5499525123241825, "grad_norm": 1.1446465253829956, "learning_rate": 2.3304926847763797e-05, "loss": 0.305206298828125, "step": 1520 }, { "epoch": 0.5503143231875537, "grad_norm": 0.919645369052887, "learning_rate": 2.3277406739823033e-05, "loss": 0.27320098876953125, "step": 1521 }, { "epoch": 0.5506761340509249, "grad_norm": 0.4115041494369507, "learning_rate": 2.3249880253754053e-05, "loss": 0.2722015380859375, "step": 1522 }, { "epoch": 0.551037944914296, "grad_norm": 3.209439277648926, "learning_rate": 2.322234744312588e-05, "loss": 0.3229522705078125, "step": 1523 }, { "epoch": 0.5513997557776672, "grad_norm": 0.49899211525917053, "learning_rate": 2.3194808361519834e-05, "loss": 0.24729156494140625, "step": 1524 }, { "epoch": 0.5517615666410384, "grad_norm": 1.2267048358917236, "learning_rate": 2.3167263062529446e-05, "loss": 0.2647552490234375, "step": 1525 }, { "epoch": 0.5521233775044095, "grad_norm": 1.3027575016021729, "learning_rate": 2.313971159976035e-05, "loss": 0.291351318359375, "step": 1526 }, { "epoch": 0.5524851883677807, "grad_norm": 2.3374621868133545, "learning_rate": 2.3112154026830168e-05, "loss": 0.28340911865234375, "step": 1527 }, { "epoch": 0.5528469992311519, "grad_norm": 2.5753068923950195, "learning_rate": 2.3084590397368413e-05, "loss": 0.26757049560546875, "step": 1528 }, { "epoch": 0.553208810094523, "grad_norm": 1.540489673614502, "learning_rate": 2.305702076501638e-05, "loss": 0.28774261474609375, "step": 1529 }, { "epoch": 0.5535706209578942, "grad_norm": 0.977096676826477, "learning_rate": 2.3029445183427058e-05, "loss": 0.2791900634765625, "step": 1530 }, { "epoch": 0.5539324318212654, "grad_norm": 0.5574640035629272, "learning_rate": 2.300186370626502e-05, "loss": 0.28849029541015625, "step": 1531 }, { "epoch": 0.5542942426846366, "grad_norm": 1.422987937927246, "learning_rate": 2.2974276387206284e-05, "loss": 0.3147125244140625, "step": 1532 }, { "epoch": 0.5546560535480077, "grad_norm": 1.4566947221755981, "learning_rate": 2.294668327993826e-05, "loss": 0.2750396728515625, "step": 1533 }, { "epoch": 0.5550178644113789, "grad_norm": 0.4985528290271759, "learning_rate": 2.2919084438159615e-05, "loss": 0.3032073974609375, "step": 1534 }, { "epoch": 0.5553796752747501, "grad_norm": 1.3332492113113403, "learning_rate": 2.2891479915580175e-05, "loss": 0.2878265380859375, "step": 1535 }, { "epoch": 0.5557414861381212, "grad_norm": 0.5877955555915833, "learning_rate": 2.2863869765920823e-05, "loss": 0.3296356201171875, "step": 1536 }, { "epoch": 0.5561032970014925, "grad_norm": 0.8044095635414124, "learning_rate": 2.28362540429134e-05, "loss": 0.3193511962890625, "step": 1537 }, { "epoch": 0.5564651078648637, "grad_norm": 2.6918838024139404, "learning_rate": 2.2808632800300576e-05, "loss": 0.28228759765625, "step": 1538 }, { "epoch": 0.5568269187282349, "grad_norm": 3.102980852127075, "learning_rate": 2.2781006091835784e-05, "loss": 0.3025054931640625, "step": 1539 }, { "epoch": 0.557188729591606, "grad_norm": 2.60640025138855, "learning_rate": 2.2753373971283073e-05, "loss": 0.2698822021484375, "step": 1540 }, { "epoch": 0.5575505404549772, "grad_norm": 3.396245002746582, "learning_rate": 2.2725736492417037e-05, "loss": 0.284210205078125, "step": 1541 }, { "epoch": 0.5579123513183484, "grad_norm": 3.1154556274414062, "learning_rate": 2.2698093709022694e-05, "loss": 0.28322601318359375, "step": 1542 }, { "epoch": 0.5582741621817195, "grad_norm": 2.2759487628936768, "learning_rate": 2.2670445674895383e-05, "loss": 0.3000907897949219, "step": 1543 }, { "epoch": 0.5586359730450907, "grad_norm": 2.6120834350585938, "learning_rate": 2.264279244384068e-05, "loss": 0.32916259765625, "step": 1544 }, { "epoch": 0.5589977839084619, "grad_norm": 1.773138165473938, "learning_rate": 2.2615134069674246e-05, "loss": 0.29111480712890625, "step": 1545 }, { "epoch": 0.5593595947718331, "grad_norm": 2.3437564373016357, "learning_rate": 2.258747060622176e-05, "loss": 0.28020477294921875, "step": 1546 }, { "epoch": 0.5597214056352042, "grad_norm": 0.7528247237205505, "learning_rate": 2.255980210731882e-05, "loss": 0.27875518798828125, "step": 1547 }, { "epoch": 0.5600832164985754, "grad_norm": 0.6096819043159485, "learning_rate": 2.253212862681081e-05, "loss": 0.2877044677734375, "step": 1548 }, { "epoch": 0.5604450273619466, "grad_norm": 5.768158435821533, "learning_rate": 2.2504450218552807e-05, "loss": 0.38605499267578125, "step": 1549 }, { "epoch": 0.5608068382253177, "grad_norm": 0.6463198661804199, "learning_rate": 2.2476766936409487e-05, "loss": 0.2837371826171875, "step": 1550 }, { "epoch": 0.5611686490886889, "grad_norm": 1.3337795734405518, "learning_rate": 2.2449078834255e-05, "loss": 0.2576751708984375, "step": 1551 }, { "epoch": 0.5615304599520601, "grad_norm": 1.0040441751480103, "learning_rate": 2.2421385965972894e-05, "loss": 0.3370513916015625, "step": 1552 }, { "epoch": 0.5618922708154312, "grad_norm": 1.2390204668045044, "learning_rate": 2.239368838545597e-05, "loss": 0.270172119140625, "step": 1553 }, { "epoch": 0.5622540816788024, "grad_norm": 1.1536835432052612, "learning_rate": 2.2365986146606215e-05, "loss": 0.30133056640625, "step": 1554 }, { "epoch": 0.5626158925421736, "grad_norm": 0.704155683517456, "learning_rate": 2.2338279303334667e-05, "loss": 0.26320648193359375, "step": 1555 }, { "epoch": 0.5629777034055448, "grad_norm": 1.9395917654037476, "learning_rate": 2.2310567909561352e-05, "loss": 0.3163604736328125, "step": 1556 }, { "epoch": 0.5633395142689159, "grad_norm": 1.2427339553833008, "learning_rate": 2.2282852019215124e-05, "loss": 0.2879486083984375, "step": 1557 }, { "epoch": 0.5637013251322871, "grad_norm": 2.0451653003692627, "learning_rate": 2.2255131686233596e-05, "loss": 0.27983856201171875, "step": 1558 }, { "epoch": 0.5640631359956583, "grad_norm": 2.0573880672454834, "learning_rate": 2.222740696456303e-05, "loss": 0.347442626953125, "step": 1559 }, { "epoch": 0.5644249468590294, "grad_norm": 1.703316330909729, "learning_rate": 2.2199677908158234e-05, "loss": 0.2521820068359375, "step": 1560 }, { "epoch": 0.5647867577224006, "grad_norm": 2.17191219329834, "learning_rate": 2.2171944570982427e-05, "loss": 0.2554473876953125, "step": 1561 }, { "epoch": 0.5651485685857718, "grad_norm": 1.1670048236846924, "learning_rate": 2.2144207007007198e-05, "loss": 0.28553009033203125, "step": 1562 }, { "epoch": 0.565510379449143, "grad_norm": 0.9832708835601807, "learning_rate": 2.2116465270212335e-05, "loss": 0.26828765869140625, "step": 1563 }, { "epoch": 0.5658721903125141, "grad_norm": 2.4788248538970947, "learning_rate": 2.208871941458574e-05, "loss": 0.3010406494140625, "step": 1564 }, { "epoch": 0.5662340011758853, "grad_norm": 2.3830885887145996, "learning_rate": 2.206096949412336e-05, "loss": 0.3110847473144531, "step": 1565 }, { "epoch": 0.5665958120392565, "grad_norm": 2.5231778621673584, "learning_rate": 2.2033215562829028e-05, "loss": 0.31191253662109375, "step": 1566 }, { "epoch": 0.5669576229026276, "grad_norm": 0.8461129069328308, "learning_rate": 2.2005457674714384e-05, "loss": 0.28000640869140625, "step": 1567 }, { "epoch": 0.5673194337659988, "grad_norm": 0.6341442465782166, "learning_rate": 2.197769588379878e-05, "loss": 0.3094329833984375, "step": 1568 }, { "epoch": 0.56768124462937, "grad_norm": 0.5631226301193237, "learning_rate": 2.194993024410916e-05, "loss": 0.3262176513671875, "step": 1569 }, { "epoch": 0.5680430554927411, "grad_norm": 3.532379627227783, "learning_rate": 2.192216080967995e-05, "loss": 0.2884979248046875, "step": 1570 }, { "epoch": 0.5684048663561123, "grad_norm": 2.7309634685516357, "learning_rate": 2.189438763455297e-05, "loss": 0.2943267822265625, "step": 1571 }, { "epoch": 0.5687666772194835, "grad_norm": 2.3283653259277344, "learning_rate": 2.186661077277731e-05, "loss": 0.3087005615234375, "step": 1572 }, { "epoch": 0.5691284880828547, "grad_norm": 5.064445972442627, "learning_rate": 2.183883027840925e-05, "loss": 0.3212738037109375, "step": 1573 }, { "epoch": 0.5694902989462258, "grad_norm": 4.0373101234436035, "learning_rate": 2.1811046205512122e-05, "loss": 0.306182861328125, "step": 1574 }, { "epoch": 0.569852109809597, "grad_norm": 3.739072799682617, "learning_rate": 2.178325860815624e-05, "loss": 0.2712860107421875, "step": 1575 }, { "epoch": 0.5702139206729682, "grad_norm": 1.761242151260376, "learning_rate": 2.175546754041876e-05, "loss": 0.31504058837890625, "step": 1576 }, { "epoch": 0.5705757315363393, "grad_norm": 0.646484911441803, "learning_rate": 2.172767305638359e-05, "loss": 0.297271728515625, "step": 1577 }, { "epoch": 0.5709375423997105, "grad_norm": 3.427306890487671, "learning_rate": 2.1699875210141316e-05, "loss": 0.3449249267578125, "step": 1578 }, { "epoch": 0.5712993532630817, "grad_norm": 1.0797642469406128, "learning_rate": 2.167207405578903e-05, "loss": 0.28845977783203125, "step": 1579 }, { "epoch": 0.5716611641264528, "grad_norm": 3.298560380935669, "learning_rate": 2.1644269647430278e-05, "loss": 0.32000732421875, "step": 1580 }, { "epoch": 0.572022974989824, "grad_norm": 1.6212284564971924, "learning_rate": 2.1616462039174943e-05, "loss": 0.26458740234375, "step": 1581 }, { "epoch": 0.5723847858531952, "grad_norm": 0.5536385178565979, "learning_rate": 2.158865128513914e-05, "loss": 0.3081703186035156, "step": 1582 }, { "epoch": 0.5727465967165665, "grad_norm": 1.5940872430801392, "learning_rate": 2.1560837439445087e-05, "loss": 0.30304718017578125, "step": 1583 }, { "epoch": 0.5731084075799376, "grad_norm": 1.0183876752853394, "learning_rate": 2.1533020556221027e-05, "loss": 0.2959747314453125, "step": 1584 }, { "epoch": 0.5734702184433088, "grad_norm": 0.694981575012207, "learning_rate": 2.150520068960112e-05, "loss": 0.26615142822265625, "step": 1585 }, { "epoch": 0.57383202930668, "grad_norm": 0.6998457908630371, "learning_rate": 2.1477377893725323e-05, "loss": 0.28159332275390625, "step": 1586 }, { "epoch": 0.5741938401700512, "grad_norm": 1.8801151514053345, "learning_rate": 2.1449552222739308e-05, "loss": 0.2895355224609375, "step": 1587 }, { "epoch": 0.5745556510334223, "grad_norm": 0.607376754283905, "learning_rate": 2.1421723730794333e-05, "loss": 0.30818939208984375, "step": 1588 }, { "epoch": 0.5749174618967935, "grad_norm": 2.1465935707092285, "learning_rate": 2.139389247204713e-05, "loss": 0.2671356201171875, "step": 1589 }, { "epoch": 0.5752792727601647, "grad_norm": 0.4939686357975006, "learning_rate": 2.1366058500659847e-05, "loss": 0.2737274169921875, "step": 1590 }, { "epoch": 0.5756410836235358, "grad_norm": 2.1871869564056396, "learning_rate": 2.1338221870799888e-05, "loss": 0.303375244140625, "step": 1591 }, { "epoch": 0.576002894486907, "grad_norm": 0.7850298285484314, "learning_rate": 2.1310382636639842e-05, "loss": 0.27862548828125, "step": 1592 }, { "epoch": 0.5763647053502782, "grad_norm": 1.943921446800232, "learning_rate": 2.1282540852357353e-05, "loss": 0.33237457275390625, "step": 1593 }, { "epoch": 0.5767265162136493, "grad_norm": 1.473929524421692, "learning_rate": 2.1254696572135042e-05, "loss": 0.30367279052734375, "step": 1594 }, { "epoch": 0.5770883270770205, "grad_norm": 0.8525585532188416, "learning_rate": 2.1226849850160388e-05, "loss": 0.29587554931640625, "step": 1595 }, { "epoch": 0.5774501379403917, "grad_norm": 2.315154552459717, "learning_rate": 2.119900074062561e-05, "loss": 0.2908935546875, "step": 1596 }, { "epoch": 0.5778119488037629, "grad_norm": 2.9363853931427, "learning_rate": 2.117114929772758e-05, "loss": 0.28745269775390625, "step": 1597 }, { "epoch": 0.578173759667134, "grad_norm": 0.6365917921066284, "learning_rate": 2.1143295575667705e-05, "loss": 0.2749443054199219, "step": 1598 }, { "epoch": 0.5785355705305052, "grad_norm": 0.6327043771743774, "learning_rate": 2.111543962865184e-05, "loss": 0.28997802734375, "step": 1599 }, { "epoch": 0.5788973813938764, "grad_norm": 1.2254751920700073, "learning_rate": 2.108758151089016e-05, "loss": 0.2777252197265625, "step": 1600 }, { "epoch": 0.5792591922572475, "grad_norm": 0.723250687122345, "learning_rate": 2.105972127659707e-05, "loss": 0.31200408935546875, "step": 1601 }, { "epoch": 0.5796210031206187, "grad_norm": 2.673541784286499, "learning_rate": 2.103185897999109e-05, "loss": 0.25980377197265625, "step": 1602 }, { "epoch": 0.5799828139839899, "grad_norm": 3.0232584476470947, "learning_rate": 2.1003994675294745e-05, "loss": 0.30120849609375, "step": 1603 }, { "epoch": 0.580344624847361, "grad_norm": 1.4589781761169434, "learning_rate": 2.097612841673449e-05, "loss": 0.3054351806640625, "step": 1604 }, { "epoch": 0.5807064357107322, "grad_norm": 1.5131436586380005, "learning_rate": 2.0948260258540562e-05, "loss": 0.28155517578125, "step": 1605 }, { "epoch": 0.5810682465741034, "grad_norm": 1.5505859851837158, "learning_rate": 2.0920390254946912e-05, "loss": 0.273284912109375, "step": 1606 }, { "epoch": 0.5814300574374746, "grad_norm": 2.7725775241851807, "learning_rate": 2.0892518460191062e-05, "loss": 0.2812957763671875, "step": 1607 }, { "epoch": 0.5817918683008457, "grad_norm": 0.8858634233474731, "learning_rate": 2.0864644928514038e-05, "loss": 0.273773193359375, "step": 1608 }, { "epoch": 0.5821536791642169, "grad_norm": 0.48767781257629395, "learning_rate": 2.0836769714160236e-05, "loss": 0.27898406982421875, "step": 1609 }, { "epoch": 0.5825154900275881, "grad_norm": 0.9953821301460266, "learning_rate": 2.080889287137733e-05, "loss": 0.30022430419921875, "step": 1610 }, { "epoch": 0.5828773008909592, "grad_norm": 1.9495247602462769, "learning_rate": 2.078101445441616e-05, "loss": 0.24037933349609375, "step": 1611 }, { "epoch": 0.5832391117543304, "grad_norm": 3.032747745513916, "learning_rate": 2.075313451753063e-05, "loss": 0.313568115234375, "step": 1612 }, { "epoch": 0.5836009226177016, "grad_norm": 0.47599518299102783, "learning_rate": 2.0725253114977622e-05, "loss": 0.2700347900390625, "step": 1613 }, { "epoch": 0.5839627334810727, "grad_norm": 2.1333248615264893, "learning_rate": 2.0697370301016835e-05, "loss": 0.2977752685546875, "step": 1614 }, { "epoch": 0.5843245443444439, "grad_norm": 0.5299839377403259, "learning_rate": 2.0669486129910735e-05, "loss": 0.28025054931640625, "step": 1615 }, { "epoch": 0.5846863552078151, "grad_norm": 3.5133957862854004, "learning_rate": 2.0641600655924424e-05, "loss": 0.3343963623046875, "step": 1616 }, { "epoch": 0.5850481660711863, "grad_norm": 0.6475329995155334, "learning_rate": 2.061371393332555e-05, "loss": 0.29502105712890625, "step": 1617 }, { "epoch": 0.5854099769345574, "grad_norm": 0.5292240977287292, "learning_rate": 2.0585826016384175e-05, "loss": 0.3054046630859375, "step": 1618 }, { "epoch": 0.5857717877979286, "grad_norm": 1.1286377906799316, "learning_rate": 2.05579369593727e-05, "loss": 0.3055419921875, "step": 1619 }, { "epoch": 0.5861335986612998, "grad_norm": 2.4512436389923096, "learning_rate": 2.0530046816565732e-05, "loss": 0.2746429443359375, "step": 1620 }, { "epoch": 0.5864954095246709, "grad_norm": 1.2330721616744995, "learning_rate": 2.0502155642239996e-05, "loss": 0.3081512451171875, "step": 1621 }, { "epoch": 0.5868572203880421, "grad_norm": 3.0214600563049316, "learning_rate": 2.0474263490674233e-05, "loss": 0.3096923828125, "step": 1622 }, { "epoch": 0.5872190312514133, "grad_norm": 1.9294486045837402, "learning_rate": 2.0446370416149075e-05, "loss": 0.278656005859375, "step": 1623 }, { "epoch": 0.5875808421147845, "grad_norm": 0.4592364430427551, "learning_rate": 2.0418476472946943e-05, "loss": 0.29471588134765625, "step": 1624 }, { "epoch": 0.5879426529781556, "grad_norm": 1.9690245389938354, "learning_rate": 2.0390581715351964e-05, "loss": 0.333282470703125, "step": 1625 }, { "epoch": 0.5883044638415268, "grad_norm": 1.9051790237426758, "learning_rate": 2.0362686197649855e-05, "loss": 0.276123046875, "step": 1626 }, { "epoch": 0.588666274704898, "grad_norm": 1.3021254539489746, "learning_rate": 2.033478997412779e-05, "loss": 0.29042816162109375, "step": 1627 }, { "epoch": 0.5890280855682691, "grad_norm": 0.6444965600967407, "learning_rate": 2.0306893099074328e-05, "loss": 0.282623291015625, "step": 1628 }, { "epoch": 0.5893898964316404, "grad_norm": 1.1537623405456543, "learning_rate": 2.027899562677931e-05, "loss": 0.3079071044921875, "step": 1629 }, { "epoch": 0.5897517072950116, "grad_norm": 0.5368306636810303, "learning_rate": 2.0251097611533703e-05, "loss": 0.292022705078125, "step": 1630 }, { "epoch": 0.5901135181583828, "grad_norm": 1.235844612121582, "learning_rate": 2.0223199107629577e-05, "loss": 0.2762947082519531, "step": 1631 }, { "epoch": 0.5904753290217539, "grad_norm": 2.3223488330841064, "learning_rate": 2.0195300169359914e-05, "loss": 0.29376983642578125, "step": 1632 }, { "epoch": 0.5908371398851251, "grad_norm": 2.785372257232666, "learning_rate": 2.016740085101856e-05, "loss": 0.293304443359375, "step": 1633 }, { "epoch": 0.5911989507484963, "grad_norm": 1.7777533531188965, "learning_rate": 2.01395012069001e-05, "loss": 0.27678680419921875, "step": 1634 }, { "epoch": 0.5915607616118674, "grad_norm": 3.98687481880188, "learning_rate": 2.011160129129975e-05, "loss": 0.34525299072265625, "step": 1635 }, { "epoch": 0.5919225724752386, "grad_norm": 0.6579886078834534, "learning_rate": 2.0083701158513246e-05, "loss": 0.28888702392578125, "step": 1636 }, { "epoch": 0.5922843833386098, "grad_norm": 0.4886251389980316, "learning_rate": 2.005580086283676e-05, "loss": 0.3147125244140625, "step": 1637 }, { "epoch": 0.592646194201981, "grad_norm": 1.4349327087402344, "learning_rate": 2.0027900458566784e-05, "loss": 0.28133392333984375, "step": 1638 }, { "epoch": 0.5930080050653521, "grad_norm": 3.624547004699707, "learning_rate": 2e-05, "loss": 0.27850341796875, "step": 1639 }, { "epoch": 0.5933698159287233, "grad_norm": 0.7291319966316223, "learning_rate": 1.9972099541433227e-05, "loss": 0.2857818603515625, "step": 1640 }, { "epoch": 0.5937316267920945, "grad_norm": 0.6952764987945557, "learning_rate": 1.9944199137163242e-05, "loss": 0.29937744140625, "step": 1641 }, { "epoch": 0.5940934376554656, "grad_norm": 1.1511214971542358, "learning_rate": 1.9916298841486764e-05, "loss": 0.2838134765625, "step": 1642 }, { "epoch": 0.5944552485188368, "grad_norm": 2.0026943683624268, "learning_rate": 1.988839870870026e-05, "loss": 0.295745849609375, "step": 1643 }, { "epoch": 0.594817059382208, "grad_norm": 1.5312420129776, "learning_rate": 1.986049879309991e-05, "loss": 0.29425811767578125, "step": 1644 }, { "epoch": 0.5951788702455791, "grad_norm": 1.0783854722976685, "learning_rate": 1.9832599148981445e-05, "loss": 0.3054656982421875, "step": 1645 }, { "epoch": 0.5955406811089503, "grad_norm": 0.9255203604698181, "learning_rate": 1.980469983064009e-05, "loss": 0.2811279296875, "step": 1646 }, { "epoch": 0.5959024919723215, "grad_norm": 2.580531358718872, "learning_rate": 1.9776800892370423e-05, "loss": 0.31993865966796875, "step": 1647 }, { "epoch": 0.5962643028356927, "grad_norm": 0.5884448289871216, "learning_rate": 1.9748902388466296e-05, "loss": 0.3209075927734375, "step": 1648 }, { "epoch": 0.5966261136990638, "grad_norm": 1.8625268936157227, "learning_rate": 1.9721004373220698e-05, "loss": 0.30927276611328125, "step": 1649 }, { "epoch": 0.596987924562435, "grad_norm": 1.6007553339004517, "learning_rate": 1.9693106900925675e-05, "loss": 0.2872161865234375, "step": 1650 }, { "epoch": 0.5973497354258062, "grad_norm": 0.5362300872802734, "learning_rate": 1.9665210025872215e-05, "loss": 0.2935638427734375, "step": 1651 }, { "epoch": 0.5977115462891773, "grad_norm": 1.0519458055496216, "learning_rate": 1.963731380235015e-05, "loss": 0.2572174072265625, "step": 1652 }, { "epoch": 0.5980733571525485, "grad_norm": 0.6917871236801147, "learning_rate": 1.960941828464804e-05, "loss": 0.303955078125, "step": 1653 }, { "epoch": 0.5984351680159197, "grad_norm": 0.6841064095497131, "learning_rate": 1.9581523527053067e-05, "loss": 0.28704833984375, "step": 1654 }, { "epoch": 0.5987969788792908, "grad_norm": 0.42214643955230713, "learning_rate": 1.955362958385094e-05, "loss": 0.2940826416015625, "step": 1655 }, { "epoch": 0.599158789742662, "grad_norm": 1.4472627639770508, "learning_rate": 1.9525736509325777e-05, "loss": 0.2861328125, "step": 1656 }, { "epoch": 0.5995206006060332, "grad_norm": 0.6417531371116638, "learning_rate": 1.949784435776001e-05, "loss": 0.306365966796875, "step": 1657 }, { "epoch": 0.5998824114694044, "grad_norm": 1.8180652856826782, "learning_rate": 1.946995318343427e-05, "loss": 0.28270721435546875, "step": 1658 }, { "epoch": 0.6002442223327755, "grad_norm": 1.6778978109359741, "learning_rate": 1.9442063040627305e-05, "loss": 0.2554931640625, "step": 1659 }, { "epoch": 0.6006060331961467, "grad_norm": 0.9862295389175415, "learning_rate": 1.9414173983615825e-05, "loss": 0.28745269775390625, "step": 1660 }, { "epoch": 0.6009678440595179, "grad_norm": 1.114543080329895, "learning_rate": 1.9386286066674455e-05, "loss": 0.2940406799316406, "step": 1661 }, { "epoch": 0.601329654922889, "grad_norm": 2.0052695274353027, "learning_rate": 1.9358399344075583e-05, "loss": 0.316864013671875, "step": 1662 }, { "epoch": 0.6016914657862602, "grad_norm": 0.7951840162277222, "learning_rate": 1.9330513870089272e-05, "loss": 0.28592681884765625, "step": 1663 }, { "epoch": 0.6020532766496314, "grad_norm": 0.5905314087867737, "learning_rate": 1.9302629698983172e-05, "loss": 0.27239227294921875, "step": 1664 }, { "epoch": 0.6024150875130025, "grad_norm": 1.0643343925476074, "learning_rate": 1.9274746885022385e-05, "loss": 0.3199310302734375, "step": 1665 }, { "epoch": 0.6027768983763737, "grad_norm": 0.7729546427726746, "learning_rate": 1.9246865482469372e-05, "loss": 0.28333282470703125, "step": 1666 }, { "epoch": 0.6031387092397449, "grad_norm": 0.9181944727897644, "learning_rate": 1.9218985545583848e-05, "loss": 0.2974700927734375, "step": 1667 }, { "epoch": 0.6035005201031161, "grad_norm": 1.549612283706665, "learning_rate": 1.919110712862268e-05, "loss": 0.32270050048828125, "step": 1668 }, { "epoch": 0.6038623309664872, "grad_norm": 1.2579493522644043, "learning_rate": 1.9163230285839774e-05, "loss": 0.32767486572265625, "step": 1669 }, { "epoch": 0.6042241418298584, "grad_norm": 0.8719000220298767, "learning_rate": 1.9135355071485976e-05, "loss": 0.3231964111328125, "step": 1670 }, { "epoch": 0.6045859526932296, "grad_norm": 1.7615532875061035, "learning_rate": 1.910748153980894e-05, "loss": 0.2932891845703125, "step": 1671 }, { "epoch": 0.6049477635566007, "grad_norm": 1.9341877698898315, "learning_rate": 1.9079609745053095e-05, "loss": 0.3159332275390625, "step": 1672 }, { "epoch": 0.6053095744199719, "grad_norm": 2.681870698928833, "learning_rate": 1.9051739741459438e-05, "loss": 0.2642974853515625, "step": 1673 }, { "epoch": 0.6056713852833431, "grad_norm": 0.46746718883514404, "learning_rate": 1.9023871583265515e-05, "loss": 0.30724334716796875, "step": 1674 }, { "epoch": 0.6060331961467144, "grad_norm": 1.1458462476730347, "learning_rate": 1.899600532470526e-05, "loss": 0.27806854248046875, "step": 1675 }, { "epoch": 0.6063950070100855, "grad_norm": 0.4503445029258728, "learning_rate": 1.8968141020008918e-05, "loss": 0.2901611328125, "step": 1676 }, { "epoch": 0.6067568178734567, "grad_norm": 1.7371394634246826, "learning_rate": 1.8940278723402938e-05, "loss": 0.29419708251953125, "step": 1677 }, { "epoch": 0.6071186287368279, "grad_norm": 0.47052180767059326, "learning_rate": 1.8912418489109846e-05, "loss": 0.29862213134765625, "step": 1678 }, { "epoch": 0.607480439600199, "grad_norm": 0.5562170743942261, "learning_rate": 1.8884560371348168e-05, "loss": 0.2994575500488281, "step": 1679 }, { "epoch": 0.6078422504635702, "grad_norm": 2.541355848312378, "learning_rate": 1.8856704424332298e-05, "loss": 0.313568115234375, "step": 1680 }, { "epoch": 0.6082040613269414, "grad_norm": 0.7542663812637329, "learning_rate": 1.882885070227243e-05, "loss": 0.30338287353515625, "step": 1681 }, { "epoch": 0.6085658721903126, "grad_norm": 1.3321003913879395, "learning_rate": 1.88009992593744e-05, "loss": 0.27375030517578125, "step": 1682 }, { "epoch": 0.6089276830536837, "grad_norm": 1.1610981225967407, "learning_rate": 1.877315014983961e-05, "loss": 0.29494476318359375, "step": 1683 }, { "epoch": 0.6092894939170549, "grad_norm": 0.5325490832328796, "learning_rate": 1.8745303427864958e-05, "loss": 0.2778587341308594, "step": 1684 }, { "epoch": 0.6096513047804261, "grad_norm": 0.7614805102348328, "learning_rate": 1.871745914764265e-05, "loss": 0.2814178466796875, "step": 1685 }, { "epoch": 0.6100131156437972, "grad_norm": 1.9364620447158813, "learning_rate": 1.8689617363360164e-05, "loss": 0.31191253662109375, "step": 1686 }, { "epoch": 0.6103749265071684, "grad_norm": 0.583784818649292, "learning_rate": 1.8661778129200115e-05, "loss": 0.28548431396484375, "step": 1687 }, { "epoch": 0.6107367373705396, "grad_norm": 1.0113433599472046, "learning_rate": 1.863394149934016e-05, "loss": 0.2916412353515625, "step": 1688 }, { "epoch": 0.6110985482339107, "grad_norm": 0.8521387577056885, "learning_rate": 1.8606107527952872e-05, "loss": 0.301177978515625, "step": 1689 }, { "epoch": 0.6114603590972819, "grad_norm": 0.7798730731010437, "learning_rate": 1.8578276269205674e-05, "loss": 0.2809600830078125, "step": 1690 }, { "epoch": 0.6118221699606531, "grad_norm": 1.6385362148284912, "learning_rate": 1.8550447777260695e-05, "loss": 0.33174896240234375, "step": 1691 }, { "epoch": 0.6121839808240243, "grad_norm": 2.8516931533813477, "learning_rate": 1.852262210627468e-05, "loss": 0.2508697509765625, "step": 1692 }, { "epoch": 0.6125457916873954, "grad_norm": 0.6596441864967346, "learning_rate": 1.8494799310398888e-05, "loss": 0.30088043212890625, "step": 1693 }, { "epoch": 0.6129076025507666, "grad_norm": 1.416139841079712, "learning_rate": 1.8466979443778983e-05, "loss": 0.3059234619140625, "step": 1694 }, { "epoch": 0.6132694134141378, "grad_norm": 2.433805465698242, "learning_rate": 1.8439162560554926e-05, "loss": 0.3084869384765625, "step": 1695 }, { "epoch": 0.6136312242775089, "grad_norm": 2.30465030670166, "learning_rate": 1.8411348714860865e-05, "loss": 0.31183624267578125, "step": 1696 }, { "epoch": 0.6139930351408801, "grad_norm": 0.7521658539772034, "learning_rate": 1.8383537960825057e-05, "loss": 0.27982330322265625, "step": 1697 }, { "epoch": 0.6143548460042513, "grad_norm": 0.5746760368347168, "learning_rate": 1.8355730352569726e-05, "loss": 0.2872772216796875, "step": 1698 }, { "epoch": 0.6147166568676224, "grad_norm": 1.5850472450256348, "learning_rate": 1.8327925944210977e-05, "loss": 0.2924957275390625, "step": 1699 }, { "epoch": 0.6150784677309936, "grad_norm": 1.44949471950531, "learning_rate": 1.8300124789858694e-05, "loss": 0.277069091796875, "step": 1700 }, { "epoch": 0.6154402785943648, "grad_norm": 2.4832584857940674, "learning_rate": 1.8272326943616413e-05, "loss": 0.2727203369140625, "step": 1701 }, { "epoch": 0.615802089457736, "grad_norm": 1.4597474336624146, "learning_rate": 1.8244532459581248e-05, "loss": 0.30197906494140625, "step": 1702 }, { "epoch": 0.6161639003211071, "grad_norm": 0.6523033976554871, "learning_rate": 1.8216741391843767e-05, "loss": 0.28762054443359375, "step": 1703 }, { "epoch": 0.6165257111844783, "grad_norm": 1.2747018337249756, "learning_rate": 1.818895379448788e-05, "loss": 0.31078338623046875, "step": 1704 }, { "epoch": 0.6168875220478495, "grad_norm": 1.3035825490951538, "learning_rate": 1.8161169721590756e-05, "loss": 0.323333740234375, "step": 1705 }, { "epoch": 0.6172493329112206, "grad_norm": 0.79984450340271, "learning_rate": 1.813338922722269e-05, "loss": 0.298095703125, "step": 1706 }, { "epoch": 0.6176111437745918, "grad_norm": 0.5364368557929993, "learning_rate": 1.810561236544704e-05, "loss": 0.2940177917480469, "step": 1707 }, { "epoch": 0.617972954637963, "grad_norm": 1.016381025314331, "learning_rate": 1.807783919032005e-05, "loss": 0.2967376708984375, "step": 1708 }, { "epoch": 0.6183347655013341, "grad_norm": 0.5313143134117126, "learning_rate": 1.8050069755890844e-05, "loss": 0.28432464599609375, "step": 1709 }, { "epoch": 0.6186965763647053, "grad_norm": 0.6403621435165405, "learning_rate": 1.8022304116201224e-05, "loss": 0.28399658203125, "step": 1710 }, { "epoch": 0.6190583872280765, "grad_norm": 2.562915563583374, "learning_rate": 1.799454232528562e-05, "loss": 0.279754638671875, "step": 1711 }, { "epoch": 0.6194201980914477, "grad_norm": 0.6166876554489136, "learning_rate": 1.796678443717098e-05, "loss": 0.2800750732421875, "step": 1712 }, { "epoch": 0.6197820089548188, "grad_norm": 1.5968003273010254, "learning_rate": 1.7939030505876646e-05, "loss": 0.332855224609375, "step": 1713 }, { "epoch": 0.62014381981819, "grad_norm": 1.1030566692352295, "learning_rate": 1.7911280585414263e-05, "loss": 0.308135986328125, "step": 1714 }, { "epoch": 0.6205056306815612, "grad_norm": 0.639352560043335, "learning_rate": 1.7883534729787672e-05, "loss": 0.27713775634765625, "step": 1715 }, { "epoch": 0.6208674415449323, "grad_norm": 0.6138225793838501, "learning_rate": 1.7855792992992805e-05, "loss": 0.3118743896484375, "step": 1716 }, { "epoch": 0.6212292524083035, "grad_norm": 0.7522735595703125, "learning_rate": 1.7828055429017576e-05, "loss": 0.28348541259765625, "step": 1717 }, { "epoch": 0.6215910632716747, "grad_norm": 1.1559950113296509, "learning_rate": 1.780032209184178e-05, "loss": 0.3050994873046875, "step": 1718 }, { "epoch": 0.6219528741350459, "grad_norm": 0.7229307293891907, "learning_rate": 1.777259303543698e-05, "loss": 0.27330780029296875, "step": 1719 }, { "epoch": 0.622314684998417, "grad_norm": 1.335661768913269, "learning_rate": 1.7744868313766414e-05, "loss": 0.31746673583984375, "step": 1720 }, { "epoch": 0.6226764958617883, "grad_norm": 0.47771549224853516, "learning_rate": 1.771714798078488e-05, "loss": 0.287078857421875, "step": 1721 }, { "epoch": 0.6230383067251595, "grad_norm": 0.8776565194129944, "learning_rate": 1.768943209043865e-05, "loss": 0.261749267578125, "step": 1722 }, { "epoch": 0.6234001175885306, "grad_norm": 1.5103930234909058, "learning_rate": 1.7661720696665333e-05, "loss": 0.30078887939453125, "step": 1723 }, { "epoch": 0.6237619284519018, "grad_norm": 1.0027780532836914, "learning_rate": 1.763401385339379e-05, "loss": 0.3242530822753906, "step": 1724 }, { "epoch": 0.624123739315273, "grad_norm": 0.5429537892341614, "learning_rate": 1.7606311614544038e-05, "loss": 0.27014923095703125, "step": 1725 }, { "epoch": 0.6244855501786442, "grad_norm": 0.8839976191520691, "learning_rate": 1.7578614034027112e-05, "loss": 0.305206298828125, "step": 1726 }, { "epoch": 0.6248473610420153, "grad_norm": 0.5204808115959167, "learning_rate": 1.7550921165745004e-05, "loss": 0.2791290283203125, "step": 1727 }, { "epoch": 0.6252091719053865, "grad_norm": 2.9155430793762207, "learning_rate": 1.7523233063590516e-05, "loss": 0.36301422119140625, "step": 1728 }, { "epoch": 0.6255709827687577, "grad_norm": 2.4553287029266357, "learning_rate": 1.74955497814472e-05, "loss": 0.2821502685546875, "step": 1729 }, { "epoch": 0.6259327936321288, "grad_norm": 2.6886844635009766, "learning_rate": 1.7467871373189198e-05, "loss": 0.2760009765625, "step": 1730 }, { "epoch": 0.6262946044955, "grad_norm": 1.0086872577667236, "learning_rate": 1.7440197892681187e-05, "loss": 0.31943511962890625, "step": 1731 }, { "epoch": 0.6266564153588712, "grad_norm": 0.48901402950286865, "learning_rate": 1.7412529393778244e-05, "loss": 0.31893157958984375, "step": 1732 }, { "epoch": 0.6270182262222423, "grad_norm": 2.666562557220459, "learning_rate": 1.738486593032576e-05, "loss": 0.26399993896484375, "step": 1733 }, { "epoch": 0.6273800370856135, "grad_norm": 0.6745082139968872, "learning_rate": 1.7357207556159322e-05, "loss": 0.283721923828125, "step": 1734 }, { "epoch": 0.6277418479489847, "grad_norm": 1.8189009428024292, "learning_rate": 1.7329554325104616e-05, "loss": 0.3115234375, "step": 1735 }, { "epoch": 0.6281036588123559, "grad_norm": 1.0712531805038452, "learning_rate": 1.7301906290977313e-05, "loss": 0.2841033935546875, "step": 1736 }, { "epoch": 0.628465469675727, "grad_norm": 1.7433065176010132, "learning_rate": 1.727426350758297e-05, "loss": 0.33025360107421875, "step": 1737 }, { "epoch": 0.6288272805390982, "grad_norm": 1.4492321014404297, "learning_rate": 1.7246626028716934e-05, "loss": 0.3223419189453125, "step": 1738 }, { "epoch": 0.6291890914024694, "grad_norm": 0.8705816864967346, "learning_rate": 1.7218993908164226e-05, "loss": 0.29061126708984375, "step": 1739 }, { "epoch": 0.6295509022658405, "grad_norm": 1.5682997703552246, "learning_rate": 1.719136719969943e-05, "loss": 0.2840118408203125, "step": 1740 }, { "epoch": 0.6299127131292117, "grad_norm": 0.4751293361186981, "learning_rate": 1.716374595708661e-05, "loss": 0.3012237548828125, "step": 1741 }, { "epoch": 0.6302745239925829, "grad_norm": 1.7206605672836304, "learning_rate": 1.7136130234079184e-05, "loss": 0.2759246826171875, "step": 1742 }, { "epoch": 0.630636334855954, "grad_norm": 2.1135993003845215, "learning_rate": 1.7108520084419835e-05, "loss": 0.31571197509765625, "step": 1743 }, { "epoch": 0.6309981457193252, "grad_norm": 0.6021486520767212, "learning_rate": 1.70809155618404e-05, "loss": 0.283599853515625, "step": 1744 }, { "epoch": 0.6313599565826964, "grad_norm": 0.6375644207000732, "learning_rate": 1.705331672006175e-05, "loss": 0.28527069091796875, "step": 1745 }, { "epoch": 0.6317217674460676, "grad_norm": 1.3254534006118774, "learning_rate": 1.702572361279372e-05, "loss": 0.28968048095703125, "step": 1746 }, { "epoch": 0.6320835783094387, "grad_norm": 0.6480424404144287, "learning_rate": 1.6998136293734984e-05, "loss": 0.277679443359375, "step": 1747 }, { "epoch": 0.6324453891728099, "grad_norm": 2.060051202774048, "learning_rate": 1.6970554816572942e-05, "loss": 0.32651519775390625, "step": 1748 }, { "epoch": 0.6328072000361811, "grad_norm": 0.5339307188987732, "learning_rate": 1.6942979234983626e-05, "loss": 0.2934722900390625, "step": 1749 }, { "epoch": 0.6331690108995522, "grad_norm": 0.6229574680328369, "learning_rate": 1.6915409602631594e-05, "loss": 0.3009834289550781, "step": 1750 }, { "epoch": 0.6335308217629234, "grad_norm": 1.263800859451294, "learning_rate": 1.688784597316984e-05, "loss": 0.30572509765625, "step": 1751 }, { "epoch": 0.6338926326262946, "grad_norm": 1.2821372747421265, "learning_rate": 1.6860288400239655e-05, "loss": 0.2820892333984375, "step": 1752 }, { "epoch": 0.6342544434896658, "grad_norm": 2.8511483669281006, "learning_rate": 1.683273693747056e-05, "loss": 0.2800140380859375, "step": 1753 }, { "epoch": 0.6346162543530369, "grad_norm": 1.3479783535003662, "learning_rate": 1.6805191638480173e-05, "loss": 0.28668212890625, "step": 1754 }, { "epoch": 0.6349780652164081, "grad_norm": 2.6316421031951904, "learning_rate": 1.6777652556874133e-05, "loss": 0.29994964599609375, "step": 1755 }, { "epoch": 0.6353398760797793, "grad_norm": 0.9091010689735413, "learning_rate": 1.6750119746245957e-05, "loss": 0.2829437255859375, "step": 1756 }, { "epoch": 0.6357016869431504, "grad_norm": 4.320092678070068, "learning_rate": 1.6722593260176977e-05, "loss": 0.362548828125, "step": 1757 }, { "epoch": 0.6360634978065216, "grad_norm": 0.45152148604393005, "learning_rate": 1.6695073152236203e-05, "loss": 0.286834716796875, "step": 1758 }, { "epoch": 0.6364253086698928, "grad_norm": 2.275797128677368, "learning_rate": 1.6667559475980248e-05, "loss": 0.3243370056152344, "step": 1759 }, { "epoch": 0.636787119533264, "grad_norm": 0.9941002130508423, "learning_rate": 1.6640052284953202e-05, "loss": 0.2522125244140625, "step": 1760 }, { "epoch": 0.6371489303966351, "grad_norm": 1.6988002061843872, "learning_rate": 1.6612551632686523e-05, "loss": 0.3370819091796875, "step": 1761 }, { "epoch": 0.6375107412600063, "grad_norm": 1.7838466167449951, "learning_rate": 1.6585057572698963e-05, "loss": 0.3155975341796875, "step": 1762 }, { "epoch": 0.6378725521233775, "grad_norm": 0.4950980246067047, "learning_rate": 1.6557570158496422e-05, "loss": 0.27667999267578125, "step": 1763 }, { "epoch": 0.6382343629867486, "grad_norm": 0.7547111511230469, "learning_rate": 1.6530089443571892e-05, "loss": 0.3177337646484375, "step": 1764 }, { "epoch": 0.6385961738501198, "grad_norm": 0.5979122519493103, "learning_rate": 1.6502615481405312e-05, "loss": 0.287017822265625, "step": 1765 }, { "epoch": 0.638957984713491, "grad_norm": 1.0422890186309814, "learning_rate": 1.6475148325463478e-05, "loss": 0.27728271484375, "step": 1766 }, { "epoch": 0.6393197955768622, "grad_norm": 2.685791492462158, "learning_rate": 1.6447688029199945e-05, "loss": 0.32180023193359375, "step": 1767 }, { "epoch": 0.6396816064402334, "grad_norm": 0.7245455980300903, "learning_rate": 1.6420234646054928e-05, "loss": 0.304107666015625, "step": 1768 }, { "epoch": 0.6400434173036046, "grad_norm": 1.343702793121338, "learning_rate": 1.639278822945517e-05, "loss": 0.265838623046875, "step": 1769 }, { "epoch": 0.6404052281669758, "grad_norm": 1.0249629020690918, "learning_rate": 1.636534883281387e-05, "loss": 0.3211402893066406, "step": 1770 }, { "epoch": 0.6407670390303469, "grad_norm": 2.4570698738098145, "learning_rate": 1.633791650953055e-05, "loss": 0.2886199951171875, "step": 1771 }, { "epoch": 0.6411288498937181, "grad_norm": 1.8756471872329712, "learning_rate": 1.6310491312990994e-05, "loss": 0.27797698974609375, "step": 1772 }, { "epoch": 0.6414906607570893, "grad_norm": 1.5042024850845337, "learning_rate": 1.62830732965671e-05, "loss": 0.27515411376953125, "step": 1773 }, { "epoch": 0.6418524716204604, "grad_norm": 1.1155556440353394, "learning_rate": 1.625566251361678e-05, "loss": 0.26132965087890625, "step": 1774 }, { "epoch": 0.6422142824838316, "grad_norm": 1.0185623168945312, "learning_rate": 1.6228259017483895e-05, "loss": 0.29615020751953125, "step": 1775 }, { "epoch": 0.6425760933472028, "grad_norm": 1.3759535551071167, "learning_rate": 1.6200862861498104e-05, "loss": 0.287353515625, "step": 1776 }, { "epoch": 0.642937904210574, "grad_norm": 1.717901587486267, "learning_rate": 1.6173474098974796e-05, "loss": 0.28031158447265625, "step": 1777 }, { "epoch": 0.6432997150739451, "grad_norm": 0.8353276252746582, "learning_rate": 1.614609278321496e-05, "loss": 0.28267669677734375, "step": 1778 }, { "epoch": 0.6436615259373163, "grad_norm": 3.5144083499908447, "learning_rate": 1.6118718967505103e-05, "loss": 0.33135223388671875, "step": 1779 }, { "epoch": 0.6440233368006875, "grad_norm": 0.6789864897727966, "learning_rate": 1.6091352705117126e-05, "loss": 0.3018035888671875, "step": 1780 }, { "epoch": 0.6443851476640586, "grad_norm": 0.6875971555709839, "learning_rate": 1.6063994049308245e-05, "loss": 0.281341552734375, "step": 1781 }, { "epoch": 0.6447469585274298, "grad_norm": 0.9292905330657959, "learning_rate": 1.603664305332086e-05, "loss": 0.3194122314453125, "step": 1782 }, { "epoch": 0.645108769390801, "grad_norm": 0.7378065586090088, "learning_rate": 1.600929977038246e-05, "loss": 0.2757415771484375, "step": 1783 }, { "epoch": 0.6454705802541721, "grad_norm": 1.2755624055862427, "learning_rate": 1.5981964253705543e-05, "loss": 0.29192352294921875, "step": 1784 }, { "epoch": 0.6458323911175433, "grad_norm": 0.8967157602310181, "learning_rate": 1.5954636556487483e-05, "loss": 0.2950897216796875, "step": 1785 }, { "epoch": 0.6461942019809145, "grad_norm": 0.753318727016449, "learning_rate": 1.5927316731910432e-05, "loss": 0.303924560546875, "step": 1786 }, { "epoch": 0.6465560128442857, "grad_norm": 0.8727132678031921, "learning_rate": 1.590000483314123e-05, "loss": 0.27217864990234375, "step": 1787 }, { "epoch": 0.6469178237076568, "grad_norm": 0.6492746472358704, "learning_rate": 1.5872700913331278e-05, "loss": 0.3028564453125, "step": 1788 }, { "epoch": 0.647279634571028, "grad_norm": 0.4472762942314148, "learning_rate": 1.584540502561646e-05, "loss": 0.2998504638671875, "step": 1789 }, { "epoch": 0.6476414454343992, "grad_norm": 2.1910009384155273, "learning_rate": 1.5818117223117044e-05, "loss": 0.34935760498046875, "step": 1790 }, { "epoch": 0.6480032562977703, "grad_norm": 1.5292940139770508, "learning_rate": 1.5790837558937532e-05, "loss": 0.2895355224609375, "step": 1791 }, { "epoch": 0.6483650671611415, "grad_norm": 1.6034893989562988, "learning_rate": 1.5763566086166608e-05, "loss": 0.3179779052734375, "step": 1792 }, { "epoch": 0.6487268780245127, "grad_norm": 0.6216861605644226, "learning_rate": 1.5736302857877004e-05, "loss": 0.300201416015625, "step": 1793 }, { "epoch": 0.6490886888878838, "grad_norm": 1.9740725755691528, "learning_rate": 1.5709047927125425e-05, "loss": 0.3049774169921875, "step": 1794 }, { "epoch": 0.649450499751255, "grad_norm": 0.9930004477500916, "learning_rate": 1.568180134695241e-05, "loss": 0.302490234375, "step": 1795 }, { "epoch": 0.6498123106146262, "grad_norm": 0.4280160963535309, "learning_rate": 1.5654563170382246e-05, "loss": 0.29485321044921875, "step": 1796 }, { "epoch": 0.6501741214779974, "grad_norm": 0.573508083820343, "learning_rate": 1.562733345042289e-05, "loss": 0.27584075927734375, "step": 1797 }, { "epoch": 0.6505359323413685, "grad_norm": 2.428520679473877, "learning_rate": 1.5600112240065816e-05, "loss": 0.3310546875, "step": 1798 }, { "epoch": 0.6508977432047397, "grad_norm": 0.589165985584259, "learning_rate": 1.5572899592285957e-05, "loss": 0.29914093017578125, "step": 1799 }, { "epoch": 0.6512595540681109, "grad_norm": 3.104466199874878, "learning_rate": 1.554569556004156e-05, "loss": 0.3442497253417969, "step": 1800 }, { "epoch": 0.651621364931482, "grad_norm": 1.0047636032104492, "learning_rate": 1.5518500196274123e-05, "loss": 0.3262481689453125, "step": 1801 }, { "epoch": 0.6519831757948532, "grad_norm": 0.4986249804496765, "learning_rate": 1.5491313553908266e-05, "loss": 0.302093505859375, "step": 1802 }, { "epoch": 0.6523449866582244, "grad_norm": 1.4546124935150146, "learning_rate": 1.5464135685851646e-05, "loss": 0.299468994140625, "step": 1803 }, { "epoch": 0.6527067975215955, "grad_norm": 1.1268067359924316, "learning_rate": 1.543696664499484e-05, "loss": 0.2786102294921875, "step": 1804 }, { "epoch": 0.6530686083849667, "grad_norm": 1.348120927810669, "learning_rate": 1.5409806484211232e-05, "loss": 0.27486419677734375, "step": 1805 }, { "epoch": 0.6534304192483379, "grad_norm": 1.071722388267517, "learning_rate": 1.5382655256356942e-05, "loss": 0.32419586181640625, "step": 1806 }, { "epoch": 0.6537922301117091, "grad_norm": 0.7507110834121704, "learning_rate": 1.5355513014270707e-05, "loss": 0.31636810302734375, "step": 1807 }, { "epoch": 0.6541540409750802, "grad_norm": 0.5411331057548523, "learning_rate": 1.5328379810773757e-05, "loss": 0.31056976318359375, "step": 1808 }, { "epoch": 0.6545158518384514, "grad_norm": 1.5874671936035156, "learning_rate": 1.5301255698669762e-05, "loss": 0.26433563232421875, "step": 1809 }, { "epoch": 0.6548776627018226, "grad_norm": 3.1045305728912354, "learning_rate": 1.527414073074468e-05, "loss": 0.3381614685058594, "step": 1810 }, { "epoch": 0.6552394735651937, "grad_norm": 0.8007964491844177, "learning_rate": 1.524703495976667e-05, "loss": 0.27423858642578125, "step": 1811 }, { "epoch": 0.6556012844285649, "grad_norm": 0.6092385649681091, "learning_rate": 1.5219938438486004e-05, "loss": 0.2785072326660156, "step": 1812 }, { "epoch": 0.6559630952919362, "grad_norm": 1.3292436599731445, "learning_rate": 1.5192851219634948e-05, "loss": 0.30352020263671875, "step": 1813 }, { "epoch": 0.6563249061553074, "grad_norm": 0.7102413773536682, "learning_rate": 1.5165773355927665e-05, "loss": 0.284698486328125, "step": 1814 }, { "epoch": 0.6566867170186785, "grad_norm": 2.8399014472961426, "learning_rate": 1.5138704900060109e-05, "loss": 0.351470947265625, "step": 1815 }, { "epoch": 0.6570485278820497, "grad_norm": 0.607560396194458, "learning_rate": 1.5111645904709936e-05, "loss": 0.325225830078125, "step": 1816 }, { "epoch": 0.6574103387454209, "grad_norm": 0.5328109860420227, "learning_rate": 1.5084596422536377e-05, "loss": 0.29430389404296875, "step": 1817 }, { "epoch": 0.657772149608792, "grad_norm": 1.6454776525497437, "learning_rate": 1.5057556506180165e-05, "loss": 0.279205322265625, "step": 1818 }, { "epoch": 0.6581339604721632, "grad_norm": 0.6656097173690796, "learning_rate": 1.5030526208263398e-05, "loss": 0.305023193359375, "step": 1819 }, { "epoch": 0.6584957713355344, "grad_norm": 1.5917710065841675, "learning_rate": 1.5003505581389476e-05, "loss": 0.26613616943359375, "step": 1820 }, { "epoch": 0.6588575821989056, "grad_norm": 1.26250422000885, "learning_rate": 1.4976494678142956e-05, "loss": 0.28011322021484375, "step": 1821 }, { "epoch": 0.6592193930622767, "grad_norm": 0.8559413552284241, "learning_rate": 1.49494935510895e-05, "loss": 0.300689697265625, "step": 1822 }, { "epoch": 0.6595812039256479, "grad_norm": 1.9032177925109863, "learning_rate": 1.4922502252775726e-05, "loss": 0.2924957275390625, "step": 1823 }, { "epoch": 0.6599430147890191, "grad_norm": 0.639337420463562, "learning_rate": 1.4895520835729116e-05, "loss": 0.28603363037109375, "step": 1824 }, { "epoch": 0.6603048256523902, "grad_norm": 0.97772616147995, "learning_rate": 1.4868549352457945e-05, "loss": 0.2681884765625, "step": 1825 }, { "epoch": 0.6606666365157614, "grad_norm": 0.5707042813301086, "learning_rate": 1.4841587855451143e-05, "loss": 0.286376953125, "step": 1826 }, { "epoch": 0.6610284473791326, "grad_norm": 3.480649948120117, "learning_rate": 1.4814636397178203e-05, "loss": 0.326934814453125, "step": 1827 }, { "epoch": 0.6613902582425037, "grad_norm": 0.5718881487846375, "learning_rate": 1.4787695030089086e-05, "loss": 0.27850341796875, "step": 1828 }, { "epoch": 0.6617520691058749, "grad_norm": 1.398424506187439, "learning_rate": 1.4760763806614122e-05, "loss": 0.308349609375, "step": 1829 }, { "epoch": 0.6621138799692461, "grad_norm": 3.0212531089782715, "learning_rate": 1.473384277916389e-05, "loss": 0.31534576416015625, "step": 1830 }, { "epoch": 0.6624756908326173, "grad_norm": 1.0407787561416626, "learning_rate": 1.4706932000129126e-05, "loss": 0.3101959228515625, "step": 1831 }, { "epoch": 0.6628375016959884, "grad_norm": 1.9225587844848633, "learning_rate": 1.4680031521880626e-05, "loss": 0.29168701171875, "step": 1832 }, { "epoch": 0.6631993125593596, "grad_norm": 1.7230968475341797, "learning_rate": 1.4653141396769137e-05, "loss": 0.295806884765625, "step": 1833 }, { "epoch": 0.6635611234227308, "grad_norm": 1.0379807949066162, "learning_rate": 1.4626261677125276e-05, "loss": 0.28882598876953125, "step": 1834 }, { "epoch": 0.6639229342861019, "grad_norm": 1.206425428390503, "learning_rate": 1.4599392415259384e-05, "loss": 0.30633544921875, "step": 1835 }, { "epoch": 0.6642847451494731, "grad_norm": 2.4167473316192627, "learning_rate": 1.4572533663461456e-05, "loss": 0.28786468505859375, "step": 1836 }, { "epoch": 0.6646465560128443, "grad_norm": 1.4335782527923584, "learning_rate": 1.4545685474001041e-05, "loss": 0.30049896240234375, "step": 1837 }, { "epoch": 0.6650083668762155, "grad_norm": 0.6019370555877686, "learning_rate": 1.4518847899127138e-05, "loss": 0.3096771240234375, "step": 1838 }, { "epoch": 0.6653701777395866, "grad_norm": 0.7620216608047485, "learning_rate": 1.4492020991068072e-05, "loss": 0.310943603515625, "step": 1839 }, { "epoch": 0.6657319886029578, "grad_norm": 0.7491160035133362, "learning_rate": 1.4465204802031423e-05, "loss": 0.3428955078125, "step": 1840 }, { "epoch": 0.666093799466329, "grad_norm": 2.360295534133911, "learning_rate": 1.4438399384203897e-05, "loss": 0.2889556884765625, "step": 1841 }, { "epoch": 0.6664556103297001, "grad_norm": 1.1191056966781616, "learning_rate": 1.4411604789751259e-05, "loss": 0.30863189697265625, "step": 1842 }, { "epoch": 0.6668174211930713, "grad_norm": 4.385706424713135, "learning_rate": 1.4384821070818191e-05, "loss": 0.37225341796875, "step": 1843 }, { "epoch": 0.6671792320564425, "grad_norm": 0.9002503156661987, "learning_rate": 1.435804827952822e-05, "loss": 0.29027557373046875, "step": 1844 }, { "epoch": 0.6675410429198136, "grad_norm": 1.9017900228500366, "learning_rate": 1.4331286467983612e-05, "loss": 0.27904510498046875, "step": 1845 }, { "epoch": 0.6679028537831848, "grad_norm": 1.798837423324585, "learning_rate": 1.4304535688265247e-05, "loss": 0.3024749755859375, "step": 1846 }, { "epoch": 0.668264664646556, "grad_norm": 0.4912078082561493, "learning_rate": 1.4277795992432557e-05, "loss": 0.3080902099609375, "step": 1847 }, { "epoch": 0.6686264755099272, "grad_norm": 1.3905181884765625, "learning_rate": 1.4251067432523393e-05, "loss": 0.28234100341796875, "step": 1848 }, { "epoch": 0.6689882863732983, "grad_norm": 0.5152089595794678, "learning_rate": 1.4224350060553937e-05, "loss": 0.31764984130859375, "step": 1849 }, { "epoch": 0.6693500972366695, "grad_norm": 0.5480913519859314, "learning_rate": 1.419764392851859e-05, "loss": 0.29119110107421875, "step": 1850 }, { "epoch": 0.6697119081000407, "grad_norm": 2.2378838062286377, "learning_rate": 1.4170949088389891e-05, "loss": 0.28234100341796875, "step": 1851 }, { "epoch": 0.6700737189634118, "grad_norm": 0.47529688477516174, "learning_rate": 1.4144265592118402e-05, "loss": 0.27553558349609375, "step": 1852 }, { "epoch": 0.670435529826783, "grad_norm": 0.6186584234237671, "learning_rate": 1.4117593491632606e-05, "loss": 0.27874755859375, "step": 1853 }, { "epoch": 0.6707973406901542, "grad_norm": 0.749733030796051, "learning_rate": 1.40909328388388e-05, "loss": 0.27375030517578125, "step": 1854 }, { "epoch": 0.6711591515535253, "grad_norm": 2.0929818153381348, "learning_rate": 1.406428368562102e-05, "loss": 0.28692626953125, "step": 1855 }, { "epoch": 0.6715209624168965, "grad_norm": 0.8989866375923157, "learning_rate": 1.4037646083840912e-05, "loss": 0.27349090576171875, "step": 1856 }, { "epoch": 0.6718827732802677, "grad_norm": 0.5513809323310852, "learning_rate": 1.4011020085337641e-05, "loss": 0.2786102294921875, "step": 1857 }, { "epoch": 0.672244584143639, "grad_norm": 0.6778003573417664, "learning_rate": 1.3984405741927798e-05, "loss": 0.2830657958984375, "step": 1858 }, { "epoch": 0.6726063950070101, "grad_norm": 1.2918676137924194, "learning_rate": 1.3957803105405287e-05, "loss": 0.31451416015625, "step": 1859 }, { "epoch": 0.6729682058703813, "grad_norm": 1.0771679878234863, "learning_rate": 1.3931212227541235e-05, "loss": 0.29229736328125, "step": 1860 }, { "epoch": 0.6733300167337525, "grad_norm": 0.6623329520225525, "learning_rate": 1.390463316008388e-05, "loss": 0.27898406982421875, "step": 1861 }, { "epoch": 0.6736918275971236, "grad_norm": 1.495482325553894, "learning_rate": 1.3878065954758473e-05, "loss": 0.26702117919921875, "step": 1862 }, { "epoch": 0.6740536384604948, "grad_norm": 0.9737681746482849, "learning_rate": 1.3851510663267184e-05, "loss": 0.30755615234375, "step": 1863 }, { "epoch": 0.674415449323866, "grad_norm": 2.4249513149261475, "learning_rate": 1.3824967337289006e-05, "loss": 0.3329010009765625, "step": 1864 }, { "epoch": 0.6747772601872372, "grad_norm": 0.5945470929145813, "learning_rate": 1.379843602847963e-05, "loss": 0.2916259765625, "step": 1865 }, { "epoch": 0.6751390710506083, "grad_norm": 0.7130574584007263, "learning_rate": 1.3771916788471371e-05, "loss": 0.2920989990234375, "step": 1866 }, { "epoch": 0.6755008819139795, "grad_norm": 0.6342980861663818, "learning_rate": 1.3745409668873052e-05, "loss": 0.27582550048828125, "step": 1867 }, { "epoch": 0.6758626927773507, "grad_norm": 0.8562067747116089, "learning_rate": 1.3718914721269915e-05, "loss": 0.28775787353515625, "step": 1868 }, { "epoch": 0.6762245036407218, "grad_norm": 0.8934643268585205, "learning_rate": 1.3692431997223515e-05, "loss": 0.25424957275390625, "step": 1869 }, { "epoch": 0.676586314504093, "grad_norm": 1.3008344173431396, "learning_rate": 1.36659615482716e-05, "loss": 0.26177215576171875, "step": 1870 }, { "epoch": 0.6769481253674642, "grad_norm": 0.4461919665336609, "learning_rate": 1.3639503425928053e-05, "loss": 0.2693595886230469, "step": 1871 }, { "epoch": 0.6773099362308354, "grad_norm": 1.0597554445266724, "learning_rate": 1.361305768168276e-05, "loss": 0.28696441650390625, "step": 1872 }, { "epoch": 0.6776717470942065, "grad_norm": 1.1273095607757568, "learning_rate": 1.358662436700152e-05, "loss": 0.3224945068359375, "step": 1873 }, { "epoch": 0.6780335579575777, "grad_norm": 2.1342391967773438, "learning_rate": 1.3560203533325933e-05, "loss": 0.29734039306640625, "step": 1874 }, { "epoch": 0.6783953688209489, "grad_norm": 0.7360700368881226, "learning_rate": 1.3533795232073321e-05, "loss": 0.3026275634765625, "step": 1875 }, { "epoch": 0.67875717968432, "grad_norm": 1.0010993480682373, "learning_rate": 1.3507399514636608e-05, "loss": 0.29093170166015625, "step": 1876 }, { "epoch": 0.6791189905476912, "grad_norm": 1.1599187850952148, "learning_rate": 1.3481016432384242e-05, "loss": 0.2641754150390625, "step": 1877 }, { "epoch": 0.6794808014110624, "grad_norm": 1.1629358530044556, "learning_rate": 1.3454646036660068e-05, "loss": 0.30661773681640625, "step": 1878 }, { "epoch": 0.6798426122744335, "grad_norm": 0.969555675983429, "learning_rate": 1.342828837878325e-05, "loss": 0.28432464599609375, "step": 1879 }, { "epoch": 0.6802044231378047, "grad_norm": 0.38826027512550354, "learning_rate": 1.3401943510048154e-05, "loss": 0.2769317626953125, "step": 1880 }, { "epoch": 0.6805662340011759, "grad_norm": 0.5199748873710632, "learning_rate": 1.3375611481724269e-05, "loss": 0.2866058349609375, "step": 1881 }, { "epoch": 0.680928044864547, "grad_norm": 1.005450963973999, "learning_rate": 1.334929234505609e-05, "loss": 0.25467681884765625, "step": 1882 }, { "epoch": 0.6812898557279182, "grad_norm": 0.4405005872249603, "learning_rate": 1.3322986151263013e-05, "loss": 0.28336334228515625, "step": 1883 }, { "epoch": 0.6816516665912894, "grad_norm": 0.9444582462310791, "learning_rate": 1.3296692951539273e-05, "loss": 0.250885009765625, "step": 1884 }, { "epoch": 0.6820134774546606, "grad_norm": 1.3236746788024902, "learning_rate": 1.327041279705379e-05, "loss": 0.2864837646484375, "step": 1885 }, { "epoch": 0.6823752883180317, "grad_norm": 1.6627141237258911, "learning_rate": 1.3244145738950116e-05, "loss": 0.29016876220703125, "step": 1886 }, { "epoch": 0.6827370991814029, "grad_norm": 0.44942906498908997, "learning_rate": 1.32178918283463e-05, "loss": 0.28141021728515625, "step": 1887 }, { "epoch": 0.6830989100447741, "grad_norm": 1.1860507726669312, "learning_rate": 1.319165111633482e-05, "loss": 0.3002471923828125, "step": 1888 }, { "epoch": 0.6834607209081452, "grad_norm": 0.46655505895614624, "learning_rate": 1.3165423653982452e-05, "loss": 0.25936126708984375, "step": 1889 }, { "epoch": 0.6838225317715164, "grad_norm": 1.86506187915802, "learning_rate": 1.3139209492330202e-05, "loss": 0.3155975341796875, "step": 1890 }, { "epoch": 0.6841843426348876, "grad_norm": 1.3893734216690063, "learning_rate": 1.3113008682393185e-05, "loss": 0.272979736328125, "step": 1891 }, { "epoch": 0.6845461534982588, "grad_norm": 0.745888888835907, "learning_rate": 1.308682127516053e-05, "loss": 0.29006195068359375, "step": 1892 }, { "epoch": 0.6849079643616299, "grad_norm": 0.6235277056694031, "learning_rate": 1.3060647321595293e-05, "loss": 0.31145477294921875, "step": 1893 }, { "epoch": 0.6852697752250011, "grad_norm": 1.2837755680084229, "learning_rate": 1.3034486872634334e-05, "loss": 0.268951416015625, "step": 1894 }, { "epoch": 0.6856315860883723, "grad_norm": 2.2188644409179688, "learning_rate": 1.3008339979188243e-05, "loss": 0.2595405578613281, "step": 1895 }, { "epoch": 0.6859933969517434, "grad_norm": 1.6054277420043945, "learning_rate": 1.2982206692141217e-05, "loss": 0.29319000244140625, "step": 1896 }, { "epoch": 0.6863552078151146, "grad_norm": 1.7851876020431519, "learning_rate": 1.2956087062350997e-05, "loss": 0.26064300537109375, "step": 1897 }, { "epoch": 0.6867170186784858, "grad_norm": 2.4257652759552, "learning_rate": 1.2929981140648723e-05, "loss": 0.31874847412109375, "step": 1898 }, { "epoch": 0.687078829541857, "grad_norm": 0.5358285903930664, "learning_rate": 1.2903888977838865e-05, "loss": 0.30321502685546875, "step": 1899 }, { "epoch": 0.6874406404052281, "grad_norm": 1.187419056892395, "learning_rate": 1.2877810624699126e-05, "loss": 0.27140045166015625, "step": 1900 }, { "epoch": 0.6878024512685993, "grad_norm": 3.352274179458618, "learning_rate": 1.2851746131980317e-05, "loss": 0.3161773681640625, "step": 1901 }, { "epoch": 0.6881642621319705, "grad_norm": 0.6482881307601929, "learning_rate": 1.2825695550406283e-05, "loss": 0.280517578125, "step": 1902 }, { "epoch": 0.6885260729953416, "grad_norm": 1.5150667428970337, "learning_rate": 1.2799658930673809e-05, "loss": 0.2764739990234375, "step": 1903 }, { "epoch": 0.6888878838587129, "grad_norm": 0.4436072111129761, "learning_rate": 1.2773636323452491e-05, "loss": 0.2773284912109375, "step": 1904 }, { "epoch": 0.6892496947220841, "grad_norm": 1.6535348892211914, "learning_rate": 1.2747627779384661e-05, "loss": 0.28730010986328125, "step": 1905 }, { "epoch": 0.6896115055854553, "grad_norm": 1.2023143768310547, "learning_rate": 1.2721633349085289e-05, "loss": 0.26726531982421875, "step": 1906 }, { "epoch": 0.6899733164488264, "grad_norm": 0.5597810745239258, "learning_rate": 1.2695653083141872e-05, "loss": 0.296661376953125, "step": 1907 }, { "epoch": 0.6903351273121976, "grad_norm": 0.7866974472999573, "learning_rate": 1.2669687032114335e-05, "loss": 0.30423736572265625, "step": 1908 }, { "epoch": 0.6906969381755688, "grad_norm": 0.7530738711357117, "learning_rate": 1.2643735246534963e-05, "loss": 0.3004302978515625, "step": 1909 }, { "epoch": 0.6910587490389399, "grad_norm": 0.5572252869606018, "learning_rate": 1.261779777690826e-05, "loss": 0.26953125, "step": 1910 }, { "epoch": 0.6914205599023111, "grad_norm": 0.5818307399749756, "learning_rate": 1.2591874673710866e-05, "loss": 0.29727935791015625, "step": 1911 }, { "epoch": 0.6917823707656823, "grad_norm": 0.5290035009384155, "learning_rate": 1.2565965987391483e-05, "loss": 0.3114776611328125, "step": 1912 }, { "epoch": 0.6921441816290534, "grad_norm": 1.0386043787002563, "learning_rate": 1.2540071768370733e-05, "loss": 0.2761993408203125, "step": 1913 }, { "epoch": 0.6925059924924246, "grad_norm": 1.8254995346069336, "learning_rate": 1.2514192067041102e-05, "loss": 0.277191162109375, "step": 1914 }, { "epoch": 0.6928678033557958, "grad_norm": 0.4173400104045868, "learning_rate": 1.248832693376681e-05, "loss": 0.28043365478515625, "step": 1915 }, { "epoch": 0.693229614219167, "grad_norm": 1.2294261455535889, "learning_rate": 1.2462476418883738e-05, "loss": 0.3163909912109375, "step": 1916 }, { "epoch": 0.6935914250825381, "grad_norm": 2.286785840988159, "learning_rate": 1.243664057269931e-05, "loss": 0.2637176513671875, "step": 1917 }, { "epoch": 0.6939532359459093, "grad_norm": 0.8184279203414917, "learning_rate": 1.24108194454924e-05, "loss": 0.2976226806640625, "step": 1918 }, { "epoch": 0.6943150468092805, "grad_norm": 0.7691208124160767, "learning_rate": 1.2385013087513252e-05, "loss": 0.30450439453125, "step": 1919 }, { "epoch": 0.6946768576726516, "grad_norm": 2.2049622535705566, "learning_rate": 1.2359221548983355e-05, "loss": 0.34848785400390625, "step": 1920 }, { "epoch": 0.6950386685360228, "grad_norm": 2.8165414333343506, "learning_rate": 1.2333444880095357e-05, "loss": 0.3437957763671875, "step": 1921 }, { "epoch": 0.695400479399394, "grad_norm": 0.9320873022079468, "learning_rate": 1.2307683131012986e-05, "loss": 0.296356201171875, "step": 1922 }, { "epoch": 0.6957622902627651, "grad_norm": 0.841556966304779, "learning_rate": 1.2281936351870915e-05, "loss": 0.294342041015625, "step": 1923 }, { "epoch": 0.6961241011261363, "grad_norm": 3.1980738639831543, "learning_rate": 1.2256204592774687e-05, "loss": 0.2780914306640625, "step": 1924 }, { "epoch": 0.6964859119895075, "grad_norm": 0.9389556646347046, "learning_rate": 1.2230487903800632e-05, "loss": 0.285064697265625, "step": 1925 }, { "epoch": 0.6968477228528787, "grad_norm": 1.0230523347854614, "learning_rate": 1.2204786334995724e-05, "loss": 0.2755699157714844, "step": 1926 }, { "epoch": 0.6972095337162498, "grad_norm": 1.6127142906188965, "learning_rate": 1.2179099936377534e-05, "loss": 0.2890167236328125, "step": 1927 }, { "epoch": 0.697571344579621, "grad_norm": 0.732151985168457, "learning_rate": 1.2153428757934105e-05, "loss": 0.2842254638671875, "step": 1928 }, { "epoch": 0.6979331554429922, "grad_norm": 0.6219437718391418, "learning_rate": 1.2127772849623858e-05, "loss": 0.28623199462890625, "step": 1929 }, { "epoch": 0.6982949663063633, "grad_norm": 0.6758730411529541, "learning_rate": 1.2102132261375494e-05, "loss": 0.2829475402832031, "step": 1930 }, { "epoch": 0.6986567771697345, "grad_norm": 1.9708343744277954, "learning_rate": 1.2076507043087901e-05, "loss": 0.29859161376953125, "step": 1931 }, { "epoch": 0.6990185880331057, "grad_norm": 0.5874659419059753, "learning_rate": 1.2050897244630066e-05, "loss": 0.2722320556640625, "step": 1932 }, { "epoch": 0.6993803988964769, "grad_norm": 2.142900228500366, "learning_rate": 1.202530291584096e-05, "loss": 0.29903411865234375, "step": 1933 }, { "epoch": 0.699742209759848, "grad_norm": 1.4885762929916382, "learning_rate": 1.1999724106529432e-05, "loss": 0.2975006103515625, "step": 1934 }, { "epoch": 0.7001040206232192, "grad_norm": 0.6597947478294373, "learning_rate": 1.1974160866474165e-05, "loss": 0.289764404296875, "step": 1935 }, { "epoch": 0.7004658314865904, "grad_norm": 0.7343711853027344, "learning_rate": 1.1948613245423516e-05, "loss": 0.3107757568359375, "step": 1936 }, { "epoch": 0.7008276423499615, "grad_norm": 0.49316543340682983, "learning_rate": 1.1923081293095444e-05, "loss": 0.29619598388671875, "step": 1937 }, { "epoch": 0.7011894532133327, "grad_norm": 1.0734009742736816, "learning_rate": 1.1897565059177439e-05, "loss": 0.29296875, "step": 1938 }, { "epoch": 0.7015512640767039, "grad_norm": 2.0416882038116455, "learning_rate": 1.1872064593326375e-05, "loss": 0.2751617431640625, "step": 1939 }, { "epoch": 0.701913074940075, "grad_norm": 1.0863078832626343, "learning_rate": 1.1846579945168455e-05, "loss": 0.32586669921875, "step": 1940 }, { "epoch": 0.7022748858034462, "grad_norm": 4.063940048217773, "learning_rate": 1.1821111164299097e-05, "loss": 0.3345489501953125, "step": 1941 }, { "epoch": 0.7026366966668174, "grad_norm": 0.9210754036903381, "learning_rate": 1.1795658300282839e-05, "loss": 0.2783203125, "step": 1942 }, { "epoch": 0.7029985075301886, "grad_norm": 1.3846694231033325, "learning_rate": 1.1770221402653243e-05, "loss": 0.28131866455078125, "step": 1943 }, { "epoch": 0.7033603183935597, "grad_norm": 1.37974214553833, "learning_rate": 1.1744800520912794e-05, "loss": 0.271026611328125, "step": 1944 }, { "epoch": 0.7037221292569309, "grad_norm": 1.717216968536377, "learning_rate": 1.1719395704532818e-05, "loss": 0.2673187255859375, "step": 1945 }, { "epoch": 0.7040839401203021, "grad_norm": 2.3895423412323, "learning_rate": 1.1694007002953364e-05, "loss": 0.3205413818359375, "step": 1946 }, { "epoch": 0.7044457509836732, "grad_norm": 2.778700113296509, "learning_rate": 1.166863446558315e-05, "loss": 0.26857757568359375, "step": 1947 }, { "epoch": 0.7048075618470444, "grad_norm": 0.8778892159461975, "learning_rate": 1.1643278141799391e-05, "loss": 0.29865264892578125, "step": 1948 }, { "epoch": 0.7051693727104156, "grad_norm": 0.6920444369316101, "learning_rate": 1.1617938080947786e-05, "loss": 0.296875, "step": 1949 }, { "epoch": 0.7055311835737869, "grad_norm": 1.3498008251190186, "learning_rate": 1.159261433234237e-05, "loss": 0.2606201171875, "step": 1950 }, { "epoch": 0.705892994437158, "grad_norm": 0.4344874322414398, "learning_rate": 1.1567306945265435e-05, "loss": 0.28665924072265625, "step": 1951 }, { "epoch": 0.7062548053005292, "grad_norm": 1.0302355289459229, "learning_rate": 1.1542015968967431e-05, "loss": 0.26690673828125, "step": 1952 }, { "epoch": 0.7066166161639004, "grad_norm": 0.45472222566604614, "learning_rate": 1.1516741452666868e-05, "loss": 0.2723731994628906, "step": 1953 }, { "epoch": 0.7069784270272715, "grad_norm": 0.7834783792495728, "learning_rate": 1.1491483445550233e-05, "loss": 0.2692375183105469, "step": 1954 }, { "epoch": 0.7073402378906427, "grad_norm": 1.9879730939865112, "learning_rate": 1.146624199677187e-05, "loss": 0.29998016357421875, "step": 1955 }, { "epoch": 0.7077020487540139, "grad_norm": 0.8603039383888245, "learning_rate": 1.144101715545392e-05, "loss": 0.308441162109375, "step": 1956 }, { "epoch": 0.708063859617385, "grad_norm": 0.7491096258163452, "learning_rate": 1.1415808970686188e-05, "loss": 0.2741241455078125, "step": 1957 }, { "epoch": 0.7084256704807562, "grad_norm": 1.361840009689331, "learning_rate": 1.1390617491526067e-05, "loss": 0.2910614013671875, "step": 1958 }, { "epoch": 0.7087874813441274, "grad_norm": 2.098140239715576, "learning_rate": 1.1365442766998433e-05, "loss": 0.2552642822265625, "step": 1959 }, { "epoch": 0.7091492922074986, "grad_norm": 1.6238418817520142, "learning_rate": 1.1340284846095582e-05, "loss": 0.31760406494140625, "step": 1960 }, { "epoch": 0.7095111030708697, "grad_norm": 0.6774130463600159, "learning_rate": 1.1315143777777076e-05, "loss": 0.292022705078125, "step": 1961 }, { "epoch": 0.7098729139342409, "grad_norm": 1.4175631999969482, "learning_rate": 1.12900196109697e-05, "loss": 0.2745361328125, "step": 1962 }, { "epoch": 0.7102347247976121, "grad_norm": 1.5567262172698975, "learning_rate": 1.126491239456734e-05, "loss": 0.3267822265625, "step": 1963 }, { "epoch": 0.7105965356609832, "grad_norm": 0.8996405601501465, "learning_rate": 1.1239822177430887e-05, "loss": 0.3193511962890625, "step": 1964 }, { "epoch": 0.7109583465243544, "grad_norm": 1.9378345012664795, "learning_rate": 1.121474900838818e-05, "loss": 0.32047271728515625, "step": 1965 }, { "epoch": 0.7113201573877256, "grad_norm": 1.56272554397583, "learning_rate": 1.1189692936233847e-05, "loss": 0.32949066162109375, "step": 1966 }, { "epoch": 0.7116819682510968, "grad_norm": 2.4073646068573, "learning_rate": 1.1164654009729262e-05, "loss": 0.29486083984375, "step": 1967 }, { "epoch": 0.7120437791144679, "grad_norm": 2.0144762992858887, "learning_rate": 1.1139632277602423e-05, "loss": 0.30664825439453125, "step": 1968 }, { "epoch": 0.7124055899778391, "grad_norm": 2.546031951904297, "learning_rate": 1.1114627788547874e-05, "loss": 0.29898834228515625, "step": 1969 }, { "epoch": 0.7127674008412103, "grad_norm": 3.3443851470947266, "learning_rate": 1.1089640591226601e-05, "loss": 0.28545379638671875, "step": 1970 }, { "epoch": 0.7131292117045814, "grad_norm": 2.3311495780944824, "learning_rate": 1.1064670734265924e-05, "loss": 0.27237701416015625, "step": 1971 }, { "epoch": 0.7134910225679526, "grad_norm": 1.8387603759765625, "learning_rate": 1.1039718266259447e-05, "loss": 0.32225799560546875, "step": 1972 }, { "epoch": 0.7138528334313238, "grad_norm": 0.5661828517913818, "learning_rate": 1.1014783235766908e-05, "loss": 0.31927490234375, "step": 1973 }, { "epoch": 0.714214644294695, "grad_norm": 1.3562408685684204, "learning_rate": 1.0989865691314114e-05, "loss": 0.2639923095703125, "step": 1974 }, { "epoch": 0.7145764551580661, "grad_norm": 2.937654972076416, "learning_rate": 1.0964965681392861e-05, "loss": 0.34148406982421875, "step": 1975 }, { "epoch": 0.7149382660214373, "grad_norm": 0.6782722473144531, "learning_rate": 1.0940083254460795e-05, "loss": 0.3165283203125, "step": 1976 }, { "epoch": 0.7153000768848085, "grad_norm": 1.3421870470046997, "learning_rate": 1.0915218458941363e-05, "loss": 0.2919921875, "step": 1977 }, { "epoch": 0.7156618877481796, "grad_norm": 0.6994495391845703, "learning_rate": 1.0890371343223692e-05, "loss": 0.2917633056640625, "step": 1978 }, { "epoch": 0.7160236986115508, "grad_norm": 1.0663303136825562, "learning_rate": 1.0865541955662497e-05, "loss": 0.2981109619140625, "step": 1979 }, { "epoch": 0.716385509474922, "grad_norm": 0.829656720161438, "learning_rate": 1.0840730344578005e-05, "loss": 0.28714752197265625, "step": 1980 }, { "epoch": 0.7167473203382931, "grad_norm": 0.7688905000686646, "learning_rate": 1.081593655825583e-05, "loss": 0.2631988525390625, "step": 1981 }, { "epoch": 0.7171091312016643, "grad_norm": 0.8644232749938965, "learning_rate": 1.0791160644946932e-05, "loss": 0.2881336212158203, "step": 1982 }, { "epoch": 0.7174709420650355, "grad_norm": 1.3190501928329468, "learning_rate": 1.0766402652867454e-05, "loss": 0.28195953369140625, "step": 1983 }, { "epoch": 0.7178327529284066, "grad_norm": 0.792522132396698, "learning_rate": 1.0741662630198664e-05, "loss": 0.29128265380859375, "step": 1984 }, { "epoch": 0.7181945637917778, "grad_norm": 0.992865264415741, "learning_rate": 1.071694062508689e-05, "loss": 0.27388763427734375, "step": 1985 }, { "epoch": 0.718556374655149, "grad_norm": 0.7354349493980408, "learning_rate": 1.0692236685643366e-05, "loss": 0.26235198974609375, "step": 1986 }, { "epoch": 0.7189181855185202, "grad_norm": 2.411576986312866, "learning_rate": 1.0667550859944185e-05, "loss": 0.3293609619140625, "step": 1987 }, { "epoch": 0.7192799963818913, "grad_norm": 0.867729902267456, "learning_rate": 1.0642883196030178e-05, "loss": 0.2689056396484375, "step": 1988 }, { "epoch": 0.7196418072452625, "grad_norm": 0.42565011978149414, "learning_rate": 1.061823374190684e-05, "loss": 0.27832794189453125, "step": 1989 }, { "epoch": 0.7200036181086337, "grad_norm": 0.835253894329071, "learning_rate": 1.0593602545544218e-05, "loss": 0.2739105224609375, "step": 1990 }, { "epoch": 0.7203654289720048, "grad_norm": 0.876404345035553, "learning_rate": 1.0568989654876847e-05, "loss": 0.30486297607421875, "step": 1991 }, { "epoch": 0.720727239835376, "grad_norm": 0.4424457252025604, "learning_rate": 1.0544395117803621e-05, "loss": 0.278167724609375, "step": 1992 }, { "epoch": 0.7210890506987472, "grad_norm": 0.5868606567382812, "learning_rate": 1.051981898218772e-05, "loss": 0.29647064208984375, "step": 1993 }, { "epoch": 0.7214508615621184, "grad_norm": 1.4801784753799438, "learning_rate": 1.0495261295856509e-05, "loss": 0.29256439208984375, "step": 1994 }, { "epoch": 0.7218126724254895, "grad_norm": 1.5305812358856201, "learning_rate": 1.0470722106601461e-05, "loss": 0.32382965087890625, "step": 1995 }, { "epoch": 0.7221744832888608, "grad_norm": 2.881505012512207, "learning_rate": 1.0446201462178033e-05, "loss": 0.305389404296875, "step": 1996 }, { "epoch": 0.722536294152232, "grad_norm": 1.5277271270751953, "learning_rate": 1.0421699410305619e-05, "loss": 0.30657196044921875, "step": 1997 }, { "epoch": 0.7228981050156031, "grad_norm": 1.8103424310684204, "learning_rate": 1.0397215998667412e-05, "loss": 0.250732421875, "step": 1998 }, { "epoch": 0.7232599158789743, "grad_norm": 1.2109979391098022, "learning_rate": 1.0372751274910317e-05, "loss": 0.29747772216796875, "step": 1999 }, { "epoch": 0.7236217267423455, "grad_norm": 0.5790792107582092, "learning_rate": 1.0348305286644908e-05, "loss": 0.277130126953125, "step": 2000 }, { "epoch": 0.7239835376057167, "grad_norm": 1.2400788068771362, "learning_rate": 1.0323878081445268e-05, "loss": 0.3037567138671875, "step": 2001 }, { "epoch": 0.7243453484690878, "grad_norm": 1.585336446762085, "learning_rate": 1.0299469706848932e-05, "loss": 0.3325347900390625, "step": 2002 }, { "epoch": 0.724707159332459, "grad_norm": 3.4473862648010254, "learning_rate": 1.0275080210356792e-05, "loss": 0.33603668212890625, "step": 2003 }, { "epoch": 0.7250689701958302, "grad_norm": 3.1499297618865967, "learning_rate": 1.0250709639433003e-05, "loss": 0.26773834228515625, "step": 2004 }, { "epoch": 0.7254307810592013, "grad_norm": 0.6108013987541199, "learning_rate": 1.0226358041504886e-05, "loss": 0.26811981201171875, "step": 2005 }, { "epoch": 0.7257925919225725, "grad_norm": 1.3173404932022095, "learning_rate": 1.0202025463962844e-05, "loss": 0.31855010986328125, "step": 2006 }, { "epoch": 0.7261544027859437, "grad_norm": 1.0393760204315186, "learning_rate": 1.0177711954160246e-05, "loss": 0.292510986328125, "step": 2007 }, { "epoch": 0.7265162136493148, "grad_norm": 1.1817708015441895, "learning_rate": 1.015341755941339e-05, "loss": 0.30263519287109375, "step": 2008 }, { "epoch": 0.726878024512686, "grad_norm": 1.9298760890960693, "learning_rate": 1.0129142327001334e-05, "loss": 0.26557159423828125, "step": 2009 }, { "epoch": 0.7272398353760572, "grad_norm": 0.5317143201828003, "learning_rate": 1.010488630416588e-05, "loss": 0.2981109619140625, "step": 2010 }, { "epoch": 0.7276016462394284, "grad_norm": 0.6240288019180298, "learning_rate": 1.0080649538111417e-05, "loss": 0.28814697265625, "step": 2011 }, { "epoch": 0.7279634571027995, "grad_norm": 2.3108105659484863, "learning_rate": 1.0056432076004881e-05, "loss": 0.268341064453125, "step": 2012 }, { "epoch": 0.7283252679661707, "grad_norm": 1.3425930738449097, "learning_rate": 1.0032233964975621e-05, "loss": 0.27782440185546875, "step": 2013 }, { "epoch": 0.7286870788295419, "grad_norm": 1.7430672645568848, "learning_rate": 1.0008055252115344e-05, "loss": 0.293914794921875, "step": 2014 }, { "epoch": 0.729048889692913, "grad_norm": 2.1568782329559326, "learning_rate": 9.983895984477994e-06, "loss": 0.26897430419921875, "step": 2015 }, { "epoch": 0.7294107005562842, "grad_norm": 0.9408864378929138, "learning_rate": 9.959756209079672e-06, "loss": 0.31790924072265625, "step": 2016 }, { "epoch": 0.7297725114196554, "grad_norm": 1.5104948282241821, "learning_rate": 9.935635972898568e-06, "loss": 0.2728424072265625, "step": 2017 }, { "epoch": 0.7301343222830265, "grad_norm": 1.46299147605896, "learning_rate": 9.911535322874823e-06, "loss": 0.3113861083984375, "step": 2018 }, { "epoch": 0.7304961331463977, "grad_norm": 0.8574767112731934, "learning_rate": 9.88745430591047e-06, "loss": 0.28537750244140625, "step": 2019 }, { "epoch": 0.7308579440097689, "grad_norm": 1.4546924829483032, "learning_rate": 9.863392968869327e-06, "loss": 0.2559661865234375, "step": 2020 }, { "epoch": 0.7312197548731401, "grad_norm": 1.7424744367599487, "learning_rate": 9.839351358576921e-06, "loss": 0.2868194580078125, "step": 2021 }, { "epoch": 0.7315815657365112, "grad_norm": 0.6353687047958374, "learning_rate": 9.815329521820393e-06, "loss": 0.28891754150390625, "step": 2022 }, { "epoch": 0.7319433765998824, "grad_norm": 1.3329564332962036, "learning_rate": 9.7913275053484e-06, "loss": 0.314300537109375, "step": 2023 }, { "epoch": 0.7323051874632536, "grad_norm": 3.995215654373169, "learning_rate": 9.767345355871017e-06, "loss": 0.33676910400390625, "step": 2024 }, { "epoch": 0.7326669983266247, "grad_norm": 0.9399797916412354, "learning_rate": 9.743383120059655e-06, "loss": 0.26988983154296875, "step": 2025 }, { "epoch": 0.7330288091899959, "grad_norm": 1.6762381792068481, "learning_rate": 9.719440844547002e-06, "loss": 0.2831878662109375, "step": 2026 }, { "epoch": 0.7333906200533671, "grad_norm": 2.343536376953125, "learning_rate": 9.695518575926864e-06, "loss": 0.305419921875, "step": 2027 }, { "epoch": 0.7337524309167383, "grad_norm": 2.0540080070495605, "learning_rate": 9.67161636075413e-06, "loss": 0.257354736328125, "step": 2028 }, { "epoch": 0.7341142417801094, "grad_norm": 0.8803778886795044, "learning_rate": 9.647734245544657e-06, "loss": 0.2823028564453125, "step": 2029 }, { "epoch": 0.7344760526434806, "grad_norm": 0.6621719002723694, "learning_rate": 9.623872276775194e-06, "loss": 0.2985954284667969, "step": 2030 }, { "epoch": 0.7348378635068518, "grad_norm": 0.6565130352973938, "learning_rate": 9.600030500883275e-06, "loss": 0.31298828125, "step": 2031 }, { "epoch": 0.7351996743702229, "grad_norm": 0.7543070912361145, "learning_rate": 9.57620896426714e-06, "loss": 0.286956787109375, "step": 2032 }, { "epoch": 0.7355614852335941, "grad_norm": 0.6199185252189636, "learning_rate": 9.552407713285631e-06, "loss": 0.30333709716796875, "step": 2033 }, { "epoch": 0.7359232960969653, "grad_norm": 0.6806892156600952, "learning_rate": 9.528626794258133e-06, "loss": 0.28037261962890625, "step": 2034 }, { "epoch": 0.7362851069603364, "grad_norm": 0.7412317991256714, "learning_rate": 9.504866253464462e-06, "loss": 0.2850341796875, "step": 2035 }, { "epoch": 0.7366469178237076, "grad_norm": 1.6475956439971924, "learning_rate": 9.481126137144754e-06, "loss": 0.270965576171875, "step": 2036 }, { "epoch": 0.7370087286870788, "grad_norm": 0.8774546384811401, "learning_rate": 9.457406491499414e-06, "loss": 0.271087646484375, "step": 2037 }, { "epoch": 0.73737053955045, "grad_norm": 1.84042489528656, "learning_rate": 9.433707362689005e-06, "loss": 0.26811981201171875, "step": 2038 }, { "epoch": 0.7377323504138211, "grad_norm": 1.3619556427001953, "learning_rate": 9.410028796834161e-06, "loss": 0.2752227783203125, "step": 2039 }, { "epoch": 0.7380941612771923, "grad_norm": 0.4878597557544708, "learning_rate": 9.386370840015501e-06, "loss": 0.2967987060546875, "step": 2040 }, { "epoch": 0.7384559721405635, "grad_norm": 0.48502978682518005, "learning_rate": 9.362733538273539e-06, "loss": 0.300048828125, "step": 2041 }, { "epoch": 0.7388177830039347, "grad_norm": 1.411429524421692, "learning_rate": 9.339116937608576e-06, "loss": 0.30999755859375, "step": 2042 }, { "epoch": 0.7391795938673059, "grad_norm": 0.7514380216598511, "learning_rate": 9.315521083980659e-06, "loss": 0.3119659423828125, "step": 2043 }, { "epoch": 0.7395414047306771, "grad_norm": 0.4231019914150238, "learning_rate": 9.291946023309434e-06, "loss": 0.27204132080078125, "step": 2044 }, { "epoch": 0.7399032155940483, "grad_norm": 0.47059231996536255, "learning_rate": 9.26839180147409e-06, "loss": 0.27874755859375, "step": 2045 }, { "epoch": 0.7402650264574194, "grad_norm": 0.5501687526702881, "learning_rate": 9.244858464313248e-06, "loss": 0.26895904541015625, "step": 2046 }, { "epoch": 0.7406268373207906, "grad_norm": 1.9908087253570557, "learning_rate": 9.221346057624919e-06, "loss": 0.301605224609375, "step": 2047 }, { "epoch": 0.7409886481841618, "grad_norm": 1.5776256322860718, "learning_rate": 9.19785462716635e-06, "loss": 0.3069915771484375, "step": 2048 }, { "epoch": 0.7413504590475329, "grad_norm": 1.9271215200424194, "learning_rate": 9.174384218653975e-06, "loss": 0.307220458984375, "step": 2049 }, { "epoch": 0.7417122699109041, "grad_norm": 1.9860888719558716, "learning_rate": 9.150934877763325e-06, "loss": 0.2961082458496094, "step": 2050 }, { "epoch": 0.7420740807742753, "grad_norm": 2.0440213680267334, "learning_rate": 9.127506650128914e-06, "loss": 0.3061370849609375, "step": 2051 }, { "epoch": 0.7424358916376465, "grad_norm": 2.1767168045043945, "learning_rate": 9.104099581344198e-06, "loss": 0.29422760009765625, "step": 2052 }, { "epoch": 0.7427977025010176, "grad_norm": 1.0910667181015015, "learning_rate": 9.080713716961423e-06, "loss": 0.28588104248046875, "step": 2053 }, { "epoch": 0.7431595133643888, "grad_norm": 2.1298329830169678, "learning_rate": 9.057349102491593e-06, "loss": 0.32523345947265625, "step": 2054 }, { "epoch": 0.74352132422776, "grad_norm": 0.5449501872062683, "learning_rate": 9.034005783404341e-06, "loss": 0.30496978759765625, "step": 2055 }, { "epoch": 0.7438831350911311, "grad_norm": 2.547140121459961, "learning_rate": 9.010683805127871e-06, "loss": 0.2602691650390625, "step": 2056 }, { "epoch": 0.7442449459545023, "grad_norm": 0.47545358538627625, "learning_rate": 8.987383213048846e-06, "loss": 0.2838134765625, "step": 2057 }, { "epoch": 0.7446067568178735, "grad_norm": 0.7316913604736328, "learning_rate": 8.964104052512313e-06, "loss": 0.3028717041015625, "step": 2058 }, { "epoch": 0.7449685676812446, "grad_norm": 1.999995470046997, "learning_rate": 8.940846368821608e-06, "loss": 0.30873870849609375, "step": 2059 }, { "epoch": 0.7453303785446158, "grad_norm": 2.6741111278533936, "learning_rate": 8.917610207238279e-06, "loss": 0.2798004150390625, "step": 2060 }, { "epoch": 0.745692189407987, "grad_norm": 1.5063456296920776, "learning_rate": 8.894395612981991e-06, "loss": 0.2894744873046875, "step": 2061 }, { "epoch": 0.7460540002713582, "grad_norm": 2.208566188812256, "learning_rate": 8.871202631230431e-06, "loss": 0.265716552734375, "step": 2062 }, { "epoch": 0.7464158111347293, "grad_norm": 0.7017208337783813, "learning_rate": 8.848031307119222e-06, "loss": 0.28713226318359375, "step": 2063 }, { "epoch": 0.7467776219981005, "grad_norm": 2.726802110671997, "learning_rate": 8.824881685741847e-06, "loss": 0.25650787353515625, "step": 2064 }, { "epoch": 0.7471394328614717, "grad_norm": 0.726462721824646, "learning_rate": 8.801753812149544e-06, "loss": 0.2965240478515625, "step": 2065 }, { "epoch": 0.7475012437248428, "grad_norm": 0.7167609930038452, "learning_rate": 8.778647731351246e-06, "loss": 0.3104705810546875, "step": 2066 }, { "epoch": 0.747863054588214, "grad_norm": 0.8133559823036194, "learning_rate": 8.755563488313454e-06, "loss": 0.27558135986328125, "step": 2067 }, { "epoch": 0.7482248654515852, "grad_norm": 2.258305549621582, "learning_rate": 8.732501127960177e-06, "loss": 0.314208984375, "step": 2068 }, { "epoch": 0.7485866763149563, "grad_norm": 0.619029700756073, "learning_rate": 8.70946069517286e-06, "loss": 0.30645751953125, "step": 2069 }, { "epoch": 0.7489484871783275, "grad_norm": 2.2734878063201904, "learning_rate": 8.686442234790244e-06, "loss": 0.2738800048828125, "step": 2070 }, { "epoch": 0.7493102980416987, "grad_norm": 1.6631587743759155, "learning_rate": 8.663445791608316e-06, "loss": 0.3127288818359375, "step": 2071 }, { "epoch": 0.7496721089050699, "grad_norm": 1.303459882736206, "learning_rate": 8.64047141038024e-06, "loss": 0.32994842529296875, "step": 2072 }, { "epoch": 0.750033919768441, "grad_norm": 1.1078165769577026, "learning_rate": 8.61751913581622e-06, "loss": 0.26094818115234375, "step": 2073 }, { "epoch": 0.7503957306318122, "grad_norm": 0.8651514649391174, "learning_rate": 8.594589012583442e-06, "loss": 0.2808990478515625, "step": 2074 }, { "epoch": 0.7507575414951834, "grad_norm": 1.2632973194122314, "learning_rate": 8.571681085305994e-06, "loss": 0.27414703369140625, "step": 2075 }, { "epoch": 0.7511193523585545, "grad_norm": 0.5077779293060303, "learning_rate": 8.548795398564755e-06, "loss": 0.26163482666015625, "step": 2076 }, { "epoch": 0.7514811632219257, "grad_norm": 1.473169207572937, "learning_rate": 8.52593199689733e-06, "loss": 0.3092155456542969, "step": 2077 }, { "epoch": 0.7518429740852969, "grad_norm": 0.9638535976409912, "learning_rate": 8.50309092479796e-06, "loss": 0.28199005126953125, "step": 2078 }, { "epoch": 0.752204784948668, "grad_norm": 0.7948732376098633, "learning_rate": 8.480272226717427e-06, "loss": 0.28905487060546875, "step": 2079 }, { "epoch": 0.7525665958120392, "grad_norm": 0.9013261795043945, "learning_rate": 8.457475947062963e-06, "loss": 0.2725982666015625, "step": 2080 }, { "epoch": 0.7529284066754104, "grad_norm": 0.683644711971283, "learning_rate": 8.434702130198181e-06, "loss": 0.29401397705078125, "step": 2081 }, { "epoch": 0.7532902175387816, "grad_norm": 1.225305199623108, "learning_rate": 8.411950820442972e-06, "loss": 0.26263427734375, "step": 2082 }, { "epoch": 0.7536520284021527, "grad_norm": 1.0510221719741821, "learning_rate": 8.389222062073437e-06, "loss": 0.295928955078125, "step": 2083 }, { "epoch": 0.7540138392655239, "grad_norm": 1.279076337814331, "learning_rate": 8.36651589932177e-06, "loss": 0.31679534912109375, "step": 2084 }, { "epoch": 0.7543756501288951, "grad_norm": 2.3481404781341553, "learning_rate": 8.343832376376226e-06, "loss": 0.315948486328125, "step": 2085 }, { "epoch": 0.7547374609922662, "grad_norm": 1.7214516401290894, "learning_rate": 8.321171537380961e-06, "loss": 0.2967071533203125, "step": 2086 }, { "epoch": 0.7550992718556374, "grad_norm": 1.5679672956466675, "learning_rate": 8.298533426436022e-06, "loss": 0.3087921142578125, "step": 2087 }, { "epoch": 0.7554610827190087, "grad_norm": 1.1081656217575073, "learning_rate": 8.2759180875972e-06, "loss": 0.27557373046875, "step": 2088 }, { "epoch": 0.7558228935823799, "grad_norm": 0.8075366020202637, "learning_rate": 8.25332556487598e-06, "loss": 0.2818450927734375, "step": 2089 }, { "epoch": 0.756184704445751, "grad_norm": 1.3013746738433838, "learning_rate": 8.23075590223944e-06, "loss": 0.26068115234375, "step": 2090 }, { "epoch": 0.7565465153091222, "grad_norm": 1.153868556022644, "learning_rate": 8.208209143610176e-06, "loss": 0.285552978515625, "step": 2091 }, { "epoch": 0.7569083261724934, "grad_norm": 1.6037343740463257, "learning_rate": 8.185685332866202e-06, "loss": 0.3035888671875, "step": 2092 }, { "epoch": 0.7572701370358645, "grad_norm": 0.9523524045944214, "learning_rate": 8.163184513840887e-06, "loss": 0.26950836181640625, "step": 2093 }, { "epoch": 0.7576319478992357, "grad_norm": 2.092425584793091, "learning_rate": 8.140706730322833e-06, "loss": 0.30544281005859375, "step": 2094 }, { "epoch": 0.7579937587626069, "grad_norm": 1.0961828231811523, "learning_rate": 8.118252026055848e-06, "loss": 0.336090087890625, "step": 2095 }, { "epoch": 0.758355569625978, "grad_norm": 1.6999764442443848, "learning_rate": 8.095820444738789e-06, "loss": 0.26886749267578125, "step": 2096 }, { "epoch": 0.7587173804893492, "grad_norm": 1.069846272468567, "learning_rate": 8.073412030025545e-06, "loss": 0.30096435546875, "step": 2097 }, { "epoch": 0.7590791913527204, "grad_norm": 0.764700174331665, "learning_rate": 8.0510268255249e-06, "loss": 0.26978302001953125, "step": 2098 }, { "epoch": 0.7594410022160916, "grad_norm": 3.3480124473571777, "learning_rate": 8.02866487480047e-06, "loss": 0.2671966552734375, "step": 2099 }, { "epoch": 0.7598028130794627, "grad_norm": 2.869000196456909, "learning_rate": 8.006326221370627e-06, "loss": 0.25177001953125, "step": 2100 }, { "epoch": 0.7601646239428339, "grad_norm": 1.0870609283447266, "learning_rate": 7.984010908708395e-06, "loss": 0.30254364013671875, "step": 2101 }, { "epoch": 0.7605264348062051, "grad_norm": 0.5578786730766296, "learning_rate": 7.961718980241383e-06, "loss": 0.2845306396484375, "step": 2102 }, { "epoch": 0.7608882456695762, "grad_norm": 0.8165073990821838, "learning_rate": 7.93945047935168e-06, "loss": 0.2877197265625, "step": 2103 }, { "epoch": 0.7612500565329474, "grad_norm": 0.5669277310371399, "learning_rate": 7.917205449375801e-06, "loss": 0.2708740234375, "step": 2104 }, { "epoch": 0.7616118673963186, "grad_norm": 0.5065291523933411, "learning_rate": 7.894983933604576e-06, "loss": 0.28122711181640625, "step": 2105 }, { "epoch": 0.7619736782596898, "grad_norm": 0.6149017214775085, "learning_rate": 7.872785975283066e-06, "loss": 0.286041259765625, "step": 2106 }, { "epoch": 0.7623354891230609, "grad_norm": 1.336867332458496, "learning_rate": 7.850611617610495e-06, "loss": 0.30147552490234375, "step": 2107 }, { "epoch": 0.7626972999864321, "grad_norm": 1.176039457321167, "learning_rate": 7.828460903740158e-06, "loss": 0.28701019287109375, "step": 2108 }, { "epoch": 0.7630591108498033, "grad_norm": 1.5398608446121216, "learning_rate": 7.806333876779329e-06, "loss": 0.2639007568359375, "step": 2109 }, { "epoch": 0.7634209217131744, "grad_norm": 1.3273985385894775, "learning_rate": 7.784230579789205e-06, "loss": 0.2854194641113281, "step": 2110 }, { "epoch": 0.7637827325765456, "grad_norm": 0.9335752129554749, "learning_rate": 7.762151055784783e-06, "loss": 0.2698822021484375, "step": 2111 }, { "epoch": 0.7641445434399168, "grad_norm": 0.7427600026130676, "learning_rate": 7.740095347734794e-06, "loss": 0.28868865966796875, "step": 2112 }, { "epoch": 0.764506354303288, "grad_norm": 0.5394023656845093, "learning_rate": 7.718063498561648e-06, "loss": 0.26763153076171875, "step": 2113 }, { "epoch": 0.7648681651666591, "grad_norm": 0.9127338528633118, "learning_rate": 7.69605555114129e-06, "loss": 0.31565093994140625, "step": 2114 }, { "epoch": 0.7652299760300303, "grad_norm": 0.7843260765075684, "learning_rate": 7.67407154830317e-06, "loss": 0.274444580078125, "step": 2115 }, { "epoch": 0.7655917868934015, "grad_norm": 0.9258513450622559, "learning_rate": 7.652111532830134e-06, "loss": 0.2730712890625, "step": 2116 }, { "epoch": 0.7659535977567726, "grad_norm": 1.0265785455703735, "learning_rate": 7.630175547458343e-06, "loss": 0.271942138671875, "step": 2117 }, { "epoch": 0.7663154086201438, "grad_norm": 0.5068545937538147, "learning_rate": 7.608263634877198e-06, "loss": 0.28342437744140625, "step": 2118 }, { "epoch": 0.766677219483515, "grad_norm": 0.8301880359649658, "learning_rate": 7.586375837729254e-06, "loss": 0.27074432373046875, "step": 2119 }, { "epoch": 0.7670390303468861, "grad_norm": 1.283322811126709, "learning_rate": 7.5645121986101235e-06, "loss": 0.285308837890625, "step": 2120 }, { "epoch": 0.7674008412102573, "grad_norm": 0.9133622646331787, "learning_rate": 7.542672760068422e-06, "loss": 0.2678680419921875, "step": 2121 }, { "epoch": 0.7677626520736285, "grad_norm": 0.6409438252449036, "learning_rate": 7.520857564605666e-06, "loss": 0.26702880859375, "step": 2122 }, { "epoch": 0.7681244629369997, "grad_norm": 0.6496618986129761, "learning_rate": 7.499066654676182e-06, "loss": 0.2498645782470703, "step": 2123 }, { "epoch": 0.7684862738003708, "grad_norm": 1.977553367614746, "learning_rate": 7.477300072687039e-06, "loss": 0.31378173828125, "step": 2124 }, { "epoch": 0.768848084663742, "grad_norm": 0.52681565284729, "learning_rate": 7.455557860997964e-06, "loss": 0.25574493408203125, "step": 2125 }, { "epoch": 0.7692098955271132, "grad_norm": 2.285032033920288, "learning_rate": 7.433840061921256e-06, "loss": 0.3079986572265625, "step": 2126 }, { "epoch": 0.7695717063904843, "grad_norm": 1.0352336168289185, "learning_rate": 7.412146717721702e-06, "loss": 0.2727813720703125, "step": 2127 }, { "epoch": 0.7699335172538555, "grad_norm": 1.6472221612930298, "learning_rate": 7.390477870616504e-06, "loss": 0.2841339111328125, "step": 2128 }, { "epoch": 0.7702953281172267, "grad_norm": 2.3880321979522705, "learning_rate": 7.3688335627751775e-06, "loss": 0.32447052001953125, "step": 2129 }, { "epoch": 0.7706571389805978, "grad_norm": 3.285285234451294, "learning_rate": 7.347213836319506e-06, "loss": 0.2948760986328125, "step": 2130 }, { "epoch": 0.771018949843969, "grad_norm": 2.260315418243408, "learning_rate": 7.3256187333234165e-06, "loss": 0.286376953125, "step": 2131 }, { "epoch": 0.7713807607073402, "grad_norm": 0.8361008167266846, "learning_rate": 7.304048295812918e-06, "loss": 0.2879295349121094, "step": 2132 }, { "epoch": 0.7717425715707114, "grad_norm": 1.9353309869766235, "learning_rate": 7.282502565766023e-06, "loss": 0.304718017578125, "step": 2133 }, { "epoch": 0.7721043824340826, "grad_norm": 0.9782391786575317, "learning_rate": 7.2609815851126546e-06, "loss": 0.321441650390625, "step": 2134 }, { "epoch": 0.7724661932974538, "grad_norm": 1.5381039381027222, "learning_rate": 7.239485395734585e-06, "loss": 0.2505683898925781, "step": 2135 }, { "epoch": 0.772828004160825, "grad_norm": 1.391632080078125, "learning_rate": 7.2180140394653285e-06, "loss": 0.314056396484375, "step": 2136 }, { "epoch": 0.7731898150241961, "grad_norm": 3.120582103729248, "learning_rate": 7.196567558090075e-06, "loss": 0.32390594482421875, "step": 2137 }, { "epoch": 0.7735516258875673, "grad_norm": 1.0119940042495728, "learning_rate": 7.175145993345598e-06, "loss": 0.2953224182128906, "step": 2138 }, { "epoch": 0.7739134367509385, "grad_norm": 1.7341864109039307, "learning_rate": 7.153749386920201e-06, "loss": 0.28229522705078125, "step": 2139 }, { "epoch": 0.7742752476143097, "grad_norm": 1.5073193311691284, "learning_rate": 7.132377780453601e-06, "loss": 0.2589874267578125, "step": 2140 }, { "epoch": 0.7746370584776808, "grad_norm": 2.3288140296936035, "learning_rate": 7.1110312155368635e-06, "loss": 0.2686004638671875, "step": 2141 }, { "epoch": 0.774998869341052, "grad_norm": 1.5973016023635864, "learning_rate": 7.08970973371232e-06, "loss": 0.32098388671875, "step": 2142 }, { "epoch": 0.7753606802044232, "grad_norm": 2.4453887939453125, "learning_rate": 7.0684133764734975e-06, "loss": 0.28322601318359375, "step": 2143 }, { "epoch": 0.7757224910677943, "grad_norm": 1.2843890190124512, "learning_rate": 7.047142185265019e-06, "loss": 0.3222198486328125, "step": 2144 }, { "epoch": 0.7760843019311655, "grad_norm": 1.3322311639785767, "learning_rate": 7.025896201482527e-06, "loss": 0.2637786865234375, "step": 2145 }, { "epoch": 0.7764461127945367, "grad_norm": 0.4535099267959595, "learning_rate": 7.004675466472632e-06, "loss": 0.28910064697265625, "step": 2146 }, { "epoch": 0.7768079236579079, "grad_norm": 2.663909912109375, "learning_rate": 6.983480021532778e-06, "loss": 0.31420135498046875, "step": 2147 }, { "epoch": 0.777169734521279, "grad_norm": 1.7236891984939575, "learning_rate": 6.962309907911222e-06, "loss": 0.27330780029296875, "step": 2148 }, { "epoch": 0.7775315453846502, "grad_norm": 1.7276142835617065, "learning_rate": 6.9411651668069e-06, "loss": 0.31751251220703125, "step": 2149 }, { "epoch": 0.7778933562480214, "grad_norm": 1.6581449508666992, "learning_rate": 6.920045839369381e-06, "loss": 0.31543731689453125, "step": 2150 }, { "epoch": 0.7782551671113925, "grad_norm": 0.9099588394165039, "learning_rate": 6.89895196669877e-06, "loss": 0.275115966796875, "step": 2151 }, { "epoch": 0.7786169779747637, "grad_norm": 0.9963325262069702, "learning_rate": 6.877883589845648e-06, "loss": 0.28224945068359375, "step": 2152 }, { "epoch": 0.7789787888381349, "grad_norm": 1.1063090562820435, "learning_rate": 6.856840749810967e-06, "loss": 0.295379638671875, "step": 2153 }, { "epoch": 0.779340599701506, "grad_norm": 1.4155371189117432, "learning_rate": 6.835823487545983e-06, "loss": 0.26938629150390625, "step": 2154 }, { "epoch": 0.7797024105648772, "grad_norm": 0.7008209824562073, "learning_rate": 6.814831843952177e-06, "loss": 0.2594757080078125, "step": 2155 }, { "epoch": 0.7800642214282484, "grad_norm": 1.2376457452774048, "learning_rate": 6.793865859881184e-06, "loss": 0.32061767578125, "step": 2156 }, { "epoch": 0.7804260322916196, "grad_norm": 1.022017478942871, "learning_rate": 6.772925576134688e-06, "loss": 0.25939178466796875, "step": 2157 }, { "epoch": 0.7807878431549907, "grad_norm": 1.99070143699646, "learning_rate": 6.752011033464368e-06, "loss": 0.316253662109375, "step": 2158 }, { "epoch": 0.7811496540183619, "grad_norm": 0.9455556273460388, "learning_rate": 6.731122272571793e-06, "loss": 0.277862548828125, "step": 2159 }, { "epoch": 0.7815114648817331, "grad_norm": 0.7167611718177795, "learning_rate": 6.710259334108385e-06, "loss": 0.2966880798339844, "step": 2160 }, { "epoch": 0.7818732757451042, "grad_norm": 2.2383902072906494, "learning_rate": 6.6894222586752935e-06, "loss": 0.275909423828125, "step": 2161 }, { "epoch": 0.7822350866084754, "grad_norm": 1.714604377746582, "learning_rate": 6.668611086823338e-06, "loss": 0.31858062744140625, "step": 2162 }, { "epoch": 0.7825968974718466, "grad_norm": 1.129149079322815, "learning_rate": 6.64782585905293e-06, "loss": 0.27459716796875, "step": 2163 }, { "epoch": 0.7829587083352177, "grad_norm": 0.8382195234298706, "learning_rate": 6.627066615813989e-06, "loss": 0.2840728759765625, "step": 2164 }, { "epoch": 0.7833205191985889, "grad_norm": 2.298030138015747, "learning_rate": 6.60633339750588e-06, "loss": 0.2989959716796875, "step": 2165 }, { "epoch": 0.7836823300619601, "grad_norm": 1.9535231590270996, "learning_rate": 6.585626244477301e-06, "loss": 0.28148651123046875, "step": 2166 }, { "epoch": 0.7840441409253313, "grad_norm": 0.5629681944847107, "learning_rate": 6.564945197026238e-06, "loss": 0.25955963134765625, "step": 2167 }, { "epoch": 0.7844059517887024, "grad_norm": 1.0145905017852783, "learning_rate": 6.544290295399867e-06, "loss": 0.29650115966796875, "step": 2168 }, { "epoch": 0.7847677626520736, "grad_norm": 0.6523012518882751, "learning_rate": 6.5236615797944826e-06, "loss": 0.29302978515625, "step": 2169 }, { "epoch": 0.7851295735154448, "grad_norm": 0.6204728484153748, "learning_rate": 6.503059090355421e-06, "loss": 0.28514862060546875, "step": 2170 }, { "epoch": 0.7854913843788159, "grad_norm": 1.0206570625305176, "learning_rate": 6.482482867176973e-06, "loss": 0.2977142333984375, "step": 2171 }, { "epoch": 0.7858531952421871, "grad_norm": 0.8375414609909058, "learning_rate": 6.461932950302332e-06, "loss": 0.29811859130859375, "step": 2172 }, { "epoch": 0.7862150061055583, "grad_norm": 1.4542150497436523, "learning_rate": 6.441409379723473e-06, "loss": 0.2829132080078125, "step": 2173 }, { "epoch": 0.7865768169689294, "grad_norm": 1.714672565460205, "learning_rate": 6.420912195381119e-06, "loss": 0.28368377685546875, "step": 2174 }, { "epoch": 0.7869386278323006, "grad_norm": 0.5524047613143921, "learning_rate": 6.400441437164631e-06, "loss": 0.2947540283203125, "step": 2175 }, { "epoch": 0.7873004386956718, "grad_norm": 0.9212258458137512, "learning_rate": 6.37999714491194e-06, "loss": 0.28049468994140625, "step": 2176 }, { "epoch": 0.787662249559043, "grad_norm": 1.7586625814437866, "learning_rate": 6.3595793584094845e-06, "loss": 0.325927734375, "step": 2177 }, { "epoch": 0.7880240604224141, "grad_norm": 0.5942303538322449, "learning_rate": 6.339188117392105e-06, "loss": 0.29659271240234375, "step": 2178 }, { "epoch": 0.7883858712857854, "grad_norm": 0.6439244747161865, "learning_rate": 6.318823461542995e-06, "loss": 0.29462432861328125, "step": 2179 }, { "epoch": 0.7887476821491566, "grad_norm": 1.8621875047683716, "learning_rate": 6.2984854304936e-06, "loss": 0.28162384033203125, "step": 2180 }, { "epoch": 0.7891094930125278, "grad_norm": 1.375120759010315, "learning_rate": 6.2781740638235675e-06, "loss": 0.30438232421875, "step": 2181 }, { "epoch": 0.7894713038758989, "grad_norm": 0.5320656895637512, "learning_rate": 6.257889401060637e-06, "loss": 0.27581787109375, "step": 2182 }, { "epoch": 0.7898331147392701, "grad_norm": 1.3865851163864136, "learning_rate": 6.237631481680588e-06, "loss": 0.3041534423828125, "step": 2183 }, { "epoch": 0.7901949256026413, "grad_norm": 1.3433527946472168, "learning_rate": 6.217400345107145e-06, "loss": 0.2876930236816406, "step": 2184 }, { "epoch": 0.7905567364660124, "grad_norm": 0.5098855495452881, "learning_rate": 6.197196030711934e-06, "loss": 0.2946929931640625, "step": 2185 }, { "epoch": 0.7909185473293836, "grad_norm": 1.6943466663360596, "learning_rate": 6.177018577814358e-06, "loss": 0.2838935852050781, "step": 2186 }, { "epoch": 0.7912803581927548, "grad_norm": 0.7632755637168884, "learning_rate": 6.156868025681557e-06, "loss": 0.27197265625, "step": 2187 }, { "epoch": 0.7916421690561259, "grad_norm": 1.1439878940582275, "learning_rate": 6.1367444135283174e-06, "loss": 0.31505775451660156, "step": 2188 }, { "epoch": 0.7920039799194971, "grad_norm": 0.5318371057510376, "learning_rate": 6.116647780516995e-06, "loss": 0.29204559326171875, "step": 2189 }, { "epoch": 0.7923657907828683, "grad_norm": 0.872897207736969, "learning_rate": 6.096578165757441e-06, "loss": 0.286651611328125, "step": 2190 }, { "epoch": 0.7927276016462395, "grad_norm": 2.0091850757598877, "learning_rate": 6.076535608306944e-06, "loss": 0.30340576171875, "step": 2191 }, { "epoch": 0.7930894125096106, "grad_norm": 0.7524468898773193, "learning_rate": 6.056520147170113e-06, "loss": 0.27008056640625, "step": 2192 }, { "epoch": 0.7934512233729818, "grad_norm": 2.371096611022949, "learning_rate": 6.036531821298839e-06, "loss": 0.30457305908203125, "step": 2193 }, { "epoch": 0.793813034236353, "grad_norm": 1.5691487789154053, "learning_rate": 6.016570669592197e-06, "loss": 0.309356689453125, "step": 2194 }, { "epoch": 0.7941748450997241, "grad_norm": 1.495584487915039, "learning_rate": 5.996636730896388e-06, "loss": 0.29853057861328125, "step": 2195 }, { "epoch": 0.7945366559630953, "grad_norm": 0.482940673828125, "learning_rate": 5.976730044004639e-06, "loss": 0.27850341796875, "step": 2196 }, { "epoch": 0.7948984668264665, "grad_norm": 3.2953755855560303, "learning_rate": 5.956850647657169e-06, "loss": 0.33624267578125, "step": 2197 }, { "epoch": 0.7952602776898376, "grad_norm": 0.9393661618232727, "learning_rate": 5.9369985805410605e-06, "loss": 0.30047607421875, "step": 2198 }, { "epoch": 0.7956220885532088, "grad_norm": 0.5915073156356812, "learning_rate": 5.917173881290217e-06, "loss": 0.3057098388671875, "step": 2199 }, { "epoch": 0.79598389941658, "grad_norm": 1.2945173978805542, "learning_rate": 5.897376588485297e-06, "loss": 0.25836944580078125, "step": 2200 }, { "epoch": 0.7963457102799512, "grad_norm": 1.433022141456604, "learning_rate": 5.877606740653608e-06, "loss": 0.3271484375, "step": 2201 }, { "epoch": 0.7967075211433223, "grad_norm": 1.056863784790039, "learning_rate": 5.857864376269051e-06, "loss": 0.29337310791015625, "step": 2202 }, { "epoch": 0.7970693320066935, "grad_norm": 1.3162082433700562, "learning_rate": 5.8381495337520425e-06, "loss": 0.2960205078125, "step": 2203 }, { "epoch": 0.7974311428700647, "grad_norm": 1.197184443473816, "learning_rate": 5.8184622514694365e-06, "loss": 0.31475830078125, "step": 2204 }, { "epoch": 0.7977929537334358, "grad_norm": 0.7495045065879822, "learning_rate": 5.798802567734458e-06, "loss": 0.3080291748046875, "step": 2205 }, { "epoch": 0.798154764596807, "grad_norm": 1.4147897958755493, "learning_rate": 5.7791705208066094e-06, "loss": 0.2747955322265625, "step": 2206 }, { "epoch": 0.7985165754601782, "grad_norm": 0.9892336130142212, "learning_rate": 5.7595661488916355e-06, "loss": 0.30677032470703125, "step": 2207 }, { "epoch": 0.7988783863235493, "grad_norm": 1.630575180053711, "learning_rate": 5.739989490141398e-06, "loss": 0.3090972900390625, "step": 2208 }, { "epoch": 0.7992401971869205, "grad_norm": 0.6499831676483154, "learning_rate": 5.72044058265383e-06, "loss": 0.31647491455078125, "step": 2209 }, { "epoch": 0.7996020080502917, "grad_norm": 1.4931848049163818, "learning_rate": 5.700919464472876e-06, "loss": 0.26825714111328125, "step": 2210 }, { "epoch": 0.7999638189136629, "grad_norm": 1.6118346452713013, "learning_rate": 5.681426173588378e-06, "loss": 0.2526512145996094, "step": 2211 }, { "epoch": 0.800325629777034, "grad_norm": 2.5751922130584717, "learning_rate": 5.661960747936037e-06, "loss": 0.3402557373046875, "step": 2212 }, { "epoch": 0.8006874406404052, "grad_norm": 0.4405186176300049, "learning_rate": 5.642523225397321e-06, "loss": 0.28857421875, "step": 2213 }, { "epoch": 0.8010492515037764, "grad_norm": 0.5349907279014587, "learning_rate": 5.623113643799392e-06, "loss": 0.2841949462890625, "step": 2214 }, { "epoch": 0.8014110623671475, "grad_norm": 1.2355293035507202, "learning_rate": 5.603732040915042e-06, "loss": 0.281341552734375, "step": 2215 }, { "epoch": 0.8017728732305187, "grad_norm": 0.5368197560310364, "learning_rate": 5.5843784544626175e-06, "loss": 0.2994537353515625, "step": 2216 }, { "epoch": 0.8021346840938899, "grad_norm": 1.0359838008880615, "learning_rate": 5.5650529221059405e-06, "loss": 0.314483642578125, "step": 2217 }, { "epoch": 0.802496494957261, "grad_norm": 1.3006187677383423, "learning_rate": 5.545755481454227e-06, "loss": 0.30429840087890625, "step": 2218 }, { "epoch": 0.8028583058206322, "grad_norm": 2.3541016578674316, "learning_rate": 5.526486170062035e-06, "loss": 0.3230915069580078, "step": 2219 }, { "epoch": 0.8032201166840034, "grad_norm": 2.3784549236297607, "learning_rate": 5.5072450254291795e-06, "loss": 0.3373260498046875, "step": 2220 }, { "epoch": 0.8035819275473746, "grad_norm": 0.635913074016571, "learning_rate": 5.488032085000647e-06, "loss": 0.30567169189453125, "step": 2221 }, { "epoch": 0.8039437384107457, "grad_norm": 1.3369684219360352, "learning_rate": 5.468847386166563e-06, "loss": 0.28794097900390625, "step": 2222 }, { "epoch": 0.8043055492741169, "grad_norm": 1.2609206438064575, "learning_rate": 5.449690966262067e-06, "loss": 0.3178558349609375, "step": 2223 }, { "epoch": 0.8046673601374881, "grad_norm": 0.5007044672966003, "learning_rate": 5.4305628625672795e-06, "loss": 0.24985504150390625, "step": 2224 }, { "epoch": 0.8050291710008594, "grad_norm": 0.4714759290218353, "learning_rate": 5.4114631123072e-06, "loss": 0.30484771728515625, "step": 2225 }, { "epoch": 0.8053909818642305, "grad_norm": 1.4469444751739502, "learning_rate": 5.392391752651673e-06, "loss": 0.30603790283203125, "step": 2226 }, { "epoch": 0.8057527927276017, "grad_norm": 0.8596948385238647, "learning_rate": 5.373348820715274e-06, "loss": 0.2761688232421875, "step": 2227 }, { "epoch": 0.8061146035909729, "grad_norm": 1.2795765399932861, "learning_rate": 5.35433435355726e-06, "loss": 0.2819061279296875, "step": 2228 }, { "epoch": 0.806476414454344, "grad_norm": 0.9610520005226135, "learning_rate": 5.335348388181496e-06, "loss": 0.3068084716796875, "step": 2229 }, { "epoch": 0.8068382253177152, "grad_norm": 3.6899375915527344, "learning_rate": 5.316390961536375e-06, "loss": 0.342559814453125, "step": 2230 }, { "epoch": 0.8072000361810864, "grad_norm": 0.5941717028617859, "learning_rate": 5.297462110514755e-06, "loss": 0.3025054931640625, "step": 2231 }, { "epoch": 0.8075618470444575, "grad_norm": 0.509285032749176, "learning_rate": 5.278561871953876e-06, "loss": 0.27512359619140625, "step": 2232 }, { "epoch": 0.8079236579078287, "grad_norm": 1.3889234066009521, "learning_rate": 5.259690282635313e-06, "loss": 0.2910003662109375, "step": 2233 }, { "epoch": 0.8082854687711999, "grad_norm": 1.4816780090332031, "learning_rate": 5.240847379284864e-06, "loss": 0.2584228515625, "step": 2234 }, { "epoch": 0.8086472796345711, "grad_norm": 0.9820772409439087, "learning_rate": 5.222033198572527e-06, "loss": 0.2782859802246094, "step": 2235 }, { "epoch": 0.8090090904979422, "grad_norm": 0.9131484031677246, "learning_rate": 5.203247777112381e-06, "loss": 0.31917572021484375, "step": 2236 }, { "epoch": 0.8093709013613134, "grad_norm": 2.221553325653076, "learning_rate": 5.184491151462548e-06, "loss": 0.2743682861328125, "step": 2237 }, { "epoch": 0.8097327122246846, "grad_norm": 2.7362587451934814, "learning_rate": 5.165763358125104e-06, "loss": 0.3211517333984375, "step": 2238 }, { "epoch": 0.8100945230880557, "grad_norm": 1.2823468446731567, "learning_rate": 5.147064433546023e-06, "loss": 0.3033905029296875, "step": 2239 }, { "epoch": 0.8104563339514269, "grad_norm": 1.2528023719787598, "learning_rate": 5.1283944141150925e-06, "loss": 0.26230621337890625, "step": 2240 }, { "epoch": 0.8108181448147981, "grad_norm": 0.691731870174408, "learning_rate": 5.109753336165841e-06, "loss": 0.28717803955078125, "step": 2241 }, { "epoch": 0.8111799556781693, "grad_norm": 0.8452617526054382, "learning_rate": 5.0911412359754965e-06, "loss": 0.2809906005859375, "step": 2242 }, { "epoch": 0.8115417665415404, "grad_norm": 3.220432996749878, "learning_rate": 5.0725581497648725e-06, "loss": 0.32253265380859375, "step": 2243 }, { "epoch": 0.8119035774049116, "grad_norm": 1.139315128326416, "learning_rate": 5.0540041136983255e-06, "loss": 0.27613067626953125, "step": 2244 }, { "epoch": 0.8122653882682828, "grad_norm": 0.7463616728782654, "learning_rate": 5.035479163883678e-06, "loss": 0.300567626953125, "step": 2245 }, { "epoch": 0.8126271991316539, "grad_norm": 0.5989643335342407, "learning_rate": 5.016983336372143e-06, "loss": 0.261077880859375, "step": 2246 }, { "epoch": 0.8129890099950251, "grad_norm": 1.8069732189178467, "learning_rate": 4.998516667158275e-06, "loss": 0.31851959228515625, "step": 2247 }, { "epoch": 0.8133508208583963, "grad_norm": 0.9651872515678406, "learning_rate": 4.98007919217987e-06, "loss": 0.3012542724609375, "step": 2248 }, { "epoch": 0.8137126317217674, "grad_norm": 0.9078238010406494, "learning_rate": 4.961670947317905e-06, "loss": 0.2769317626953125, "step": 2249 }, { "epoch": 0.8140744425851386, "grad_norm": 0.8456000685691833, "learning_rate": 4.943291968396482e-06, "loss": 0.32692718505859375, "step": 2250 }, { "epoch": 0.8144362534485098, "grad_norm": 2.308664083480835, "learning_rate": 4.924942291182753e-06, "loss": 0.2597923278808594, "step": 2251 }, { "epoch": 0.814798064311881, "grad_norm": 2.1748220920562744, "learning_rate": 4.906621951386834e-06, "loss": 0.26613616943359375, "step": 2252 }, { "epoch": 0.8151598751752521, "grad_norm": 0.5640010833740234, "learning_rate": 4.888330984661758e-06, "loss": 0.3013763427734375, "step": 2253 }, { "epoch": 0.8155216860386233, "grad_norm": 0.7334820032119751, "learning_rate": 4.870069426603386e-06, "loss": 0.29294586181640625, "step": 2254 }, { "epoch": 0.8158834969019945, "grad_norm": 1.0253266096115112, "learning_rate": 4.851837312750354e-06, "loss": 0.29473876953125, "step": 2255 }, { "epoch": 0.8162453077653656, "grad_norm": 1.0358951091766357, "learning_rate": 4.833634678583994e-06, "loss": 0.2912139892578125, "step": 2256 }, { "epoch": 0.8166071186287368, "grad_norm": 1.3727002143859863, "learning_rate": 4.8154615595282715e-06, "loss": 0.289825439453125, "step": 2257 }, { "epoch": 0.816968929492108, "grad_norm": 2.7865095138549805, "learning_rate": 4.797317990949701e-06, "loss": 0.2799530029296875, "step": 2258 }, { "epoch": 0.8173307403554791, "grad_norm": 0.8314295411109924, "learning_rate": 4.779204008157314e-06, "loss": 0.29665374755859375, "step": 2259 }, { "epoch": 0.8176925512188503, "grad_norm": 1.7118474245071411, "learning_rate": 4.761119646402534e-06, "loss": 0.31693267822265625, "step": 2260 }, { "epoch": 0.8180543620822215, "grad_norm": 0.4357931315898895, "learning_rate": 4.743064940879169e-06, "loss": 0.3055419921875, "step": 2261 }, { "epoch": 0.8184161729455927, "grad_norm": 1.111654281616211, "learning_rate": 4.725039926723294e-06, "loss": 0.303985595703125, "step": 2262 }, { "epoch": 0.8187779838089638, "grad_norm": 0.573691189289093, "learning_rate": 4.707044639013205e-06, "loss": 0.3031768798828125, "step": 2263 }, { "epoch": 0.819139794672335, "grad_norm": 0.8130781650543213, "learning_rate": 4.6890791127693526e-06, "loss": 0.309844970703125, "step": 2264 }, { "epoch": 0.8195016055357062, "grad_norm": 1.9664064645767212, "learning_rate": 4.671143382954264e-06, "loss": 0.26552581787109375, "step": 2265 }, { "epoch": 0.8198634163990773, "grad_norm": 1.8030704259872437, "learning_rate": 4.653237484472484e-06, "loss": 0.27243804931640625, "step": 2266 }, { "epoch": 0.8202252272624485, "grad_norm": 1.1656068563461304, "learning_rate": 4.635361452170495e-06, "loss": 0.28789520263671875, "step": 2267 }, { "epoch": 0.8205870381258197, "grad_norm": 1.5586278438568115, "learning_rate": 4.6175153208366745e-06, "loss": 0.3110198974609375, "step": 2268 }, { "epoch": 0.8209488489891908, "grad_norm": 0.5388164520263672, "learning_rate": 4.599699125201191e-06, "loss": 0.303802490234375, "step": 2269 }, { "epoch": 0.821310659852562, "grad_norm": 2.5052483081817627, "learning_rate": 4.581912899935965e-06, "loss": 0.25556182861328125, "step": 2270 }, { "epoch": 0.8216724707159333, "grad_norm": 2.106959342956543, "learning_rate": 4.564156679654586e-06, "loss": 0.26091766357421875, "step": 2271 }, { "epoch": 0.8220342815793045, "grad_norm": 1.147621989250183, "learning_rate": 4.546430498912264e-06, "loss": 0.2683258056640625, "step": 2272 }, { "epoch": 0.8223960924426756, "grad_norm": 2.0389187335968018, "learning_rate": 4.528734392205738e-06, "loss": 0.30091094970703125, "step": 2273 }, { "epoch": 0.8227579033060468, "grad_norm": 0.7868401408195496, "learning_rate": 4.51106839397322e-06, "loss": 0.3075065612792969, "step": 2274 }, { "epoch": 0.823119714169418, "grad_norm": 1.1133596897125244, "learning_rate": 4.493432538594331e-06, "loss": 0.28513336181640625, "step": 2275 }, { "epoch": 0.8234815250327892, "grad_norm": 1.0912387371063232, "learning_rate": 4.4758268603900265e-06, "loss": 0.275054931640625, "step": 2276 }, { "epoch": 0.8238433358961603, "grad_norm": 0.6080613732337952, "learning_rate": 4.45825139362255e-06, "loss": 0.271728515625, "step": 2277 }, { "epoch": 0.8242051467595315, "grad_norm": 0.9764197468757629, "learning_rate": 4.440706172495337e-06, "loss": 0.3032684326171875, "step": 2278 }, { "epoch": 0.8245669576229027, "grad_norm": 0.675006091594696, "learning_rate": 4.4231912311529635e-06, "loss": 0.2855377197265625, "step": 2279 }, { "epoch": 0.8249287684862738, "grad_norm": 0.42420098185539246, "learning_rate": 4.405706603681079e-06, "loss": 0.27307891845703125, "step": 2280 }, { "epoch": 0.825290579349645, "grad_norm": 1.1354732513427734, "learning_rate": 4.3882523241063435e-06, "loss": 0.29736328125, "step": 2281 }, { "epoch": 0.8256523902130162, "grad_norm": 0.5796273350715637, "learning_rate": 4.370828426396354e-06, "loss": 0.2523307800292969, "step": 2282 }, { "epoch": 0.8260142010763873, "grad_norm": 2.974097967147827, "learning_rate": 4.35343494445958e-06, "loss": 0.31233978271484375, "step": 2283 }, { "epoch": 0.8263760119397585, "grad_norm": 0.8145573139190674, "learning_rate": 4.336071912145296e-06, "loss": 0.267486572265625, "step": 2284 }, { "epoch": 0.8267378228031297, "grad_norm": 2.5764949321746826, "learning_rate": 4.318739363243533e-06, "loss": 0.30074310302734375, "step": 2285 }, { "epoch": 0.8270996336665009, "grad_norm": 0.9035328030586243, "learning_rate": 4.3014373314849924e-06, "loss": 0.293121337890625, "step": 2286 }, { "epoch": 0.827461444529872, "grad_norm": 0.5864529013633728, "learning_rate": 4.284165850540978e-06, "loss": 0.2516326904296875, "step": 2287 }, { "epoch": 0.8278232553932432, "grad_norm": 1.7834675312042236, "learning_rate": 4.266924954023346e-06, "loss": 0.31320953369140625, "step": 2288 }, { "epoch": 0.8281850662566144, "grad_norm": 0.798608660697937, "learning_rate": 4.249714675484431e-06, "loss": 0.28376007080078125, "step": 2289 }, { "epoch": 0.8285468771199855, "grad_norm": 0.9023197293281555, "learning_rate": 4.232535048416981e-06, "loss": 0.2818145751953125, "step": 2290 }, { "epoch": 0.8289086879833567, "grad_norm": 1.1018948554992676, "learning_rate": 4.215386106254098e-06, "loss": 0.27079010009765625, "step": 2291 }, { "epoch": 0.8292704988467279, "grad_norm": 1.7548854351043701, "learning_rate": 4.198267882369164e-06, "loss": 0.2933197021484375, "step": 2292 }, { "epoch": 0.829632309710099, "grad_norm": 2.842466115951538, "learning_rate": 4.181180410075776e-06, "loss": 0.25597381591796875, "step": 2293 }, { "epoch": 0.8299941205734702, "grad_norm": 1.4402354955673218, "learning_rate": 4.164123722627702e-06, "loss": 0.3159637451171875, "step": 2294 }, { "epoch": 0.8303559314368414, "grad_norm": 1.3882596492767334, "learning_rate": 4.147097853218785e-06, "loss": 0.30483245849609375, "step": 2295 }, { "epoch": 0.8307177423002126, "grad_norm": 1.9652047157287598, "learning_rate": 4.130102834982892e-06, "loss": 0.288116455078125, "step": 2296 }, { "epoch": 0.8310795531635837, "grad_norm": 0.827353835105896, "learning_rate": 4.113138700993866e-06, "loss": 0.3130645751953125, "step": 2297 }, { "epoch": 0.8314413640269549, "grad_norm": 0.5175329446792603, "learning_rate": 4.096205484265436e-06, "loss": 0.3018035888671875, "step": 2298 }, { "epoch": 0.8318031748903261, "grad_norm": 0.6065088510513306, "learning_rate": 4.079303217751161e-06, "loss": 0.2666015625, "step": 2299 }, { "epoch": 0.8321649857536972, "grad_norm": 2.722536087036133, "learning_rate": 4.062431934344371e-06, "loss": 0.2715301513671875, "step": 2300 }, { "epoch": 0.8325267966170684, "grad_norm": 1.8447721004486084, "learning_rate": 4.045591666878101e-06, "loss": 0.2789306640625, "step": 2301 }, { "epoch": 0.8328886074804396, "grad_norm": 0.9914527535438538, "learning_rate": 4.028782448125024e-06, "loss": 0.27855682373046875, "step": 2302 }, { "epoch": 0.8332504183438108, "grad_norm": 3.175940752029419, "learning_rate": 4.012004310797395e-06, "loss": 0.2782745361328125, "step": 2303 }, { "epoch": 0.8336122292071819, "grad_norm": 1.5735642910003662, "learning_rate": 3.9952572875469786e-06, "loss": 0.26641845703125, "step": 2304 }, { "epoch": 0.8339740400705531, "grad_norm": 0.8239665627479553, "learning_rate": 3.978541410964984e-06, "loss": 0.30533599853515625, "step": 2305 }, { "epoch": 0.8343358509339243, "grad_norm": 2.3251049518585205, "learning_rate": 3.9618567135820126e-06, "loss": 0.308624267578125, "step": 2306 }, { "epoch": 0.8346976617972954, "grad_norm": 1.7016351222991943, "learning_rate": 3.945203227867987e-06, "loss": 0.2841033935546875, "step": 2307 }, { "epoch": 0.8350594726606666, "grad_norm": 2.083376407623291, "learning_rate": 3.928580986232082e-06, "loss": 0.3224945068359375, "step": 2308 }, { "epoch": 0.8354212835240378, "grad_norm": 1.7445425987243652, "learning_rate": 3.911990021022676e-06, "loss": 0.2474365234375, "step": 2309 }, { "epoch": 0.8357830943874089, "grad_norm": 0.9819921851158142, "learning_rate": 3.8954303645272836e-06, "loss": 0.2697601318359375, "step": 2310 }, { "epoch": 0.8361449052507801, "grad_norm": 1.2429322004318237, "learning_rate": 3.8789020489724794e-06, "loss": 0.28527069091796875, "step": 2311 }, { "epoch": 0.8365067161141513, "grad_norm": 1.0229772329330444, "learning_rate": 3.862405106523861e-06, "loss": 0.3012542724609375, "step": 2312 }, { "epoch": 0.8368685269775225, "grad_norm": 0.5867646336555481, "learning_rate": 3.845939569285955e-06, "loss": 0.30279541015625, "step": 2313 }, { "epoch": 0.8372303378408936, "grad_norm": 1.3870441913604736, "learning_rate": 3.82950546930218e-06, "loss": 0.2374725341796875, "step": 2314 }, { "epoch": 0.8375921487042648, "grad_norm": 3.194931983947754, "learning_rate": 3.8131028385547696e-06, "loss": 0.3105010986328125, "step": 2315 }, { "epoch": 0.837953959567636, "grad_norm": 1.039365291595459, "learning_rate": 3.7967317089647183e-06, "loss": 0.28398895263671875, "step": 2316 }, { "epoch": 0.8383157704310072, "grad_norm": 2.785952568054199, "learning_rate": 3.7803921123917176e-06, "loss": 0.32610321044921875, "step": 2317 }, { "epoch": 0.8386775812943784, "grad_norm": 1.900559902191162, "learning_rate": 3.7640840806340918e-06, "loss": 0.333770751953125, "step": 2318 }, { "epoch": 0.8390393921577496, "grad_norm": 1.8667652606964111, "learning_rate": 3.7478076454287317e-06, "loss": 0.3119392395019531, "step": 2319 }, { "epoch": 0.8394012030211208, "grad_norm": 1.9208897352218628, "learning_rate": 3.7315628384510527e-06, "loss": 0.31109619140625, "step": 2320 }, { "epoch": 0.8397630138844919, "grad_norm": 0.6207360029220581, "learning_rate": 3.7153496913149025e-06, "loss": 0.25585174560546875, "step": 2321 }, { "epoch": 0.8401248247478631, "grad_norm": 1.0399773120880127, "learning_rate": 3.69916823557253e-06, "loss": 0.26750946044921875, "step": 2322 }, { "epoch": 0.8404866356112343, "grad_norm": 0.4562792479991913, "learning_rate": 3.6830185027144995e-06, "loss": 0.29759979248046875, "step": 2323 }, { "epoch": 0.8408484464746054, "grad_norm": 1.0888195037841797, "learning_rate": 3.666900524169643e-06, "loss": 0.2876739501953125, "step": 2324 }, { "epoch": 0.8412102573379766, "grad_norm": 0.5295800566673279, "learning_rate": 3.6508143313049995e-06, "loss": 0.2576904296875, "step": 2325 }, { "epoch": 0.8415720682013478, "grad_norm": 1.6873384714126587, "learning_rate": 3.6347599554257395e-06, "loss": 0.27767181396484375, "step": 2326 }, { "epoch": 0.841933879064719, "grad_norm": 0.6908431053161621, "learning_rate": 3.6187374277751297e-06, "loss": 0.2905426025390625, "step": 2327 }, { "epoch": 0.8422956899280901, "grad_norm": 3.3715288639068604, "learning_rate": 3.6027467795344386e-06, "loss": 0.3397216796875, "step": 2328 }, { "epoch": 0.8426575007914613, "grad_norm": 3.301405906677246, "learning_rate": 3.5867880418229194e-06, "loss": 0.3484344482421875, "step": 2329 }, { "epoch": 0.8430193116548325, "grad_norm": 1.1306209564208984, "learning_rate": 3.570861245697703e-06, "loss": 0.297454833984375, "step": 2330 }, { "epoch": 0.8433811225182036, "grad_norm": 0.6730585098266602, "learning_rate": 3.5549664221537693e-06, "loss": 0.2631378173828125, "step": 2331 }, { "epoch": 0.8437429333815748, "grad_norm": 1.2122313976287842, "learning_rate": 3.5391036021238746e-06, "loss": 0.2995452880859375, "step": 2332 }, { "epoch": 0.844104744244946, "grad_norm": 0.5499914288520813, "learning_rate": 3.523272816478489e-06, "loss": 0.284759521484375, "step": 2333 }, { "epoch": 0.8444665551083171, "grad_norm": 0.41732051968574524, "learning_rate": 3.5074740960257425e-06, "loss": 0.29337310791015625, "step": 2334 }, { "epoch": 0.8448283659716883, "grad_norm": 1.5894269943237305, "learning_rate": 3.491707471511374e-06, "loss": 0.28768157958984375, "step": 2335 }, { "epoch": 0.8451901768350595, "grad_norm": 0.5527780652046204, "learning_rate": 3.475972973618651e-06, "loss": 0.31212615966796875, "step": 2336 }, { "epoch": 0.8455519876984307, "grad_norm": 1.145338535308838, "learning_rate": 3.4602706329683123e-06, "loss": 0.2928009033203125, "step": 2337 }, { "epoch": 0.8459137985618018, "grad_norm": 0.6073173880577087, "learning_rate": 3.4446004801185385e-06, "loss": 0.29955291748046875, "step": 2338 }, { "epoch": 0.846275609425173, "grad_norm": 0.9840760231018066, "learning_rate": 3.4289625455648466e-06, "loss": 0.2725982666015625, "step": 2339 }, { "epoch": 0.8466374202885442, "grad_norm": 0.9270073771476746, "learning_rate": 3.4133568597400668e-06, "loss": 0.2753448486328125, "step": 2340 }, { "epoch": 0.8469992311519153, "grad_norm": 1.810293197631836, "learning_rate": 3.3977834530142673e-06, "loss": 0.291748046875, "step": 2341 }, { "epoch": 0.8473610420152865, "grad_norm": 3.133788585662842, "learning_rate": 3.3822423556946937e-06, "loss": 0.3504486083984375, "step": 2342 }, { "epoch": 0.8477228528786577, "grad_norm": 3.106889486312866, "learning_rate": 3.3667335980257244e-06, "loss": 0.26869964599609375, "step": 2343 }, { "epoch": 0.8480846637420288, "grad_norm": 0.8451537489891052, "learning_rate": 3.3512572101887918e-06, "loss": 0.2741851806640625, "step": 2344 }, { "epoch": 0.8484464746054, "grad_norm": 1.9349422454833984, "learning_rate": 3.3358132223023355e-06, "loss": 0.2827606201171875, "step": 2345 }, { "epoch": 0.8488082854687712, "grad_norm": 2.234168291091919, "learning_rate": 3.3204016644217484e-06, "loss": 0.3229217529296875, "step": 2346 }, { "epoch": 0.8491700963321424, "grad_norm": 1.5073753595352173, "learning_rate": 3.3050225665393152e-06, "loss": 0.29306793212890625, "step": 2347 }, { "epoch": 0.8495319071955135, "grad_norm": 0.5955642461776733, "learning_rate": 3.2896759585841354e-06, "loss": 0.314788818359375, "step": 2348 }, { "epoch": 0.8498937180588847, "grad_norm": 1.0422202348709106, "learning_rate": 3.274361870422089e-06, "loss": 0.293365478515625, "step": 2349 }, { "epoch": 0.8502555289222559, "grad_norm": 1.0319148302078247, "learning_rate": 3.2590803318557686e-06, "loss": 0.29549407958984375, "step": 2350 }, { "epoch": 0.850617339785627, "grad_norm": 1.1214889287948608, "learning_rate": 3.2438313726244243e-06, "loss": 0.28440093994140625, "step": 2351 }, { "epoch": 0.8509791506489982, "grad_norm": 1.0804264545440674, "learning_rate": 3.2286150224038983e-06, "loss": 0.27991485595703125, "step": 2352 }, { "epoch": 0.8513409615123694, "grad_norm": 2.942901849746704, "learning_rate": 3.2134313108065785e-06, "loss": 0.33631134033203125, "step": 2353 }, { "epoch": 0.8517027723757405, "grad_norm": 1.4523701667785645, "learning_rate": 3.1982802673813283e-06, "loss": 0.32126617431640625, "step": 2354 }, { "epoch": 0.8520645832391117, "grad_norm": 0.6427270174026489, "learning_rate": 3.183161921613449e-06, "loss": 0.2743072509765625, "step": 2355 }, { "epoch": 0.8524263941024829, "grad_norm": 0.7443055510520935, "learning_rate": 3.1680763029245987e-06, "loss": 0.29209136962890625, "step": 2356 }, { "epoch": 0.8527882049658541, "grad_norm": 1.522828221321106, "learning_rate": 3.1530234406727445e-06, "loss": 0.2562408447265625, "step": 2357 }, { "epoch": 0.8531500158292252, "grad_norm": 0.6814577579498291, "learning_rate": 3.1380033641521112e-06, "loss": 0.272674560546875, "step": 2358 }, { "epoch": 0.8535118266925964, "grad_norm": 0.7288751602172852, "learning_rate": 3.1230161025931126e-06, "loss": 0.298919677734375, "step": 2359 }, { "epoch": 0.8538736375559676, "grad_norm": 2.123044013977051, "learning_rate": 3.108061685162318e-06, "loss": 0.3147430419921875, "step": 2360 }, { "epoch": 0.8542354484193387, "grad_norm": 0.6025727391242981, "learning_rate": 3.093140140962365e-06, "loss": 0.29135894775390625, "step": 2361 }, { "epoch": 0.8545972592827099, "grad_norm": 0.6285558938980103, "learning_rate": 3.078251499031917e-06, "loss": 0.2858428955078125, "step": 2362 }, { "epoch": 0.8549590701460812, "grad_norm": 0.5822709798812866, "learning_rate": 3.063395788345609e-06, "loss": 0.28986358642578125, "step": 2363 }, { "epoch": 0.8553208810094524, "grad_norm": 0.9657636284828186, "learning_rate": 3.0485730378139955e-06, "loss": 0.2907562255859375, "step": 2364 }, { "epoch": 0.8556826918728235, "grad_norm": 1.69796621799469, "learning_rate": 3.0337832762834773e-06, "loss": 0.3146672248840332, "step": 2365 }, { "epoch": 0.8560445027361947, "grad_norm": 0.6536872386932373, "learning_rate": 3.0190265325362645e-06, "loss": 0.307769775390625, "step": 2366 }, { "epoch": 0.8564063135995659, "grad_norm": 0.9724010229110718, "learning_rate": 3.0043028352903027e-06, "loss": 0.2917938232421875, "step": 2367 }, { "epoch": 0.856768124462937, "grad_norm": 0.9800344705581665, "learning_rate": 2.9896122131992335e-06, "loss": 0.2830648422241211, "step": 2368 }, { "epoch": 0.8571299353263082, "grad_norm": 2.031968355178833, "learning_rate": 2.974954694852328e-06, "loss": 0.333251953125, "step": 2369 }, { "epoch": 0.8574917461896794, "grad_norm": 0.9909080266952515, "learning_rate": 2.960330308774433e-06, "loss": 0.2594490051269531, "step": 2370 }, { "epoch": 0.8578535570530506, "grad_norm": 1.2877610921859741, "learning_rate": 2.9457390834259157e-06, "loss": 0.3188934326171875, "step": 2371 }, { "epoch": 0.8582153679164217, "grad_norm": 3.3190362453460693, "learning_rate": 2.9311810472026157e-06, "loss": 0.27130889892578125, "step": 2372 }, { "epoch": 0.8585771787797929, "grad_norm": 1.5177143812179565, "learning_rate": 2.916656228435788e-06, "loss": 0.27747344970703125, "step": 2373 }, { "epoch": 0.8589389896431641, "grad_norm": 1.6471965312957764, "learning_rate": 2.9021646553920303e-06, "loss": 0.30120849609375, "step": 2374 }, { "epoch": 0.8593008005065352, "grad_norm": 0.5263684391975403, "learning_rate": 2.8877063562732453e-06, "loss": 0.28437042236328125, "step": 2375 }, { "epoch": 0.8596626113699064, "grad_norm": 0.9101806879043579, "learning_rate": 2.873281359216584e-06, "loss": 0.307098388671875, "step": 2376 }, { "epoch": 0.8600244222332776, "grad_norm": 1.3606419563293457, "learning_rate": 2.8588896922943886e-06, "loss": 0.3192634582519531, "step": 2377 }, { "epoch": 0.8603862330966487, "grad_norm": 0.856496274471283, "learning_rate": 2.844531383514135e-06, "loss": 0.29651641845703125, "step": 2378 }, { "epoch": 0.8607480439600199, "grad_norm": 0.4934034049510956, "learning_rate": 2.8302064608183856e-06, "loss": 0.29648590087890625, "step": 2379 }, { "epoch": 0.8611098548233911, "grad_norm": 0.5638453960418701, "learning_rate": 2.815914952084722e-06, "loss": 0.30132293701171875, "step": 2380 }, { "epoch": 0.8614716656867623, "grad_norm": 1.4481974840164185, "learning_rate": 2.801656885125714e-06, "loss": 0.3098945617675781, "step": 2381 }, { "epoch": 0.8618334765501334, "grad_norm": 2.9060449600219727, "learning_rate": 2.7874322876888382e-06, "loss": 0.25574493408203125, "step": 2382 }, { "epoch": 0.8621952874135046, "grad_norm": 0.7061432003974915, "learning_rate": 2.773241187456439e-06, "loss": 0.29816436767578125, "step": 2383 }, { "epoch": 0.8625570982768758, "grad_norm": 1.2613757848739624, "learning_rate": 2.7590836120456723e-06, "loss": 0.24522018432617188, "step": 2384 }, { "epoch": 0.8629189091402469, "grad_norm": 1.7661710977554321, "learning_rate": 2.744959589008458e-06, "loss": 0.29233551025390625, "step": 2385 }, { "epoch": 0.8632807200036181, "grad_norm": 1.7250845432281494, "learning_rate": 2.7308691458314117e-06, "loss": 0.26497650146484375, "step": 2386 }, { "epoch": 0.8636425308669893, "grad_norm": 1.0888134241104126, "learning_rate": 2.7168123099358033e-06, "loss": 0.30504608154296875, "step": 2387 }, { "epoch": 0.8640043417303604, "grad_norm": 1.0252019166946411, "learning_rate": 2.7027891086774994e-06, "loss": 0.294097900390625, "step": 2388 }, { "epoch": 0.8643661525937316, "grad_norm": 2.0829555988311768, "learning_rate": 2.688799569346907e-06, "loss": 0.31359100341796875, "step": 2389 }, { "epoch": 0.8647279634571028, "grad_norm": 1.0206106901168823, "learning_rate": 2.6748437191689336e-06, "loss": 0.2915802001953125, "step": 2390 }, { "epoch": 0.865089774320474, "grad_norm": 1.0499094724655151, "learning_rate": 2.660921585302918e-06, "loss": 0.32868194580078125, "step": 2391 }, { "epoch": 0.8654515851838451, "grad_norm": 2.5620691776275635, "learning_rate": 2.647033194842581e-06, "loss": 0.26915740966796875, "step": 2392 }, { "epoch": 0.8658133960472163, "grad_norm": 1.0885593891143799, "learning_rate": 2.63317857481598e-06, "loss": 0.29238128662109375, "step": 2393 }, { "epoch": 0.8661752069105875, "grad_norm": 0.8160111904144287, "learning_rate": 2.619357752185454e-06, "loss": 0.30374908447265625, "step": 2394 }, { "epoch": 0.8665370177739586, "grad_norm": 0.7954045534133911, "learning_rate": 2.6055707538475617e-06, "loss": 0.26825714111328125, "step": 2395 }, { "epoch": 0.8668988286373298, "grad_norm": 0.5491597056388855, "learning_rate": 2.5918176066330424e-06, "loss": 0.30797576904296875, "step": 2396 }, { "epoch": 0.867260639500701, "grad_norm": 1.555437684059143, "learning_rate": 2.5780983373067624e-06, "loss": 0.2986259460449219, "step": 2397 }, { "epoch": 0.8676224503640722, "grad_norm": 1.3989065885543823, "learning_rate": 2.564412972567647e-06, "loss": 0.26226043701171875, "step": 2398 }, { "epoch": 0.8679842612274433, "grad_norm": 1.03159761428833, "learning_rate": 2.5507615390486516e-06, "loss": 0.27257537841796875, "step": 2399 }, { "epoch": 0.8683460720908145, "grad_norm": 0.8714413642883301, "learning_rate": 2.5371440633166943e-06, "loss": 0.2921562194824219, "step": 2400 }, { "epoch": 0.8687078829541857, "grad_norm": 0.9787154197692871, "learning_rate": 2.5235605718726033e-06, "loss": 0.2839927673339844, "step": 2401 }, { "epoch": 0.8690696938175568, "grad_norm": 0.5586144924163818, "learning_rate": 2.5100110911510766e-06, "loss": 0.2751312255859375, "step": 2402 }, { "epoch": 0.869431504680928, "grad_norm": 0.8120859861373901, "learning_rate": 2.4964956475206205e-06, "loss": 0.2886199951171875, "step": 2403 }, { "epoch": 0.8697933155442992, "grad_norm": 3.427284002304077, "learning_rate": 2.483014267283503e-06, "loss": 0.270782470703125, "step": 2404 }, { "epoch": 0.8701551264076703, "grad_norm": 1.1265039443969727, "learning_rate": 2.4695669766757037e-06, "loss": 0.31301116943359375, "step": 2405 }, { "epoch": 0.8705169372710415, "grad_norm": 1.642045021057129, "learning_rate": 2.4561538018668564e-06, "loss": 0.272430419921875, "step": 2406 }, { "epoch": 0.8708787481344127, "grad_norm": 1.3550918102264404, "learning_rate": 2.4427747689602098e-06, "loss": 0.2891082763671875, "step": 2407 }, { "epoch": 0.8712405589977839, "grad_norm": 1.1233429908752441, "learning_rate": 2.429429903992564e-06, "loss": 0.2877311706542969, "step": 2408 }, { "epoch": 0.8716023698611551, "grad_norm": 0.7778831124305725, "learning_rate": 2.4161192329342197e-06, "loss": 0.2835235595703125, "step": 2409 }, { "epoch": 0.8719641807245263, "grad_norm": 0.4552399218082428, "learning_rate": 2.4028427816889522e-06, "loss": 0.28662109375, "step": 2410 }, { "epoch": 0.8723259915878975, "grad_norm": 1.265568494796753, "learning_rate": 2.3896005760939202e-06, "loss": 0.29770660400390625, "step": 2411 }, { "epoch": 0.8726878024512686, "grad_norm": 0.9610320329666138, "learning_rate": 2.376392641919649e-06, "loss": 0.29604339599609375, "step": 2412 }, { "epoch": 0.8730496133146398, "grad_norm": 2.5403499603271484, "learning_rate": 2.3632190048699655e-06, "loss": 0.34296417236328125, "step": 2413 }, { "epoch": 0.873411424178011, "grad_norm": 1.3962208032608032, "learning_rate": 2.3500796905819523e-06, "loss": 0.30512237548828125, "step": 2414 }, { "epoch": 0.8737732350413822, "grad_norm": 0.5487320423126221, "learning_rate": 2.3369747246258913e-06, "loss": 0.2966270446777344, "step": 2415 }, { "epoch": 0.8741350459047533, "grad_norm": 0.9295533895492554, "learning_rate": 2.323904132505235e-06, "loss": 0.2998199462890625, "step": 2416 }, { "epoch": 0.8744968567681245, "grad_norm": 2.2646405696868896, "learning_rate": 2.3108679396565247e-06, "loss": 0.33514404296875, "step": 2417 }, { "epoch": 0.8748586676314957, "grad_norm": 1.6003949642181396, "learning_rate": 2.2978661714493656e-06, "loss": 0.27182769775390625, "step": 2418 }, { "epoch": 0.8752204784948668, "grad_norm": 2.3890576362609863, "learning_rate": 2.2848988531863658e-06, "loss": 0.28778839111328125, "step": 2419 }, { "epoch": 0.875582289358238, "grad_norm": 2.097421884536743, "learning_rate": 2.271966010103095e-06, "loss": 0.2760162353515625, "step": 2420 }, { "epoch": 0.8759441002216092, "grad_norm": 0.8860839605331421, "learning_rate": 2.2590676673680225e-06, "loss": 0.3043670654296875, "step": 2421 }, { "epoch": 0.8763059110849803, "grad_norm": 2.198458433151245, "learning_rate": 2.2462038500824934e-06, "loss": 0.32671356201171875, "step": 2422 }, { "epoch": 0.8766677219483515, "grad_norm": 1.0440469980239868, "learning_rate": 2.233374583280645e-06, "loss": 0.32712554931640625, "step": 2423 }, { "epoch": 0.8770295328117227, "grad_norm": 0.8599452972412109, "learning_rate": 2.220579891929384e-06, "loss": 0.28105926513671875, "step": 2424 }, { "epoch": 0.8773913436750939, "grad_norm": 1.3974498510360718, "learning_rate": 2.2078198009283368e-06, "loss": 0.27789306640625, "step": 2425 }, { "epoch": 0.877753154538465, "grad_norm": 0.5608438849449158, "learning_rate": 2.195094335109782e-06, "loss": 0.2916717529296875, "step": 2426 }, { "epoch": 0.8781149654018362, "grad_norm": 0.8236314058303833, "learning_rate": 2.182403519238623e-06, "loss": 0.287445068359375, "step": 2427 }, { "epoch": 0.8784767762652074, "grad_norm": 2.2258548736572266, "learning_rate": 2.169747378012328e-06, "loss": 0.2873382568359375, "step": 2428 }, { "epoch": 0.8788385871285785, "grad_norm": 0.7429357171058655, "learning_rate": 2.1571259360608865e-06, "loss": 0.3089447021484375, "step": 2429 }, { "epoch": 0.8792003979919497, "grad_norm": 0.5878556370735168, "learning_rate": 2.1445392179467573e-06, "loss": 0.27620697021484375, "step": 2430 }, { "epoch": 0.8795622088553209, "grad_norm": 0.721565306186676, "learning_rate": 2.131987248164822e-06, "loss": 0.2819061279296875, "step": 2431 }, { "epoch": 0.879924019718692, "grad_norm": 0.6127999424934387, "learning_rate": 2.1194700511423516e-06, "loss": 0.293853759765625, "step": 2432 }, { "epoch": 0.8802858305820632, "grad_norm": 1.1502107381820679, "learning_rate": 2.106987651238934e-06, "loss": 0.2783203125, "step": 2433 }, { "epoch": 0.8806476414454344, "grad_norm": 0.7562735080718994, "learning_rate": 2.094540072746434e-06, "loss": 0.2949256896972656, "step": 2434 }, { "epoch": 0.8810094523088056, "grad_norm": 1.5575865507125854, "learning_rate": 2.082127339888971e-06, "loss": 0.29620361328125, "step": 2435 }, { "epoch": 0.8813712631721767, "grad_norm": 1.633097529411316, "learning_rate": 2.0697494768228334e-06, "loss": 0.2545013427734375, "step": 2436 }, { "epoch": 0.8817330740355479, "grad_norm": 0.5730903744697571, "learning_rate": 2.0574065076364525e-06, "loss": 0.2899322509765625, "step": 2437 }, { "epoch": 0.8820948848989191, "grad_norm": 0.4490615129470825, "learning_rate": 2.045098456350363e-06, "loss": 0.26306915283203125, "step": 2438 }, { "epoch": 0.8824566957622902, "grad_norm": 2.434112787246704, "learning_rate": 2.0328253469171335e-06, "loss": 0.26007843017578125, "step": 2439 }, { "epoch": 0.8828185066256614, "grad_norm": 1.094253659248352, "learning_rate": 2.0205872032213403e-06, "loss": 0.2783355712890625, "step": 2440 }, { "epoch": 0.8831803174890326, "grad_norm": 0.5732084512710571, "learning_rate": 2.0083840490795103e-06, "loss": 0.2851409912109375, "step": 2441 }, { "epoch": 0.8835421283524038, "grad_norm": 1.8907042741775513, "learning_rate": 1.9962159082400823e-06, "loss": 0.31005859375, "step": 2442 }, { "epoch": 0.8839039392157749, "grad_norm": 0.9677457213401794, "learning_rate": 1.9840828043833494e-06, "loss": 0.277374267578125, "step": 2443 }, { "epoch": 0.8842657500791461, "grad_norm": 0.4350811839103699, "learning_rate": 1.971984761121426e-06, "loss": 0.2986602783203125, "step": 2444 }, { "epoch": 0.8846275609425173, "grad_norm": 1.400561809539795, "learning_rate": 1.959921801998188e-06, "loss": 0.26674652099609375, "step": 2445 }, { "epoch": 0.8849893718058884, "grad_norm": 0.5273115634918213, "learning_rate": 1.9478939504892393e-06, "loss": 0.291717529296875, "step": 2446 }, { "epoch": 0.8853511826692596, "grad_norm": 0.8047792315483093, "learning_rate": 1.935901230001862e-06, "loss": 0.27652740478515625, "step": 2447 }, { "epoch": 0.8857129935326308, "grad_norm": 2.9835476875305176, "learning_rate": 1.92394366387497e-06, "loss": 0.3135490417480469, "step": 2448 }, { "epoch": 0.886074804396002, "grad_norm": 1.2142313718795776, "learning_rate": 1.912021275379061e-06, "loss": 0.26873779296875, "step": 2449 }, { "epoch": 0.8864366152593731, "grad_norm": 0.807934582233429, "learning_rate": 1.90013408771617e-06, "loss": 0.28025054931640625, "step": 2450 }, { "epoch": 0.8867984261227443, "grad_norm": 0.6071181893348694, "learning_rate": 1.8882821240198402e-06, "loss": 0.27320098876953125, "step": 2451 }, { "epoch": 0.8871602369861155, "grad_norm": 2.969365358352661, "learning_rate": 1.8764654073550548e-06, "loss": 0.3360595703125, "step": 2452 }, { "epoch": 0.8875220478494866, "grad_norm": 0.4794723391532898, "learning_rate": 1.86468396071821e-06, "loss": 0.26380157470703125, "step": 2453 }, { "epoch": 0.8878838587128578, "grad_norm": 0.6277179718017578, "learning_rate": 1.8529378070370563e-06, "loss": 0.29814910888671875, "step": 2454 }, { "epoch": 0.8882456695762291, "grad_norm": 1.682379961013794, "learning_rate": 1.8412269691706686e-06, "loss": 0.295623779296875, "step": 2455 }, { "epoch": 0.8886074804396003, "grad_norm": 1.2135430574417114, "learning_rate": 1.8295514699093897e-06, "loss": 0.293792724609375, "step": 2456 }, { "epoch": 0.8889692913029714, "grad_norm": 1.4292277097702026, "learning_rate": 1.8179113319747843e-06, "loss": 0.25185394287109375, "step": 2457 }, { "epoch": 0.8893311021663426, "grad_norm": 0.503707230091095, "learning_rate": 1.8063065780196165e-06, "loss": 0.28069305419921875, "step": 2458 }, { "epoch": 0.8896929130297138, "grad_norm": 2.4467079639434814, "learning_rate": 1.7947372306277743e-06, "loss": 0.3182373046875, "step": 2459 }, { "epoch": 0.8900547238930849, "grad_norm": 0.801544189453125, "learning_rate": 1.7832033123142567e-06, "loss": 0.3092041015625, "step": 2460 }, { "epoch": 0.8904165347564561, "grad_norm": 1.1980706453323364, "learning_rate": 1.7717048455250997e-06, "loss": 0.30864715576171875, "step": 2461 }, { "epoch": 0.8907783456198273, "grad_norm": 2.1375415325164795, "learning_rate": 1.7602418526373522e-06, "loss": 0.3037567138671875, "step": 2462 }, { "epoch": 0.8911401564831984, "grad_norm": 1.2824114561080933, "learning_rate": 1.7488143559590342e-06, "loss": 0.312957763671875, "step": 2463 }, { "epoch": 0.8915019673465696, "grad_norm": 1.3645577430725098, "learning_rate": 1.7374223777290767e-06, "loss": 0.2945556640625, "step": 2464 }, { "epoch": 0.8918637782099408, "grad_norm": 0.6013411283493042, "learning_rate": 1.7260659401172963e-06, "loss": 0.2559394836425781, "step": 2465 }, { "epoch": 0.892225589073312, "grad_norm": 0.9642933011054993, "learning_rate": 1.7147450652243392e-06, "loss": 0.2582359313964844, "step": 2466 }, { "epoch": 0.8925873999366831, "grad_norm": 1.0280359983444214, "learning_rate": 1.7034597750816505e-06, "loss": 0.26473236083984375, "step": 2467 }, { "epoch": 0.8929492108000543, "grad_norm": 2.3164408206939697, "learning_rate": 1.6922100916514161e-06, "loss": 0.330963134765625, "step": 2468 }, { "epoch": 0.8933110216634255, "grad_norm": 0.9764476418495178, "learning_rate": 1.680996036826532e-06, "loss": 0.2960052490234375, "step": 2469 }, { "epoch": 0.8936728325267966, "grad_norm": 0.7567160725593567, "learning_rate": 1.6698176324305592e-06, "loss": 0.28272247314453125, "step": 2470 }, { "epoch": 0.8940346433901678, "grad_norm": 1.1153993606567383, "learning_rate": 1.6586749002176717e-06, "loss": 0.30060577392578125, "step": 2471 }, { "epoch": 0.894396454253539, "grad_norm": 2.7453603744506836, "learning_rate": 1.6475678618726365e-06, "loss": 0.3520050048828125, "step": 2472 }, { "epoch": 0.8947582651169101, "grad_norm": 1.5835955142974854, "learning_rate": 1.6364965390107456e-06, "loss": 0.28305816650390625, "step": 2473 }, { "epoch": 0.8951200759802813, "grad_norm": 1.286450982093811, "learning_rate": 1.6254609531777887e-06, "loss": 0.30609130859375, "step": 2474 }, { "epoch": 0.8954818868436525, "grad_norm": 1.2177766561508179, "learning_rate": 1.6144611258500065e-06, "loss": 0.26275634765625, "step": 2475 }, { "epoch": 0.8958436977070237, "grad_norm": 2.3710954189300537, "learning_rate": 1.603497078434051e-06, "loss": 0.2578887939453125, "step": 2476 }, { "epoch": 0.8962055085703948, "grad_norm": 1.7900912761688232, "learning_rate": 1.5925688322669497e-06, "loss": 0.25583648681640625, "step": 2477 }, { "epoch": 0.896567319433766, "grad_norm": 0.6512370109558105, "learning_rate": 1.5816764086160507e-06, "loss": 0.28632354736328125, "step": 2478 }, { "epoch": 0.8969291302971372, "grad_norm": 1.5020771026611328, "learning_rate": 1.5708198286789867e-06, "loss": 0.27440643310546875, "step": 2479 }, { "epoch": 0.8972909411605083, "grad_norm": 1.0633037090301514, "learning_rate": 1.5599991135836412e-06, "loss": 0.29296112060546875, "step": 2480 }, { "epoch": 0.8976527520238795, "grad_norm": 0.5514442324638367, "learning_rate": 1.5492142843880964e-06, "loss": 0.267181396484375, "step": 2481 }, { "epoch": 0.8980145628872507, "grad_norm": 1.780203938484192, "learning_rate": 1.5384653620806033e-06, "loss": 0.3011932373046875, "step": 2482 }, { "epoch": 0.8983763737506218, "grad_norm": 0.799961507320404, "learning_rate": 1.5277523675795225e-06, "loss": 0.29644012451171875, "step": 2483 }, { "epoch": 0.898738184613993, "grad_norm": 1.1681400537490845, "learning_rate": 1.5170753217333167e-06, "loss": 0.28028106689453125, "step": 2484 }, { "epoch": 0.8990999954773642, "grad_norm": 1.1862269639968872, "learning_rate": 1.5064342453204672e-06, "loss": 0.24827098846435547, "step": 2485 }, { "epoch": 0.8994618063407354, "grad_norm": 0.9190186858177185, "learning_rate": 1.4958291590494734e-06, "loss": 0.2904815673828125, "step": 2486 }, { "epoch": 0.8998236172041065, "grad_norm": 0.5445919036865234, "learning_rate": 1.4852600835587838e-06, "loss": 0.27497100830078125, "step": 2487 }, { "epoch": 0.9001854280674777, "grad_norm": 2.4288086891174316, "learning_rate": 1.474727039416768e-06, "loss": 0.32161712646484375, "step": 2488 }, { "epoch": 0.9005472389308489, "grad_norm": 1.1144391298294067, "learning_rate": 1.4642300471216796e-06, "loss": 0.303619384765625, "step": 2489 }, { "epoch": 0.90090904979422, "grad_norm": 0.5247443914413452, "learning_rate": 1.4537691271016074e-06, "loss": 0.25890350341796875, "step": 2490 }, { "epoch": 0.9012708606575912, "grad_norm": 1.2329001426696777, "learning_rate": 1.443344299714442e-06, "loss": 0.2601318359375, "step": 2491 }, { "epoch": 0.9016326715209624, "grad_norm": 1.564788818359375, "learning_rate": 1.4329555852478327e-06, "loss": 0.30511474609375, "step": 2492 }, { "epoch": 0.9019944823843336, "grad_norm": 2.0046029090881348, "learning_rate": 1.4226030039191586e-06, "loss": 0.30558013916015625, "step": 2493 }, { "epoch": 0.9023562932477047, "grad_norm": 0.7769598960876465, "learning_rate": 1.4122865758754678e-06, "loss": 0.27828216552734375, "step": 2494 }, { "epoch": 0.9027181041110759, "grad_norm": 0.638518214225769, "learning_rate": 1.4020063211934632e-06, "loss": 0.2577247619628906, "step": 2495 }, { "epoch": 0.9030799149744471, "grad_norm": 0.880139946937561, "learning_rate": 1.391762259879439e-06, "loss": 0.25821685791015625, "step": 2496 }, { "epoch": 0.9034417258378182, "grad_norm": 0.7132489085197449, "learning_rate": 1.381554411869257e-06, "loss": 0.28223609924316406, "step": 2497 }, { "epoch": 0.9038035367011894, "grad_norm": 1.083715796470642, "learning_rate": 1.3713827970283179e-06, "loss": 0.2975006103515625, "step": 2498 }, { "epoch": 0.9041653475645606, "grad_norm": 2.223935127258301, "learning_rate": 1.3612474351514914e-06, "loss": 0.30371856689453125, "step": 2499 }, { "epoch": 0.9045271584279317, "grad_norm": 1.5061171054840088, "learning_rate": 1.3511483459631048e-06, "loss": 0.28993988037109375, "step": 2500 }, { "epoch": 0.904888969291303, "grad_norm": 0.5928227305412292, "learning_rate": 1.3410855491168939e-06, "loss": 0.2975616455078125, "step": 2501 }, { "epoch": 0.9052507801546742, "grad_norm": 0.6319429278373718, "learning_rate": 1.3310590641959609e-06, "loss": 0.2927703857421875, "step": 2502 }, { "epoch": 0.9056125910180454, "grad_norm": 0.6753240823745728, "learning_rate": 1.321068910712755e-06, "loss": 0.26483154296875, "step": 2503 }, { "epoch": 0.9059744018814165, "grad_norm": 1.0242915153503418, "learning_rate": 1.3111151081090112e-06, "loss": 0.2959136962890625, "step": 2504 }, { "epoch": 0.9063362127447877, "grad_norm": 3.1280219554901123, "learning_rate": 1.3011976757557233e-06, "loss": 0.319915771484375, "step": 2505 }, { "epoch": 0.9066980236081589, "grad_norm": 0.9787948131561279, "learning_rate": 1.2913166329531035e-06, "loss": 0.28327178955078125, "step": 2506 }, { "epoch": 0.90705983447153, "grad_norm": 1.5217458009719849, "learning_rate": 1.2814719989305546e-06, "loss": 0.2675514221191406, "step": 2507 }, { "epoch": 0.9074216453349012, "grad_norm": 1.7884025573730469, "learning_rate": 1.271663792846618e-06, "loss": 0.265289306640625, "step": 2508 }, { "epoch": 0.9077834561982724, "grad_norm": 0.8021644949913025, "learning_rate": 1.2618920337889385e-06, "loss": 0.301910400390625, "step": 2509 }, { "epoch": 0.9081452670616436, "grad_norm": 1.8356159925460815, "learning_rate": 1.2521567407742485e-06, "loss": 0.3051605224609375, "step": 2510 }, { "epoch": 0.9085070779250147, "grad_norm": 3.2309060096740723, "learning_rate": 1.2424579327482954e-06, "loss": 0.370635986328125, "step": 2511 }, { "epoch": 0.9088688887883859, "grad_norm": 0.5516260266304016, "learning_rate": 1.2327956285858367e-06, "loss": 0.28733062744140625, "step": 2512 }, { "epoch": 0.9092306996517571, "grad_norm": 0.5435688495635986, "learning_rate": 1.223169847090584e-06, "loss": 0.272216796875, "step": 2513 }, { "epoch": 0.9095925105151282, "grad_norm": 1.1946479082107544, "learning_rate": 1.2135806069951706e-06, "loss": 0.260284423828125, "step": 2514 }, { "epoch": 0.9099543213784994, "grad_norm": 1.373612403869629, "learning_rate": 1.2040279269611244e-06, "loss": 0.2939605712890625, "step": 2515 }, { "epoch": 0.9103161322418706, "grad_norm": 0.5063194632530212, "learning_rate": 1.1945118255788168e-06, "loss": 0.27899932861328125, "step": 2516 }, { "epoch": 0.9106779431052417, "grad_norm": 2.168308973312378, "learning_rate": 1.1850323213674365e-06, "loss": 0.31874847412109375, "step": 2517 }, { "epoch": 0.9110397539686129, "grad_norm": 2.9335131645202637, "learning_rate": 1.1755894327749507e-06, "loss": 0.25054931640625, "step": 2518 }, { "epoch": 0.9114015648319841, "grad_norm": 1.3873571157455444, "learning_rate": 1.1661831781780731e-06, "loss": 0.27577972412109375, "step": 2519 }, { "epoch": 0.9117633756953553, "grad_norm": 0.6643727421760559, "learning_rate": 1.1568135758822186e-06, "loss": 0.26581573486328125, "step": 2520 }, { "epoch": 0.9121251865587264, "grad_norm": 2.2170052528381348, "learning_rate": 1.1474806441214748e-06, "loss": 0.30005645751953125, "step": 2521 }, { "epoch": 0.9124869974220976, "grad_norm": 1.8777565956115723, "learning_rate": 1.1381844010585662e-06, "loss": 0.32611846923828125, "step": 2522 }, { "epoch": 0.9128488082854688, "grad_norm": 0.655655026435852, "learning_rate": 1.1289248647848216e-06, "loss": 0.29293060302734375, "step": 2523 }, { "epoch": 0.9132106191488399, "grad_norm": 1.2902592420578003, "learning_rate": 1.1197020533201263e-06, "loss": 0.29927825927734375, "step": 2524 }, { "epoch": 0.9135724300122111, "grad_norm": 0.5554535388946533, "learning_rate": 1.1105159846129033e-06, "loss": 0.288360595703125, "step": 2525 }, { "epoch": 0.9139342408755823, "grad_norm": 0.4680086672306061, "learning_rate": 1.1013666765400699e-06, "loss": 0.27817535400390625, "step": 2526 }, { "epoch": 0.9142960517389535, "grad_norm": 0.993740975856781, "learning_rate": 1.0922541469069946e-06, "loss": 0.315338134765625, "step": 2527 }, { "epoch": 0.9146578626023246, "grad_norm": 1.0824651718139648, "learning_rate": 1.0831784134474898e-06, "loss": 0.2883453369140625, "step": 2528 }, { "epoch": 0.9150196734656958, "grad_norm": 0.8273263573646545, "learning_rate": 1.0741394938237425e-06, "loss": 0.283416748046875, "step": 2529 }, { "epoch": 0.915381484329067, "grad_norm": 1.4887923002243042, "learning_rate": 1.0651374056263042e-06, "loss": 0.259429931640625, "step": 2530 }, { "epoch": 0.9157432951924381, "grad_norm": 1.000060796737671, "learning_rate": 1.05617216637405e-06, "loss": 0.29476165771484375, "step": 2531 }, { "epoch": 0.9161051060558093, "grad_norm": 1.1873610019683838, "learning_rate": 1.0472437935141389e-06, "loss": 0.27886199951171875, "step": 2532 }, { "epoch": 0.9164669169191805, "grad_norm": 0.47407692670822144, "learning_rate": 1.0383523044219879e-06, "loss": 0.28665924072265625, "step": 2533 }, { "epoch": 0.9168287277825516, "grad_norm": 0.6524341106414795, "learning_rate": 1.0294977164012332e-06, "loss": 0.2997283935546875, "step": 2534 }, { "epoch": 0.9171905386459228, "grad_norm": 2.0667271614074707, "learning_rate": 1.020680046683702e-06, "loss": 0.2603607177734375, "step": 2535 }, { "epoch": 0.917552349509294, "grad_norm": 0.8523874282836914, "learning_rate": 1.0118993124293718e-06, "loss": 0.2798309326171875, "step": 2536 }, { "epoch": 0.9179141603726652, "grad_norm": 0.5885987877845764, "learning_rate": 1.0031555307263386e-06, "loss": 0.28189849853515625, "step": 2537 }, { "epoch": 0.9182759712360363, "grad_norm": 0.5933631062507629, "learning_rate": 9.944487185907924e-07, "loss": 0.29064178466796875, "step": 2538 }, { "epoch": 0.9186377820994075, "grad_norm": 1.0640703439712524, "learning_rate": 9.857788929669686e-07, "loss": 0.27349090576171875, "step": 2539 }, { "epoch": 0.9189995929627787, "grad_norm": 0.6804836392402649, "learning_rate": 9.771460707271286e-07, "loss": 0.2998046875, "step": 2540 }, { "epoch": 0.9193614038261498, "grad_norm": 1.7806293964385986, "learning_rate": 9.68550268671522e-07, "loss": 0.3364105224609375, "step": 2541 }, { "epoch": 0.919723214689521, "grad_norm": 3.253384828567505, "learning_rate": 9.599915035283503e-07, "loss": 0.27197265625, "step": 2542 }, { "epoch": 0.9200850255528922, "grad_norm": 1.9773472547531128, "learning_rate": 9.514697919537408e-07, "loss": 0.317138671875, "step": 2543 }, { "epoch": 0.9204468364162633, "grad_norm": 0.4769286811351776, "learning_rate": 9.429851505317078e-07, "loss": 0.29981231689453125, "step": 2544 }, { "epoch": 0.9208086472796345, "grad_norm": 1.7831603288650513, "learning_rate": 9.34537595774132e-07, "loss": 0.2989654541015625, "step": 2545 }, { "epoch": 0.9211704581430058, "grad_norm": 2.170050859451294, "learning_rate": 9.261271441207121e-07, "loss": 0.2631378173828125, "step": 2546 }, { "epoch": 0.921532269006377, "grad_norm": 2.0449304580688477, "learning_rate": 9.177538119389395e-07, "loss": 0.27803802490234375, "step": 2547 }, { "epoch": 0.9218940798697481, "grad_norm": 0.6414024233818054, "learning_rate": 9.094176155240797e-07, "loss": 0.30560302734375, "step": 2548 }, { "epoch": 0.9222558907331193, "grad_norm": 0.4924793243408203, "learning_rate": 9.011185710991133e-07, "loss": 0.28755950927734375, "step": 2549 }, { "epoch": 0.9226177015964905, "grad_norm": 0.5643022060394287, "learning_rate": 8.928566948147322e-07, "loss": 0.28411865234375, "step": 2550 }, { "epoch": 0.9229795124598617, "grad_norm": 1.1448718309402466, "learning_rate": 8.846320027492861e-07, "loss": 0.30138397216796875, "step": 2551 }, { "epoch": 0.9233413233232328, "grad_norm": 1.0328741073608398, "learning_rate": 8.764445109087649e-07, "loss": 0.295806884765625, "step": 2552 }, { "epoch": 0.923703134186604, "grad_norm": 0.585496723651886, "learning_rate": 8.682942352267676e-07, "loss": 0.2922515869140625, "step": 2553 }, { "epoch": 0.9240649450499752, "grad_norm": 0.5926225781440735, "learning_rate": 8.60181191564462e-07, "loss": 0.266204833984375, "step": 2554 }, { "epoch": 0.9244267559133463, "grad_norm": 0.7258228063583374, "learning_rate": 8.521053957105607e-07, "loss": 0.2796783447265625, "step": 2555 }, { "epoch": 0.9247885667767175, "grad_norm": 1.1045770645141602, "learning_rate": 8.440668633812876e-07, "loss": 0.317108154296875, "step": 2556 }, { "epoch": 0.9251503776400887, "grad_norm": 0.5357193946838379, "learning_rate": 8.360656102203513e-07, "loss": 0.28733062744140625, "step": 2557 }, { "epoch": 0.9255121885034598, "grad_norm": 1.6203707456588745, "learning_rate": 8.281016517989093e-07, "loss": 0.27561187744140625, "step": 2558 }, { "epoch": 0.925873999366831, "grad_norm": 1.4568045139312744, "learning_rate": 8.201750036155376e-07, "loss": 0.308624267578125, "step": 2559 }, { "epoch": 0.9262358102302022, "grad_norm": 1.751102328300476, "learning_rate": 8.122856810962121e-07, "loss": 0.2755584716796875, "step": 2560 }, { "epoch": 0.9265976210935734, "grad_norm": 0.6513852477073669, "learning_rate": 8.044336995942581e-07, "loss": 0.28675079345703125, "step": 2561 }, { "epoch": 0.9269594319569445, "grad_norm": 1.7067608833312988, "learning_rate": 7.966190743903412e-07, "loss": 0.301849365234375, "step": 2562 }, { "epoch": 0.9273212428203157, "grad_norm": 0.7905396223068237, "learning_rate": 7.888418206924231e-07, "loss": 0.31462860107421875, "step": 2563 }, { "epoch": 0.9276830536836869, "grad_norm": 1.4812426567077637, "learning_rate": 7.811019536357411e-07, "loss": 0.24989700317382812, "step": 2564 }, { "epoch": 0.928044864547058, "grad_norm": 0.7975465059280396, "learning_rate": 7.733994882827711e-07, "loss": 0.2770843505859375, "step": 2565 }, { "epoch": 0.9284066754104292, "grad_norm": 1.2589937448501587, "learning_rate": 7.657344396232025e-07, "loss": 0.33969879150390625, "step": 2566 }, { "epoch": 0.9287684862738004, "grad_norm": 0.5189456939697266, "learning_rate": 7.581068225739097e-07, "loss": 0.27907562255859375, "step": 2567 }, { "epoch": 0.9291302971371715, "grad_norm": 0.9764813780784607, "learning_rate": 7.505166519789208e-07, "loss": 0.28125762939453125, "step": 2568 }, { "epoch": 0.9294921080005427, "grad_norm": 2.2431585788726807, "learning_rate": 7.429639426093915e-07, "loss": 0.3329315185546875, "step": 2569 }, { "epoch": 0.9298539188639139, "grad_norm": 1.5263967514038086, "learning_rate": 7.354487091635709e-07, "loss": 0.322509765625, "step": 2570 }, { "epoch": 0.9302157297272851, "grad_norm": 0.8828604817390442, "learning_rate": 7.279709662667822e-07, "loss": 0.2867889404296875, "step": 2571 }, { "epoch": 0.9305775405906562, "grad_norm": 0.550834596157074, "learning_rate": 7.205307284713825e-07, "loss": 0.2925567626953125, "step": 2572 }, { "epoch": 0.9309393514540274, "grad_norm": 1.3092032670974731, "learning_rate": 7.131280102567473e-07, "loss": 0.317352294921875, "step": 2573 }, { "epoch": 0.9313011623173986, "grad_norm": 1.1395158767700195, "learning_rate": 7.057628260292282e-07, "loss": 0.264556884765625, "step": 2574 }, { "epoch": 0.9316629731807697, "grad_norm": 1.8801627159118652, "learning_rate": 6.984351901221375e-07, "loss": 0.31855010986328125, "step": 2575 }, { "epoch": 0.9320247840441409, "grad_norm": 0.5576367974281311, "learning_rate": 6.91145116795715e-07, "loss": 0.2961883544921875, "step": 2576 }, { "epoch": 0.9323865949075121, "grad_norm": 0.6465184688568115, "learning_rate": 6.838926202370965e-07, "loss": 0.2735443115234375, "step": 2577 }, { "epoch": 0.9327484057708832, "grad_norm": 1.0311965942382812, "learning_rate": 6.766777145602943e-07, "loss": 0.2815704345703125, "step": 2578 }, { "epoch": 0.9331102166342544, "grad_norm": 1.147888422012329, "learning_rate": 6.69500413806159e-07, "loss": 0.291656494140625, "step": 2579 }, { "epoch": 0.9334720274976256, "grad_norm": 1.0374624729156494, "learning_rate": 6.623607319423708e-07, "loss": 0.299468994140625, "step": 2580 }, { "epoch": 0.9338338383609968, "grad_norm": 0.7190362811088562, "learning_rate": 6.552586828633867e-07, "loss": 0.2732429504394531, "step": 2581 }, { "epoch": 0.9341956492243679, "grad_norm": 1.1514382362365723, "learning_rate": 6.481942803904373e-07, "loss": 0.29286956787109375, "step": 2582 }, { "epoch": 0.9345574600877391, "grad_norm": 0.7998215556144714, "learning_rate": 6.41167538271481e-07, "loss": 0.2855987548828125, "step": 2583 }, { "epoch": 0.9349192709511103, "grad_norm": 0.6164237856864929, "learning_rate": 6.341784701811881e-07, "loss": 0.2768440246582031, "step": 2584 }, { "epoch": 0.9352810818144814, "grad_norm": 1.9107147455215454, "learning_rate": 6.272270897209188e-07, "loss": 0.30057525634765625, "step": 2585 }, { "epoch": 0.9356428926778526, "grad_norm": 0.9469056129455566, "learning_rate": 6.203134104186847e-07, "loss": 0.263641357421875, "step": 2586 }, { "epoch": 0.9360047035412238, "grad_norm": 3.4785807132720947, "learning_rate": 6.134374457291236e-07, "loss": 0.3364391326904297, "step": 2587 }, { "epoch": 0.936366514404595, "grad_norm": 1.8409126996994019, "learning_rate": 6.065992090334805e-07, "loss": 0.28597259521484375, "step": 2588 }, { "epoch": 0.9367283252679661, "grad_norm": 0.8914380073547363, "learning_rate": 5.997987136395833e-07, "loss": 0.301300048828125, "step": 2589 }, { "epoch": 0.9370901361313373, "grad_norm": 1.8868896961212158, "learning_rate": 5.930359727818058e-07, "loss": 0.3243255615234375, "step": 2590 }, { "epoch": 0.9374519469947085, "grad_norm": 0.598539412021637, "learning_rate": 5.86310999621047e-07, "loss": 0.291351318359375, "step": 2591 }, { "epoch": 0.9378137578580797, "grad_norm": 1.6946208477020264, "learning_rate": 5.796238072447135e-07, "loss": 0.2609367370605469, "step": 2592 }, { "epoch": 0.9381755687214509, "grad_norm": 0.9872573018074036, "learning_rate": 5.7297440866668e-07, "loss": 0.2769622802734375, "step": 2593 }, { "epoch": 0.9385373795848221, "grad_norm": 1.1862825155258179, "learning_rate": 5.66362816827275e-07, "loss": 0.24747467041015625, "step": 2594 }, { "epoch": 0.9388991904481933, "grad_norm": 2.325122833251953, "learning_rate": 5.59789044593253e-07, "loss": 0.3300628662109375, "step": 2595 }, { "epoch": 0.9392610013115644, "grad_norm": 1.8748070001602173, "learning_rate": 5.532531047577583e-07, "loss": 0.2910919189453125, "step": 2596 }, { "epoch": 0.9396228121749356, "grad_norm": 2.269803762435913, "learning_rate": 5.467550100403252e-07, "loss": 0.3066139221191406, "step": 2597 }, { "epoch": 0.9399846230383068, "grad_norm": 0.5332046747207642, "learning_rate": 5.402947730868291e-07, "loss": 0.28277587890625, "step": 2598 }, { "epoch": 0.9403464339016779, "grad_norm": 0.5256174802780151, "learning_rate": 5.338724064694711e-07, "loss": 0.2670135498046875, "step": 2599 }, { "epoch": 0.9407082447650491, "grad_norm": 0.6280266642570496, "learning_rate": 5.274879226867535e-07, "loss": 0.2848663330078125, "step": 2600 }, { "epoch": 0.9410700556284203, "grad_norm": 1.7297333478927612, "learning_rate": 5.211413341634552e-07, "loss": 0.30535125732421875, "step": 2601 }, { "epoch": 0.9414318664917914, "grad_norm": 0.6066834330558777, "learning_rate": 5.148326532506098e-07, "loss": 0.2987518310546875, "step": 2602 }, { "epoch": 0.9417936773551626, "grad_norm": 1.4169548749923706, "learning_rate": 5.085618922254765e-07, "loss": 0.2941741943359375, "step": 2603 }, { "epoch": 0.9421554882185338, "grad_norm": 2.1896071434020996, "learning_rate": 5.023290632915223e-07, "loss": 0.28766632080078125, "step": 2604 }, { "epoch": 0.942517299081905, "grad_norm": 0.7530184388160706, "learning_rate": 4.961341785783868e-07, "loss": 0.2797393798828125, "step": 2605 }, { "epoch": 0.9428791099452761, "grad_norm": 0.45595502853393555, "learning_rate": 4.899772501418821e-07, "loss": 0.27935791015625, "step": 2606 }, { "epoch": 0.9432409208086473, "grad_norm": 2.895369052886963, "learning_rate": 4.838582899639366e-07, "loss": 0.32355499267578125, "step": 2607 }, { "epoch": 0.9436027316720185, "grad_norm": 0.8113094568252563, "learning_rate": 4.777773099526006e-07, "loss": 0.2969970703125, "step": 2608 }, { "epoch": 0.9439645425353896, "grad_norm": 0.831231415271759, "learning_rate": 4.717343219420034e-07, "loss": 0.312896728515625, "step": 2609 }, { "epoch": 0.9443263533987608, "grad_norm": 1.067131757736206, "learning_rate": 4.657293376923511e-07, "loss": 0.2985382080078125, "step": 2610 }, { "epoch": 0.944688164262132, "grad_norm": 0.7163575887680054, "learning_rate": 4.597623688898778e-07, "loss": 0.298797607421875, "step": 2611 }, { "epoch": 0.9450499751255031, "grad_norm": 0.4615340530872345, "learning_rate": 4.5383342714684145e-07, "loss": 0.2830047607421875, "step": 2612 }, { "epoch": 0.9454117859888743, "grad_norm": 0.5421106815338135, "learning_rate": 4.4794252400149673e-07, "loss": 0.26653289794921875, "step": 2613 }, { "epoch": 0.9457735968522455, "grad_norm": 1.191899061203003, "learning_rate": 4.4208967091806887e-07, "loss": 0.2982330322265625, "step": 2614 }, { "epoch": 0.9461354077156167, "grad_norm": 2.88940691947937, "learning_rate": 4.3627487928673997e-07, "loss": 0.341583251953125, "step": 2615 }, { "epoch": 0.9464972185789878, "grad_norm": 3.3195080757141113, "learning_rate": 4.304981604236158e-07, "loss": 0.31929779052734375, "step": 2616 }, { "epoch": 0.946859029442359, "grad_norm": 1.9492313861846924, "learning_rate": 4.2475952557071044e-07, "loss": 0.31804656982421875, "step": 2617 }, { "epoch": 0.9472208403057302, "grad_norm": 0.8294345736503601, "learning_rate": 4.1905898589592154e-07, "loss": 0.2690582275390625, "step": 2618 }, { "epoch": 0.9475826511691013, "grad_norm": 1.0662243366241455, "learning_rate": 4.13396552493015e-07, "loss": 0.276153564453125, "step": 2619 }, { "epoch": 0.9479444620324725, "grad_norm": 2.1525068283081055, "learning_rate": 4.077722363815939e-07, "loss": 0.2832794189453125, "step": 2620 }, { "epoch": 0.9483062728958437, "grad_norm": 2.2782909870147705, "learning_rate": 4.0218604850708277e-07, "loss": 0.32158660888671875, "step": 2621 }, { "epoch": 0.9486680837592149, "grad_norm": 1.8483107089996338, "learning_rate": 3.966379997407055e-07, "loss": 0.30249786376953125, "step": 2622 }, { "epoch": 0.949029894622586, "grad_norm": 1.0001353025436401, "learning_rate": 3.9112810087946316e-07, "loss": 0.2836761474609375, "step": 2623 }, { "epoch": 0.9493917054859572, "grad_norm": 0.7217339873313904, "learning_rate": 3.8565636264611627e-07, "loss": 0.286590576171875, "step": 2624 }, { "epoch": 0.9497535163493284, "grad_norm": 0.6162693500518799, "learning_rate": 3.802227956891602e-07, "loss": 0.28006744384765625, "step": 2625 }, { "epoch": 0.9501153272126995, "grad_norm": 2.3420052528381348, "learning_rate": 3.748274105828009e-07, "loss": 0.270416259765625, "step": 2626 }, { "epoch": 0.9504771380760707, "grad_norm": 0.843025803565979, "learning_rate": 3.6947021782694824e-07, "loss": 0.30254364013671875, "step": 2627 }, { "epoch": 0.9508389489394419, "grad_norm": 0.5764593482017517, "learning_rate": 3.6415122784717593e-07, "loss": 0.2944183349609375, "step": 2628 }, { "epoch": 0.951200759802813, "grad_norm": 0.9732195734977722, "learning_rate": 3.588704509947194e-07, "loss": 0.2700042724609375, "step": 2629 }, { "epoch": 0.9515625706661842, "grad_norm": 0.5516036748886108, "learning_rate": 3.536278975464447e-07, "loss": 0.3078155517578125, "step": 2630 }, { "epoch": 0.9519243815295554, "grad_norm": 0.9074336290359497, "learning_rate": 3.484235777048306e-07, "loss": 0.2926788330078125, "step": 2631 }, { "epoch": 0.9522861923929266, "grad_norm": 2.0058062076568604, "learning_rate": 3.4325750159795556e-07, "loss": 0.3009490966796875, "step": 2632 }, { "epoch": 0.9526480032562977, "grad_norm": 0.7261598110198975, "learning_rate": 3.3812967927946415e-07, "loss": 0.2913818359375, "step": 2633 }, { "epoch": 0.9530098141196689, "grad_norm": 1.7157179117202759, "learning_rate": 3.330401207285605e-07, "loss": 0.27094268798828125, "step": 2634 }, { "epoch": 0.9533716249830401, "grad_norm": 1.1158455610275269, "learning_rate": 3.279888358499861e-07, "loss": 0.29095458984375, "step": 2635 }, { "epoch": 0.9537334358464112, "grad_norm": 0.7192267775535583, "learning_rate": 3.2297583447399305e-07, "loss": 0.2877655029296875, "step": 2636 }, { "epoch": 0.9540952467097824, "grad_norm": 3.8849244117736816, "learning_rate": 3.180011263563332e-07, "loss": 0.3429412841796875, "step": 2637 }, { "epoch": 0.9544570575731537, "grad_norm": 1.088169813156128, "learning_rate": 3.130647211782356e-07, "loss": 0.26328277587890625, "step": 2638 }, { "epoch": 0.9548188684365249, "grad_norm": 0.8440806269645691, "learning_rate": 3.081666285463869e-07, "loss": 0.271331787109375, "step": 2639 }, { "epoch": 0.955180679299896, "grad_norm": 0.5708215236663818, "learning_rate": 3.0330685799291993e-07, "loss": 0.26634979248046875, "step": 2640 }, { "epoch": 0.9555424901632672, "grad_norm": 1.918246865272522, "learning_rate": 2.9848541897538054e-07, "loss": 0.30829620361328125, "step": 2641 }, { "epoch": 0.9559043010266384, "grad_norm": 1.0519030094146729, "learning_rate": 2.937023208767276e-07, "loss": 0.28000640869140625, "step": 2642 }, { "epoch": 0.9562661118900095, "grad_norm": 2.230764150619507, "learning_rate": 2.8895757300529734e-07, "loss": 0.31451416015625, "step": 2643 }, { "epoch": 0.9566279227533807, "grad_norm": 0.5079091787338257, "learning_rate": 2.842511845947948e-07, "loss": 0.29084014892578125, "step": 2644 }, { "epoch": 0.9569897336167519, "grad_norm": 1.6294664144515991, "learning_rate": 2.7958316480427574e-07, "loss": 0.2576789855957031, "step": 2645 }, { "epoch": 0.957351544480123, "grad_norm": 1.0953346490859985, "learning_rate": 2.749535227181288e-07, "loss": 0.2910423278808594, "step": 2646 }, { "epoch": 0.9577133553434942, "grad_norm": 1.1819719076156616, "learning_rate": 2.703622673460471e-07, "loss": 0.30403900146484375, "step": 2647 }, { "epoch": 0.9580751662068654, "grad_norm": 0.9575484991073608, "learning_rate": 2.6580940762303444e-07, "loss": 0.2973976135253906, "step": 2648 }, { "epoch": 0.9584369770702366, "grad_norm": 2.316419839859009, "learning_rate": 2.6129495240936107e-07, "loss": 0.2675018310546875, "step": 2649 }, { "epoch": 0.9587987879336077, "grad_norm": 2.136752128601074, "learning_rate": 2.5681891049056385e-07, "loss": 0.3372344970703125, "step": 2650 }, { "epoch": 0.9591605987969789, "grad_norm": 1.3973294496536255, "learning_rate": 2.523812905774237e-07, "loss": 0.2694549560546875, "step": 2651 }, { "epoch": 0.9595224096603501, "grad_norm": 1.8263213634490967, "learning_rate": 2.479821013059458e-07, "loss": 0.3264617919921875, "step": 2652 }, { "epoch": 0.9598842205237212, "grad_norm": 0.9159199595451355, "learning_rate": 2.436213512373486e-07, "loss": 0.297454833984375, "step": 2653 }, { "epoch": 0.9602460313870924, "grad_norm": 1.302946925163269, "learning_rate": 2.3929904885804376e-07, "loss": 0.30030059814453125, "step": 2654 }, { "epoch": 0.9606078422504636, "grad_norm": 1.566523551940918, "learning_rate": 2.3501520257962262e-07, "loss": 0.2879638671875, "step": 2655 }, { "epoch": 0.9609696531138348, "grad_norm": 3.113504409790039, "learning_rate": 2.3076982073883204e-07, "loss": 0.338897705078125, "step": 2656 }, { "epoch": 0.9613314639772059, "grad_norm": 1.2642817497253418, "learning_rate": 2.265629115975676e-07, "loss": 0.273406982421875, "step": 2657 }, { "epoch": 0.9616932748405771, "grad_norm": 0.7675498127937317, "learning_rate": 2.2239448334285374e-07, "loss": 0.27129364013671875, "step": 2658 }, { "epoch": 0.9620550857039483, "grad_norm": 1.0110228061676025, "learning_rate": 2.1826454408682584e-07, "loss": 0.29013824462890625, "step": 2659 }, { "epoch": 0.9624168965673194, "grad_norm": 1.3908288478851318, "learning_rate": 2.1417310186671925e-07, "loss": 0.2802734375, "step": 2660 }, { "epoch": 0.9627787074306906, "grad_norm": 0.6748393774032593, "learning_rate": 2.1012016464484697e-07, "loss": 0.300048828125, "step": 2661 }, { "epoch": 0.9631405182940618, "grad_norm": 1.2853039503097534, "learning_rate": 2.0610574030858865e-07, "loss": 0.3109893798828125, "step": 2662 }, { "epoch": 0.963502329157433, "grad_norm": 1.963425874710083, "learning_rate": 2.02129836670375e-07, "loss": 0.30181121826171875, "step": 2663 }, { "epoch": 0.9638641400208041, "grad_norm": 2.209540605545044, "learning_rate": 1.9819246146767223e-07, "loss": 0.3092193603515625, "step": 2664 }, { "epoch": 0.9642259508841753, "grad_norm": 0.9047132730484009, "learning_rate": 1.9429362236296657e-07, "loss": 0.291015625, "step": 2665 }, { "epoch": 0.9645877617475465, "grad_norm": 0.3930076062679291, "learning_rate": 1.904333269437486e-07, "loss": 0.29039764404296875, "step": 2666 }, { "epoch": 0.9649495726109176, "grad_norm": 1.0070046186447144, "learning_rate": 1.8661158272250234e-07, "loss": 0.3095245361328125, "step": 2667 }, { "epoch": 0.9653113834742888, "grad_norm": 0.48663973808288574, "learning_rate": 1.828283971366851e-07, "loss": 0.29845428466796875, "step": 2668 }, { "epoch": 0.96567319433766, "grad_norm": 0.6960523128509521, "learning_rate": 1.790837775487142e-07, "loss": 0.2812347412109375, "step": 2669 }, { "epoch": 0.9660350052010311, "grad_norm": 1.0662399530410767, "learning_rate": 1.7537773124596037e-07, "loss": 0.3052520751953125, "step": 2670 }, { "epoch": 0.9663968160644023, "grad_norm": 1.704908847808838, "learning_rate": 1.717102654407188e-07, "loss": 0.27591705322265625, "step": 2671 }, { "epoch": 0.9667586269277735, "grad_norm": 0.8210089802742004, "learning_rate": 1.6808138727020696e-07, "loss": 0.28855133056640625, "step": 2672 }, { "epoch": 0.9671204377911446, "grad_norm": 1.0983340740203857, "learning_rate": 1.644911037965513e-07, "loss": 0.27581024169921875, "step": 2673 }, { "epoch": 0.9674822486545158, "grad_norm": 0.8268303275108337, "learning_rate": 1.6093942200676727e-07, "loss": 0.3042793273925781, "step": 2674 }, { "epoch": 0.967844059517887, "grad_norm": 2.0062742233276367, "learning_rate": 1.5742634881274144e-07, "loss": 0.3250274658203125, "step": 2675 }, { "epoch": 0.9682058703812582, "grad_norm": 0.7029966711997986, "learning_rate": 1.539518910512361e-07, "loss": 0.28670501708984375, "step": 2676 }, { "epoch": 0.9685676812446293, "grad_norm": 0.953620970249176, "learning_rate": 1.5051605548385583e-07, "loss": 0.2704315185546875, "step": 2677 }, { "epoch": 0.9689294921080005, "grad_norm": 0.45314693450927734, "learning_rate": 1.4711884879704542e-07, "loss": 0.29128265380859375, "step": 2678 }, { "epoch": 0.9692913029713717, "grad_norm": 1.3073334693908691, "learning_rate": 1.4376027760207635e-07, "loss": 0.2537841796875, "step": 2679 }, { "epoch": 0.9696531138347428, "grad_norm": 0.8803020715713501, "learning_rate": 1.4044034843502696e-07, "loss": 0.27005767822265625, "step": 2680 }, { "epoch": 0.970014924698114, "grad_norm": 3.073544979095459, "learning_rate": 1.3715906775678023e-07, "loss": 0.3312225341796875, "step": 2681 }, { "epoch": 0.9703767355614852, "grad_norm": 0.49244895577430725, "learning_rate": 1.3391644195300146e-07, "loss": 0.275299072265625, "step": 2682 }, { "epoch": 0.9707385464248564, "grad_norm": 0.5694587230682373, "learning_rate": 1.3071247733413395e-07, "loss": 0.3040313720703125, "step": 2683 }, { "epoch": 0.9711003572882276, "grad_norm": 0.8174071907997131, "learning_rate": 1.2754718013538113e-07, "loss": 0.27365875244140625, "step": 2684 }, { "epoch": 0.9714621681515988, "grad_norm": 1.1812822818756104, "learning_rate": 1.244205565166956e-07, "loss": 0.31201171875, "step": 2685 }, { "epoch": 0.97182397901497, "grad_norm": 0.45737123489379883, "learning_rate": 1.2133261256276563e-07, "loss": 0.282501220703125, "step": 2686 }, { "epoch": 0.9721857898783411, "grad_norm": 0.7840861082077026, "learning_rate": 1.1828335428301307e-07, "loss": 0.28726959228515625, "step": 2687 }, { "epoch": 0.9725476007417123, "grad_norm": 1.5193803310394287, "learning_rate": 1.1527278761156225e-07, "loss": 0.2722206115722656, "step": 2688 }, { "epoch": 0.9729094116050835, "grad_norm": 2.2745168209075928, "learning_rate": 1.12300918407251e-07, "loss": 0.24433135986328125, "step": 2689 }, { "epoch": 0.9732712224684547, "grad_norm": 1.7017006874084473, "learning_rate": 1.093677524536041e-07, "loss": 0.26366424560546875, "step": 2690 }, { "epoch": 0.9736330333318258, "grad_norm": 0.5543412566184998, "learning_rate": 1.0647329545882434e-07, "loss": 0.29305267333984375, "step": 2691 }, { "epoch": 0.973994844195197, "grad_norm": 1.6908445358276367, "learning_rate": 1.0361755305578591e-07, "loss": 0.31240081787109375, "step": 2692 }, { "epoch": 0.9743566550585682, "grad_norm": 2.2910079956054688, "learning_rate": 1.0080053080201879e-07, "loss": 0.3258552551269531, "step": 2693 }, { "epoch": 0.9747184659219393, "grad_norm": 2.8846185207366943, "learning_rate": 9.802223417970658e-08, "loss": 0.3227119445800781, "step": 2694 }, { "epoch": 0.9750802767853105, "grad_norm": 3.0458292961120605, "learning_rate": 9.52826685956576e-08, "loss": 0.3610382080078125, "step": 2695 }, { "epoch": 0.9754420876486817, "grad_norm": 0.7219113111495972, "learning_rate": 9.258183938131826e-08, "loss": 0.296783447265625, "step": 2696 }, { "epoch": 0.9758038985120528, "grad_norm": 1.3431719541549683, "learning_rate": 8.991975179274193e-08, "loss": 0.26789093017578125, "step": 2697 }, { "epoch": 0.976165709375424, "grad_norm": 0.6155787110328674, "learning_rate": 8.729641101059117e-08, "loss": 0.28041839599609375, "step": 2698 }, { "epoch": 0.9765275202387952, "grad_norm": 1.408881664276123, "learning_rate": 8.471182214012441e-08, "loss": 0.31640625, "step": 2699 }, { "epoch": 0.9768893311021664, "grad_norm": 1.0961987972259521, "learning_rate": 8.216599021118044e-08, "loss": 0.2751617431640625, "step": 2700 }, { "epoch": 0.9772511419655375, "grad_norm": 1.3200327157974243, "learning_rate": 7.965892017818278e-08, "loss": 0.30368804931640625, "step": 2701 }, { "epoch": 0.9776129528289087, "grad_norm": 0.5494323372840881, "learning_rate": 7.719061692010865e-08, "loss": 0.3017425537109375, "step": 2702 }, { "epoch": 0.9779747636922799, "grad_norm": 3.88070011138916, "learning_rate": 7.476108524050452e-08, "loss": 0.2884521484375, "step": 2703 }, { "epoch": 0.978336574555651, "grad_norm": 0.4575096666812897, "learning_rate": 7.237032986745274e-08, "loss": 0.27349090576171875, "step": 2704 }, { "epoch": 0.9786983854190222, "grad_norm": 1.049872636795044, "learning_rate": 7.00183554535805e-08, "loss": 0.25983428955078125, "step": 2705 }, { "epoch": 0.9790601962823934, "grad_norm": 2.7907564640045166, "learning_rate": 6.770516657603975e-08, "loss": 0.327239990234375, "step": 2706 }, { "epoch": 0.9794220071457646, "grad_norm": 0.5250320434570312, "learning_rate": 6.543076773650514e-08, "loss": 0.29210662841796875, "step": 2707 }, { "epoch": 0.9797838180091357, "grad_norm": 0.6801429986953735, "learning_rate": 6.319516336116271e-08, "loss": 0.28658294677734375, "step": 2708 }, { "epoch": 0.9801456288725069, "grad_norm": 1.355457067489624, "learning_rate": 6.099835780069674e-08, "loss": 0.3055458068847656, "step": 2709 }, { "epoch": 0.9805074397358781, "grad_norm": 0.48738348484039307, "learning_rate": 5.8840355330289645e-08, "loss": 0.2834625244140625, "step": 2710 }, { "epoch": 0.9808692505992492, "grad_norm": 2.740642547607422, "learning_rate": 5.672116014960871e-08, "loss": 0.32575225830078125, "step": 2711 }, { "epoch": 0.9812310614626204, "grad_norm": 0.8898307681083679, "learning_rate": 5.464077638279941e-08, "loss": 0.28102874755859375, "step": 2712 }, { "epoch": 0.9815928723259916, "grad_norm": 1.7008721828460693, "learning_rate": 5.2599208078474294e-08, "loss": 0.29990386962890625, "step": 2713 }, { "epoch": 0.9819546831893627, "grad_norm": 1.1281261444091797, "learning_rate": 5.059645920970857e-08, "loss": 0.2922630310058594, "step": 2714 }, { "epoch": 0.9823164940527339, "grad_norm": 1.504082202911377, "learning_rate": 4.86325336740312e-08, "loss": 0.30413055419921875, "step": 2715 }, { "epoch": 0.9826783049161051, "grad_norm": 0.6996898055076599, "learning_rate": 4.670743529341826e-08, "loss": 0.295989990234375, "step": 2716 }, { "epoch": 0.9830401157794763, "grad_norm": 1.3771064281463623, "learning_rate": 4.4821167814284025e-08, "loss": 0.32257080078125, "step": 2717 }, { "epoch": 0.9834019266428474, "grad_norm": 1.1640180349349976, "learning_rate": 4.297373490747658e-08, "loss": 0.3364105224609375, "step": 2718 }, { "epoch": 0.9837637375062186, "grad_norm": 1.1557284593582153, "learning_rate": 4.116514016826445e-08, "loss": 0.2730560302734375, "step": 2719 }, { "epoch": 0.9841255483695898, "grad_norm": 0.8826087713241577, "learning_rate": 3.939538711633661e-08, "loss": 0.2762603759765625, "step": 2720 }, { "epoch": 0.9844873592329609, "grad_norm": 0.8503260016441345, "learning_rate": 3.766447919579142e-08, "loss": 0.284515380859375, "step": 2721 }, { "epoch": 0.9848491700963321, "grad_norm": 0.6799980998039246, "learning_rate": 3.5972419775129886e-08, "loss": 0.29279327392578125, "step": 2722 }, { "epoch": 0.9852109809597033, "grad_norm": 1.0351145267486572, "learning_rate": 3.431921214725575e-08, "loss": 0.3213348388671875, "step": 2723 }, { "epoch": 0.9855727918230744, "grad_norm": 0.9946504831314087, "learning_rate": 3.270485952945546e-08, "loss": 0.26760101318359375, "step": 2724 }, { "epoch": 0.9859346026864456, "grad_norm": 0.8848947882652283, "learning_rate": 3.1129365063404804e-08, "loss": 0.30088043212890625, "step": 2725 }, { "epoch": 0.9862964135498168, "grad_norm": 1.409818410873413, "learning_rate": 2.9592731815160093e-08, "loss": 0.26781463623046875, "step": 2726 }, { "epoch": 0.986658224413188, "grad_norm": 1.9390428066253662, "learning_rate": 2.8094962775147007e-08, "loss": 0.28475189208984375, "step": 2727 }, { "epoch": 0.9870200352765591, "grad_norm": 0.6075991988182068, "learning_rate": 2.6636060858156178e-08, "loss": 0.27459716796875, "step": 2728 }, { "epoch": 0.9873818461399303, "grad_norm": 0.9043630957603455, "learning_rate": 2.5216028903345403e-08, "loss": 0.290771484375, "step": 2729 }, { "epoch": 0.9877436570033016, "grad_norm": 2.3849358558654785, "learning_rate": 2.3834869674224104e-08, "loss": 0.27349853515625, "step": 2730 }, { "epoch": 0.9881054678666727, "grad_norm": 0.9238505363464355, "learning_rate": 2.2492585858651104e-08, "loss": 0.26332855224609375, "step": 2731 }, { "epoch": 0.9884672787300439, "grad_norm": 0.8614092469215393, "learning_rate": 2.1189180068830195e-08, "loss": 0.313720703125, "step": 2732 }, { "epoch": 0.9888290895934151, "grad_norm": 1.0164947509765625, "learning_rate": 1.992465484131012e-08, "loss": 0.297607421875, "step": 2733 }, { "epoch": 0.9891909004567863, "grad_norm": 1.2086141109466553, "learning_rate": 1.869901263696683e-08, "loss": 0.262725830078125, "step": 2734 }, { "epoch": 0.9895527113201574, "grad_norm": 0.7275430560112, "learning_rate": 1.751225584101013e-08, "loss": 0.28670501708984375, "step": 2735 }, { "epoch": 0.9899145221835286, "grad_norm": 0.5187181830406189, "learning_rate": 1.6364386762979245e-08, "loss": 0.27410888671875, "step": 2736 }, { "epoch": 0.9902763330468998, "grad_norm": 0.6112494468688965, "learning_rate": 1.5255407636727283e-08, "loss": 0.29852294921875, "step": 2737 }, { "epoch": 0.9906381439102709, "grad_norm": 0.685583233833313, "learning_rate": 1.4185320620430098e-08, "loss": 0.31082916259765625, "step": 2738 }, { "epoch": 0.9909999547736421, "grad_norm": 2.053461790084839, "learning_rate": 1.315412779657077e-08, "loss": 0.31024169921875, "step": 2739 }, { "epoch": 0.9913617656370133, "grad_norm": 0.8496238589286804, "learning_rate": 1.2161831171944028e-08, "loss": 0.28568267822265625, "step": 2740 }, { "epoch": 0.9917235765003845, "grad_norm": 1.419298529624939, "learning_rate": 1.1208432677649594e-08, "loss": 0.3068122863769531, "step": 2741 }, { "epoch": 0.9920853873637556, "grad_norm": 0.9541823863983154, "learning_rate": 1.0293934169083309e-08, "loss": 0.25799560546875, "step": 2742 }, { "epoch": 0.9924471982271268, "grad_norm": 0.6911214590072632, "learning_rate": 9.418337425943779e-09, "loss": 0.3120574951171875, "step": 2743 }, { "epoch": 0.992809009090498, "grad_norm": 0.9237205386161804, "learning_rate": 8.581644152223511e-09, "loss": 0.27999114990234375, "step": 2744 }, { "epoch": 0.9931708199538691, "grad_norm": 0.5315385460853577, "learning_rate": 7.783855976197796e-09, "loss": 0.2994232177734375, "step": 2745 }, { "epoch": 0.9935326308172403, "grad_norm": 0.43929028511047363, "learning_rate": 7.024974450438038e-09, "loss": 0.2653694152832031, "step": 2746 }, { "epoch": 0.9938944416806115, "grad_norm": 2.3217287063598633, "learning_rate": 6.3050010517939905e-09, "loss": 0.2875823974609375, "step": 2747 }, { "epoch": 0.9942562525439826, "grad_norm": 1.4251412153244019, "learning_rate": 5.623937181398198e-09, "loss": 0.3192901611328125, "step": 2748 }, { "epoch": 0.9946180634073538, "grad_norm": 1.354678988456726, "learning_rate": 4.981784164663772e-09, "loss": 0.2686004638671875, "step": 2749 }, { "epoch": 0.994979874270725, "grad_norm": 0.642626166343689, "learning_rate": 4.378543251277733e-09, "loss": 0.29425811767578125, "step": 2750 }, { "epoch": 0.9953416851340962, "grad_norm": 0.749987781047821, "learning_rate": 3.814215615201012e-09, "loss": 0.269500732421875, "step": 2751 }, { "epoch": 0.9957034959974673, "grad_norm": 2.056068181991577, "learning_rate": 3.2888023546662205e-09, "loss": 0.31087493896484375, "step": 2752 }, { "epoch": 0.9960653068608385, "grad_norm": 0.4568631649017334, "learning_rate": 2.8023044921732244e-09, "loss": 0.29256439208984375, "step": 2753 }, { "epoch": 0.9964271177242097, "grad_norm": 1.8779361248016357, "learning_rate": 2.3547229744935728e-09, "loss": 0.256561279296875, "step": 2754 }, { "epoch": 0.9967889285875808, "grad_norm": 1.0481544733047485, "learning_rate": 1.9460586726594012e-09, "loss": 0.2962646484375, "step": 2755 }, { "epoch": 0.997150739450952, "grad_norm": 0.5428613424301147, "learning_rate": 1.5763123819656501e-09, "loss": 0.2784309387207031, "step": 2756 }, { "epoch": 0.9975125503143232, "grad_norm": 1.219523549079895, "learning_rate": 1.2454848219767279e-09, "loss": 0.31331634521484375, "step": 2757 }, { "epoch": 0.9978743611776943, "grad_norm": 0.8910971879959106, "learning_rate": 9.535766365109667e-10, "loss": 0.26807403564453125, "step": 2758 }, { "epoch": 0.9982361720410655, "grad_norm": 0.6278151273727417, "learning_rate": 7.00588393645063e-10, "loss": 0.27448272705078125, "step": 2759 }, { "epoch": 0.9985979829044367, "grad_norm": 1.4590727090835571, "learning_rate": 4.865205857207401e-10, "loss": 0.266204833984375, "step": 2760 }, { "epoch": 0.9989597937678079, "grad_norm": 1.292899489402771, "learning_rate": 3.1137362933364446e-10, "loss": 0.3164825439453125, "step": 2761 }, { "epoch": 0.999321604631179, "grad_norm": 2.4243712425231934, "learning_rate": 1.7514786533112672e-10, "loss": 0.26319122314453125, "step": 2762 }, { "epoch": 0.9996834154945502, "grad_norm": 0.8626971244812012, "learning_rate": 7.784355882334283e-11, "loss": 0.2945556640625, "step": 2763 }, { "epoch": 1.0, "grad_norm": 0.502400279045105, "learning_rate": 1.946089917437277e-11, "loss": 0.28357696533203125, "step": 2764 }, { "epoch": 1.0, "step": 2764, "total_flos": 2.2592843227210973e+19, "train_loss": 0.30654678348176906, "train_runtime": 33238.0318, "train_samples_per_second": 21.287, "train_steps_per_second": 0.083 } ], "logging_steps": 1.0, "max_steps": 2764, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.2592843227210973e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }