{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 7177, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00013934369121438027, "grad_norm": 68.35508728027344, "learning_rate": 0.0, "loss": 0.741943359375, "step": 1 }, { "epoch": 0.00027868738242876054, "grad_norm": 84.05374908447266, "learning_rate": 7.8125e-08, "loss": 0.7530517578125, "step": 2 }, { "epoch": 0.0004180310736431408, "grad_norm": 80.69912719726562, "learning_rate": 1.5625e-07, "loss": 0.74884033203125, "step": 3 }, { "epoch": 0.0005573747648575211, "grad_norm": 82.28425598144531, "learning_rate": 2.3437500000000003e-07, "loss": 0.74957275390625, "step": 4 }, { "epoch": 0.0006967184560719013, "grad_norm": 86.44834899902344, "learning_rate": 3.125e-07, "loss": 0.7557373046875, "step": 5 }, { "epoch": 0.0008360621472862816, "grad_norm": 80.1391830444336, "learning_rate": 3.90625e-07, "loss": 0.74884033203125, "step": 6 }, { "epoch": 0.0009754058385006619, "grad_norm": 86.425537109375, "learning_rate": 4.6875000000000006e-07, "loss": 0.7528076171875, "step": 7 }, { "epoch": 0.0011147495297150422, "grad_norm": 86.5018310546875, "learning_rate": 5.468750000000001e-07, "loss": 0.754638671875, "step": 8 }, { "epoch": 0.0012540932209294225, "grad_norm": 84.22899627685547, "learning_rate": 6.25e-07, "loss": 0.7506103515625, "step": 9 }, { "epoch": 0.0013934369121438026, "grad_norm": 84.8850326538086, "learning_rate": 7.03125e-07, "loss": 0.75335693359375, "step": 10 }, { "epoch": 0.001532780603358183, "grad_norm": 83.78080749511719, "learning_rate": 7.8125e-07, "loss": 0.75177001953125, "step": 11 }, { "epoch": 0.0016721242945725633, "grad_norm": 81.85120391845703, "learning_rate": 8.59375e-07, "loss": 0.74407958984375, "step": 12 }, { "epoch": 0.0018114679857869436, "grad_norm": 82.96898651123047, "learning_rate": 9.375000000000001e-07, "loss": 0.74395751953125, "step": 13 }, { "epoch": 0.0019508116770013237, "grad_norm": 85.06578063964844, "learning_rate": 1.0156250000000001e-06, "loss": 0.7449951171875, "step": 14 }, { "epoch": 0.0020901553682157042, "grad_norm": 80.7475357055664, "learning_rate": 1.0937500000000001e-06, "loss": 0.740966796875, "step": 15 }, { "epoch": 0.0022294990594300844, "grad_norm": 80.70791625976562, "learning_rate": 1.1718750000000001e-06, "loss": 0.726318359375, "step": 16 }, { "epoch": 0.0023688427506444645, "grad_norm": 78.53620147705078, "learning_rate": 1.25e-06, "loss": 0.7271728515625, "step": 17 }, { "epoch": 0.002508186441858845, "grad_norm": 81.7895278930664, "learning_rate": 1.328125e-06, "loss": 0.72772216796875, "step": 18 }, { "epoch": 0.002647530133073225, "grad_norm": 84.47947692871094, "learning_rate": 1.40625e-06, "loss": 0.725341796875, "step": 19 }, { "epoch": 0.0027868738242876052, "grad_norm": 73.71009826660156, "learning_rate": 1.484375e-06, "loss": 0.7186279296875, "step": 20 }, { "epoch": 0.0029262175155019858, "grad_norm": 78.1681900024414, "learning_rate": 1.5625e-06, "loss": 0.71649169921875, "step": 21 }, { "epoch": 0.003065561206716366, "grad_norm": 75.11559295654297, "learning_rate": 1.640625e-06, "loss": 0.66864013671875, "step": 22 }, { "epoch": 0.003204904897930746, "grad_norm": 76.30701446533203, "learning_rate": 1.71875e-06, "loss": 0.66497802734375, "step": 23 }, { "epoch": 0.0033442485891451265, "grad_norm": 71.80799102783203, "learning_rate": 1.796875e-06, "loss": 0.6649169921875, "step": 24 }, { "epoch": 0.0034835922803595066, "grad_norm": 70.86400604248047, "learning_rate": 1.8750000000000003e-06, "loss": 0.66485595703125, "step": 25 }, { "epoch": 0.003622935971573887, "grad_norm": 69.09696197509766, "learning_rate": 1.953125e-06, "loss": 0.6510009765625, "step": 26 }, { "epoch": 0.0037622796627882673, "grad_norm": 69.65038299560547, "learning_rate": 2.0312500000000002e-06, "loss": 0.64556884765625, "step": 27 }, { "epoch": 0.0039016233540026474, "grad_norm": 74.79225158691406, "learning_rate": 2.109375e-06, "loss": 0.6395263671875, "step": 28 }, { "epoch": 0.0040409670452170275, "grad_norm": 69.77416229248047, "learning_rate": 2.1875000000000002e-06, "loss": 0.63531494140625, "step": 29 }, { "epoch": 0.0041803107364314085, "grad_norm": 61.20261001586914, "learning_rate": 2.265625e-06, "loss": 0.54632568359375, "step": 30 }, { "epoch": 0.004319654427645789, "grad_norm": 59.32276916503906, "learning_rate": 2.3437500000000002e-06, "loss": 0.53826904296875, "step": 31 }, { "epoch": 0.004458998118860169, "grad_norm": 61.9091911315918, "learning_rate": 2.421875e-06, "loss": 0.53729248046875, "step": 32 }, { "epoch": 0.004598341810074549, "grad_norm": 59.664695739746094, "learning_rate": 2.5e-06, "loss": 0.52093505859375, "step": 33 }, { "epoch": 0.004737685501288929, "grad_norm": 51.16754150390625, "learning_rate": 2.5781250000000004e-06, "loss": 0.53924560546875, "step": 34 }, { "epoch": 0.004877029192503309, "grad_norm": 53.8609619140625, "learning_rate": 2.65625e-06, "loss": 0.521240234375, "step": 35 }, { "epoch": 0.00501637288371769, "grad_norm": 53.472801208496094, "learning_rate": 2.7343750000000004e-06, "loss": 0.496063232421875, "step": 36 }, { "epoch": 0.00515571657493207, "grad_norm": 47.95134353637695, "learning_rate": 2.8125e-06, "loss": 0.498260498046875, "step": 37 }, { "epoch": 0.00529506026614645, "grad_norm": 47.91554260253906, "learning_rate": 2.8906250000000004e-06, "loss": 0.484832763671875, "step": 38 }, { "epoch": 0.00543440395736083, "grad_norm": 51.750431060791016, "learning_rate": 2.96875e-06, "loss": 0.448883056640625, "step": 39 }, { "epoch": 0.0055737476485752105, "grad_norm": 46.16836166381836, "learning_rate": 3.0468750000000004e-06, "loss": 0.428558349609375, "step": 40 }, { "epoch": 0.005713091339789591, "grad_norm": 31.973407745361328, "learning_rate": 3.125e-06, "loss": 0.42529296875, "step": 41 }, { "epoch": 0.0058524350310039715, "grad_norm": 30.66975975036621, "learning_rate": 3.2031250000000004e-06, "loss": 0.397003173828125, "step": 42 }, { "epoch": 0.005991778722218352, "grad_norm": 24.170297622680664, "learning_rate": 3.28125e-06, "loss": 0.4156494140625, "step": 43 }, { "epoch": 0.006131122413432732, "grad_norm": 29.654438018798828, "learning_rate": 3.3593750000000003e-06, "loss": 0.369110107421875, "step": 44 }, { "epoch": 0.006270466104647112, "grad_norm": 19.66484260559082, "learning_rate": 3.4375e-06, "loss": 0.42010498046875, "step": 45 }, { "epoch": 0.006409809795861492, "grad_norm": 26.667516708374023, "learning_rate": 3.5156250000000003e-06, "loss": 0.364288330078125, "step": 46 }, { "epoch": 0.006549153487075873, "grad_norm": 15.48472785949707, "learning_rate": 3.59375e-06, "loss": 0.424407958984375, "step": 47 }, { "epoch": 0.006688497178290253, "grad_norm": 19.41010856628418, "learning_rate": 3.6718750000000003e-06, "loss": 0.380035400390625, "step": 48 }, { "epoch": 0.006827840869504633, "grad_norm": 24.582704544067383, "learning_rate": 3.7500000000000005e-06, "loss": 0.332916259765625, "step": 49 }, { "epoch": 0.006967184560719013, "grad_norm": 14.221268653869629, "learning_rate": 3.828125000000001e-06, "loss": 0.38800048828125, "step": 50 }, { "epoch": 0.007106528251933393, "grad_norm": 17.17940330505371, "learning_rate": 3.90625e-06, "loss": 0.3555755615234375, "step": 51 }, { "epoch": 0.007245871943147774, "grad_norm": 12.101567268371582, "learning_rate": 3.984375e-06, "loss": 0.3737945556640625, "step": 52 }, { "epoch": 0.0073852156343621545, "grad_norm": 16.21722412109375, "learning_rate": 4.0625000000000005e-06, "loss": 0.318939208984375, "step": 53 }, { "epoch": 0.007524559325576535, "grad_norm": 9.64992904663086, "learning_rate": 4.140625000000001e-06, "loss": 0.364654541015625, "step": 54 }, { "epoch": 0.007663903016790915, "grad_norm": 11.83438491821289, "learning_rate": 4.21875e-06, "loss": 0.32879638671875, "step": 55 }, { "epoch": 0.007803246708005295, "grad_norm": 2.6852619647979736, "learning_rate": 4.296875e-06, "loss": 0.403411865234375, "step": 56 }, { "epoch": 0.007942590399219676, "grad_norm": 4.491562843322754, "learning_rate": 4.3750000000000005e-06, "loss": 0.375762939453125, "step": 57 }, { "epoch": 0.008081934090434055, "grad_norm": 10.005256652832031, "learning_rate": 4.453125000000001e-06, "loss": 0.3044281005859375, "step": 58 }, { "epoch": 0.008221277781648436, "grad_norm": 8.927207946777344, "learning_rate": 4.53125e-06, "loss": 0.302581787109375, "step": 59 }, { "epoch": 0.008360621472862817, "grad_norm": 4.227772235870361, "learning_rate": 4.609375e-06, "loss": 0.339599609375, "step": 60 }, { "epoch": 0.008499965164077196, "grad_norm": 2.812326192855835, "learning_rate": 4.6875000000000004e-06, "loss": 0.3450164794921875, "step": 61 }, { "epoch": 0.008639308855291577, "grad_norm": 3.775153875350952, "learning_rate": 4.765625000000001e-06, "loss": 0.3248138427734375, "step": 62 }, { "epoch": 0.008778652546505956, "grad_norm": 2.7568106651306152, "learning_rate": 4.84375e-06, "loss": 0.379791259765625, "step": 63 }, { "epoch": 0.008917996237720337, "grad_norm": 5.9498748779296875, "learning_rate": 4.921875e-06, "loss": 0.3110198974609375, "step": 64 }, { "epoch": 0.009057339928934717, "grad_norm": 4.063409805297852, "learning_rate": 5e-06, "loss": 0.3192901611328125, "step": 65 }, { "epoch": 0.009196683620149098, "grad_norm": 7.123007297515869, "learning_rate": 5.078125000000001e-06, "loss": 0.258636474609375, "step": 66 }, { "epoch": 0.009336027311363479, "grad_norm": 8.098702430725098, "learning_rate": 5.156250000000001e-06, "loss": 0.225128173828125, "step": 67 }, { "epoch": 0.009475371002577858, "grad_norm": 7.1579155921936035, "learning_rate": 5.234375e-06, "loss": 0.216796875, "step": 68 }, { "epoch": 0.009614714693792239, "grad_norm": 7.160254955291748, "learning_rate": 5.3125e-06, "loss": 0.32779693603515625, "step": 69 }, { "epoch": 0.009754058385006618, "grad_norm": 6.66569185256958, "learning_rate": 5.390625000000001e-06, "loss": 0.231903076171875, "step": 70 }, { "epoch": 0.009893402076220999, "grad_norm": 5.218270778656006, "learning_rate": 5.468750000000001e-06, "loss": 0.3233489990234375, "step": 71 }, { "epoch": 0.01003274576743538, "grad_norm": 7.696992874145508, "learning_rate": 5.546875e-06, "loss": 0.22046661376953125, "step": 72 }, { "epoch": 0.01017208945864976, "grad_norm": 10.82709789276123, "learning_rate": 5.625e-06, "loss": 0.3422698974609375, "step": 73 }, { "epoch": 0.01031143314986414, "grad_norm": 8.295283317565918, "learning_rate": 5.7031250000000006e-06, "loss": 0.22650909423828125, "step": 74 }, { "epoch": 0.01045077684107852, "grad_norm": 20.828475952148438, "learning_rate": 5.781250000000001e-06, "loss": 0.196014404296875, "step": 75 }, { "epoch": 0.0105901205322929, "grad_norm": 15.837902069091797, "learning_rate": 5.859375e-06, "loss": 0.3138885498046875, "step": 76 }, { "epoch": 0.010729464223507281, "grad_norm": 8.306597709655762, "learning_rate": 5.9375e-06, "loss": 0.25958251953125, "step": 77 }, { "epoch": 0.01086880791472166, "grad_norm": 13.491727828979492, "learning_rate": 6.0156250000000005e-06, "loss": 0.22585296630859375, "step": 78 }, { "epoch": 0.011008151605936042, "grad_norm": 11.723892211914062, "learning_rate": 6.093750000000001e-06, "loss": 0.2397003173828125, "step": 79 }, { "epoch": 0.011147495297150421, "grad_norm": 20.432104110717773, "learning_rate": 6.171875e-06, "loss": 0.2823944091796875, "step": 80 }, { "epoch": 0.011286838988364802, "grad_norm": 18.704376220703125, "learning_rate": 6.25e-06, "loss": 0.3331146240234375, "step": 81 }, { "epoch": 0.011426182679579183, "grad_norm": 16.293437957763672, "learning_rate": 6.3281250000000005e-06, "loss": 0.3228607177734375, "step": 82 }, { "epoch": 0.011565526370793562, "grad_norm": 7.765333652496338, "learning_rate": 6.406250000000001e-06, "loss": 0.3239898681640625, "step": 83 }, { "epoch": 0.011704870062007943, "grad_norm": 9.332883834838867, "learning_rate": 6.484375000000001e-06, "loss": 0.24146270751953125, "step": 84 }, { "epoch": 0.011844213753222322, "grad_norm": 5.795038223266602, "learning_rate": 6.5625e-06, "loss": 0.23305511474609375, "step": 85 }, { "epoch": 0.011983557444436703, "grad_norm": 13.246281623840332, "learning_rate": 6.6406250000000005e-06, "loss": 0.2350921630859375, "step": 86 }, { "epoch": 0.012122901135651083, "grad_norm": 12.693568229675293, "learning_rate": 6.718750000000001e-06, "loss": 0.20955657958984375, "step": 87 }, { "epoch": 0.012262244826865464, "grad_norm": 11.37897777557373, "learning_rate": 6.796875000000001e-06, "loss": 0.2831573486328125, "step": 88 }, { "epoch": 0.012401588518079844, "grad_norm": 13.740562438964844, "learning_rate": 6.875e-06, "loss": 0.25287628173828125, "step": 89 }, { "epoch": 0.012540932209294224, "grad_norm": 5.004226207733154, "learning_rate": 6.9531250000000004e-06, "loss": 0.23177337646484375, "step": 90 }, { "epoch": 0.012680275900508605, "grad_norm": 16.822385787963867, "learning_rate": 7.031250000000001e-06, "loss": 0.261810302734375, "step": 91 }, { "epoch": 0.012819619591722984, "grad_norm": 25.2241153717041, "learning_rate": 7.109375000000001e-06, "loss": 0.2317962646484375, "step": 92 }, { "epoch": 0.012958963282937365, "grad_norm": 12.065925598144531, "learning_rate": 7.1875e-06, "loss": 0.323699951171875, "step": 93 }, { "epoch": 0.013098306974151746, "grad_norm": 3.920778512954712, "learning_rate": 7.265625e-06, "loss": 0.22322845458984375, "step": 94 }, { "epoch": 0.013237650665366125, "grad_norm": 8.746639251708984, "learning_rate": 7.343750000000001e-06, "loss": 0.208465576171875, "step": 95 }, { "epoch": 0.013376994356580506, "grad_norm": 4.796881198883057, "learning_rate": 7.421875000000001e-06, "loss": 0.18829345703125, "step": 96 }, { "epoch": 0.013516338047794885, "grad_norm": 6.854090213775635, "learning_rate": 7.500000000000001e-06, "loss": 0.21773910522460938, "step": 97 }, { "epoch": 0.013655681739009266, "grad_norm": 6.848060607910156, "learning_rate": 7.578125e-06, "loss": 0.235931396484375, "step": 98 }, { "epoch": 0.013795025430223647, "grad_norm": 5.420156002044678, "learning_rate": 7.656250000000001e-06, "loss": 0.25177001953125, "step": 99 }, { "epoch": 0.013934369121438027, "grad_norm": 8.819807052612305, "learning_rate": 7.734375e-06, "loss": 0.214385986328125, "step": 100 }, { "epoch": 0.014073712812652408, "grad_norm": 4.034743309020996, "learning_rate": 7.8125e-06, "loss": 0.1653900146484375, "step": 101 }, { "epoch": 0.014213056503866787, "grad_norm": 20.99355697631836, "learning_rate": 7.890625e-06, "loss": 0.28530120849609375, "step": 102 }, { "epoch": 0.014352400195081168, "grad_norm": 15.552170753479004, "learning_rate": 7.96875e-06, "loss": 0.23798370361328125, "step": 103 }, { "epoch": 0.014491743886295549, "grad_norm": 7.223971366882324, "learning_rate": 8.046875e-06, "loss": 0.21608734130859375, "step": 104 }, { "epoch": 0.014631087577509928, "grad_norm": 10.722821235656738, "learning_rate": 8.125000000000001e-06, "loss": 0.17705535888671875, "step": 105 }, { "epoch": 0.014770431268724309, "grad_norm": 11.949408531188965, "learning_rate": 8.203125000000001e-06, "loss": 0.23856353759765625, "step": 106 }, { "epoch": 0.014909774959938688, "grad_norm": 9.366409301757812, "learning_rate": 8.281250000000001e-06, "loss": 0.203033447265625, "step": 107 }, { "epoch": 0.01504911865115307, "grad_norm": 15.073811531066895, "learning_rate": 8.359375e-06, "loss": 0.14673614501953125, "step": 108 }, { "epoch": 0.01518846234236745, "grad_norm": 14.343490600585938, "learning_rate": 8.4375e-06, "loss": 0.2232666015625, "step": 109 }, { "epoch": 0.01532780603358183, "grad_norm": 3.198679208755493, "learning_rate": 8.515625e-06, "loss": 0.2050323486328125, "step": 110 }, { "epoch": 0.01546714972479621, "grad_norm": 16.795978546142578, "learning_rate": 8.59375e-06, "loss": 0.19673538208007812, "step": 111 }, { "epoch": 0.01560649341601059, "grad_norm": 15.135960578918457, "learning_rate": 8.671875e-06, "loss": 0.238311767578125, "step": 112 }, { "epoch": 0.01574583710722497, "grad_norm": 10.938837051391602, "learning_rate": 8.750000000000001e-06, "loss": 0.29259490966796875, "step": 113 }, { "epoch": 0.01588518079843935, "grad_norm": 15.526944160461426, "learning_rate": 8.828125000000001e-06, "loss": 0.235565185546875, "step": 114 }, { "epoch": 0.016024524489653733, "grad_norm": 20.863676071166992, "learning_rate": 8.906250000000001e-06, "loss": 0.2219696044921875, "step": 115 }, { "epoch": 0.01616386818086811, "grad_norm": 15.12514877319336, "learning_rate": 8.984375000000002e-06, "loss": 0.2725677490234375, "step": 116 }, { "epoch": 0.01630321187208249, "grad_norm": 8.126269340515137, "learning_rate": 9.0625e-06, "loss": 0.17389678955078125, "step": 117 }, { "epoch": 0.016442555563296872, "grad_norm": 19.26151466369629, "learning_rate": 9.140625e-06, "loss": 0.1754608154296875, "step": 118 }, { "epoch": 0.016581899254511253, "grad_norm": 25.02276039123535, "learning_rate": 9.21875e-06, "loss": 0.2781982421875, "step": 119 }, { "epoch": 0.016721242945725634, "grad_norm": 18.551816940307617, "learning_rate": 9.296875e-06, "loss": 0.27923583984375, "step": 120 }, { "epoch": 0.01686058663694001, "grad_norm": 9.031113624572754, "learning_rate": 9.375000000000001e-06, "loss": 0.2947731018066406, "step": 121 }, { "epoch": 0.016999930328154392, "grad_norm": 6.5948028564453125, "learning_rate": 9.453125000000001e-06, "loss": 0.19278717041015625, "step": 122 }, { "epoch": 0.017139274019368773, "grad_norm": 8.066917419433594, "learning_rate": 9.531250000000001e-06, "loss": 0.19611740112304688, "step": 123 }, { "epoch": 0.017278617710583154, "grad_norm": 4.516763210296631, "learning_rate": 9.609375000000001e-06, "loss": 0.24005889892578125, "step": 124 }, { "epoch": 0.017417961401797532, "grad_norm": 2.6600706577301025, "learning_rate": 9.6875e-06, "loss": 0.1641998291015625, "step": 125 }, { "epoch": 0.017557305093011913, "grad_norm": 10.423636436462402, "learning_rate": 9.765625e-06, "loss": 0.18814468383789062, "step": 126 }, { "epoch": 0.017696648784226294, "grad_norm": 6.515997409820557, "learning_rate": 9.84375e-06, "loss": 0.2266082763671875, "step": 127 }, { "epoch": 0.017835992475440675, "grad_norm": 8.942510604858398, "learning_rate": 9.921875e-06, "loss": 0.232635498046875, "step": 128 }, { "epoch": 0.017975336166655056, "grad_norm": 11.724843978881836, "learning_rate": 1e-05, "loss": 0.1920928955078125, "step": 129 }, { "epoch": 0.018114679857869433, "grad_norm": 2.1824944019317627, "learning_rate": 1.0078125000000001e-05, "loss": 0.1387939453125, "step": 130 }, { "epoch": 0.018254023549083814, "grad_norm": 16.392610549926758, "learning_rate": 1.0156250000000001e-05, "loss": 0.22784423828125, "step": 131 }, { "epoch": 0.018393367240298195, "grad_norm": 5.487381935119629, "learning_rate": 1.0234375000000001e-05, "loss": 0.2693672180175781, "step": 132 }, { "epoch": 0.018532710931512576, "grad_norm": 7.823380470275879, "learning_rate": 1.0312500000000002e-05, "loss": 0.142913818359375, "step": 133 }, { "epoch": 0.018672054622726957, "grad_norm": 4.9395270347595215, "learning_rate": 1.0390625e-05, "loss": 0.14363861083984375, "step": 134 }, { "epoch": 0.018811398313941335, "grad_norm": 6.06682014465332, "learning_rate": 1.046875e-05, "loss": 0.15842819213867188, "step": 135 }, { "epoch": 0.018950742005155716, "grad_norm": 6.153921604156494, "learning_rate": 1.0546875e-05, "loss": 0.2611846923828125, "step": 136 }, { "epoch": 0.019090085696370097, "grad_norm": 4.895573616027832, "learning_rate": 1.0625e-05, "loss": 0.1944427490234375, "step": 137 }, { "epoch": 0.019229429387584478, "grad_norm": 3.2464022636413574, "learning_rate": 1.0703125000000001e-05, "loss": 0.19963836669921875, "step": 138 }, { "epoch": 0.01936877307879886, "grad_norm": 12.111172676086426, "learning_rate": 1.0781250000000001e-05, "loss": 0.23938751220703125, "step": 139 }, { "epoch": 0.019508116770013236, "grad_norm": 13.649538040161133, "learning_rate": 1.0859375000000001e-05, "loss": 0.179962158203125, "step": 140 }, { "epoch": 0.019647460461227617, "grad_norm": 4.919685363769531, "learning_rate": 1.0937500000000002e-05, "loss": 0.24884796142578125, "step": 141 }, { "epoch": 0.019786804152441998, "grad_norm": 5.723228931427002, "learning_rate": 1.1015625e-05, "loss": 0.19905853271484375, "step": 142 }, { "epoch": 0.01992614784365638, "grad_norm": 2.247729778289795, "learning_rate": 1.109375e-05, "loss": 0.23711776733398438, "step": 143 }, { "epoch": 0.02006549153487076, "grad_norm": 10.623004913330078, "learning_rate": 1.1171875e-05, "loss": 0.21348953247070312, "step": 144 }, { "epoch": 0.020204835226085138, "grad_norm": 6.32315731048584, "learning_rate": 1.125e-05, "loss": 0.18719482421875, "step": 145 }, { "epoch": 0.02034417891729952, "grad_norm": 6.062091827392578, "learning_rate": 1.1328125000000001e-05, "loss": 0.17003250122070312, "step": 146 }, { "epoch": 0.0204835226085139, "grad_norm": 10.080375671386719, "learning_rate": 1.1406250000000001e-05, "loss": 0.21138763427734375, "step": 147 }, { "epoch": 0.02062286629972828, "grad_norm": 7.955326557159424, "learning_rate": 1.1484375000000001e-05, "loss": 0.17786407470703125, "step": 148 }, { "epoch": 0.02076220999094266, "grad_norm": 2.162188768386841, "learning_rate": 1.1562500000000002e-05, "loss": 0.14757537841796875, "step": 149 }, { "epoch": 0.02090155368215704, "grad_norm": 6.504528045654297, "learning_rate": 1.1640625000000002e-05, "loss": 0.17267608642578125, "step": 150 }, { "epoch": 0.02104089737337142, "grad_norm": 4.625658988952637, "learning_rate": 1.171875e-05, "loss": 0.229766845703125, "step": 151 }, { "epoch": 0.0211802410645858, "grad_norm": 5.853835105895996, "learning_rate": 1.1796875e-05, "loss": 0.21680450439453125, "step": 152 }, { "epoch": 0.021319584755800182, "grad_norm": 8.290628433227539, "learning_rate": 1.1875e-05, "loss": 0.12215805053710938, "step": 153 }, { "epoch": 0.021458928447014563, "grad_norm": 6.803259372711182, "learning_rate": 1.1953125000000001e-05, "loss": 0.1794586181640625, "step": 154 }, { "epoch": 0.02159827213822894, "grad_norm": 5.6353440284729, "learning_rate": 1.2031250000000001e-05, "loss": 0.19565582275390625, "step": 155 }, { "epoch": 0.02173761582944332, "grad_norm": 8.689154624938965, "learning_rate": 1.2109375000000001e-05, "loss": 0.1830902099609375, "step": 156 }, { "epoch": 0.021876959520657702, "grad_norm": 5.814373970031738, "learning_rate": 1.2187500000000001e-05, "loss": 0.1688690185546875, "step": 157 }, { "epoch": 0.022016303211872083, "grad_norm": 15.974237442016602, "learning_rate": 1.2265625000000002e-05, "loss": 0.2555427551269531, "step": 158 }, { "epoch": 0.022155646903086464, "grad_norm": 5.6902618408203125, "learning_rate": 1.234375e-05, "loss": 0.15194320678710938, "step": 159 }, { "epoch": 0.022294990594300842, "grad_norm": 9.141617774963379, "learning_rate": 1.2421875e-05, "loss": 0.22792816162109375, "step": 160 }, { "epoch": 0.022434334285515223, "grad_norm": 4.283246994018555, "learning_rate": 1.25e-05, "loss": 0.2251434326171875, "step": 161 }, { "epoch": 0.022573677976729604, "grad_norm": 7.396460056304932, "learning_rate": 1.2578125e-05, "loss": 0.1446075439453125, "step": 162 }, { "epoch": 0.022713021667943985, "grad_norm": 5.757813930511475, "learning_rate": 1.2656250000000001e-05, "loss": 0.18130874633789062, "step": 163 }, { "epoch": 0.022852365359158366, "grad_norm": 3.1287357807159424, "learning_rate": 1.2734375000000001e-05, "loss": 0.118560791015625, "step": 164 }, { "epoch": 0.022991709050372743, "grad_norm": 1.7641234397888184, "learning_rate": 1.2812500000000001e-05, "loss": 0.16223526000976562, "step": 165 }, { "epoch": 0.023131052741587124, "grad_norm": 3.406259536743164, "learning_rate": 1.2890625000000002e-05, "loss": 0.180267333984375, "step": 166 }, { "epoch": 0.023270396432801505, "grad_norm": 11.442927360534668, "learning_rate": 1.2968750000000002e-05, "loss": 0.179901123046875, "step": 167 }, { "epoch": 0.023409740124015886, "grad_norm": 4.9085493087768555, "learning_rate": 1.3046875e-05, "loss": 0.11784744262695312, "step": 168 }, { "epoch": 0.023549083815230267, "grad_norm": 5.811831474304199, "learning_rate": 1.3125e-05, "loss": 0.12638092041015625, "step": 169 }, { "epoch": 0.023688427506444645, "grad_norm": 3.0889089107513428, "learning_rate": 1.3203125e-05, "loss": 0.10998153686523438, "step": 170 }, { "epoch": 0.023827771197659026, "grad_norm": 2.942074775695801, "learning_rate": 1.3281250000000001e-05, "loss": 0.18724441528320312, "step": 171 }, { "epoch": 0.023967114888873407, "grad_norm": 4.158938407897949, "learning_rate": 1.3359375000000001e-05, "loss": 0.1234893798828125, "step": 172 }, { "epoch": 0.024106458580087788, "grad_norm": 4.3631367683410645, "learning_rate": 1.3437500000000001e-05, "loss": 0.1478424072265625, "step": 173 }, { "epoch": 0.024245802271302165, "grad_norm": 5.135581016540527, "learning_rate": 1.3515625000000002e-05, "loss": 0.17469406127929688, "step": 174 }, { "epoch": 0.024385145962516546, "grad_norm": 7.9102301597595215, "learning_rate": 1.3593750000000002e-05, "loss": 0.20298004150390625, "step": 175 }, { "epoch": 0.024524489653730927, "grad_norm": 2.550529956817627, "learning_rate": 1.3671875e-05, "loss": 0.15830612182617188, "step": 176 }, { "epoch": 0.024663833344945308, "grad_norm": 10.557239532470703, "learning_rate": 1.375e-05, "loss": 0.14163589477539062, "step": 177 }, { "epoch": 0.02480317703615969, "grad_norm": 7.7039899826049805, "learning_rate": 1.3828125e-05, "loss": 0.1522216796875, "step": 178 }, { "epoch": 0.024942520727374067, "grad_norm": 4.780228614807129, "learning_rate": 1.3906250000000001e-05, "loss": 0.12770843505859375, "step": 179 }, { "epoch": 0.025081864418588447, "grad_norm": 7.108807563781738, "learning_rate": 1.3984375000000001e-05, "loss": 0.106536865234375, "step": 180 }, { "epoch": 0.02522120810980283, "grad_norm": 10.260586738586426, "learning_rate": 1.4062500000000001e-05, "loss": 0.17214584350585938, "step": 181 }, { "epoch": 0.02536055180101721, "grad_norm": 15.482001304626465, "learning_rate": 1.4140625000000002e-05, "loss": 0.23189544677734375, "step": 182 }, { "epoch": 0.02549989549223159, "grad_norm": 3.990216016769409, "learning_rate": 1.4218750000000002e-05, "loss": 0.2648162841796875, "step": 183 }, { "epoch": 0.025639239183445968, "grad_norm": 11.725672721862793, "learning_rate": 1.4296875000000002e-05, "loss": 0.22397232055664062, "step": 184 }, { "epoch": 0.02577858287466035, "grad_norm": 4.92960786819458, "learning_rate": 1.4375e-05, "loss": 0.15979385375976562, "step": 185 }, { "epoch": 0.02591792656587473, "grad_norm": 4.962447643280029, "learning_rate": 1.4453125e-05, "loss": 0.19137191772460938, "step": 186 }, { "epoch": 0.02605727025708911, "grad_norm": 4.462624549865723, "learning_rate": 1.453125e-05, "loss": 0.1897125244140625, "step": 187 }, { "epoch": 0.026196613948303492, "grad_norm": 4.031778335571289, "learning_rate": 1.4609375000000001e-05, "loss": 0.17697906494140625, "step": 188 }, { "epoch": 0.02633595763951787, "grad_norm": 6.0412139892578125, "learning_rate": 1.4687500000000001e-05, "loss": 0.13458251953125, "step": 189 }, { "epoch": 0.02647530133073225, "grad_norm": 6.443297863006592, "learning_rate": 1.4765625000000001e-05, "loss": 0.19022369384765625, "step": 190 }, { "epoch": 0.02661464502194663, "grad_norm": 1.3980852365493774, "learning_rate": 1.4843750000000002e-05, "loss": 0.1270732879638672, "step": 191 }, { "epoch": 0.026753988713161012, "grad_norm": 8.832712173461914, "learning_rate": 1.4921875000000002e-05, "loss": 0.22046661376953125, "step": 192 }, { "epoch": 0.026893332404375393, "grad_norm": 6.981844425201416, "learning_rate": 1.5000000000000002e-05, "loss": 0.2545623779296875, "step": 193 }, { "epoch": 0.02703267609558977, "grad_norm": 3.730447292327881, "learning_rate": 1.5078125e-05, "loss": 0.13394546508789062, "step": 194 }, { "epoch": 0.02717201978680415, "grad_norm": 3.3465733528137207, "learning_rate": 1.515625e-05, "loss": 0.17260360717773438, "step": 195 }, { "epoch": 0.027311363478018533, "grad_norm": 5.559998989105225, "learning_rate": 1.5234375000000001e-05, "loss": 0.12349700927734375, "step": 196 }, { "epoch": 0.027450707169232914, "grad_norm": 6.5464701652526855, "learning_rate": 1.5312500000000003e-05, "loss": 0.2064361572265625, "step": 197 }, { "epoch": 0.027590050860447295, "grad_norm": 7.421088695526123, "learning_rate": 1.5390625e-05, "loss": 0.167144775390625, "step": 198 }, { "epoch": 0.027729394551661672, "grad_norm": 5.992583274841309, "learning_rate": 1.546875e-05, "loss": 0.15998458862304688, "step": 199 }, { "epoch": 0.027868738242876053, "grad_norm": 2.1265034675598145, "learning_rate": 1.5546875e-05, "loss": 0.17952728271484375, "step": 200 }, { "epoch": 0.028008081934090434, "grad_norm": 4.205785274505615, "learning_rate": 1.5625e-05, "loss": 0.13156509399414062, "step": 201 }, { "epoch": 0.028147425625304815, "grad_norm": 1.694059133529663, "learning_rate": 1.5703125e-05, "loss": 0.0891876220703125, "step": 202 }, { "epoch": 0.028286769316519196, "grad_norm": 2.5963356494903564, "learning_rate": 1.578125e-05, "loss": 0.10303878784179688, "step": 203 }, { "epoch": 0.028426113007733574, "grad_norm": 2.343632698059082, "learning_rate": 1.5859375e-05, "loss": 0.12206268310546875, "step": 204 }, { "epoch": 0.028565456698947955, "grad_norm": 2.1490259170532227, "learning_rate": 1.59375e-05, "loss": 0.13367843627929688, "step": 205 }, { "epoch": 0.028704800390162336, "grad_norm": 3.200058698654175, "learning_rate": 1.6015625e-05, "loss": 0.11779022216796875, "step": 206 }, { "epoch": 0.028844144081376717, "grad_norm": 9.46463680267334, "learning_rate": 1.609375e-05, "loss": 0.15291595458984375, "step": 207 }, { "epoch": 0.028983487772591097, "grad_norm": 5.547858715057373, "learning_rate": 1.6171875000000002e-05, "loss": 0.14309310913085938, "step": 208 }, { "epoch": 0.029122831463805475, "grad_norm": 4.851221561431885, "learning_rate": 1.6250000000000002e-05, "loss": 0.0955352783203125, "step": 209 }, { "epoch": 0.029262175155019856, "grad_norm": 7.888828277587891, "learning_rate": 1.6328125000000002e-05, "loss": 0.20464324951171875, "step": 210 }, { "epoch": 0.029401518846234237, "grad_norm": 3.4378020763397217, "learning_rate": 1.6406250000000002e-05, "loss": 0.12322235107421875, "step": 211 }, { "epoch": 0.029540862537448618, "grad_norm": 4.337862968444824, "learning_rate": 1.6484375000000003e-05, "loss": 0.14989852905273438, "step": 212 }, { "epoch": 0.029680206228663, "grad_norm": 3.9836552143096924, "learning_rate": 1.6562500000000003e-05, "loss": 0.18027496337890625, "step": 213 }, { "epoch": 0.029819549919877376, "grad_norm": 6.980125427246094, "learning_rate": 1.6640625000000003e-05, "loss": 0.196807861328125, "step": 214 }, { "epoch": 0.029958893611091757, "grad_norm": 8.426046371459961, "learning_rate": 1.671875e-05, "loss": 0.21418380737304688, "step": 215 }, { "epoch": 0.03009823730230614, "grad_norm": 7.015588283538818, "learning_rate": 1.6796875e-05, "loss": 0.22662734985351562, "step": 216 }, { "epoch": 0.03023758099352052, "grad_norm": 4.552795886993408, "learning_rate": 1.6875e-05, "loss": 0.15364456176757812, "step": 217 }, { "epoch": 0.0303769246847349, "grad_norm": 1.7086564302444458, "learning_rate": 1.6953125e-05, "loss": 0.15375518798828125, "step": 218 }, { "epoch": 0.030516268375949278, "grad_norm": 0.8052099943161011, "learning_rate": 1.703125e-05, "loss": 0.11156082153320312, "step": 219 }, { "epoch": 0.03065561206716366, "grad_norm": 4.236763954162598, "learning_rate": 1.7109375e-05, "loss": 0.13831329345703125, "step": 220 }, { "epoch": 0.03079495575837804, "grad_norm": 2.7073967456817627, "learning_rate": 1.71875e-05, "loss": 0.1667022705078125, "step": 221 }, { "epoch": 0.03093429944959242, "grad_norm": 4.411389350891113, "learning_rate": 1.7265625e-05, "loss": 0.23344802856445312, "step": 222 }, { "epoch": 0.031073643140806798, "grad_norm": 19.97957992553711, "learning_rate": 1.734375e-05, "loss": 0.21508026123046875, "step": 223 }, { "epoch": 0.03121298683202118, "grad_norm": 10.919645309448242, "learning_rate": 1.7421875e-05, "loss": 0.1551361083984375, "step": 224 }, { "epoch": 0.031352330523235564, "grad_norm": 1.9346929788589478, "learning_rate": 1.7500000000000002e-05, "loss": 0.14713668823242188, "step": 225 }, { "epoch": 0.03149167421444994, "grad_norm": 3.8985822200775146, "learning_rate": 1.7578125000000002e-05, "loss": 0.13894271850585938, "step": 226 }, { "epoch": 0.03163101790566432, "grad_norm": 2.8252902030944824, "learning_rate": 1.7656250000000002e-05, "loss": 0.133819580078125, "step": 227 }, { "epoch": 0.0317703615968787, "grad_norm": 4.966582298278809, "learning_rate": 1.7734375000000002e-05, "loss": 0.17972183227539062, "step": 228 }, { "epoch": 0.03190970528809308, "grad_norm": 11.945621490478516, "learning_rate": 1.7812500000000003e-05, "loss": 0.21394729614257812, "step": 229 }, { "epoch": 0.032049048979307465, "grad_norm": 15.203856468200684, "learning_rate": 1.7890625000000003e-05, "loss": 0.2523040771484375, "step": 230 }, { "epoch": 0.03218839267052184, "grad_norm": 4.826852798461914, "learning_rate": 1.7968750000000003e-05, "loss": 0.14131927490234375, "step": 231 }, { "epoch": 0.03232773636173622, "grad_norm": 9.49569034576416, "learning_rate": 1.8046875e-05, "loss": 0.22652435302734375, "step": 232 }, { "epoch": 0.032467080052950605, "grad_norm": 4.826241970062256, "learning_rate": 1.8125e-05, "loss": 0.13573455810546875, "step": 233 }, { "epoch": 0.03260642374416498, "grad_norm": 9.664764404296875, "learning_rate": 1.8203125e-05, "loss": 0.2448883056640625, "step": 234 }, { "epoch": 0.032745767435379367, "grad_norm": 2.496797561645508, "learning_rate": 1.828125e-05, "loss": 0.1580657958984375, "step": 235 }, { "epoch": 0.032885111126593744, "grad_norm": 9.663092613220215, "learning_rate": 1.8359375e-05, "loss": 0.13799285888671875, "step": 236 }, { "epoch": 0.03302445481780812, "grad_norm": 7.511887550354004, "learning_rate": 1.84375e-05, "loss": 0.14389991760253906, "step": 237 }, { "epoch": 0.033163798509022506, "grad_norm": 6.70507287979126, "learning_rate": 1.8515625e-05, "loss": 0.13750076293945312, "step": 238 }, { "epoch": 0.033303142200236883, "grad_norm": 1.5982306003570557, "learning_rate": 1.859375e-05, "loss": 0.09104156494140625, "step": 239 }, { "epoch": 0.03344248589145127, "grad_norm": 4.109487533569336, "learning_rate": 1.8671875e-05, "loss": 0.17372512817382812, "step": 240 }, { "epoch": 0.033581829582665645, "grad_norm": 5.061545372009277, "learning_rate": 1.8750000000000002e-05, "loss": 0.12432098388671875, "step": 241 }, { "epoch": 0.03372117327388002, "grad_norm": 3.4716570377349854, "learning_rate": 1.8828125000000002e-05, "loss": 0.11724090576171875, "step": 242 }, { "epoch": 0.03386051696509441, "grad_norm": 5.689507007598877, "learning_rate": 1.8906250000000002e-05, "loss": 0.19164276123046875, "step": 243 }, { "epoch": 0.033999860656308785, "grad_norm": 16.495922088623047, "learning_rate": 1.8984375000000002e-05, "loss": 0.20988845825195312, "step": 244 }, { "epoch": 0.03413920434752317, "grad_norm": 18.91434097290039, "learning_rate": 1.9062500000000003e-05, "loss": 0.18117904663085938, "step": 245 }, { "epoch": 0.03427854803873755, "grad_norm": 5.467302322387695, "learning_rate": 1.9140625000000003e-05, "loss": 0.0908203125, "step": 246 }, { "epoch": 0.034417891729951924, "grad_norm": 2.3467586040496826, "learning_rate": 1.9218750000000003e-05, "loss": 0.13916015625, "step": 247 }, { "epoch": 0.03455723542116631, "grad_norm": 3.496011734008789, "learning_rate": 1.9296875000000003e-05, "loss": 0.15574264526367188, "step": 248 }, { "epoch": 0.034696579112380686, "grad_norm": 4.236489295959473, "learning_rate": 1.9375e-05, "loss": 0.18981170654296875, "step": 249 }, { "epoch": 0.034835922803595064, "grad_norm": 3.7420201301574707, "learning_rate": 1.9453125e-05, "loss": 0.11133575439453125, "step": 250 }, { "epoch": 0.03497526649480945, "grad_norm": 4.348264217376709, "learning_rate": 1.953125e-05, "loss": 0.14104843139648438, "step": 251 }, { "epoch": 0.035114610186023826, "grad_norm": 13.566645622253418, "learning_rate": 1.9609375e-05, "loss": 0.23079681396484375, "step": 252 }, { "epoch": 0.03525395387723821, "grad_norm": 10.602228164672852, "learning_rate": 1.96875e-05, "loss": 0.18766403198242188, "step": 253 }, { "epoch": 0.03539329756845259, "grad_norm": 6.669156074523926, "learning_rate": 1.9765625e-05, "loss": 0.134124755859375, "step": 254 }, { "epoch": 0.035532641259666965, "grad_norm": 11.8833646774292, "learning_rate": 1.984375e-05, "loss": 0.19338607788085938, "step": 255 }, { "epoch": 0.03567198495088135, "grad_norm": 10.189719200134277, "learning_rate": 1.9921875e-05, "loss": 0.19855880737304688, "step": 256 }, { "epoch": 0.03581132864209573, "grad_norm": 3.4376537799835205, "learning_rate": 2e-05, "loss": 0.16809844970703125, "step": 257 }, { "epoch": 0.03595067233331011, "grad_norm": 2.0516419410705566, "learning_rate": 2.0078125000000002e-05, "loss": 0.11817741394042969, "step": 258 }, { "epoch": 0.03609001602452449, "grad_norm": 2.8684804439544678, "learning_rate": 2.0156250000000002e-05, "loss": 0.16216659545898438, "step": 259 }, { "epoch": 0.03622935971573887, "grad_norm": 3.486982822418213, "learning_rate": 2.0234375000000002e-05, "loss": 0.1451244354248047, "step": 260 }, { "epoch": 0.03636870340695325, "grad_norm": 12.981520652770996, "learning_rate": 2.0312500000000002e-05, "loss": 0.1826019287109375, "step": 261 }, { "epoch": 0.03650804709816763, "grad_norm": 10.362968444824219, "learning_rate": 2.0390625000000003e-05, "loss": 0.17943191528320312, "step": 262 }, { "epoch": 0.03664739078938201, "grad_norm": 8.4119291305542, "learning_rate": 2.0468750000000003e-05, "loss": 0.19427490234375, "step": 263 }, { "epoch": 0.03678673448059639, "grad_norm": 8.52756404876709, "learning_rate": 2.0546875000000003e-05, "loss": 0.18929672241210938, "step": 264 }, { "epoch": 0.03692607817181077, "grad_norm": 8.45046329498291, "learning_rate": 2.0625000000000003e-05, "loss": 0.20080184936523438, "step": 265 }, { "epoch": 0.03706542186302515, "grad_norm": 4.45372200012207, "learning_rate": 2.0703125e-05, "loss": 0.11498641967773438, "step": 266 }, { "epoch": 0.03720476555423953, "grad_norm": 2.6231820583343506, "learning_rate": 2.078125e-05, "loss": 0.11147689819335938, "step": 267 }, { "epoch": 0.037344109245453914, "grad_norm": 6.669605731964111, "learning_rate": 2.0859375e-05, "loss": 0.17826080322265625, "step": 268 }, { "epoch": 0.03748345293666829, "grad_norm": 3.3699822425842285, "learning_rate": 2.09375e-05, "loss": 0.1303253173828125, "step": 269 }, { "epoch": 0.03762279662788267, "grad_norm": 4.310925006866455, "learning_rate": 2.1015625e-05, "loss": 0.13889694213867188, "step": 270 }, { "epoch": 0.037762140319097054, "grad_norm": 2.983015537261963, "learning_rate": 2.109375e-05, "loss": 0.13807296752929688, "step": 271 }, { "epoch": 0.03790148401031143, "grad_norm": 1.8394473791122437, "learning_rate": 2.1171875e-05, "loss": 0.14680099487304688, "step": 272 }, { "epoch": 0.038040827701525816, "grad_norm": 3.6735901832580566, "learning_rate": 2.125e-05, "loss": 0.13889694213867188, "step": 273 }, { "epoch": 0.03818017139274019, "grad_norm": 1.6112126111984253, "learning_rate": 2.1328125000000002e-05, "loss": 0.09775924682617188, "step": 274 }, { "epoch": 0.03831951508395457, "grad_norm": 7.074946880340576, "learning_rate": 2.1406250000000002e-05, "loss": 0.13819503784179688, "step": 275 }, { "epoch": 0.038458858775168955, "grad_norm": 1.4056625366210938, "learning_rate": 2.1484375000000002e-05, "loss": 0.08742141723632812, "step": 276 }, { "epoch": 0.03859820246638333, "grad_norm": 4.244787216186523, "learning_rate": 2.1562500000000002e-05, "loss": 0.17275238037109375, "step": 277 }, { "epoch": 0.03873754615759772, "grad_norm": 4.395383834838867, "learning_rate": 2.1640625000000003e-05, "loss": 0.15907669067382812, "step": 278 }, { "epoch": 0.038876889848812095, "grad_norm": 2.8337953090667725, "learning_rate": 2.1718750000000003e-05, "loss": 0.13778114318847656, "step": 279 }, { "epoch": 0.03901623354002647, "grad_norm": 3.2872002124786377, "learning_rate": 2.1796875000000003e-05, "loss": 0.11849594116210938, "step": 280 }, { "epoch": 0.03915557723124086, "grad_norm": 5.852701663970947, "learning_rate": 2.1875000000000003e-05, "loss": 0.1680908203125, "step": 281 }, { "epoch": 0.039294920922455234, "grad_norm": 4.413242816925049, "learning_rate": 2.1953125000000003e-05, "loss": 0.11581039428710938, "step": 282 }, { "epoch": 0.03943426461366962, "grad_norm": 4.618696689605713, "learning_rate": 2.203125e-05, "loss": 0.16373443603515625, "step": 283 }, { "epoch": 0.039573608304883996, "grad_norm": 11.284041404724121, "learning_rate": 2.2109375e-05, "loss": 0.2421875, "step": 284 }, { "epoch": 0.039712951996098374, "grad_norm": 5.369922637939453, "learning_rate": 2.21875e-05, "loss": 0.1548919677734375, "step": 285 }, { "epoch": 0.03985229568731276, "grad_norm": 7.530871868133545, "learning_rate": 2.2265625e-05, "loss": 0.15246963500976562, "step": 286 }, { "epoch": 0.039991639378527136, "grad_norm": 3.955171585083008, "learning_rate": 2.234375e-05, "loss": 0.14166259765625, "step": 287 }, { "epoch": 0.04013098306974152, "grad_norm": 5.170267581939697, "learning_rate": 2.2421875e-05, "loss": 0.11017227172851562, "step": 288 }, { "epoch": 0.0402703267609559, "grad_norm": 11.165712356567383, "learning_rate": 2.25e-05, "loss": 0.15656661987304688, "step": 289 }, { "epoch": 0.040409670452170275, "grad_norm": 6.673206329345703, "learning_rate": 2.2578125e-05, "loss": 0.15043258666992188, "step": 290 }, { "epoch": 0.04054901414338466, "grad_norm": 2.142934799194336, "learning_rate": 2.2656250000000002e-05, "loss": 0.144195556640625, "step": 291 }, { "epoch": 0.04068835783459904, "grad_norm": 4.6472954750061035, "learning_rate": 2.2734375000000002e-05, "loss": 0.17383956909179688, "step": 292 }, { "epoch": 0.04082770152581342, "grad_norm": 8.35328483581543, "learning_rate": 2.2812500000000002e-05, "loss": 0.21802902221679688, "step": 293 }, { "epoch": 0.0409670452170278, "grad_norm": 4.26718282699585, "learning_rate": 2.2890625000000002e-05, "loss": 0.14316177368164062, "step": 294 }, { "epoch": 0.04110638890824218, "grad_norm": 2.589521884918213, "learning_rate": 2.2968750000000003e-05, "loss": 0.12925338745117188, "step": 295 }, { "epoch": 0.04124573259945656, "grad_norm": 4.902878761291504, "learning_rate": 2.3046875000000003e-05, "loss": 0.17634201049804688, "step": 296 }, { "epoch": 0.04138507629067094, "grad_norm": 3.3919310569763184, "learning_rate": 2.3125000000000003e-05, "loss": 0.11818695068359375, "step": 297 }, { "epoch": 0.04152441998188532, "grad_norm": 0.8714953660964966, "learning_rate": 2.3203125000000003e-05, "loss": 0.09313583374023438, "step": 298 }, { "epoch": 0.0416637636730997, "grad_norm": 6.001412391662598, "learning_rate": 2.3281250000000003e-05, "loss": 0.09900283813476562, "step": 299 }, { "epoch": 0.04180310736431408, "grad_norm": 7.798113822937012, "learning_rate": 2.3359375e-05, "loss": 0.17302322387695312, "step": 300 }, { "epoch": 0.04194245105552846, "grad_norm": 6.590585231781006, "learning_rate": 2.34375e-05, "loss": 0.18585968017578125, "step": 301 }, { "epoch": 0.04208179474674284, "grad_norm": 1.1921802759170532, "learning_rate": 2.3515625e-05, "loss": 0.1658782958984375, "step": 302 }, { "epoch": 0.042221138437957224, "grad_norm": 4.3022942543029785, "learning_rate": 2.359375e-05, "loss": 0.11975669860839844, "step": 303 }, { "epoch": 0.0423604821291716, "grad_norm": 13.332063674926758, "learning_rate": 2.3671875e-05, "loss": 0.2271881103515625, "step": 304 }, { "epoch": 0.04249982582038598, "grad_norm": 2.675811290740967, "learning_rate": 2.375e-05, "loss": 0.12166213989257812, "step": 305 }, { "epoch": 0.042639169511600364, "grad_norm": 9.621895790100098, "learning_rate": 2.3828125e-05, "loss": 0.21872711181640625, "step": 306 }, { "epoch": 0.04277851320281474, "grad_norm": 16.944185256958008, "learning_rate": 2.3906250000000002e-05, "loss": 0.19298171997070312, "step": 307 }, { "epoch": 0.042917856894029126, "grad_norm": 7.998727321624756, "learning_rate": 2.3984375000000002e-05, "loss": 0.14169692993164062, "step": 308 }, { "epoch": 0.0430572005852435, "grad_norm": 4.063196182250977, "learning_rate": 2.4062500000000002e-05, "loss": 0.07884597778320312, "step": 309 }, { "epoch": 0.04319654427645788, "grad_norm": 3.240393877029419, "learning_rate": 2.4140625000000002e-05, "loss": 0.1863555908203125, "step": 310 }, { "epoch": 0.043335887967672265, "grad_norm": 1.6797086000442505, "learning_rate": 2.4218750000000003e-05, "loss": 0.14199447631835938, "step": 311 }, { "epoch": 0.04347523165888664, "grad_norm": 13.882161140441895, "learning_rate": 2.4296875000000003e-05, "loss": 0.22545623779296875, "step": 312 }, { "epoch": 0.04361457535010103, "grad_norm": 6.29648494720459, "learning_rate": 2.4375000000000003e-05, "loss": 0.14227676391601562, "step": 313 }, { "epoch": 0.043753919041315405, "grad_norm": 5.53711462020874, "learning_rate": 2.4453125000000003e-05, "loss": 0.13063430786132812, "step": 314 }, { "epoch": 0.04389326273252978, "grad_norm": 9.892417907714844, "learning_rate": 2.4531250000000003e-05, "loss": 0.15546798706054688, "step": 315 }, { "epoch": 0.04403260642374417, "grad_norm": 1.3137129545211792, "learning_rate": 2.4609375000000004e-05, "loss": 0.12632369995117188, "step": 316 }, { "epoch": 0.044171950114958544, "grad_norm": 13.980039596557617, "learning_rate": 2.46875e-05, "loss": 0.1938323974609375, "step": 317 }, { "epoch": 0.04431129380617293, "grad_norm": 13.280157089233398, "learning_rate": 2.4765625e-05, "loss": 0.16693878173828125, "step": 318 }, { "epoch": 0.044450637497387306, "grad_norm": 3.1769421100616455, "learning_rate": 2.484375e-05, "loss": 0.12438201904296875, "step": 319 }, { "epoch": 0.044589981188601684, "grad_norm": 6.686270236968994, "learning_rate": 2.4921875e-05, "loss": 0.12722015380859375, "step": 320 }, { "epoch": 0.04472932487981607, "grad_norm": 6.62825870513916, "learning_rate": 2.5e-05, "loss": 0.1279144287109375, "step": 321 }, { "epoch": 0.044868668571030446, "grad_norm": 4.808536529541016, "learning_rate": 2.5078125e-05, "loss": 0.12734603881835938, "step": 322 }, { "epoch": 0.04500801226224483, "grad_norm": 6.01970100402832, "learning_rate": 2.515625e-05, "loss": 0.160400390625, "step": 323 }, { "epoch": 0.04514735595345921, "grad_norm": 5.805986404418945, "learning_rate": 2.5234375000000002e-05, "loss": 0.106903076171875, "step": 324 }, { "epoch": 0.045286699644673585, "grad_norm": 7.291723728179932, "learning_rate": 2.5312500000000002e-05, "loss": 0.16937255859375, "step": 325 }, { "epoch": 0.04542604333588797, "grad_norm": 4.367210388183594, "learning_rate": 2.5390625000000002e-05, "loss": 0.16176605224609375, "step": 326 }, { "epoch": 0.04556538702710235, "grad_norm": 5.535088539123535, "learning_rate": 2.5468750000000002e-05, "loss": 0.10674285888671875, "step": 327 }, { "epoch": 0.04570473071831673, "grad_norm": 6.321102619171143, "learning_rate": 2.5546875000000003e-05, "loss": 0.17055892944335938, "step": 328 }, { "epoch": 0.04584407440953111, "grad_norm": 1.4896820783615112, "learning_rate": 2.5625000000000003e-05, "loss": 0.1331329345703125, "step": 329 }, { "epoch": 0.045983418100745486, "grad_norm": 4.083991527557373, "learning_rate": 2.5703125000000003e-05, "loss": 0.15977096557617188, "step": 330 }, { "epoch": 0.04612276179195987, "grad_norm": 3.0442798137664795, "learning_rate": 2.5781250000000003e-05, "loss": 0.1445465087890625, "step": 331 }, { "epoch": 0.04626210548317425, "grad_norm": 1.5406112670898438, "learning_rate": 2.5859375000000003e-05, "loss": 0.11664199829101562, "step": 332 }, { "epoch": 0.04640144917438863, "grad_norm": 3.74257493019104, "learning_rate": 2.5937500000000004e-05, "loss": 0.11249160766601562, "step": 333 }, { "epoch": 0.04654079286560301, "grad_norm": 2.8007924556732178, "learning_rate": 2.6015625e-05, "loss": 0.13175392150878906, "step": 334 }, { "epoch": 0.04668013655681739, "grad_norm": 2.975522041320801, "learning_rate": 2.609375e-05, "loss": 0.142822265625, "step": 335 }, { "epoch": 0.04681948024803177, "grad_norm": 2.471824884414673, "learning_rate": 2.6171875e-05, "loss": 0.1080474853515625, "step": 336 }, { "epoch": 0.04695882393924615, "grad_norm": 0.7004625797271729, "learning_rate": 2.625e-05, "loss": 0.09199905395507812, "step": 337 }, { "epoch": 0.047098167630460534, "grad_norm": 2.7238762378692627, "learning_rate": 2.6328125e-05, "loss": 0.1332111358642578, "step": 338 }, { "epoch": 0.04723751132167491, "grad_norm": 5.013106346130371, "learning_rate": 2.640625e-05, "loss": 0.11324310302734375, "step": 339 }, { "epoch": 0.04737685501288929, "grad_norm": 3.931610584259033, "learning_rate": 2.6484375000000002e-05, "loss": 0.10091400146484375, "step": 340 }, { "epoch": 0.047516198704103674, "grad_norm": 2.0242345333099365, "learning_rate": 2.6562500000000002e-05, "loss": 0.13353347778320312, "step": 341 }, { "epoch": 0.04765554239531805, "grad_norm": 2.6253623962402344, "learning_rate": 2.6640625000000002e-05, "loss": 0.15896224975585938, "step": 342 }, { "epoch": 0.047794886086532436, "grad_norm": 1.82095468044281, "learning_rate": 2.6718750000000002e-05, "loss": 0.11186981201171875, "step": 343 }, { "epoch": 0.04793422977774681, "grad_norm": 1.6914509534835815, "learning_rate": 2.6796875000000003e-05, "loss": 0.14142608642578125, "step": 344 }, { "epoch": 0.04807357346896119, "grad_norm": 2.262786626815796, "learning_rate": 2.6875000000000003e-05, "loss": 0.120819091796875, "step": 345 }, { "epoch": 0.048212917160175575, "grad_norm": 2.7549540996551514, "learning_rate": 2.6953125000000003e-05, "loss": 0.10667991638183594, "step": 346 }, { "epoch": 0.04835226085138995, "grad_norm": 3.977987051010132, "learning_rate": 2.7031250000000003e-05, "loss": 0.196563720703125, "step": 347 }, { "epoch": 0.04849160454260433, "grad_norm": 1.7890863418579102, "learning_rate": 2.7109375000000003e-05, "loss": 0.12773895263671875, "step": 348 }, { "epoch": 0.048630948233818715, "grad_norm": 10.143086433410645, "learning_rate": 2.7187500000000004e-05, "loss": 0.18898773193359375, "step": 349 }, { "epoch": 0.04877029192503309, "grad_norm": 7.500576972961426, "learning_rate": 2.7265625000000004e-05, "loss": 0.13497543334960938, "step": 350 }, { "epoch": 0.04890963561624748, "grad_norm": 4.198714256286621, "learning_rate": 2.734375e-05, "loss": 0.1424713134765625, "step": 351 }, { "epoch": 0.049048979307461854, "grad_norm": 1.4235715866088867, "learning_rate": 2.7421875e-05, "loss": 0.10097122192382812, "step": 352 }, { "epoch": 0.04918832299867623, "grad_norm": 1.9646923542022705, "learning_rate": 2.75e-05, "loss": 0.0894927978515625, "step": 353 }, { "epoch": 0.049327666689890616, "grad_norm": 3.6632654666900635, "learning_rate": 2.7578125e-05, "loss": 0.1464996337890625, "step": 354 }, { "epoch": 0.049467010381104994, "grad_norm": 5.117610931396484, "learning_rate": 2.765625e-05, "loss": 0.115447998046875, "step": 355 }, { "epoch": 0.04960635407231938, "grad_norm": 3.7435388565063477, "learning_rate": 2.7734375e-05, "loss": 0.18556594848632812, "step": 356 }, { "epoch": 0.049745697763533755, "grad_norm": 2.6390440464019775, "learning_rate": 2.7812500000000002e-05, "loss": 0.14746856689453125, "step": 357 }, { "epoch": 0.04988504145474813, "grad_norm": 1.234492301940918, "learning_rate": 2.7890625000000002e-05, "loss": 0.11200141906738281, "step": 358 }, { "epoch": 0.05002438514596252, "grad_norm": 4.065972328186035, "learning_rate": 2.7968750000000002e-05, "loss": 0.10887908935546875, "step": 359 }, { "epoch": 0.050163728837176895, "grad_norm": 9.417527198791504, "learning_rate": 2.8046875000000002e-05, "loss": 0.21554946899414062, "step": 360 }, { "epoch": 0.05030307252839128, "grad_norm": 2.6062357425689697, "learning_rate": 2.8125000000000003e-05, "loss": 0.12706375122070312, "step": 361 }, { "epoch": 0.05044241621960566, "grad_norm": 4.077699661254883, "learning_rate": 2.8203125000000003e-05, "loss": 0.1599864959716797, "step": 362 }, { "epoch": 0.050581759910820034, "grad_norm": 1.8388473987579346, "learning_rate": 2.8281250000000003e-05, "loss": 0.13700485229492188, "step": 363 }, { "epoch": 0.05072110360203442, "grad_norm": 1.6937845945358276, "learning_rate": 2.8359375000000003e-05, "loss": 0.122955322265625, "step": 364 }, { "epoch": 0.050860447293248796, "grad_norm": 0.7536792159080505, "learning_rate": 2.8437500000000003e-05, "loss": 0.08925247192382812, "step": 365 }, { "epoch": 0.05099979098446318, "grad_norm": 7.48211145401001, "learning_rate": 2.8515625000000004e-05, "loss": 0.15163040161132812, "step": 366 }, { "epoch": 0.05113913467567756, "grad_norm": 3.3823533058166504, "learning_rate": 2.8593750000000004e-05, "loss": 0.12523651123046875, "step": 367 }, { "epoch": 0.051278478366891936, "grad_norm": 3.116825819015503, "learning_rate": 2.8671875e-05, "loss": 0.13550949096679688, "step": 368 }, { "epoch": 0.05141782205810632, "grad_norm": 5.276050090789795, "learning_rate": 2.875e-05, "loss": 0.1670074462890625, "step": 369 }, { "epoch": 0.0515571657493207, "grad_norm": 2.1061322689056396, "learning_rate": 2.8828125e-05, "loss": 0.16495513916015625, "step": 370 }, { "epoch": 0.05169650944053508, "grad_norm": 1.6421290636062622, "learning_rate": 2.890625e-05, "loss": 0.13971328735351562, "step": 371 }, { "epoch": 0.05183585313174946, "grad_norm": 0.5310415029525757, "learning_rate": 2.8984375e-05, "loss": 0.07225799560546875, "step": 372 }, { "epoch": 0.05197519682296384, "grad_norm": 5.098465919494629, "learning_rate": 2.90625e-05, "loss": 0.1422252655029297, "step": 373 }, { "epoch": 0.05211454051417822, "grad_norm": 3.1248228549957275, "learning_rate": 2.9140625000000002e-05, "loss": 0.14033889770507812, "step": 374 }, { "epoch": 0.0522538842053926, "grad_norm": 2.333101987838745, "learning_rate": 2.9218750000000002e-05, "loss": 0.14675140380859375, "step": 375 }, { "epoch": 0.052393227896606984, "grad_norm": 4.245767593383789, "learning_rate": 2.9296875000000002e-05, "loss": 0.17270660400390625, "step": 376 }, { "epoch": 0.05253257158782136, "grad_norm": 1.8721506595611572, "learning_rate": 2.9375000000000003e-05, "loss": 0.13094711303710938, "step": 377 }, { "epoch": 0.05267191527903574, "grad_norm": 4.1797637939453125, "learning_rate": 2.9453125000000003e-05, "loss": 0.13178253173828125, "step": 378 }, { "epoch": 0.05281125897025012, "grad_norm": 2.8279387950897217, "learning_rate": 2.9531250000000003e-05, "loss": 0.20691680908203125, "step": 379 }, { "epoch": 0.0529506026614645, "grad_norm": 1.7068419456481934, "learning_rate": 2.9609375000000003e-05, "loss": 0.1299285888671875, "step": 380 }, { "epoch": 0.053089946352678885, "grad_norm": 2.8568222522735596, "learning_rate": 2.9687500000000003e-05, "loss": 0.11894607543945312, "step": 381 }, { "epoch": 0.05322929004389326, "grad_norm": 2.934680223464966, "learning_rate": 2.9765625000000004e-05, "loss": 0.13636016845703125, "step": 382 }, { "epoch": 0.05336863373510764, "grad_norm": 6.454765796661377, "learning_rate": 2.9843750000000004e-05, "loss": 0.13864517211914062, "step": 383 }, { "epoch": 0.053507977426322025, "grad_norm": 4.41960334777832, "learning_rate": 2.9921875000000004e-05, "loss": 0.11544609069824219, "step": 384 }, { "epoch": 0.0536473211175364, "grad_norm": 2.246563196182251, "learning_rate": 3.0000000000000004e-05, "loss": 0.10828399658203125, "step": 385 }, { "epoch": 0.053786664808750786, "grad_norm": 5.776893138885498, "learning_rate": 3.0078125e-05, "loss": 0.15362930297851562, "step": 386 }, { "epoch": 0.053926008499965164, "grad_norm": 3.4696993827819824, "learning_rate": 3.015625e-05, "loss": 0.12880706787109375, "step": 387 }, { "epoch": 0.05406535219117954, "grad_norm": 6.263038158416748, "learning_rate": 3.0234375e-05, "loss": 0.21252822875976562, "step": 388 }, { "epoch": 0.054204695882393926, "grad_norm": 4.023487091064453, "learning_rate": 3.03125e-05, "loss": 0.10901260375976562, "step": 389 }, { "epoch": 0.0543440395736083, "grad_norm": 1.549216628074646, "learning_rate": 3.0390625000000002e-05, "loss": 0.1363372802734375, "step": 390 }, { "epoch": 0.05448338326482269, "grad_norm": 3.0464742183685303, "learning_rate": 3.0468750000000002e-05, "loss": 0.11837959289550781, "step": 391 }, { "epoch": 0.054622726956037065, "grad_norm": 3.3431432247161865, "learning_rate": 3.0546875e-05, "loss": 0.13915252685546875, "step": 392 }, { "epoch": 0.05476207064725144, "grad_norm": 1.6485289335250854, "learning_rate": 3.0625000000000006e-05, "loss": 0.13364219665527344, "step": 393 }, { "epoch": 0.05490141433846583, "grad_norm": 1.2598912715911865, "learning_rate": 3.0703125e-05, "loss": 0.12411880493164062, "step": 394 }, { "epoch": 0.055040758029680205, "grad_norm": 12.061439514160156, "learning_rate": 3.078125e-05, "loss": 0.16320037841796875, "step": 395 }, { "epoch": 0.05518010172089459, "grad_norm": 6.43983793258667, "learning_rate": 3.0859375e-05, "loss": 0.11378097534179688, "step": 396 }, { "epoch": 0.05531944541210897, "grad_norm": 4.606500148773193, "learning_rate": 3.09375e-05, "loss": 0.14632034301757812, "step": 397 }, { "epoch": 0.055458789103323344, "grad_norm": 6.551544666290283, "learning_rate": 3.1015625000000003e-05, "loss": 0.15517234802246094, "step": 398 }, { "epoch": 0.05559813279453773, "grad_norm": 2.759211778640747, "learning_rate": 3.109375e-05, "loss": 0.11321258544921875, "step": 399 }, { "epoch": 0.055737476485752106, "grad_norm": 1.0862674713134766, "learning_rate": 3.1171875000000004e-05, "loss": 0.07699394226074219, "step": 400 }, { "epoch": 0.05587682017696649, "grad_norm": 5.297611236572266, "learning_rate": 3.125e-05, "loss": 0.179534912109375, "step": 401 }, { "epoch": 0.05601616386818087, "grad_norm": 1.2492098808288574, "learning_rate": 3.1328125000000004e-05, "loss": 0.08292007446289062, "step": 402 }, { "epoch": 0.056155507559395246, "grad_norm": 7.252022743225098, "learning_rate": 3.140625e-05, "loss": 0.18150711059570312, "step": 403 }, { "epoch": 0.05629485125060963, "grad_norm": 1.6567667722702026, "learning_rate": 3.1484375000000005e-05, "loss": 0.09032630920410156, "step": 404 }, { "epoch": 0.05643419494182401, "grad_norm": 1.7720317840576172, "learning_rate": 3.15625e-05, "loss": 0.1399078369140625, "step": 405 }, { "epoch": 0.05657353863303839, "grad_norm": 4.111324787139893, "learning_rate": 3.1640625000000005e-05, "loss": 0.11572647094726562, "step": 406 }, { "epoch": 0.05671288232425277, "grad_norm": 4.905533790588379, "learning_rate": 3.171875e-05, "loss": 0.162567138671875, "step": 407 }, { "epoch": 0.05685222601546715, "grad_norm": 1.1801806688308716, "learning_rate": 3.1796875000000005e-05, "loss": 0.11251449584960938, "step": 408 }, { "epoch": 0.05699156970668153, "grad_norm": 3.205890417098999, "learning_rate": 3.1875e-05, "loss": 0.11421585083007812, "step": 409 }, { "epoch": 0.05713091339789591, "grad_norm": 3.4559805393218994, "learning_rate": 3.1953125000000006e-05, "loss": 0.10849380493164062, "step": 410 }, { "epoch": 0.057270257089110294, "grad_norm": 2.342633008956909, "learning_rate": 3.203125e-05, "loss": 0.1396617889404297, "step": 411 }, { "epoch": 0.05740960078032467, "grad_norm": 2.9076180458068848, "learning_rate": 3.2109375e-05, "loss": 0.111907958984375, "step": 412 }, { "epoch": 0.05754894447153905, "grad_norm": 5.0726237297058105, "learning_rate": 3.21875e-05, "loss": 0.13541793823242188, "step": 413 }, { "epoch": 0.05768828816275343, "grad_norm": 3.734821319580078, "learning_rate": 3.2265625e-05, "loss": 0.14260101318359375, "step": 414 }, { "epoch": 0.05782763185396781, "grad_norm": 1.1307181119918823, "learning_rate": 3.2343750000000004e-05, "loss": 0.11335182189941406, "step": 415 }, { "epoch": 0.057966975545182195, "grad_norm": 2.9182047843933105, "learning_rate": 3.2421875e-05, "loss": 0.10137557983398438, "step": 416 }, { "epoch": 0.05810631923639657, "grad_norm": 2.3167152404785156, "learning_rate": 3.2500000000000004e-05, "loss": 0.13050460815429688, "step": 417 }, { "epoch": 0.05824566292761095, "grad_norm": 2.6518657207489014, "learning_rate": 3.2578125e-05, "loss": 0.16229629516601562, "step": 418 }, { "epoch": 0.058385006618825334, "grad_norm": 3.3731040954589844, "learning_rate": 3.2656250000000004e-05, "loss": 0.1067962646484375, "step": 419 }, { "epoch": 0.05852435031003971, "grad_norm": 5.558904647827148, "learning_rate": 3.2734375e-05, "loss": 0.13186264038085938, "step": 420 }, { "epoch": 0.058663694001254096, "grad_norm": 3.7551989555358887, "learning_rate": 3.2812500000000005e-05, "loss": 0.11958885192871094, "step": 421 }, { "epoch": 0.058803037692468474, "grad_norm": 1.2587206363677979, "learning_rate": 3.2890625e-05, "loss": 0.08900070190429688, "step": 422 }, { "epoch": 0.05894238138368285, "grad_norm": 3.7682955265045166, "learning_rate": 3.2968750000000005e-05, "loss": 0.12882614135742188, "step": 423 }, { "epoch": 0.059081725074897236, "grad_norm": 3.5053179264068604, "learning_rate": 3.3046875e-05, "loss": 0.15697860717773438, "step": 424 }, { "epoch": 0.05922106876611161, "grad_norm": 5.757637977600098, "learning_rate": 3.3125000000000006e-05, "loss": 0.20546913146972656, "step": 425 }, { "epoch": 0.059360412457326, "grad_norm": 4.957822322845459, "learning_rate": 3.3203125e-05, "loss": 0.21409988403320312, "step": 426 }, { "epoch": 0.059499756148540375, "grad_norm": 4.916723251342773, "learning_rate": 3.3281250000000006e-05, "loss": 0.13035202026367188, "step": 427 }, { "epoch": 0.05963909983975475, "grad_norm": 6.045250415802002, "learning_rate": 3.3359375e-05, "loss": 0.15678024291992188, "step": 428 }, { "epoch": 0.05977844353096914, "grad_norm": 2.259713649749756, "learning_rate": 3.34375e-05, "loss": 0.16791152954101562, "step": 429 }, { "epoch": 0.059917787222183515, "grad_norm": 5.927862167358398, "learning_rate": 3.3515625e-05, "loss": 0.10385513305664062, "step": 430 }, { "epoch": 0.0600571309133979, "grad_norm": 6.808668613433838, "learning_rate": 3.359375e-05, "loss": 0.13460254669189453, "step": 431 }, { "epoch": 0.06019647460461228, "grad_norm": 2.4134442806243896, "learning_rate": 3.3671875000000004e-05, "loss": 0.1080322265625, "step": 432 }, { "epoch": 0.060335818295826654, "grad_norm": 2.871001958847046, "learning_rate": 3.375e-05, "loss": 0.10614013671875, "step": 433 }, { "epoch": 0.06047516198704104, "grad_norm": 4.3150434494018555, "learning_rate": 3.3828125000000004e-05, "loss": 0.18203353881835938, "step": 434 }, { "epoch": 0.060614505678255416, "grad_norm": 2.9419655799865723, "learning_rate": 3.390625e-05, "loss": 0.10440826416015625, "step": 435 }, { "epoch": 0.0607538493694698, "grad_norm": 1.0252058506011963, "learning_rate": 3.3984375000000004e-05, "loss": 0.10599517822265625, "step": 436 }, { "epoch": 0.06089319306068418, "grad_norm": 5.004328727722168, "learning_rate": 3.40625e-05, "loss": 0.13195419311523438, "step": 437 }, { "epoch": 0.061032536751898556, "grad_norm": 2.9312875270843506, "learning_rate": 3.4140625000000005e-05, "loss": 0.10009765625, "step": 438 }, { "epoch": 0.06117188044311294, "grad_norm": 1.5946803092956543, "learning_rate": 3.421875e-05, "loss": 0.08932876586914062, "step": 439 }, { "epoch": 0.06131122413432732, "grad_norm": 2.1674325466156006, "learning_rate": 3.4296875000000005e-05, "loss": 0.1359996795654297, "step": 440 }, { "epoch": 0.0614505678255417, "grad_norm": 1.8363351821899414, "learning_rate": 3.4375e-05, "loss": 0.12226295471191406, "step": 441 }, { "epoch": 0.06158991151675608, "grad_norm": 3.685462474822998, "learning_rate": 3.4453125000000006e-05, "loss": 0.1614227294921875, "step": 442 }, { "epoch": 0.06172925520797046, "grad_norm": 1.6046937704086304, "learning_rate": 3.453125e-05, "loss": 0.149322509765625, "step": 443 }, { "epoch": 0.06186859889918484, "grad_norm": 2.0443170070648193, "learning_rate": 3.4609375000000006e-05, "loss": 0.14379310607910156, "step": 444 }, { "epoch": 0.06200794259039922, "grad_norm": 2.646277904510498, "learning_rate": 3.46875e-05, "loss": 0.17947006225585938, "step": 445 }, { "epoch": 0.062147286281613597, "grad_norm": 1.5388216972351074, "learning_rate": 3.4765625e-05, "loss": 0.10263633728027344, "step": 446 }, { "epoch": 0.06228662997282798, "grad_norm": 1.6689749956130981, "learning_rate": 3.484375e-05, "loss": 0.11783790588378906, "step": 447 }, { "epoch": 0.06242597366404236, "grad_norm": 1.6699832677841187, "learning_rate": 3.4921875e-05, "loss": 0.11031818389892578, "step": 448 }, { "epoch": 0.06256531735525674, "grad_norm": 1.9755361080169678, "learning_rate": 3.5000000000000004e-05, "loss": 0.10696601867675781, "step": 449 }, { "epoch": 0.06270466104647113, "grad_norm": 3.1808133125305176, "learning_rate": 3.5078125e-05, "loss": 0.20627593994140625, "step": 450 }, { "epoch": 0.0628440047376855, "grad_norm": 4.196004390716553, "learning_rate": 3.5156250000000004e-05, "loss": 0.1648101806640625, "step": 451 }, { "epoch": 0.06298334842889988, "grad_norm": 5.337859153747559, "learning_rate": 3.5234375e-05, "loss": 0.16832733154296875, "step": 452 }, { "epoch": 0.06312269212011426, "grad_norm": 1.4184443950653076, "learning_rate": 3.5312500000000005e-05, "loss": 0.11067008972167969, "step": 453 }, { "epoch": 0.06326203581132864, "grad_norm": 1.2673379182815552, "learning_rate": 3.5390625e-05, "loss": 0.10303115844726562, "step": 454 }, { "epoch": 0.06340137950254303, "grad_norm": 1.0493322610855103, "learning_rate": 3.5468750000000005e-05, "loss": 0.08939552307128906, "step": 455 }, { "epoch": 0.0635407231937574, "grad_norm": 0.7348464131355286, "learning_rate": 3.5546875e-05, "loss": 0.08004379272460938, "step": 456 }, { "epoch": 0.06368006688497178, "grad_norm": 1.8887932300567627, "learning_rate": 3.5625000000000005e-05, "loss": 0.11274528503417969, "step": 457 }, { "epoch": 0.06381941057618616, "grad_norm": 4.843327522277832, "learning_rate": 3.5703125e-05, "loss": 0.1579742431640625, "step": 458 }, { "epoch": 0.06395875426740054, "grad_norm": 4.328675746917725, "learning_rate": 3.5781250000000006e-05, "loss": 0.1220855712890625, "step": 459 }, { "epoch": 0.06409809795861493, "grad_norm": 3.246380567550659, "learning_rate": 3.5859375e-05, "loss": 0.157257080078125, "step": 460 }, { "epoch": 0.06423744164982931, "grad_norm": 1.911177635192871, "learning_rate": 3.5937500000000006e-05, "loss": 0.14912033081054688, "step": 461 }, { "epoch": 0.06437678534104369, "grad_norm": 6.6284942626953125, "learning_rate": 3.6015625e-05, "loss": 0.12305450439453125, "step": 462 }, { "epoch": 0.06451612903225806, "grad_norm": 3.0399420261383057, "learning_rate": 3.609375e-05, "loss": 0.09240055084228516, "step": 463 }, { "epoch": 0.06465547272347244, "grad_norm": 2.766639471054077, "learning_rate": 3.6171875000000003e-05, "loss": 0.09424209594726562, "step": 464 }, { "epoch": 0.06479481641468683, "grad_norm": 1.9291465282440186, "learning_rate": 3.625e-05, "loss": 0.09503936767578125, "step": 465 }, { "epoch": 0.06493416010590121, "grad_norm": 3.721626043319702, "learning_rate": 3.6328125000000004e-05, "loss": 0.13573837280273438, "step": 466 }, { "epoch": 0.06507350379711559, "grad_norm": 4.3513617515563965, "learning_rate": 3.640625e-05, "loss": 0.189666748046875, "step": 467 }, { "epoch": 0.06521284748832996, "grad_norm": 1.758995771408081, "learning_rate": 3.6484375000000004e-05, "loss": 0.09131813049316406, "step": 468 }, { "epoch": 0.06535219117954434, "grad_norm": 1.7660777568817139, "learning_rate": 3.65625e-05, "loss": 0.10843467712402344, "step": 469 }, { "epoch": 0.06549153487075873, "grad_norm": 1.3174102306365967, "learning_rate": 3.6640625000000005e-05, "loss": 0.08036994934082031, "step": 470 }, { "epoch": 0.06563087856197311, "grad_norm": 2.3893778324127197, "learning_rate": 3.671875e-05, "loss": 0.13868331909179688, "step": 471 }, { "epoch": 0.06577022225318749, "grad_norm": 3.097043514251709, "learning_rate": 3.6796875000000005e-05, "loss": 0.1457672119140625, "step": 472 }, { "epoch": 0.06590956594440187, "grad_norm": 7.362919807434082, "learning_rate": 3.6875e-05, "loss": 0.15611648559570312, "step": 473 }, { "epoch": 0.06604890963561624, "grad_norm": 4.099730014801025, "learning_rate": 3.6953125000000005e-05, "loss": 0.10845565795898438, "step": 474 }, { "epoch": 0.06618825332683063, "grad_norm": 3.938004493713379, "learning_rate": 3.703125e-05, "loss": 0.13916778564453125, "step": 475 }, { "epoch": 0.06632759701804501, "grad_norm": 1.572511076927185, "learning_rate": 3.7109375000000006e-05, "loss": 0.0987396240234375, "step": 476 }, { "epoch": 0.06646694070925939, "grad_norm": 1.9598009586334229, "learning_rate": 3.71875e-05, "loss": 0.1291370391845703, "step": 477 }, { "epoch": 0.06660628440047377, "grad_norm": 0.9886524677276611, "learning_rate": 3.7265625000000006e-05, "loss": 0.08448982238769531, "step": 478 }, { "epoch": 0.06674562809168814, "grad_norm": 3.3393375873565674, "learning_rate": 3.734375e-05, "loss": 0.163421630859375, "step": 479 }, { "epoch": 0.06688497178290254, "grad_norm": 3.6082115173339844, "learning_rate": 3.7421875e-05, "loss": 0.15908050537109375, "step": 480 }, { "epoch": 0.06702431547411691, "grad_norm": 4.145823955535889, "learning_rate": 3.7500000000000003e-05, "loss": 0.138275146484375, "step": 481 }, { "epoch": 0.06716365916533129, "grad_norm": 2.339674234390259, "learning_rate": 3.7578125e-05, "loss": 0.115234375, "step": 482 }, { "epoch": 0.06730300285654567, "grad_norm": 4.810821056365967, "learning_rate": 3.7656250000000004e-05, "loss": 0.17440032958984375, "step": 483 }, { "epoch": 0.06744234654776005, "grad_norm": 3.7228634357452393, "learning_rate": 3.7734375e-05, "loss": 0.13367462158203125, "step": 484 }, { "epoch": 0.06758169023897444, "grad_norm": 1.312095284461975, "learning_rate": 3.7812500000000004e-05, "loss": 0.14334487915039062, "step": 485 }, { "epoch": 0.06772103393018881, "grad_norm": 0.9424468874931335, "learning_rate": 3.7890625e-05, "loss": 0.09403419494628906, "step": 486 }, { "epoch": 0.06786037762140319, "grad_norm": 2.94515061378479, "learning_rate": 3.7968750000000005e-05, "loss": 0.10296058654785156, "step": 487 }, { "epoch": 0.06799972131261757, "grad_norm": 0.9378728866577148, "learning_rate": 3.8046875e-05, "loss": 0.10338401794433594, "step": 488 }, { "epoch": 0.06813906500383195, "grad_norm": 1.9752261638641357, "learning_rate": 3.8125000000000005e-05, "loss": 0.09709548950195312, "step": 489 }, { "epoch": 0.06827840869504634, "grad_norm": 2.080390453338623, "learning_rate": 3.8203125e-05, "loss": 0.12378692626953125, "step": 490 }, { "epoch": 0.06841775238626072, "grad_norm": 2.281527519226074, "learning_rate": 3.8281250000000006e-05, "loss": 0.11740684509277344, "step": 491 }, { "epoch": 0.0685570960774751, "grad_norm": 3.26632022857666, "learning_rate": 3.8359375e-05, "loss": 0.11805152893066406, "step": 492 }, { "epoch": 0.06869643976868947, "grad_norm": 1.0444692373275757, "learning_rate": 3.8437500000000006e-05, "loss": 0.08564186096191406, "step": 493 }, { "epoch": 0.06883578345990385, "grad_norm": 5.619449615478516, "learning_rate": 3.8515625e-05, "loss": 0.20371627807617188, "step": 494 }, { "epoch": 0.06897512715111823, "grad_norm": 2.2947535514831543, "learning_rate": 3.8593750000000006e-05, "loss": 0.11037445068359375, "step": 495 }, { "epoch": 0.06911447084233262, "grad_norm": 3.0819923877716064, "learning_rate": 3.8671875e-05, "loss": 0.095550537109375, "step": 496 }, { "epoch": 0.069253814533547, "grad_norm": 1.4950711727142334, "learning_rate": 3.875e-05, "loss": 0.07056236267089844, "step": 497 }, { "epoch": 0.06939315822476137, "grad_norm": 3.981626033782959, "learning_rate": 3.8828125000000004e-05, "loss": 0.12194633483886719, "step": 498 }, { "epoch": 0.06953250191597575, "grad_norm": 2.9882898330688477, "learning_rate": 3.890625e-05, "loss": 0.11988258361816406, "step": 499 }, { "epoch": 0.06967184560719013, "grad_norm": 4.005314350128174, "learning_rate": 3.8984375000000004e-05, "loss": 0.14714622497558594, "step": 500 }, { "epoch": 0.06981118929840452, "grad_norm": 4.22213077545166, "learning_rate": 3.90625e-05, "loss": 0.12076568603515625, "step": 501 }, { "epoch": 0.0699505329896189, "grad_norm": 2.8216464519500732, "learning_rate": 3.9140625000000004e-05, "loss": 0.1625518798828125, "step": 502 }, { "epoch": 0.07008987668083327, "grad_norm": 1.3072019815444946, "learning_rate": 3.921875e-05, "loss": 0.11846733093261719, "step": 503 }, { "epoch": 0.07022922037204765, "grad_norm": 5.674465656280518, "learning_rate": 3.9296875000000005e-05, "loss": 0.1237335205078125, "step": 504 }, { "epoch": 0.07036856406326203, "grad_norm": 7.615915298461914, "learning_rate": 3.9375e-05, "loss": 0.1512165069580078, "step": 505 }, { "epoch": 0.07050790775447642, "grad_norm": 3.9900150299072266, "learning_rate": 3.9453125000000005e-05, "loss": 0.13902664184570312, "step": 506 }, { "epoch": 0.0706472514456908, "grad_norm": 3.043977737426758, "learning_rate": 3.953125e-05, "loss": 0.12251091003417969, "step": 507 }, { "epoch": 0.07078659513690518, "grad_norm": 2.323931932449341, "learning_rate": 3.9609375000000006e-05, "loss": 0.11706161499023438, "step": 508 }, { "epoch": 0.07092593882811955, "grad_norm": 2.765200614929199, "learning_rate": 3.96875e-05, "loss": 0.10280609130859375, "step": 509 }, { "epoch": 0.07106528251933393, "grad_norm": 3.848862648010254, "learning_rate": 3.9765625000000006e-05, "loss": 0.13304519653320312, "step": 510 }, { "epoch": 0.07120462621054832, "grad_norm": 3.36061429977417, "learning_rate": 3.984375e-05, "loss": 0.17521286010742188, "step": 511 }, { "epoch": 0.0713439699017627, "grad_norm": 2.0747926235198975, "learning_rate": 3.9921875000000006e-05, "loss": 0.13555145263671875, "step": 512 }, { "epoch": 0.07148331359297708, "grad_norm": 3.8085427284240723, "learning_rate": 4e-05, "loss": 0.14349365234375, "step": 513 }, { "epoch": 0.07162265728419145, "grad_norm": 3.5413095951080322, "learning_rate": 3.999999777822831e-05, "loss": 0.125152587890625, "step": 514 }, { "epoch": 0.07176200097540583, "grad_norm": 1.5531997680664062, "learning_rate": 3.999999111291371e-05, "loss": 0.11183738708496094, "step": 515 }, { "epoch": 0.07190134466662022, "grad_norm": 4.626058578491211, "learning_rate": 3.9999980004057696e-05, "loss": 0.13446426391601562, "step": 516 }, { "epoch": 0.0720406883578346, "grad_norm": 8.27481746673584, "learning_rate": 3.999996445166274e-05, "loss": 0.17162132263183594, "step": 517 }, { "epoch": 0.07218003204904898, "grad_norm": 6.6619954109191895, "learning_rate": 3.9999944455732284e-05, "loss": 0.21932220458984375, "step": 518 }, { "epoch": 0.07231937574026336, "grad_norm": 4.600326061248779, "learning_rate": 3.9999920016270776e-05, "loss": 0.173004150390625, "step": 519 }, { "epoch": 0.07245871943147773, "grad_norm": 4.08194637298584, "learning_rate": 3.999989113328364e-05, "loss": 0.134185791015625, "step": 520 }, { "epoch": 0.07259806312269212, "grad_norm": 4.183300971984863, "learning_rate": 3.999985780677731e-05, "loss": 0.15105056762695312, "step": 521 }, { "epoch": 0.0727374068139065, "grad_norm": 1.868992567062378, "learning_rate": 3.999982003675918e-05, "loss": 0.09186744689941406, "step": 522 }, { "epoch": 0.07287675050512088, "grad_norm": 2.105400562286377, "learning_rate": 3.999977782323763e-05, "loss": 0.1583728790283203, "step": 523 }, { "epoch": 0.07301609419633526, "grad_norm": 3.8349478244781494, "learning_rate": 3.9999731166222065e-05, "loss": 0.14019012451171875, "step": 524 }, { "epoch": 0.07315543788754963, "grad_norm": 5.373597621917725, "learning_rate": 3.9999680065722826e-05, "loss": 0.17078781127929688, "step": 525 }, { "epoch": 0.07329478157876403, "grad_norm": 4.404664993286133, "learning_rate": 3.999962452175128e-05, "loss": 0.15452957153320312, "step": 526 }, { "epoch": 0.0734341252699784, "grad_norm": 2.5311670303344727, "learning_rate": 3.9999564534319764e-05, "loss": 0.10028266906738281, "step": 527 }, { "epoch": 0.07357346896119278, "grad_norm": 1.473449945449829, "learning_rate": 3.9999500103441604e-05, "loss": 0.11912918090820312, "step": 528 }, { "epoch": 0.07371281265240716, "grad_norm": 3.460256338119507, "learning_rate": 3.999943122913112e-05, "loss": 0.2085418701171875, "step": 529 }, { "epoch": 0.07385215634362154, "grad_norm": 1.859515905380249, "learning_rate": 3.9999357911403613e-05, "loss": 0.10864639282226562, "step": 530 }, { "epoch": 0.07399150003483593, "grad_norm": 0.9655384421348572, "learning_rate": 3.9999280150275375e-05, "loss": 0.10744380950927734, "step": 531 }, { "epoch": 0.0741308437260503, "grad_norm": 0.957709550857544, "learning_rate": 3.999919794576367e-05, "loss": 0.13984298706054688, "step": 532 }, { "epoch": 0.07427018741726468, "grad_norm": 2.0696256160736084, "learning_rate": 3.9999111297886774e-05, "loss": 0.08688831329345703, "step": 533 }, { "epoch": 0.07440953110847906, "grad_norm": 0.8702329993247986, "learning_rate": 3.999902020666394e-05, "loss": 0.08815574645996094, "step": 534 }, { "epoch": 0.07454887479969344, "grad_norm": 1.270948052406311, "learning_rate": 3.99989246721154e-05, "loss": 0.11433601379394531, "step": 535 }, { "epoch": 0.07468821849090783, "grad_norm": 2.3735063076019287, "learning_rate": 3.9998824694262376e-05, "loss": 0.10658454895019531, "step": 536 }, { "epoch": 0.0748275621821222, "grad_norm": 3.470846176147461, "learning_rate": 3.999872027312709e-05, "loss": 0.11987113952636719, "step": 537 }, { "epoch": 0.07496690587333658, "grad_norm": 2.8136346340179443, "learning_rate": 3.999861140873274e-05, "loss": 0.14839935302734375, "step": 538 }, { "epoch": 0.07510624956455096, "grad_norm": 4.242218494415283, "learning_rate": 3.99984981011035e-05, "loss": 0.15454864501953125, "step": 539 }, { "epoch": 0.07524559325576534, "grad_norm": 2.2143540382385254, "learning_rate": 3.999838035026456e-05, "loss": 0.1316814422607422, "step": 540 }, { "epoch": 0.07538493694697973, "grad_norm": 1.0337727069854736, "learning_rate": 3.999825815624208e-05, "loss": 0.08818244934082031, "step": 541 }, { "epoch": 0.07552428063819411, "grad_norm": 2.531571626663208, "learning_rate": 3.9998131519063204e-05, "loss": 0.12324905395507812, "step": 542 }, { "epoch": 0.07566362432940849, "grad_norm": 1.8717105388641357, "learning_rate": 3.999800043875607e-05, "loss": 0.14538955688476562, "step": 543 }, { "epoch": 0.07580296802062286, "grad_norm": 0.8989245891571045, "learning_rate": 3.999786491534981e-05, "loss": 0.08049392700195312, "step": 544 }, { "epoch": 0.07594231171183724, "grad_norm": 0.8702663779258728, "learning_rate": 3.9997724948874514e-05, "loss": 0.11128807067871094, "step": 545 }, { "epoch": 0.07608165540305163, "grad_norm": 1.7686147689819336, "learning_rate": 3.999758053936129e-05, "loss": 0.1466217041015625, "step": 546 }, { "epoch": 0.07622099909426601, "grad_norm": 0.9515817761421204, "learning_rate": 3.999743168684223e-05, "loss": 0.08151435852050781, "step": 547 }, { "epoch": 0.07636034278548039, "grad_norm": 5.572052001953125, "learning_rate": 3.9997278391350395e-05, "loss": 0.15542984008789062, "step": 548 }, { "epoch": 0.07649968647669476, "grad_norm": 2.3883323669433594, "learning_rate": 3.999712065291984e-05, "loss": 0.12106704711914062, "step": 549 }, { "epoch": 0.07663903016790914, "grad_norm": 1.4061657190322876, "learning_rate": 3.999695847158562e-05, "loss": 0.15160751342773438, "step": 550 }, { "epoch": 0.07677837385912353, "grad_norm": 2.090426206588745, "learning_rate": 3.999679184738377e-05, "loss": 0.11965560913085938, "step": 551 }, { "epoch": 0.07691771755033791, "grad_norm": 4.149261474609375, "learning_rate": 3.9996620780351306e-05, "loss": 0.12900733947753906, "step": 552 }, { "epoch": 0.07705706124155229, "grad_norm": 2.549879789352417, "learning_rate": 3.9996445270526235e-05, "loss": 0.12584304809570312, "step": 553 }, { "epoch": 0.07719640493276667, "grad_norm": 5.93257999420166, "learning_rate": 3.999626531794755e-05, "loss": 0.16759109497070312, "step": 554 }, { "epoch": 0.07733574862398104, "grad_norm": 0.6048027873039246, "learning_rate": 3.9996080922655236e-05, "loss": 0.0752716064453125, "step": 555 }, { "epoch": 0.07747509231519543, "grad_norm": 2.0776069164276123, "learning_rate": 3.9995892084690256e-05, "loss": 0.12067604064941406, "step": 556 }, { "epoch": 0.07761443600640981, "grad_norm": 0.923984944820404, "learning_rate": 3.999569880409458e-05, "loss": 0.13747787475585938, "step": 557 }, { "epoch": 0.07775377969762419, "grad_norm": 0.7828582525253296, "learning_rate": 3.9995501080911124e-05, "loss": 0.12700462341308594, "step": 558 }, { "epoch": 0.07789312338883857, "grad_norm": 1.2990708351135254, "learning_rate": 3.999529891518384e-05, "loss": 0.10113906860351562, "step": 559 }, { "epoch": 0.07803246708005294, "grad_norm": 1.3646270036697388, "learning_rate": 3.9995092306957636e-05, "loss": 0.12221717834472656, "step": 560 }, { "epoch": 0.07817181077126734, "grad_norm": 1.6404664516448975, "learning_rate": 3.9994881256278424e-05, "loss": 0.10071563720703125, "step": 561 }, { "epoch": 0.07831115446248171, "grad_norm": 2.1226539611816406, "learning_rate": 3.999466576319308e-05, "loss": 0.16773414611816406, "step": 562 }, { "epoch": 0.07845049815369609, "grad_norm": 1.2778706550598145, "learning_rate": 3.999444582774949e-05, "loss": 0.14202308654785156, "step": 563 }, { "epoch": 0.07858984184491047, "grad_norm": 3.3511712551116943, "learning_rate": 3.999422144999652e-05, "loss": 0.18107986450195312, "step": 564 }, { "epoch": 0.07872918553612485, "grad_norm": 2.2309207916259766, "learning_rate": 3.9993992629984016e-05, "loss": 0.1256866455078125, "step": 565 }, { "epoch": 0.07886852922733924, "grad_norm": 1.0485955476760864, "learning_rate": 3.9993759367762825e-05, "loss": 0.07908248901367188, "step": 566 }, { "epoch": 0.07900787291855361, "grad_norm": 2.0486409664154053, "learning_rate": 3.9993521663384766e-05, "loss": 0.12654876708984375, "step": 567 }, { "epoch": 0.07914721660976799, "grad_norm": 3.582606792449951, "learning_rate": 3.999327951690265e-05, "loss": 0.17953109741210938, "step": 568 }, { "epoch": 0.07928656030098237, "grad_norm": 2.542111396789551, "learning_rate": 3.9993032928370284e-05, "loss": 0.14783859252929688, "step": 569 }, { "epoch": 0.07942590399219675, "grad_norm": 2.0648233890533447, "learning_rate": 3.999278189784245e-05, "loss": 0.10119247436523438, "step": 570 }, { "epoch": 0.07956524768341114, "grad_norm": 2.798393487930298, "learning_rate": 3.9992526425374916e-05, "loss": 0.12763214111328125, "step": 571 }, { "epoch": 0.07970459137462552, "grad_norm": 2.1240041255950928, "learning_rate": 3.999226651102445e-05, "loss": 0.1259765625, "step": 572 }, { "epoch": 0.0798439350658399, "grad_norm": 1.9640324115753174, "learning_rate": 3.99920021548488e-05, "loss": 0.13722610473632812, "step": 573 }, { "epoch": 0.07998327875705427, "grad_norm": 2.151902675628662, "learning_rate": 3.99917333569067e-05, "loss": 0.13800811767578125, "step": 574 }, { "epoch": 0.08012262244826865, "grad_norm": 2.2750742435455322, "learning_rate": 3.9991460117257856e-05, "loss": 0.15391159057617188, "step": 575 }, { "epoch": 0.08026196613948304, "grad_norm": 1.5828267335891724, "learning_rate": 3.9991182435962994e-05, "loss": 0.1822643280029297, "step": 576 }, { "epoch": 0.08040130983069742, "grad_norm": 2.76873517036438, "learning_rate": 3.99909003130838e-05, "loss": 0.11618995666503906, "step": 577 }, { "epoch": 0.0805406535219118, "grad_norm": 3.414393424987793, "learning_rate": 3.9990613748682954e-05, "loss": 0.1348114013671875, "step": 578 }, { "epoch": 0.08067999721312617, "grad_norm": 1.2800782918930054, "learning_rate": 3.9990322742824126e-05, "loss": 0.10827255249023438, "step": 579 }, { "epoch": 0.08081934090434055, "grad_norm": 3.1290533542633057, "learning_rate": 3.999002729557197e-05, "loss": 0.10898971557617188, "step": 580 }, { "epoch": 0.08095868459555494, "grad_norm": 3.069255828857422, "learning_rate": 3.9989727406992136e-05, "loss": 0.1311187744140625, "step": 581 }, { "epoch": 0.08109802828676932, "grad_norm": 1.0050139427185059, "learning_rate": 3.998942307715124e-05, "loss": 0.11010932922363281, "step": 582 }, { "epoch": 0.0812373719779837, "grad_norm": 2.393117666244507, "learning_rate": 3.998911430611691e-05, "loss": 0.14769744873046875, "step": 583 }, { "epoch": 0.08137671566919807, "grad_norm": 1.805238127708435, "learning_rate": 3.9988801093957735e-05, "loss": 0.10110664367675781, "step": 584 }, { "epoch": 0.08151605936041245, "grad_norm": 0.9078276753425598, "learning_rate": 3.9988483440743306e-05, "loss": 0.08968353271484375, "step": 585 }, { "epoch": 0.08165540305162684, "grad_norm": 2.747990131378174, "learning_rate": 3.998816134654421e-05, "loss": 0.1086568832397461, "step": 586 }, { "epoch": 0.08179474674284122, "grad_norm": 4.451938152313232, "learning_rate": 3.9987834811431986e-05, "loss": 0.1411724090576172, "step": 587 }, { "epoch": 0.0819340904340556, "grad_norm": 1.203033685684204, "learning_rate": 3.998750383547921e-05, "loss": 0.09840202331542969, "step": 588 }, { "epoch": 0.08207343412526998, "grad_norm": 1.3895381689071655, "learning_rate": 3.9987168418759396e-05, "loss": 0.11197090148925781, "step": 589 }, { "epoch": 0.08221277781648435, "grad_norm": 2.0719799995422363, "learning_rate": 3.998682856134708e-05, "loss": 0.10160064697265625, "step": 590 }, { "epoch": 0.08235212150769874, "grad_norm": 3.161625385284424, "learning_rate": 3.9986484263317766e-05, "loss": 0.115814208984375, "step": 591 }, { "epoch": 0.08249146519891312, "grad_norm": 1.7461684942245483, "learning_rate": 3.9986135524747945e-05, "loss": 0.10396003723144531, "step": 592 }, { "epoch": 0.0826308088901275, "grad_norm": 0.9677025675773621, "learning_rate": 3.9985782345715105e-05, "loss": 0.11051368713378906, "step": 593 }, { "epoch": 0.08277015258134188, "grad_norm": 1.5929374694824219, "learning_rate": 3.99854247262977e-05, "loss": 0.09242630004882812, "step": 594 }, { "epoch": 0.08290949627255625, "grad_norm": 1.2520278692245483, "learning_rate": 3.99850626665752e-05, "loss": 0.1183319091796875, "step": 595 }, { "epoch": 0.08304883996377065, "grad_norm": 1.7947602272033691, "learning_rate": 3.998469616662805e-05, "loss": 0.10550308227539062, "step": 596 }, { "epoch": 0.08318818365498502, "grad_norm": 1.553332805633545, "learning_rate": 3.9984325226537665e-05, "loss": 0.09752845764160156, "step": 597 }, { "epoch": 0.0833275273461994, "grad_norm": 0.8475018739700317, "learning_rate": 3.998394984638647e-05, "loss": 0.08748245239257812, "step": 598 }, { "epoch": 0.08346687103741378, "grad_norm": 1.5756542682647705, "learning_rate": 3.9983570026257844e-05, "loss": 0.1014251708984375, "step": 599 }, { "epoch": 0.08360621472862816, "grad_norm": 0.6557784676551819, "learning_rate": 3.998318576623621e-05, "loss": 0.10439300537109375, "step": 600 }, { "epoch": 0.08374555841984255, "grad_norm": 1.7600188255310059, "learning_rate": 3.998279706640691e-05, "loss": 0.09660530090332031, "step": 601 }, { "epoch": 0.08388490211105692, "grad_norm": 1.3026453256607056, "learning_rate": 3.998240392685633e-05, "loss": 0.10809516906738281, "step": 602 }, { "epoch": 0.0840242458022713, "grad_norm": 1.9691437482833862, "learning_rate": 3.9982006347671796e-05, "loss": 0.11550712585449219, "step": 603 }, { "epoch": 0.08416358949348568, "grad_norm": 1.722626805305481, "learning_rate": 3.998160432894164e-05, "loss": 0.09701156616210938, "step": 604 }, { "epoch": 0.08430293318470006, "grad_norm": 0.47721943259239197, "learning_rate": 3.99811978707552e-05, "loss": 0.06611824035644531, "step": 605 }, { "epoch": 0.08444227687591445, "grad_norm": 1.7810955047607422, "learning_rate": 3.998078697320277e-05, "loss": 0.14276695251464844, "step": 606 }, { "epoch": 0.08458162056712883, "grad_norm": 1.7148725986480713, "learning_rate": 3.9980371636375645e-05, "loss": 0.10901641845703125, "step": 607 }, { "epoch": 0.0847209642583432, "grad_norm": 1.0007349252700806, "learning_rate": 3.99799518603661e-05, "loss": 0.09817886352539062, "step": 608 }, { "epoch": 0.08486030794955758, "grad_norm": 0.8453435897827148, "learning_rate": 3.9979527645267395e-05, "loss": 0.1143951416015625, "step": 609 }, { "epoch": 0.08499965164077196, "grad_norm": 1.9799214601516724, "learning_rate": 3.99790989911738e-05, "loss": 0.09308624267578125, "step": 610 }, { "epoch": 0.08513899533198635, "grad_norm": 1.374338150024414, "learning_rate": 3.997866589818053e-05, "loss": 0.11553955078125, "step": 611 }, { "epoch": 0.08527833902320073, "grad_norm": 1.1998134851455688, "learning_rate": 3.997822836638382e-05, "loss": 0.11280250549316406, "step": 612 }, { "epoch": 0.0854176827144151, "grad_norm": 1.1562647819519043, "learning_rate": 3.997778639588088e-05, "loss": 0.09814262390136719, "step": 613 }, { "epoch": 0.08555702640562948, "grad_norm": 2.2079992294311523, "learning_rate": 3.9977339986769905e-05, "loss": 0.11584091186523438, "step": 614 }, { "epoch": 0.08569637009684386, "grad_norm": 2.0636987686157227, "learning_rate": 3.9976889139150074e-05, "loss": 0.1290607452392578, "step": 615 }, { "epoch": 0.08583571378805825, "grad_norm": 1.7909809350967407, "learning_rate": 3.997643385312156e-05, "loss": 0.11124229431152344, "step": 616 }, { "epoch": 0.08597505747927263, "grad_norm": 2.6300296783447266, "learning_rate": 3.9975974128785505e-05, "loss": 0.11059188842773438, "step": 617 }, { "epoch": 0.086114401170487, "grad_norm": 3.255582809448242, "learning_rate": 3.997550996624406e-05, "loss": 0.12091636657714844, "step": 618 }, { "epoch": 0.08625374486170138, "grad_norm": 3.166318655014038, "learning_rate": 3.997504136560036e-05, "loss": 0.10851001739501953, "step": 619 }, { "epoch": 0.08639308855291576, "grad_norm": 1.74764883518219, "learning_rate": 3.997456832695849e-05, "loss": 0.0956878662109375, "step": 620 }, { "epoch": 0.08653243224413015, "grad_norm": 0.7127701044082642, "learning_rate": 3.997409085042358e-05, "loss": 0.1027069091796875, "step": 621 }, { "epoch": 0.08667177593534453, "grad_norm": 2.7335541248321533, "learning_rate": 3.997360893610169e-05, "loss": 0.11098098754882812, "step": 622 }, { "epoch": 0.08681111962655891, "grad_norm": 5.717250347137451, "learning_rate": 3.99731225840999e-05, "loss": 0.17713165283203125, "step": 623 }, { "epoch": 0.08695046331777329, "grad_norm": 2.7387754917144775, "learning_rate": 3.9972631794526265e-05, "loss": 0.13355255126953125, "step": 624 }, { "epoch": 0.08708980700898766, "grad_norm": 1.081800937652588, "learning_rate": 3.9972136567489836e-05, "loss": 0.172821044921875, "step": 625 }, { "epoch": 0.08722915070020205, "grad_norm": 1.5363692045211792, "learning_rate": 3.997163690310063e-05, "loss": 0.11891937255859375, "step": 626 }, { "epoch": 0.08736849439141643, "grad_norm": 1.4293453693389893, "learning_rate": 3.997113280146966e-05, "loss": 0.15371322631835938, "step": 627 }, { "epoch": 0.08750783808263081, "grad_norm": 1.8178093433380127, "learning_rate": 3.9970624262708934e-05, "loss": 0.1451873779296875, "step": 628 }, { "epoch": 0.08764718177384519, "grad_norm": 0.8165032863616943, "learning_rate": 3.997011128693143e-05, "loss": 0.07885360717773438, "step": 629 }, { "epoch": 0.08778652546505956, "grad_norm": 0.7978515028953552, "learning_rate": 3.996959387425113e-05, "loss": 0.08555412292480469, "step": 630 }, { "epoch": 0.08792586915627396, "grad_norm": 1.210871696472168, "learning_rate": 3.996907202478298e-05, "loss": 0.1194610595703125, "step": 631 }, { "epoch": 0.08806521284748833, "grad_norm": 1.2033125162124634, "learning_rate": 3.996854573864293e-05, "loss": 0.11191177368164062, "step": 632 }, { "epoch": 0.08820455653870271, "grad_norm": 0.672520101070404, "learning_rate": 3.9968015015947904e-05, "loss": 0.10470771789550781, "step": 633 }, { "epoch": 0.08834390022991709, "grad_norm": 0.7171827554702759, "learning_rate": 3.996747985681582e-05, "loss": 0.10395431518554688, "step": 634 }, { "epoch": 0.08848324392113147, "grad_norm": 1.9401113986968994, "learning_rate": 3.9966940261365576e-05, "loss": 0.12075424194335938, "step": 635 }, { "epoch": 0.08862258761234586, "grad_norm": 1.069329023361206, "learning_rate": 3.996639622971706e-05, "loss": 0.08725929260253906, "step": 636 }, { "epoch": 0.08876193130356023, "grad_norm": 1.750138521194458, "learning_rate": 3.996584776199114e-05, "loss": 0.1767597198486328, "step": 637 }, { "epoch": 0.08890127499477461, "grad_norm": 1.0362471342086792, "learning_rate": 3.9965294858309685e-05, "loss": 0.10419464111328125, "step": 638 }, { "epoch": 0.08904061868598899, "grad_norm": 1.1169592142105103, "learning_rate": 3.996473751879552e-05, "loss": 0.10972785949707031, "step": 639 }, { "epoch": 0.08917996237720337, "grad_norm": 0.9762294292449951, "learning_rate": 3.996417574357248e-05, "loss": 0.12237358093261719, "step": 640 }, { "epoch": 0.08931930606841776, "grad_norm": 2.073392868041992, "learning_rate": 3.996360953276538e-05, "loss": 0.15527725219726562, "step": 641 }, { "epoch": 0.08945864975963214, "grad_norm": 0.6941425204277039, "learning_rate": 3.996303888650002e-05, "loss": 0.10612106323242188, "step": 642 }, { "epoch": 0.08959799345084651, "grad_norm": 0.5668501257896423, "learning_rate": 3.996246380490319e-05, "loss": 0.10363006591796875, "step": 643 }, { "epoch": 0.08973733714206089, "grad_norm": 2.4187774658203125, "learning_rate": 3.996188428810264e-05, "loss": 0.15231704711914062, "step": 644 }, { "epoch": 0.08987668083327527, "grad_norm": 1.8461713790893555, "learning_rate": 3.9961300336227146e-05, "loss": 0.12401580810546875, "step": 645 }, { "epoch": 0.09001602452448966, "grad_norm": 1.252118468284607, "learning_rate": 3.996071194940644e-05, "loss": 0.10144615173339844, "step": 646 }, { "epoch": 0.09015536821570404, "grad_norm": 0.6942083835601807, "learning_rate": 3.996011912777126e-05, "loss": 0.09859275817871094, "step": 647 }, { "epoch": 0.09029471190691842, "grad_norm": 2.204408884048462, "learning_rate": 3.995952187145329e-05, "loss": 0.1428050994873047, "step": 648 }, { "epoch": 0.09043405559813279, "grad_norm": 0.7580353617668152, "learning_rate": 3.995892018058525e-05, "loss": 0.09755802154541016, "step": 649 }, { "epoch": 0.09057339928934717, "grad_norm": 2.8576738834381104, "learning_rate": 3.995831405530082e-05, "loss": 0.1951770782470703, "step": 650 }, { "epoch": 0.09071274298056156, "grad_norm": 2.6492650508880615, "learning_rate": 3.9957703495734666e-05, "loss": 0.1256256103515625, "step": 651 }, { "epoch": 0.09085208667177594, "grad_norm": 1.157537817955017, "learning_rate": 3.9957088502022426e-05, "loss": 0.09515380859375, "step": 652 }, { "epoch": 0.09099143036299032, "grad_norm": 1.6019444465637207, "learning_rate": 3.995646907430076e-05, "loss": 0.09901046752929688, "step": 653 }, { "epoch": 0.0911307740542047, "grad_norm": 4.919183254241943, "learning_rate": 3.995584521270727e-05, "loss": 0.14474105834960938, "step": 654 }, { "epoch": 0.09127011774541907, "grad_norm": 0.8669459819793701, "learning_rate": 3.995521691738058e-05, "loss": 0.11705398559570312, "step": 655 }, { "epoch": 0.09140946143663346, "grad_norm": 1.8246808052062988, "learning_rate": 3.995458418846028e-05, "loss": 0.10042953491210938, "step": 656 }, { "epoch": 0.09154880512784784, "grad_norm": 3.4547104835510254, "learning_rate": 3.995394702608693e-05, "loss": 0.15300750732421875, "step": 657 }, { "epoch": 0.09168814881906222, "grad_norm": 1.3384592533111572, "learning_rate": 3.995330543040212e-05, "loss": 0.08542442321777344, "step": 658 }, { "epoch": 0.0918274925102766, "grad_norm": 1.164960503578186, "learning_rate": 3.995265940154838e-05, "loss": 0.1305999755859375, "step": 659 }, { "epoch": 0.09196683620149097, "grad_norm": 1.5734573602676392, "learning_rate": 3.995200893966925e-05, "loss": 0.19118690490722656, "step": 660 }, { "epoch": 0.09210617989270536, "grad_norm": 2.191829204559326, "learning_rate": 3.9951354044909246e-05, "loss": 0.11335182189941406, "step": 661 }, { "epoch": 0.09224552358391974, "grad_norm": 1.332816243171692, "learning_rate": 3.995069471741387e-05, "loss": 0.10511970520019531, "step": 662 }, { "epoch": 0.09238486727513412, "grad_norm": 1.7221275568008423, "learning_rate": 3.9950030957329604e-05, "loss": 0.13498878479003906, "step": 663 }, { "epoch": 0.0925242109663485, "grad_norm": 1.7761082649230957, "learning_rate": 3.9949362764803934e-05, "loss": 0.0808258056640625, "step": 664 }, { "epoch": 0.09266355465756287, "grad_norm": 1.271019697189331, "learning_rate": 3.9948690139985305e-05, "loss": 0.10305976867675781, "step": 665 }, { "epoch": 0.09280289834877727, "grad_norm": 1.014792799949646, "learning_rate": 3.994801308302316e-05, "loss": 0.07171058654785156, "step": 666 }, { "epoch": 0.09294224203999164, "grad_norm": 2.9300577640533447, "learning_rate": 3.994733159406794e-05, "loss": 0.12253570556640625, "step": 667 }, { "epoch": 0.09308158573120602, "grad_norm": 2.817939043045044, "learning_rate": 3.9946645673271034e-05, "loss": 0.13839149475097656, "step": 668 }, { "epoch": 0.0932209294224204, "grad_norm": 1.1543421745300293, "learning_rate": 3.994595532078486e-05, "loss": 0.11301040649414062, "step": 669 }, { "epoch": 0.09336027311363478, "grad_norm": 1.3884141445159912, "learning_rate": 3.9945260536762775e-05, "loss": 0.06984329223632812, "step": 670 }, { "epoch": 0.09349961680484917, "grad_norm": 3.0776305198669434, "learning_rate": 3.994456132135916e-05, "loss": 0.15468692779541016, "step": 671 }, { "epoch": 0.09363896049606354, "grad_norm": 2.6249451637268066, "learning_rate": 3.994385767472937e-05, "loss": 0.14349746704101562, "step": 672 }, { "epoch": 0.09377830418727792, "grad_norm": 0.8873428106307983, "learning_rate": 3.9943149597029724e-05, "loss": 0.08478355407714844, "step": 673 }, { "epoch": 0.0939176478784923, "grad_norm": 1.360435962677002, "learning_rate": 3.994243708841755e-05, "loss": 0.1453990936279297, "step": 674 }, { "epoch": 0.09405699156970668, "grad_norm": 2.7673938274383545, "learning_rate": 3.9941720149051146e-05, "loss": 0.16139602661132812, "step": 675 }, { "epoch": 0.09419633526092107, "grad_norm": 1.5676738023757935, "learning_rate": 3.99409987790898e-05, "loss": 0.11298370361328125, "step": 676 }, { "epoch": 0.09433567895213545, "grad_norm": 0.7559939622879028, "learning_rate": 3.9940272978693795e-05, "loss": 0.08307456970214844, "step": 677 }, { "epoch": 0.09447502264334982, "grad_norm": 3.1241300106048584, "learning_rate": 3.993954274802437e-05, "loss": 0.17406082153320312, "step": 678 }, { "epoch": 0.0946143663345642, "grad_norm": 2.981058359146118, "learning_rate": 3.993880808724378e-05, "loss": 0.14940643310546875, "step": 679 }, { "epoch": 0.09475371002577858, "grad_norm": 1.125257134437561, "learning_rate": 3.993806899651524e-05, "loss": 0.10942649841308594, "step": 680 }, { "epoch": 0.09489305371699297, "grad_norm": 1.3269895315170288, "learning_rate": 3.9937325476002955e-05, "loss": 0.103607177734375, "step": 681 }, { "epoch": 0.09503239740820735, "grad_norm": 0.6538723111152649, "learning_rate": 3.993657752587214e-05, "loss": 0.09619331359863281, "step": 682 }, { "epoch": 0.09517174109942172, "grad_norm": 1.5624948740005493, "learning_rate": 3.993582514628895e-05, "loss": 0.14385604858398438, "step": 683 }, { "epoch": 0.0953110847906361, "grad_norm": 1.3760669231414795, "learning_rate": 3.9935068337420564e-05, "loss": 0.09527206420898438, "step": 684 }, { "epoch": 0.09545042848185048, "grad_norm": 0.34413033723831177, "learning_rate": 3.99343070994351e-05, "loss": 0.07621192932128906, "step": 685 }, { "epoch": 0.09558977217306487, "grad_norm": 1.0510213375091553, "learning_rate": 3.993354143250173e-05, "loss": 0.09285545349121094, "step": 686 }, { "epoch": 0.09572911586427925, "grad_norm": 1.4344948530197144, "learning_rate": 3.993277133679053e-05, "loss": 0.12052345275878906, "step": 687 }, { "epoch": 0.09586845955549363, "grad_norm": 1.9553276300430298, "learning_rate": 3.993199681247261e-05, "loss": 0.08988189697265625, "step": 688 }, { "epoch": 0.096007803246708, "grad_norm": 2.0521926879882812, "learning_rate": 3.9931217859720066e-05, "loss": 0.14418792724609375, "step": 689 }, { "epoch": 0.09614714693792238, "grad_norm": 0.8239341378211975, "learning_rate": 3.993043447870594e-05, "loss": 0.12273788452148438, "step": 690 }, { "epoch": 0.09628649062913676, "grad_norm": 1.9166098833084106, "learning_rate": 3.99296466696043e-05, "loss": 0.11394309997558594, "step": 691 }, { "epoch": 0.09642583432035115, "grad_norm": 3.601027011871338, "learning_rate": 3.9928854432590166e-05, "loss": 0.1578388214111328, "step": 692 }, { "epoch": 0.09656517801156553, "grad_norm": 2.117563009262085, "learning_rate": 3.9928057767839565e-05, "loss": 0.11327934265136719, "step": 693 }, { "epoch": 0.0967045217027799, "grad_norm": 0.7808533906936646, "learning_rate": 3.992725667552948e-05, "loss": 0.101318359375, "step": 694 }, { "epoch": 0.09684386539399428, "grad_norm": 1.9542772769927979, "learning_rate": 3.9926451155837925e-05, "loss": 0.11246109008789062, "step": 695 }, { "epoch": 0.09698320908520866, "grad_norm": 2.3494012355804443, "learning_rate": 3.9925641208943846e-05, "loss": 0.11851882934570312, "step": 696 }, { "epoch": 0.09712255277642305, "grad_norm": 1.7434897422790527, "learning_rate": 3.99248268350272e-05, "loss": 0.1325836181640625, "step": 697 }, { "epoch": 0.09726189646763743, "grad_norm": 1.3294047117233276, "learning_rate": 3.992400803426892e-05, "loss": 0.11371994018554688, "step": 698 }, { "epoch": 0.0974012401588518, "grad_norm": 1.560742974281311, "learning_rate": 3.992318480685094e-05, "loss": 0.11021995544433594, "step": 699 }, { "epoch": 0.09754058385006618, "grad_norm": 1.5074039697647095, "learning_rate": 3.992235715295614e-05, "loss": 0.11312103271484375, "step": 700 }, { "epoch": 0.09767992754128056, "grad_norm": 0.7238860726356506, "learning_rate": 3.992152507276841e-05, "loss": 0.09823417663574219, "step": 701 }, { "epoch": 0.09781927123249495, "grad_norm": 1.6033929586410522, "learning_rate": 3.9920688566472636e-05, "loss": 0.13399314880371094, "step": 702 }, { "epoch": 0.09795861492370933, "grad_norm": 0.9758917093276978, "learning_rate": 3.991984763425465e-05, "loss": 0.0968780517578125, "step": 703 }, { "epoch": 0.09809795861492371, "grad_norm": 0.8494994640350342, "learning_rate": 3.99190022763013e-05, "loss": 0.105987548828125, "step": 704 }, { "epoch": 0.09823730230613809, "grad_norm": 1.23043692111969, "learning_rate": 3.991815249280041e-05, "loss": 0.1386699676513672, "step": 705 }, { "epoch": 0.09837664599735246, "grad_norm": 0.9754325747489929, "learning_rate": 3.991729828394078e-05, "loss": 0.09955787658691406, "step": 706 }, { "epoch": 0.09851598968856685, "grad_norm": 0.7522068023681641, "learning_rate": 3.9916439649912175e-05, "loss": 0.10589981079101562, "step": 707 }, { "epoch": 0.09865533337978123, "grad_norm": 0.9560259580612183, "learning_rate": 3.9915576590905385e-05, "loss": 0.17127609252929688, "step": 708 }, { "epoch": 0.09879467707099561, "grad_norm": 1.1445032358169556, "learning_rate": 3.991470910711216e-05, "loss": 0.08207130432128906, "step": 709 }, { "epoch": 0.09893402076220999, "grad_norm": 1.9059768915176392, "learning_rate": 3.9913837198725224e-05, "loss": 0.13786697387695312, "step": 710 }, { "epoch": 0.09907336445342436, "grad_norm": 0.48259973526000977, "learning_rate": 3.9912960865938316e-05, "loss": 0.09104347229003906, "step": 711 }, { "epoch": 0.09921270814463876, "grad_norm": 0.6930782198905945, "learning_rate": 3.9912080108946115e-05, "loss": 0.09864234924316406, "step": 712 }, { "epoch": 0.09935205183585313, "grad_norm": 0.9861940741539001, "learning_rate": 3.9911194927944315e-05, "loss": 0.07714271545410156, "step": 713 }, { "epoch": 0.09949139552706751, "grad_norm": 0.9397109150886536, "learning_rate": 3.991030532312959e-05, "loss": 0.14679718017578125, "step": 714 }, { "epoch": 0.09963073921828189, "grad_norm": 0.7344309687614441, "learning_rate": 3.990941129469957e-05, "loss": 0.0991067886352539, "step": 715 }, { "epoch": 0.09977008290949627, "grad_norm": 1.9518712759017944, "learning_rate": 3.9908512842852906e-05, "loss": 0.1284027099609375, "step": 716 }, { "epoch": 0.09990942660071066, "grad_norm": 1.9064795970916748, "learning_rate": 3.990760996778921e-05, "loss": 0.118255615234375, "step": 717 }, { "epoch": 0.10004877029192503, "grad_norm": 1.0310511589050293, "learning_rate": 3.9906702669709074e-05, "loss": 0.10207366943359375, "step": 718 }, { "epoch": 0.10018811398313941, "grad_norm": 2.126049757003784, "learning_rate": 3.9905790948814086e-05, "loss": 0.16246414184570312, "step": 719 }, { "epoch": 0.10032745767435379, "grad_norm": 1.5851454734802246, "learning_rate": 3.9904874805306804e-05, "loss": 0.1069793701171875, "step": 720 }, { "epoch": 0.10046680136556817, "grad_norm": 1.3377631902694702, "learning_rate": 3.990395423939077e-05, "loss": 0.1281890869140625, "step": 721 }, { "epoch": 0.10060614505678256, "grad_norm": 0.7208927273750305, "learning_rate": 3.9903029251270535e-05, "loss": 0.07527732849121094, "step": 722 }, { "epoch": 0.10074548874799694, "grad_norm": 0.5075308680534363, "learning_rate": 3.990209984115158e-05, "loss": 0.07642745971679688, "step": 723 }, { "epoch": 0.10088483243921131, "grad_norm": 1.675383448600769, "learning_rate": 3.990116600924042e-05, "loss": 0.09997749328613281, "step": 724 }, { "epoch": 0.10102417613042569, "grad_norm": 1.5877801179885864, "learning_rate": 3.9900227755744515e-05, "loss": 0.07759857177734375, "step": 725 }, { "epoch": 0.10116351982164007, "grad_norm": 2.3962759971618652, "learning_rate": 3.9899285080872346e-05, "loss": 0.19809341430664062, "step": 726 }, { "epoch": 0.10130286351285446, "grad_norm": 0.696289598941803, "learning_rate": 3.9898337984833334e-05, "loss": 0.08775138854980469, "step": 727 }, { "epoch": 0.10144220720406884, "grad_norm": 3.3508143424987793, "learning_rate": 3.98973864678379e-05, "loss": 0.15690231323242188, "step": 728 }, { "epoch": 0.10158155089528322, "grad_norm": 1.2874993085861206, "learning_rate": 3.989643053009747e-05, "loss": 0.12796401977539062, "step": 729 }, { "epoch": 0.10172089458649759, "grad_norm": 0.5695470571517944, "learning_rate": 3.989547017182442e-05, "loss": 0.10289192199707031, "step": 730 }, { "epoch": 0.10186023827771197, "grad_norm": 1.7321703433990479, "learning_rate": 3.989450539323211e-05, "loss": 0.10570526123046875, "step": 731 }, { "epoch": 0.10199958196892636, "grad_norm": 1.1347671747207642, "learning_rate": 3.989353619453491e-05, "loss": 0.11036109924316406, "step": 732 }, { "epoch": 0.10213892566014074, "grad_norm": 0.5553204417228699, "learning_rate": 3.989256257594814e-05, "loss": 0.07574272155761719, "step": 733 }, { "epoch": 0.10227826935135512, "grad_norm": 1.4166901111602783, "learning_rate": 3.989158453768812e-05, "loss": 0.1366710662841797, "step": 734 }, { "epoch": 0.1024176130425695, "grad_norm": 0.8847742676734924, "learning_rate": 3.989060207997215e-05, "loss": 0.11824798583984375, "step": 735 }, { "epoch": 0.10255695673378387, "grad_norm": 1.0696070194244385, "learning_rate": 3.98896152030185e-05, "loss": 0.11084556579589844, "step": 736 }, { "epoch": 0.10269630042499826, "grad_norm": 0.5786455869674683, "learning_rate": 3.988862390704645e-05, "loss": 0.10807418823242188, "step": 737 }, { "epoch": 0.10283564411621264, "grad_norm": 1.4585732221603394, "learning_rate": 3.988762819227623e-05, "loss": 0.10635185241699219, "step": 738 }, { "epoch": 0.10297498780742702, "grad_norm": 1.1260654926300049, "learning_rate": 3.988662805892907e-05, "loss": 0.12812042236328125, "step": 739 }, { "epoch": 0.1031143314986414, "grad_norm": 0.5539922118186951, "learning_rate": 3.988562350722717e-05, "loss": 0.09383201599121094, "step": 740 }, { "epoch": 0.10325367518985577, "grad_norm": 0.560459315776825, "learning_rate": 3.9884614537393724e-05, "loss": 0.0829010009765625, "step": 741 }, { "epoch": 0.10339301888107016, "grad_norm": 1.5291521549224854, "learning_rate": 3.98836011496529e-05, "loss": 0.12047004699707031, "step": 742 }, { "epoch": 0.10353236257228454, "grad_norm": 1.3131424188613892, "learning_rate": 3.9882583344229856e-05, "loss": 0.11011505126953125, "step": 743 }, { "epoch": 0.10367170626349892, "grad_norm": 1.412013292312622, "learning_rate": 3.9881561121350725e-05, "loss": 0.143463134765625, "step": 744 }, { "epoch": 0.1038110499547133, "grad_norm": 1.620991826057434, "learning_rate": 3.988053448124261e-05, "loss": 0.1324920654296875, "step": 745 }, { "epoch": 0.10395039364592767, "grad_norm": 1.2912758588790894, "learning_rate": 3.9879503424133606e-05, "loss": 0.1139373779296875, "step": 746 }, { "epoch": 0.10408973733714207, "grad_norm": 1.3240872621536255, "learning_rate": 3.9878467950252807e-05, "loss": 0.11318206787109375, "step": 747 }, { "epoch": 0.10422908102835644, "grad_norm": 1.0506455898284912, "learning_rate": 3.9877428059830256e-05, "loss": 0.0853271484375, "step": 748 }, { "epoch": 0.10436842471957082, "grad_norm": 0.7446185946464539, "learning_rate": 3.9876383753097004e-05, "loss": 0.11227798461914062, "step": 749 }, { "epoch": 0.1045077684107852, "grad_norm": 0.5639259219169617, "learning_rate": 3.987533503028507e-05, "loss": 0.09223556518554688, "step": 750 }, { "epoch": 0.10464711210199958, "grad_norm": 0.9279486536979675, "learning_rate": 3.987428189162745e-05, "loss": 0.09468841552734375, "step": 751 }, { "epoch": 0.10478645579321397, "grad_norm": 1.568182110786438, "learning_rate": 3.9873224337358134e-05, "loss": 0.12052536010742188, "step": 752 }, { "epoch": 0.10492579948442834, "grad_norm": 1.6366362571716309, "learning_rate": 3.987216236771208e-05, "loss": 0.13929367065429688, "step": 753 }, { "epoch": 0.10506514317564272, "grad_norm": 2.1157543659210205, "learning_rate": 3.987109598292524e-05, "loss": 0.13171768188476562, "step": 754 }, { "epoch": 0.1052044868668571, "grad_norm": 1.521085262298584, "learning_rate": 3.9870025183234536e-05, "loss": 0.10310173034667969, "step": 755 }, { "epoch": 0.10534383055807148, "grad_norm": 1.1753852367401123, "learning_rate": 3.986894996887788e-05, "loss": 0.11014366149902344, "step": 756 }, { "epoch": 0.10548317424928587, "grad_norm": 0.9424460530281067, "learning_rate": 3.986787034009416e-05, "loss": 0.10788917541503906, "step": 757 }, { "epoch": 0.10562251794050025, "grad_norm": 3.1568048000335693, "learning_rate": 3.986678629712323e-05, "loss": 0.13332176208496094, "step": 758 }, { "epoch": 0.10576186163171462, "grad_norm": 1.9697014093399048, "learning_rate": 3.9865697840205955e-05, "loss": 0.1415119171142578, "step": 759 }, { "epoch": 0.105901205322929, "grad_norm": 1.201587200164795, "learning_rate": 3.986460496958416e-05, "loss": 0.12663841247558594, "step": 760 }, { "epoch": 0.10604054901414338, "grad_norm": 1.1773841381072998, "learning_rate": 3.986350768550066e-05, "loss": 0.10579490661621094, "step": 761 }, { "epoch": 0.10617989270535777, "grad_norm": 1.499826431274414, "learning_rate": 3.986240598819925e-05, "loss": 0.13878631591796875, "step": 762 }, { "epoch": 0.10631923639657215, "grad_norm": 0.4687274992465973, "learning_rate": 3.986129987792469e-05, "loss": 0.08551979064941406, "step": 763 }, { "epoch": 0.10645858008778653, "grad_norm": 1.034368634223938, "learning_rate": 3.986018935492274e-05, "loss": 0.1614990234375, "step": 764 }, { "epoch": 0.1065979237790009, "grad_norm": 1.261549472808838, "learning_rate": 3.985907441944013e-05, "loss": 0.0895071029663086, "step": 765 }, { "epoch": 0.10673726747021528, "grad_norm": 1.2000449895858765, "learning_rate": 3.9857955071724575e-05, "loss": 0.11925697326660156, "step": 766 }, { "epoch": 0.10687661116142967, "grad_norm": 2.438431978225708, "learning_rate": 3.9856831312024765e-05, "loss": 0.1375598907470703, "step": 767 }, { "epoch": 0.10701595485264405, "grad_norm": 1.2121024131774902, "learning_rate": 3.985570314059038e-05, "loss": 0.10234451293945312, "step": 768 }, { "epoch": 0.10715529854385843, "grad_norm": 1.5067170858383179, "learning_rate": 3.9854570557672073e-05, "loss": 0.10780715942382812, "step": 769 }, { "epoch": 0.1072946422350728, "grad_norm": 0.8413577079772949, "learning_rate": 3.985343356352147e-05, "loss": 0.13018798828125, "step": 770 }, { "epoch": 0.10743398592628718, "grad_norm": 3.81839656829834, "learning_rate": 3.985229215839119e-05, "loss": 0.15117835998535156, "step": 771 }, { "epoch": 0.10757332961750157, "grad_norm": 1.1108512878417969, "learning_rate": 3.985114634253483e-05, "loss": 0.08644676208496094, "step": 772 }, { "epoch": 0.10771267330871595, "grad_norm": 0.8536274433135986, "learning_rate": 3.9849996116206966e-05, "loss": 0.08495903015136719, "step": 773 }, { "epoch": 0.10785201699993033, "grad_norm": 0.5108976364135742, "learning_rate": 3.9848841479663146e-05, "loss": 0.10399341583251953, "step": 774 }, { "epoch": 0.1079913606911447, "grad_norm": 0.9988969564437866, "learning_rate": 3.984768243315991e-05, "loss": 0.10687065124511719, "step": 775 }, { "epoch": 0.10813070438235908, "grad_norm": 0.6871193647384644, "learning_rate": 3.984651897695476e-05, "loss": 0.09221076965332031, "step": 776 }, { "epoch": 0.10827004807357347, "grad_norm": 2.1453189849853516, "learning_rate": 3.9845351111306196e-05, "loss": 0.118377685546875, "step": 777 }, { "epoch": 0.10840939176478785, "grad_norm": 2.358560800552368, "learning_rate": 3.98441788364737e-05, "loss": 0.1477518081665039, "step": 778 }, { "epoch": 0.10854873545600223, "grad_norm": 0.4648269712924957, "learning_rate": 3.984300215271771e-05, "loss": 0.0981597900390625, "step": 779 }, { "epoch": 0.1086880791472166, "grad_norm": 1.3363220691680908, "learning_rate": 3.984182106029967e-05, "loss": 0.08282470703125, "step": 780 }, { "epoch": 0.10882742283843098, "grad_norm": 1.3709101676940918, "learning_rate": 3.984063555948198e-05, "loss": 0.11137962341308594, "step": 781 }, { "epoch": 0.10896676652964538, "grad_norm": 1.0121681690216064, "learning_rate": 3.9839445650528046e-05, "loss": 0.1295604705810547, "step": 782 }, { "epoch": 0.10910611022085975, "grad_norm": 2.054105520248413, "learning_rate": 3.983825133370223e-05, "loss": 0.12508392333984375, "step": 783 }, { "epoch": 0.10924545391207413, "grad_norm": 0.7920443415641785, "learning_rate": 3.983705260926988e-05, "loss": 0.14387130737304688, "step": 784 }, { "epoch": 0.10938479760328851, "grad_norm": 2.895064353942871, "learning_rate": 3.983584947749733e-05, "loss": 0.14847755432128906, "step": 785 }, { "epoch": 0.10952414129450289, "grad_norm": 0.8737805485725403, "learning_rate": 3.983464193865188e-05, "loss": 0.11993789672851562, "step": 786 }, { "epoch": 0.10966348498571728, "grad_norm": 0.842776358127594, "learning_rate": 3.9833429993001827e-05, "loss": 0.08897781372070312, "step": 787 }, { "epoch": 0.10980282867693165, "grad_norm": 2.2271933555603027, "learning_rate": 3.983221364081644e-05, "loss": 0.15197372436523438, "step": 788 }, { "epoch": 0.10994217236814603, "grad_norm": 0.9457564353942871, "learning_rate": 3.983099288236595e-05, "loss": 0.09394645690917969, "step": 789 }, { "epoch": 0.11008151605936041, "grad_norm": 0.9889166355133057, "learning_rate": 3.98297677179216e-05, "loss": 0.1539154052734375, "step": 790 }, { "epoch": 0.11022085975057479, "grad_norm": 2.6877601146698, "learning_rate": 3.982853814775558e-05, "loss": 0.15602493286132812, "step": 791 }, { "epoch": 0.11036020344178918, "grad_norm": 2.448507308959961, "learning_rate": 3.982730417214107e-05, "loss": 0.12117385864257812, "step": 792 }, { "epoch": 0.11049954713300356, "grad_norm": 2.2778351306915283, "learning_rate": 3.982606579135225e-05, "loss": 0.1409473419189453, "step": 793 }, { "epoch": 0.11063889082421793, "grad_norm": 0.857330858707428, "learning_rate": 3.982482300566424e-05, "loss": 0.09689712524414062, "step": 794 }, { "epoch": 0.11077823451543231, "grad_norm": 1.0533100366592407, "learning_rate": 3.982357581535317e-05, "loss": 0.10053062438964844, "step": 795 }, { "epoch": 0.11091757820664669, "grad_norm": 1.054516077041626, "learning_rate": 3.9822324220696134e-05, "loss": 0.11700630187988281, "step": 796 }, { "epoch": 0.11105692189786108, "grad_norm": 1.549259901046753, "learning_rate": 3.98210682219712e-05, "loss": 0.12303924560546875, "step": 797 }, { "epoch": 0.11119626558907546, "grad_norm": 1.1947675943374634, "learning_rate": 3.9819807819457444e-05, "loss": 0.12286567687988281, "step": 798 }, { "epoch": 0.11133560928028984, "grad_norm": 1.2768586874008179, "learning_rate": 3.9818543013434874e-05, "loss": 0.13268661499023438, "step": 799 }, { "epoch": 0.11147495297150421, "grad_norm": 1.2711249589920044, "learning_rate": 3.9817273804184514e-05, "loss": 0.10602188110351562, "step": 800 }, { "epoch": 0.11161429666271859, "grad_norm": 2.547293186187744, "learning_rate": 3.981600019198835e-05, "loss": 0.14692306518554688, "step": 801 }, { "epoch": 0.11175364035393298, "grad_norm": 1.1622370481491089, "learning_rate": 3.981472217712935e-05, "loss": 0.10278034210205078, "step": 802 }, { "epoch": 0.11189298404514736, "grad_norm": 0.5008900761604309, "learning_rate": 3.9813439759891466e-05, "loss": 0.09461212158203125, "step": 803 }, { "epoch": 0.11203232773636174, "grad_norm": 1.5992443561553955, "learning_rate": 3.981215294055961e-05, "loss": 0.11413002014160156, "step": 804 }, { "epoch": 0.11217167142757611, "grad_norm": 1.0794358253479004, "learning_rate": 3.981086171941969e-05, "loss": 0.105010986328125, "step": 805 }, { "epoch": 0.11231101511879049, "grad_norm": 1.069921612739563, "learning_rate": 3.9809566096758586e-05, "loss": 0.09340667724609375, "step": 806 }, { "epoch": 0.11245035881000488, "grad_norm": 1.005802035331726, "learning_rate": 3.9808266072864156e-05, "loss": 0.11477279663085938, "step": 807 }, { "epoch": 0.11258970250121926, "grad_norm": 0.7565615177154541, "learning_rate": 3.980696164802523e-05, "loss": 0.1026611328125, "step": 808 }, { "epoch": 0.11272904619243364, "grad_norm": 1.95918607711792, "learning_rate": 3.980565282253164e-05, "loss": 0.13885116577148438, "step": 809 }, { "epoch": 0.11286838988364802, "grad_norm": 2.142925262451172, "learning_rate": 3.9804339596674146e-05, "loss": 0.1059713363647461, "step": 810 }, { "epoch": 0.11300773357486239, "grad_norm": 1.4271775484085083, "learning_rate": 3.980302197074455e-05, "loss": 0.08303260803222656, "step": 811 }, { "epoch": 0.11314707726607678, "grad_norm": 1.2232320308685303, "learning_rate": 3.9801699945035573e-05, "loss": 0.10942459106445312, "step": 812 }, { "epoch": 0.11328642095729116, "grad_norm": 1.390616774559021, "learning_rate": 3.980037351984095e-05, "loss": 0.10650444030761719, "step": 813 }, { "epoch": 0.11342576464850554, "grad_norm": 2.2584481239318848, "learning_rate": 3.979904269545538e-05, "loss": 0.12761497497558594, "step": 814 }, { "epoch": 0.11356510833971992, "grad_norm": 1.9571324586868286, "learning_rate": 3.979770747217455e-05, "loss": 0.10061168670654297, "step": 815 }, { "epoch": 0.1137044520309343, "grad_norm": 1.5997616052627563, "learning_rate": 3.97963678502951e-05, "loss": 0.09047698974609375, "step": 816 }, { "epoch": 0.11384379572214869, "grad_norm": 0.9450207948684692, "learning_rate": 3.979502383011468e-05, "loss": 0.07411384582519531, "step": 817 }, { "epoch": 0.11398313941336306, "grad_norm": 1.723956823348999, "learning_rate": 3.979367541193189e-05, "loss": 0.14814186096191406, "step": 818 }, { "epoch": 0.11412248310457744, "grad_norm": 1.6721744537353516, "learning_rate": 3.9792322596046326e-05, "loss": 0.12276840209960938, "step": 819 }, { "epoch": 0.11426182679579182, "grad_norm": 1.1367372274398804, "learning_rate": 3.979096538275854e-05, "loss": 0.11266899108886719, "step": 820 }, { "epoch": 0.1144011704870062, "grad_norm": 1.3513717651367188, "learning_rate": 3.978960377237009e-05, "loss": 0.16111183166503906, "step": 821 }, { "epoch": 0.11454051417822059, "grad_norm": 1.443853735923767, "learning_rate": 3.978823776518348e-05, "loss": 0.15060043334960938, "step": 822 }, { "epoch": 0.11467985786943496, "grad_norm": 2.0307443141937256, "learning_rate": 3.978686736150221e-05, "loss": 0.1636981964111328, "step": 823 }, { "epoch": 0.11481920156064934, "grad_norm": 1.1807634830474854, "learning_rate": 3.978549256163075e-05, "loss": 0.12828445434570312, "step": 824 }, { "epoch": 0.11495854525186372, "grad_norm": 0.577113687992096, "learning_rate": 3.978411336587457e-05, "loss": 0.10004234313964844, "step": 825 }, { "epoch": 0.1150978889430781, "grad_norm": 1.1561959981918335, "learning_rate": 3.978272977454006e-05, "loss": 0.08631706237792969, "step": 826 }, { "epoch": 0.11523723263429249, "grad_norm": 0.4761141240596771, "learning_rate": 3.978134178793465e-05, "loss": 0.09083938598632812, "step": 827 }, { "epoch": 0.11537657632550687, "grad_norm": 0.4336426258087158, "learning_rate": 3.977994940636671e-05, "loss": 0.10993766784667969, "step": 828 }, { "epoch": 0.11551592001672124, "grad_norm": 0.4969298839569092, "learning_rate": 3.9778552630145595e-05, "loss": 0.08218574523925781, "step": 829 }, { "epoch": 0.11565526370793562, "grad_norm": 2.006443738937378, "learning_rate": 3.977715145958164e-05, "loss": 0.2059001922607422, "step": 830 }, { "epoch": 0.11579460739915, "grad_norm": 0.9672330021858215, "learning_rate": 3.9775745894986155e-05, "loss": 0.12047958374023438, "step": 831 }, { "epoch": 0.11593395109036439, "grad_norm": 1.297072410583496, "learning_rate": 3.9774335936671414e-05, "loss": 0.11907958984375, "step": 832 }, { "epoch": 0.11607329478157877, "grad_norm": 0.895574688911438, "learning_rate": 3.977292158495068e-05, "loss": 0.09975624084472656, "step": 833 }, { "epoch": 0.11621263847279314, "grad_norm": 0.6433578133583069, "learning_rate": 3.9771502840138196e-05, "loss": 0.08802413940429688, "step": 834 }, { "epoch": 0.11635198216400752, "grad_norm": 0.6772577166557312, "learning_rate": 3.9770079702549174e-05, "loss": 0.09848785400390625, "step": 835 }, { "epoch": 0.1164913258552219, "grad_norm": 0.8166855573654175, "learning_rate": 3.9768652172499804e-05, "loss": 0.09238052368164062, "step": 836 }, { "epoch": 0.11663066954643629, "grad_norm": 1.3394217491149902, "learning_rate": 3.9767220250307244e-05, "loss": 0.09633636474609375, "step": 837 }, { "epoch": 0.11677001323765067, "grad_norm": 0.967022716999054, "learning_rate": 3.976578393628963e-05, "loss": 0.10444450378417969, "step": 838 }, { "epoch": 0.11690935692886505, "grad_norm": 0.5620585680007935, "learning_rate": 3.9764343230766096e-05, "loss": 0.10424232482910156, "step": 839 }, { "epoch": 0.11704870062007942, "grad_norm": 0.8165538311004639, "learning_rate": 3.976289813405672e-05, "loss": 0.09117507934570312, "step": 840 }, { "epoch": 0.1171880443112938, "grad_norm": 0.811055064201355, "learning_rate": 3.9761448646482576e-05, "loss": 0.09662818908691406, "step": 841 }, { "epoch": 0.11732738800250819, "grad_norm": 1.4214133024215698, "learning_rate": 3.975999476836571e-05, "loss": 0.10876274108886719, "step": 842 }, { "epoch": 0.11746673169372257, "grad_norm": 0.5501617789268494, "learning_rate": 3.9758536500029116e-05, "loss": 0.08095359802246094, "step": 843 }, { "epoch": 0.11760607538493695, "grad_norm": 0.5954941511154175, "learning_rate": 3.975707384179682e-05, "loss": 0.09542465209960938, "step": 844 }, { "epoch": 0.11774541907615133, "grad_norm": 1.2494981288909912, "learning_rate": 3.9755606793993776e-05, "loss": 0.10699462890625, "step": 845 }, { "epoch": 0.1178847627673657, "grad_norm": 1.5655019283294678, "learning_rate": 3.9754135356945934e-05, "loss": 0.12392997741699219, "step": 846 }, { "epoch": 0.1180241064585801, "grad_norm": 0.6734741926193237, "learning_rate": 3.9752659530980205e-05, "loss": 0.10831451416015625, "step": 847 }, { "epoch": 0.11816345014979447, "grad_norm": 2.138023853302002, "learning_rate": 3.975117931642449e-05, "loss": 0.17435073852539062, "step": 848 }, { "epoch": 0.11830279384100885, "grad_norm": 0.9267823696136475, "learning_rate": 3.9749694713607654e-05, "loss": 0.09423065185546875, "step": 849 }, { "epoch": 0.11844213753222323, "grad_norm": 0.5772899985313416, "learning_rate": 3.974820572285955e-05, "loss": 0.07808494567871094, "step": 850 }, { "epoch": 0.1185814812234376, "grad_norm": 0.5302408933639526, "learning_rate": 3.9746712344510996e-05, "loss": 0.094329833984375, "step": 851 }, { "epoch": 0.118720824914652, "grad_norm": 0.8932099342346191, "learning_rate": 3.9745214578893784e-05, "loss": 0.09273433685302734, "step": 852 }, { "epoch": 0.11886016860586637, "grad_norm": 1.260711669921875, "learning_rate": 3.974371242634068e-05, "loss": 0.16221237182617188, "step": 853 }, { "epoch": 0.11899951229708075, "grad_norm": 0.7477459907531738, "learning_rate": 3.9742205887185434e-05, "loss": 0.09038543701171875, "step": 854 }, { "epoch": 0.11913885598829513, "grad_norm": 1.0658917427062988, "learning_rate": 3.974069496176277e-05, "loss": 0.11784934997558594, "step": 855 }, { "epoch": 0.1192781996795095, "grad_norm": 0.6801803708076477, "learning_rate": 3.973917965040836e-05, "loss": 0.11389923095703125, "step": 856 }, { "epoch": 0.1194175433707239, "grad_norm": 0.9303820133209229, "learning_rate": 3.973765995345889e-05, "loss": 0.1399993896484375, "step": 857 }, { "epoch": 0.11955688706193827, "grad_norm": 1.3556439876556396, "learning_rate": 3.9736135871251994e-05, "loss": 0.1107940673828125, "step": 858 }, { "epoch": 0.11969623075315265, "grad_norm": 0.8433564901351929, "learning_rate": 3.9734607404126293e-05, "loss": 0.11928367614746094, "step": 859 }, { "epoch": 0.11983557444436703, "grad_norm": 0.6797468066215515, "learning_rate": 3.973307455242138e-05, "loss": 0.10315418243408203, "step": 860 }, { "epoch": 0.11997491813558141, "grad_norm": 1.3954963684082031, "learning_rate": 3.9731537316477806e-05, "loss": 0.12972259521484375, "step": 861 }, { "epoch": 0.1201142618267958, "grad_norm": 0.9100663661956787, "learning_rate": 3.9729995696637125e-05, "loss": 0.1049652099609375, "step": 862 }, { "epoch": 0.12025360551801018, "grad_norm": 0.7759761810302734, "learning_rate": 3.972844969324184e-05, "loss": 0.08396720886230469, "step": 863 }, { "epoch": 0.12039294920922455, "grad_norm": 4.135852336883545, "learning_rate": 3.9726899306635446e-05, "loss": 0.21060943603515625, "step": 864 }, { "epoch": 0.12053229290043893, "grad_norm": 1.6119786500930786, "learning_rate": 3.9725344537162394e-05, "loss": 0.13420677185058594, "step": 865 }, { "epoch": 0.12067163659165331, "grad_norm": 0.5810844898223877, "learning_rate": 3.972378538516813e-05, "loss": 0.11736488342285156, "step": 866 }, { "epoch": 0.1208109802828677, "grad_norm": 1.3256585597991943, "learning_rate": 3.972222185099905e-05, "loss": 0.13250732421875, "step": 867 }, { "epoch": 0.12095032397408208, "grad_norm": 0.759692370891571, "learning_rate": 3.972065393500254e-05, "loss": 0.07603263854980469, "step": 868 }, { "epoch": 0.12108966766529645, "grad_norm": 1.2146332263946533, "learning_rate": 3.971908163752696e-05, "loss": 0.14098024368286133, "step": 869 }, { "epoch": 0.12122901135651083, "grad_norm": 0.5106416940689087, "learning_rate": 3.9717504958921634e-05, "loss": 0.10230636596679688, "step": 870 }, { "epoch": 0.12136835504772521, "grad_norm": 0.8769246935844421, "learning_rate": 3.971592389953686e-05, "loss": 0.11970901489257812, "step": 871 }, { "epoch": 0.1215076987389396, "grad_norm": 0.863617479801178, "learning_rate": 3.9714338459723924e-05, "loss": 0.12817001342773438, "step": 872 }, { "epoch": 0.12164704243015398, "grad_norm": 0.7919303774833679, "learning_rate": 3.9712748639835056e-05, "loss": 0.10923385620117188, "step": 873 }, { "epoch": 0.12178638612136836, "grad_norm": 0.9119038581848145, "learning_rate": 3.97111544402235e-05, "loss": 0.1362323760986328, "step": 874 }, { "epoch": 0.12192572981258273, "grad_norm": 0.9829415082931519, "learning_rate": 3.970955586124344e-05, "loss": 0.10671043395996094, "step": 875 }, { "epoch": 0.12206507350379711, "grad_norm": 0.785045862197876, "learning_rate": 3.9707952903250045e-05, "loss": 0.14325714111328125, "step": 876 }, { "epoch": 0.1222044171950115, "grad_norm": 0.38225650787353516, "learning_rate": 3.9706345566599454e-05, "loss": 0.06811904907226562, "step": 877 }, { "epoch": 0.12234376088622588, "grad_norm": 1.3383413553237915, "learning_rate": 3.9704733851648785e-05, "loss": 0.1023101806640625, "step": 878 }, { "epoch": 0.12248310457744026, "grad_norm": 1.366274118423462, "learning_rate": 3.970311775875611e-05, "loss": 0.12511253356933594, "step": 879 }, { "epoch": 0.12262244826865464, "grad_norm": 0.8313373923301697, "learning_rate": 3.9701497288280506e-05, "loss": 0.1096954345703125, "step": 880 }, { "epoch": 0.12276179195986901, "grad_norm": 0.739745557308197, "learning_rate": 3.9699872440582e-05, "loss": 0.11933517456054688, "step": 881 }, { "epoch": 0.1229011356510834, "grad_norm": 0.6557244658470154, "learning_rate": 3.969824321602159e-05, "loss": 0.09633255004882812, "step": 882 }, { "epoch": 0.12304047934229778, "grad_norm": 1.0902858972549438, "learning_rate": 3.969660961496126e-05, "loss": 0.11241912841796875, "step": 883 }, { "epoch": 0.12317982303351216, "grad_norm": 1.4936882257461548, "learning_rate": 3.969497163776395e-05, "loss": 0.13085269927978516, "step": 884 }, { "epoch": 0.12331916672472654, "grad_norm": 2.4463281631469727, "learning_rate": 3.9693329284793586e-05, "loss": 0.16193389892578125, "step": 885 }, { "epoch": 0.12345851041594091, "grad_norm": 0.9028124213218689, "learning_rate": 3.9691682556415064e-05, "loss": 0.14135169982910156, "step": 886 }, { "epoch": 0.12359785410715529, "grad_norm": 1.8216041326522827, "learning_rate": 3.969003145299424e-05, "loss": 0.1624908447265625, "step": 887 }, { "epoch": 0.12373719779836968, "grad_norm": 0.8420243263244629, "learning_rate": 3.968837597489797e-05, "loss": 0.1012115478515625, "step": 888 }, { "epoch": 0.12387654148958406, "grad_norm": 0.6212747097015381, "learning_rate": 3.968671612249404e-05, "loss": 0.10637664794921875, "step": 889 }, { "epoch": 0.12401588518079844, "grad_norm": 0.8405910730361938, "learning_rate": 3.968505189615125e-05, "loss": 0.10589408874511719, "step": 890 }, { "epoch": 0.12415522887201282, "grad_norm": 1.072261095046997, "learning_rate": 3.9683383296239345e-05, "loss": 0.09920883178710938, "step": 891 }, { "epoch": 0.12429457256322719, "grad_norm": 2.0888612270355225, "learning_rate": 3.968171032312905e-05, "loss": 0.1413726806640625, "step": 892 }, { "epoch": 0.12443391625444158, "grad_norm": 0.7281012535095215, "learning_rate": 3.968003297719206e-05, "loss": 0.09989166259765625, "step": 893 }, { "epoch": 0.12457325994565596, "grad_norm": 0.6744201183319092, "learning_rate": 3.9678351258801046e-05, "loss": 0.10813140869140625, "step": 894 }, { "epoch": 0.12471260363687034, "grad_norm": 1.953149676322937, "learning_rate": 3.9676665168329655e-05, "loss": 0.1544818878173828, "step": 895 }, { "epoch": 0.12485194732808472, "grad_norm": 1.028326153755188, "learning_rate": 3.967497470615248e-05, "loss": 0.13013648986816406, "step": 896 }, { "epoch": 0.1249912910192991, "grad_norm": 0.8883559703826904, "learning_rate": 3.967327987264512e-05, "loss": 0.12459373474121094, "step": 897 }, { "epoch": 0.12513063471051347, "grad_norm": 0.9527366757392883, "learning_rate": 3.967158066818411e-05, "loss": 0.13219070434570312, "step": 898 }, { "epoch": 0.12526997840172785, "grad_norm": 0.9646626114845276, "learning_rate": 3.9669877093146995e-05, "loss": 0.11056900024414062, "step": 899 }, { "epoch": 0.12540932209294225, "grad_norm": 0.8447823524475098, "learning_rate": 3.966816914791226e-05, "loss": 0.10135078430175781, "step": 900 }, { "epoch": 0.12554866578415663, "grad_norm": 1.1658153533935547, "learning_rate": 3.9666456832859365e-05, "loss": 0.1222381591796875, "step": 901 }, { "epoch": 0.125688009475371, "grad_norm": 1.094548225402832, "learning_rate": 3.966474014836876e-05, "loss": 0.09234237670898438, "step": 902 }, { "epoch": 0.1258273531665854, "grad_norm": 1.0840222835540771, "learning_rate": 3.9663019094821843e-05, "loss": 0.1737346649169922, "step": 903 }, { "epoch": 0.12596669685779976, "grad_norm": 0.8250463604927063, "learning_rate": 3.9661293672601006e-05, "loss": 0.10967826843261719, "step": 904 }, { "epoch": 0.12610604054901414, "grad_norm": 0.7635146379470825, "learning_rate": 3.965956388208959e-05, "loss": 0.10135078430175781, "step": 905 }, { "epoch": 0.12624538424022852, "grad_norm": 2.660240411758423, "learning_rate": 3.965782972367191e-05, "loss": 0.1481761932373047, "step": 906 }, { "epoch": 0.1263847279314429, "grad_norm": 1.2389339208602905, "learning_rate": 3.965609119773326e-05, "loss": 0.13171768188476562, "step": 907 }, { "epoch": 0.12652407162265727, "grad_norm": 0.7233284115791321, "learning_rate": 3.9654348304659905e-05, "loss": 0.10956573486328125, "step": 908 }, { "epoch": 0.12666341531387165, "grad_norm": 0.9505659341812134, "learning_rate": 3.965260104483907e-05, "loss": 0.12093353271484375, "step": 909 }, { "epoch": 0.12680275900508606, "grad_norm": 0.5517993569374084, "learning_rate": 3.965084941865896e-05, "loss": 0.10152244567871094, "step": 910 }, { "epoch": 0.12694210269630044, "grad_norm": 0.6311435699462891, "learning_rate": 3.964909342650875e-05, "loss": 0.10203361511230469, "step": 911 }, { "epoch": 0.1270814463875148, "grad_norm": 0.8900637030601501, "learning_rate": 3.964733306877857e-05, "loss": 0.15645980834960938, "step": 912 }, { "epoch": 0.1272207900787292, "grad_norm": 0.28614675998687744, "learning_rate": 3.964556834585954e-05, "loss": 0.06302642822265625, "step": 913 }, { "epoch": 0.12736013376994357, "grad_norm": 0.8549137711524963, "learning_rate": 3.9643799258143745e-05, "loss": 0.12442493438720703, "step": 914 }, { "epoch": 0.12749947746115795, "grad_norm": 0.5446961522102356, "learning_rate": 3.9642025806024226e-05, "loss": 0.09725761413574219, "step": 915 }, { "epoch": 0.12763882115237232, "grad_norm": 0.457271933555603, "learning_rate": 3.964024798989501e-05, "loss": 0.06842231750488281, "step": 916 }, { "epoch": 0.1277781648435867, "grad_norm": 0.5276815295219421, "learning_rate": 3.963846581015109e-05, "loss": 0.09306907653808594, "step": 917 }, { "epoch": 0.12791750853480108, "grad_norm": 0.6969532370567322, "learning_rate": 3.963667926718841e-05, "loss": 0.08910179138183594, "step": 918 }, { "epoch": 0.12805685222601546, "grad_norm": 1.6845422983169556, "learning_rate": 3.9634888361403916e-05, "loss": 0.11983680725097656, "step": 919 }, { "epoch": 0.12819619591722986, "grad_norm": 0.48111677169799805, "learning_rate": 3.96330930931955e-05, "loss": 0.08892822265625, "step": 920 }, { "epoch": 0.12833553960844424, "grad_norm": 0.7504842877388, "learning_rate": 3.963129346296203e-05, "loss": 0.09876632690429688, "step": 921 }, { "epoch": 0.12847488329965862, "grad_norm": 0.8554426431655884, "learning_rate": 3.9629489471103334e-05, "loss": 0.09718894958496094, "step": 922 }, { "epoch": 0.128614226990873, "grad_norm": 0.825312614440918, "learning_rate": 3.962768111802023e-05, "loss": 0.10096549987792969, "step": 923 }, { "epoch": 0.12875357068208737, "grad_norm": 1.7146801948547363, "learning_rate": 3.96258684041145e-05, "loss": 0.21262741088867188, "step": 924 }, { "epoch": 0.12889291437330175, "grad_norm": 0.929318368434906, "learning_rate": 3.9624051329788875e-05, "loss": 0.14669322967529297, "step": 925 }, { "epoch": 0.12903225806451613, "grad_norm": 1.0911771059036255, "learning_rate": 3.9622229895447054e-05, "loss": 0.10153388977050781, "step": 926 }, { "epoch": 0.1291716017557305, "grad_norm": 1.5078743696212769, "learning_rate": 3.962040410149375e-05, "loss": 0.14362144470214844, "step": 927 }, { "epoch": 0.12931094544694488, "grad_norm": 0.8618125915527344, "learning_rate": 3.961857394833459e-05, "loss": 0.10207939147949219, "step": 928 }, { "epoch": 0.12945028913815926, "grad_norm": 0.47343820333480835, "learning_rate": 3.96167394363762e-05, "loss": 0.08660888671875, "step": 929 }, { "epoch": 0.12958963282937366, "grad_norm": 0.825143039226532, "learning_rate": 3.9614900566026154e-05, "loss": 0.08667755126953125, "step": 930 }, { "epoch": 0.12972897652058804, "grad_norm": 0.7102736234664917, "learning_rate": 3.961305733769303e-05, "loss": 0.1053924560546875, "step": 931 }, { "epoch": 0.12986832021180242, "grad_norm": 0.8108076453208923, "learning_rate": 3.961120975178634e-05, "loss": 0.0892791748046875, "step": 932 }, { "epoch": 0.1300076639030168, "grad_norm": 1.125351071357727, "learning_rate": 3.960935780871657e-05, "loss": 0.13264846801757812, "step": 933 }, { "epoch": 0.13014700759423117, "grad_norm": 1.4581644535064697, "learning_rate": 3.9607501508895185e-05, "loss": 0.16429901123046875, "step": 934 }, { "epoch": 0.13028635128544555, "grad_norm": 0.5428231954574585, "learning_rate": 3.960564085273461e-05, "loss": 0.1180422306060791, "step": 935 }, { "epoch": 0.13042569497665993, "grad_norm": 1.9891698360443115, "learning_rate": 3.9603775840648243e-05, "loss": 0.19652366638183594, "step": 936 }, { "epoch": 0.1305650386678743, "grad_norm": 0.7734001874923706, "learning_rate": 3.9601906473050446e-05, "loss": 0.10776519775390625, "step": 937 }, { "epoch": 0.13070438235908868, "grad_norm": 0.48310205340385437, "learning_rate": 3.960003275035655e-05, "loss": 0.08874893188476562, "step": 938 }, { "epoch": 0.13084372605030306, "grad_norm": 0.6201640963554382, "learning_rate": 3.959815467298285e-05, "loss": 0.08957290649414062, "step": 939 }, { "epoch": 0.13098306974151747, "grad_norm": 1.7165266275405884, "learning_rate": 3.9596272241346625e-05, "loss": 0.11921882629394531, "step": 940 }, { "epoch": 0.13112241343273184, "grad_norm": 0.4996984004974365, "learning_rate": 3.959438545586609e-05, "loss": 0.10185050964355469, "step": 941 }, { "epoch": 0.13126175712394622, "grad_norm": 0.6177219152450562, "learning_rate": 3.959249431696046e-05, "loss": 0.10330581665039062, "step": 942 }, { "epoch": 0.1314011008151606, "grad_norm": 0.3020438551902771, "learning_rate": 3.9590598825049896e-05, "loss": 0.06919097900390625, "step": 943 }, { "epoch": 0.13154044450637498, "grad_norm": 0.6501237750053406, "learning_rate": 3.958869898055553e-05, "loss": 0.10583877563476562, "step": 944 }, { "epoch": 0.13167978819758935, "grad_norm": 0.5967585444450378, "learning_rate": 3.9586794783899464e-05, "loss": 0.10979461669921875, "step": 945 }, { "epoch": 0.13181913188880373, "grad_norm": 0.8810577392578125, "learning_rate": 3.958488623550478e-05, "loss": 0.09739875793457031, "step": 946 }, { "epoch": 0.1319584755800181, "grad_norm": 1.4574545621871948, "learning_rate": 3.95829733357955e-05, "loss": 0.12108612060546875, "step": 947 }, { "epoch": 0.1320978192712325, "grad_norm": 0.6736417412757874, "learning_rate": 3.958105608519663e-05, "loss": 0.07841110229492188, "step": 948 }, { "epoch": 0.13223716296244686, "grad_norm": 1.2602319717407227, "learning_rate": 3.957913448413415e-05, "loss": 0.11762428283691406, "step": 949 }, { "epoch": 0.13237650665366127, "grad_norm": 0.9865601658821106, "learning_rate": 3.957720853303499e-05, "loss": 0.182098388671875, "step": 950 }, { "epoch": 0.13251585034487565, "grad_norm": 1.799268126487732, "learning_rate": 3.9575278232327036e-05, "loss": 0.09648895263671875, "step": 951 }, { "epoch": 0.13265519403609002, "grad_norm": 3.0479214191436768, "learning_rate": 3.957334358243917e-05, "loss": 0.16586875915527344, "step": 952 }, { "epoch": 0.1327945377273044, "grad_norm": 1.4267535209655762, "learning_rate": 3.957140458380123e-05, "loss": 0.13303184509277344, "step": 953 }, { "epoch": 0.13293388141851878, "grad_norm": 0.8347949385643005, "learning_rate": 3.956946123684402e-05, "loss": 0.10215568542480469, "step": 954 }, { "epoch": 0.13307322510973316, "grad_norm": 0.5874918103218079, "learning_rate": 3.95675135419993e-05, "loss": 0.10650253295898438, "step": 955 }, { "epoch": 0.13321256880094753, "grad_norm": 1.711199164390564, "learning_rate": 3.9565561499699795e-05, "loss": 0.0952911376953125, "step": 956 }, { "epoch": 0.1333519124921619, "grad_norm": 2.169102191925049, "learning_rate": 3.9563605110379224e-05, "loss": 0.1320209503173828, "step": 957 }, { "epoch": 0.1334912561833763, "grad_norm": 0.47926661372184753, "learning_rate": 3.956164437447224e-05, "loss": 0.08258724212646484, "step": 958 }, { "epoch": 0.13363059987459067, "grad_norm": 0.9093363881111145, "learning_rate": 3.955967929241447e-05, "loss": 0.09659385681152344, "step": 959 }, { "epoch": 0.13376994356580507, "grad_norm": 1.6823540925979614, "learning_rate": 3.955770986464253e-05, "loss": 0.159820556640625, "step": 960 }, { "epoch": 0.13390928725701945, "grad_norm": 0.6723421216011047, "learning_rate": 3.955573609159395e-05, "loss": 0.08875465393066406, "step": 961 }, { "epoch": 0.13404863094823383, "grad_norm": 0.49799638986587524, "learning_rate": 3.95537579737073e-05, "loss": 0.11854362487792969, "step": 962 }, { "epoch": 0.1341879746394482, "grad_norm": 0.9390732049942017, "learning_rate": 3.955177551142202e-05, "loss": 0.10270500183105469, "step": 963 }, { "epoch": 0.13432731833066258, "grad_norm": 0.7228181958198547, "learning_rate": 3.954978870517861e-05, "loss": 0.10239601135253906, "step": 964 }, { "epoch": 0.13446666202187696, "grad_norm": 0.4605611264705658, "learning_rate": 3.954779755541848e-05, "loss": 0.08382606506347656, "step": 965 }, { "epoch": 0.13460600571309134, "grad_norm": 0.44877952337265015, "learning_rate": 3.954580206258402e-05, "loss": 0.11230278015136719, "step": 966 }, { "epoch": 0.13474534940430571, "grad_norm": 0.6742639541625977, "learning_rate": 3.9543802227118574e-05, "loss": 0.10134696960449219, "step": 967 }, { "epoch": 0.1348846930955201, "grad_norm": 0.8711440563201904, "learning_rate": 3.954179804946647e-05, "loss": 0.12663841247558594, "step": 968 }, { "epoch": 0.13502403678673447, "grad_norm": 0.756864607334137, "learning_rate": 3.953978953007299e-05, "loss": 0.0988168716430664, "step": 969 }, { "epoch": 0.13516338047794887, "grad_norm": 1.2521171569824219, "learning_rate": 3.953777666938436e-05, "loss": 0.10729122161865234, "step": 970 }, { "epoch": 0.13530272416916325, "grad_norm": 0.9330423474311829, "learning_rate": 3.953575946784782e-05, "loss": 0.1218109130859375, "step": 971 }, { "epoch": 0.13544206786037763, "grad_norm": 1.0559850931167603, "learning_rate": 3.953373792591154e-05, "loss": 0.12021064758300781, "step": 972 }, { "epoch": 0.135581411551592, "grad_norm": 0.5243715047836304, "learning_rate": 3.953171204402465e-05, "loss": 0.1219024658203125, "step": 973 }, { "epoch": 0.13572075524280638, "grad_norm": 0.8299070596694946, "learning_rate": 3.952968182263726e-05, "loss": 0.11920547485351562, "step": 974 }, { "epoch": 0.13586009893402076, "grad_norm": 0.9031861424446106, "learning_rate": 3.9527647262200444e-05, "loss": 0.10433006286621094, "step": 975 }, { "epoch": 0.13599944262523514, "grad_norm": 0.9984228014945984, "learning_rate": 3.9525608363166225e-05, "loss": 0.11217880249023438, "step": 976 }, { "epoch": 0.13613878631644952, "grad_norm": 1.5464277267456055, "learning_rate": 3.9523565125987606e-05, "loss": 0.1273517608642578, "step": 977 }, { "epoch": 0.1362781300076639, "grad_norm": 0.6148502230644226, "learning_rate": 3.952151755111855e-05, "loss": 0.07797527313232422, "step": 978 }, { "epoch": 0.13641747369887827, "grad_norm": 1.7016847133636475, "learning_rate": 3.951946563901397e-05, "loss": 0.1307659149169922, "step": 979 }, { "epoch": 0.13655681739009268, "grad_norm": 0.8098174333572388, "learning_rate": 3.951740939012977e-05, "loss": 0.1078033447265625, "step": 980 }, { "epoch": 0.13669616108130705, "grad_norm": 0.7261725664138794, "learning_rate": 3.951534880492279e-05, "loss": 0.10004234313964844, "step": 981 }, { "epoch": 0.13683550477252143, "grad_norm": 0.9728497862815857, "learning_rate": 3.951328388385085e-05, "loss": 0.14611434936523438, "step": 982 }, { "epoch": 0.1369748484637358, "grad_norm": 0.500342845916748, "learning_rate": 3.951121462737273e-05, "loss": 0.08334541320800781, "step": 983 }, { "epoch": 0.1371141921549502, "grad_norm": 1.0606285333633423, "learning_rate": 3.9509141035948156e-05, "loss": 0.14150047302246094, "step": 984 }, { "epoch": 0.13725353584616456, "grad_norm": 1.2684240341186523, "learning_rate": 3.950706311003785e-05, "loss": 0.09601974487304688, "step": 985 }, { "epoch": 0.13739287953737894, "grad_norm": 1.4431241750717163, "learning_rate": 3.950498085010348e-05, "loss": 0.10696983337402344, "step": 986 }, { "epoch": 0.13753222322859332, "grad_norm": 1.647621512413025, "learning_rate": 3.950289425660767e-05, "loss": 0.1531829833984375, "step": 987 }, { "epoch": 0.1376715669198077, "grad_norm": 0.8080485463142395, "learning_rate": 3.950080333001402e-05, "loss": 0.11075592041015625, "step": 988 }, { "epoch": 0.13781091061102207, "grad_norm": 0.9858983755111694, "learning_rate": 3.9498708070787076e-05, "loss": 0.11331939697265625, "step": 989 }, { "epoch": 0.13795025430223645, "grad_norm": 0.6059591174125671, "learning_rate": 3.949660847939236e-05, "loss": 0.10280036926269531, "step": 990 }, { "epoch": 0.13808959799345086, "grad_norm": 0.8609289526939392, "learning_rate": 3.949450455629635e-05, "loss": 0.131927490234375, "step": 991 }, { "epoch": 0.13822894168466524, "grad_norm": 1.354343295097351, "learning_rate": 3.9492396301966504e-05, "loss": 0.09990119934082031, "step": 992 }, { "epoch": 0.1383682853758796, "grad_norm": 0.821421205997467, "learning_rate": 3.9490283716871214e-05, "loss": 0.08404922485351562, "step": 993 }, { "epoch": 0.138507629067094, "grad_norm": 0.9992891550064087, "learning_rate": 3.948816680147986e-05, "loss": 0.12139129638671875, "step": 994 }, { "epoch": 0.13864697275830837, "grad_norm": 0.6031719446182251, "learning_rate": 3.9486045556262756e-05, "loss": 0.11076164245605469, "step": 995 }, { "epoch": 0.13878631644952275, "grad_norm": 1.0184024572372437, "learning_rate": 3.948391998169121e-05, "loss": 0.12169456481933594, "step": 996 }, { "epoch": 0.13892566014073712, "grad_norm": 0.4988100230693817, "learning_rate": 3.948179007823746e-05, "loss": 0.08377456665039062, "step": 997 }, { "epoch": 0.1390650038319515, "grad_norm": 0.44008979201316833, "learning_rate": 3.947965584637474e-05, "loss": 0.07970237731933594, "step": 998 }, { "epoch": 0.13920434752316588, "grad_norm": 0.5558716654777527, "learning_rate": 3.947751728657722e-05, "loss": 0.110076904296875, "step": 999 }, { "epoch": 0.13934369121438026, "grad_norm": 0.829971194267273, "learning_rate": 3.9475374399320036e-05, "loss": 0.14620590209960938, "step": 1000 }, { "epoch": 0.13948303490559466, "grad_norm": 0.3911738395690918, "learning_rate": 3.947322718507929e-05, "loss": 0.10377883911132812, "step": 1001 }, { "epoch": 0.13962237859680904, "grad_norm": 0.6939563751220703, "learning_rate": 3.947107564433204e-05, "loss": 0.09389305114746094, "step": 1002 }, { "epoch": 0.13976172228802342, "grad_norm": 2.19252872467041, "learning_rate": 3.946891977755632e-05, "loss": 0.1535186767578125, "step": 1003 }, { "epoch": 0.1399010659792378, "grad_norm": 0.3593987822532654, "learning_rate": 3.946675958523111e-05, "loss": 0.09176063537597656, "step": 1004 }, { "epoch": 0.14004040967045217, "grad_norm": 0.787429690361023, "learning_rate": 3.946459506783635e-05, "loss": 0.08251571655273438, "step": 1005 }, { "epoch": 0.14017975336166655, "grad_norm": 0.5198335647583008, "learning_rate": 3.9462426225852954e-05, "loss": 0.08567237854003906, "step": 1006 }, { "epoch": 0.14031909705288093, "grad_norm": 0.5118117928504944, "learning_rate": 3.946025305976278e-05, "loss": 0.07783889770507812, "step": 1007 }, { "epoch": 0.1404584407440953, "grad_norm": 0.898271381855011, "learning_rate": 3.9458075570048666e-05, "loss": 0.09580421447753906, "step": 1008 }, { "epoch": 0.14059778443530968, "grad_norm": 1.2402944564819336, "learning_rate": 3.945589375719439e-05, "loss": 0.1432056427001953, "step": 1009 }, { "epoch": 0.14073712812652406, "grad_norm": 0.44301941990852356, "learning_rate": 3.9453707621684714e-05, "loss": 0.09371280670166016, "step": 1010 }, { "epoch": 0.14087647181773846, "grad_norm": 0.6207379698753357, "learning_rate": 3.945151716400534e-05, "loss": 0.07216835021972656, "step": 1011 }, { "epoch": 0.14101581550895284, "grad_norm": 1.2328846454620361, "learning_rate": 3.944932238464293e-05, "loss": 0.17153167724609375, "step": 1012 }, { "epoch": 0.14115515920016722, "grad_norm": 0.9434775710105896, "learning_rate": 3.944712328408513e-05, "loss": 0.09087181091308594, "step": 1013 }, { "epoch": 0.1412945028913816, "grad_norm": 0.9901787638664246, "learning_rate": 3.9444919862820514e-05, "loss": 0.08065032958984375, "step": 1014 }, { "epoch": 0.14143384658259597, "grad_norm": 1.2479723691940308, "learning_rate": 3.944271212133864e-05, "loss": 0.1379070281982422, "step": 1015 }, { "epoch": 0.14157319027381035, "grad_norm": 0.5679019093513489, "learning_rate": 3.9440500060130025e-05, "loss": 0.08939170837402344, "step": 1016 }, { "epoch": 0.14171253396502473, "grad_norm": 0.9982486963272095, "learning_rate": 3.943828367968613e-05, "loss": 0.10690498352050781, "step": 1017 }, { "epoch": 0.1418518776562391, "grad_norm": 0.4452458322048187, "learning_rate": 3.9436062980499376e-05, "loss": 0.079620361328125, "step": 1018 }, { "epoch": 0.14199122134745348, "grad_norm": 0.6532194018363953, "learning_rate": 3.943383796306317e-05, "loss": 0.10463333129882812, "step": 1019 }, { "epoch": 0.14213056503866786, "grad_norm": 0.9193634986877441, "learning_rate": 3.9431608627871845e-05, "loss": 0.10005378723144531, "step": 1020 }, { "epoch": 0.14226990872988227, "grad_norm": 0.35800793766975403, "learning_rate": 3.9429374975420714e-05, "loss": 0.08039093017578125, "step": 1021 }, { "epoch": 0.14240925242109664, "grad_norm": 0.5265731811523438, "learning_rate": 3.942713700620605e-05, "loss": 0.10545730590820312, "step": 1022 }, { "epoch": 0.14254859611231102, "grad_norm": 0.4224061369895935, "learning_rate": 3.942489472072507e-05, "loss": 0.09466171264648438, "step": 1023 }, { "epoch": 0.1426879398035254, "grad_norm": 0.9892920255661011, "learning_rate": 3.942264811947596e-05, "loss": 0.16904067993164062, "step": 1024 }, { "epoch": 0.14282728349473978, "grad_norm": 0.5567622184753418, "learning_rate": 3.9420397202957854e-05, "loss": 0.11565399169921875, "step": 1025 }, { "epoch": 0.14296662718595415, "grad_norm": 0.6593400835990906, "learning_rate": 3.941814197167087e-05, "loss": 0.09443092346191406, "step": 1026 }, { "epoch": 0.14310597087716853, "grad_norm": 0.9876372218132019, "learning_rate": 3.941588242611607e-05, "loss": 0.06507492065429688, "step": 1027 }, { "epoch": 0.1432453145683829, "grad_norm": 1.0169603824615479, "learning_rate": 3.9413618566795465e-05, "loss": 0.103790283203125, "step": 1028 }, { "epoch": 0.1433846582595973, "grad_norm": 0.7091437578201294, "learning_rate": 3.941135039421204e-05, "loss": 0.11845016479492188, "step": 1029 }, { "epoch": 0.14352400195081166, "grad_norm": 0.9706575274467468, "learning_rate": 3.940907790886971e-05, "loss": 0.10301971435546875, "step": 1030 }, { "epoch": 0.14366334564202607, "grad_norm": 1.2931971549987793, "learning_rate": 3.94068011112734e-05, "loss": 0.132659912109375, "step": 1031 }, { "epoch": 0.14380268933324045, "grad_norm": 1.0244883298873901, "learning_rate": 3.9404520001928945e-05, "loss": 0.09354782104492188, "step": 1032 }, { "epoch": 0.14394203302445482, "grad_norm": 1.0689970254898071, "learning_rate": 3.940223458134316e-05, "loss": 0.104095458984375, "step": 1033 }, { "epoch": 0.1440813767156692, "grad_norm": 0.8139352798461914, "learning_rate": 3.939994485002381e-05, "loss": 0.11745643615722656, "step": 1034 }, { "epoch": 0.14422072040688358, "grad_norm": 0.3693269193172455, "learning_rate": 3.939765080847962e-05, "loss": 0.0854644775390625, "step": 1035 }, { "epoch": 0.14436006409809796, "grad_norm": 1.1354705095291138, "learning_rate": 3.9395352457220275e-05, "loss": 0.10104179382324219, "step": 1036 }, { "epoch": 0.14449940778931233, "grad_norm": 0.8560324907302856, "learning_rate": 3.939304979675642e-05, "loss": 0.12490081787109375, "step": 1037 }, { "epoch": 0.1446387514805267, "grad_norm": 1.307073950767517, "learning_rate": 3.939074282759965e-05, "loss": 0.12902259826660156, "step": 1038 }, { "epoch": 0.1447780951717411, "grad_norm": 1.1590675115585327, "learning_rate": 3.938843155026252e-05, "loss": 0.09788894653320312, "step": 1039 }, { "epoch": 0.14491743886295547, "grad_norm": 0.3905137777328491, "learning_rate": 3.938611596525855e-05, "loss": 0.08150291442871094, "step": 1040 }, { "epoch": 0.14505678255416987, "grad_norm": 1.2704097032546997, "learning_rate": 3.9383796073102206e-05, "loss": 0.11468887329101562, "step": 1041 }, { "epoch": 0.14519612624538425, "grad_norm": 1.2073370218276978, "learning_rate": 3.9381471874308916e-05, "loss": 0.11696434020996094, "step": 1042 }, { "epoch": 0.14533546993659863, "grad_norm": 0.9552823305130005, "learning_rate": 3.9379143369395054e-05, "loss": 0.10514163970947266, "step": 1043 }, { "epoch": 0.145474813627813, "grad_norm": 0.6865363121032715, "learning_rate": 3.937681055887797e-05, "loss": 0.1565380096435547, "step": 1044 }, { "epoch": 0.14561415731902738, "grad_norm": 0.7349458932876587, "learning_rate": 3.937447344327596e-05, "loss": 0.11250877380371094, "step": 1045 }, { "epoch": 0.14575350101024176, "grad_norm": 1.6060528755187988, "learning_rate": 3.937213202310828e-05, "loss": 0.12238121032714844, "step": 1046 }, { "epoch": 0.14589284470145614, "grad_norm": 0.7500627636909485, "learning_rate": 3.9369786298895144e-05, "loss": 0.08775520324707031, "step": 1047 }, { "epoch": 0.14603218839267051, "grad_norm": 0.4673171937465668, "learning_rate": 3.93674362711577e-05, "loss": 0.11436271667480469, "step": 1048 }, { "epoch": 0.1461715320838849, "grad_norm": 1.5575499534606934, "learning_rate": 3.936508194041809e-05, "loss": 0.12291145324707031, "step": 1049 }, { "epoch": 0.14631087577509927, "grad_norm": 0.48581135272979736, "learning_rate": 3.936272330719938e-05, "loss": 0.07053375244140625, "step": 1050 }, { "epoch": 0.14645021946631367, "grad_norm": 0.7019978761672974, "learning_rate": 3.936036037202561e-05, "loss": 0.10374259948730469, "step": 1051 }, { "epoch": 0.14658956315752805, "grad_norm": 0.9891214370727539, "learning_rate": 3.935799313542178e-05, "loss": 0.11409378051757812, "step": 1052 }, { "epoch": 0.14672890684874243, "grad_norm": 1.101868987083435, "learning_rate": 3.935562159791381e-05, "loss": 0.09305763244628906, "step": 1053 }, { "epoch": 0.1468682505399568, "grad_norm": 0.5298671722412109, "learning_rate": 3.9353245760028634e-05, "loss": 0.08376121520996094, "step": 1054 }, { "epoch": 0.14700759423117118, "grad_norm": 0.5000380873680115, "learning_rate": 3.935086562229408e-05, "loss": 0.09716224670410156, "step": 1055 }, { "epoch": 0.14714693792238556, "grad_norm": 1.1321380138397217, "learning_rate": 3.9348481185238976e-05, "loss": 0.21497344970703125, "step": 1056 }, { "epoch": 0.14728628161359994, "grad_norm": 0.9351024627685547, "learning_rate": 3.9346092449393084e-05, "loss": 0.11600494384765625, "step": 1057 }, { "epoch": 0.14742562530481432, "grad_norm": 1.4278959035873413, "learning_rate": 3.934369941528713e-05, "loss": 0.10428047180175781, "step": 1058 }, { "epoch": 0.1475649689960287, "grad_norm": 1.376546859741211, "learning_rate": 3.93413020834528e-05, "loss": 0.11654090881347656, "step": 1059 }, { "epoch": 0.14770431268724307, "grad_norm": 0.989070475101471, "learning_rate": 3.9338900454422704e-05, "loss": 0.11585235595703125, "step": 1060 }, { "epoch": 0.14784365637845748, "grad_norm": 0.3304140865802765, "learning_rate": 3.933649452873044e-05, "loss": 0.07465457916259766, "step": 1061 }, { "epoch": 0.14798300006967186, "grad_norm": 0.8556541800498962, "learning_rate": 3.933408430691055e-05, "loss": 0.09260177612304688, "step": 1062 }, { "epoch": 0.14812234376088623, "grad_norm": 0.8454442620277405, "learning_rate": 3.933166978949855e-05, "loss": 0.09960174560546875, "step": 1063 }, { "epoch": 0.1482616874521006, "grad_norm": 0.848351776599884, "learning_rate": 3.932925097703086e-05, "loss": 0.09482097625732422, "step": 1064 }, { "epoch": 0.148401031143315, "grad_norm": 1.447342038154602, "learning_rate": 3.932682787004489e-05, "loss": 0.12227439880371094, "step": 1065 }, { "epoch": 0.14854037483452937, "grad_norm": 0.9167925715446472, "learning_rate": 3.932440046907902e-05, "loss": 0.12813568115234375, "step": 1066 }, { "epoch": 0.14867971852574374, "grad_norm": 0.6223666071891785, "learning_rate": 3.932196877467254e-05, "loss": 0.08332061767578125, "step": 1067 }, { "epoch": 0.14881906221695812, "grad_norm": 1.1545028686523438, "learning_rate": 3.9319532787365733e-05, "loss": 0.11682510375976562, "step": 1068 }, { "epoch": 0.1489584059081725, "grad_norm": 0.7685822248458862, "learning_rate": 3.931709250769981e-05, "loss": 0.0981903076171875, "step": 1069 }, { "epoch": 0.14909774959938688, "grad_norm": 0.5964155197143555, "learning_rate": 3.931464793621695e-05, "loss": 0.10311508178710938, "step": 1070 }, { "epoch": 0.14923709329060128, "grad_norm": 0.506064772605896, "learning_rate": 3.931219907346028e-05, "loss": 0.08450508117675781, "step": 1071 }, { "epoch": 0.14937643698181566, "grad_norm": 0.288532555103302, "learning_rate": 3.930974591997387e-05, "loss": 0.07798957824707031, "step": 1072 }, { "epoch": 0.14951578067303004, "grad_norm": 0.3222642242908478, "learning_rate": 3.930728847630278e-05, "loss": 0.07794952392578125, "step": 1073 }, { "epoch": 0.1496551243642444, "grad_norm": 0.8542212843894958, "learning_rate": 3.930482674299297e-05, "loss": 0.11896514892578125, "step": 1074 }, { "epoch": 0.1497944680554588, "grad_norm": 0.4300534725189209, "learning_rate": 3.930236072059141e-05, "loss": 0.07259368896484375, "step": 1075 }, { "epoch": 0.14993381174667317, "grad_norm": 0.7836169600486755, "learning_rate": 3.929989040964596e-05, "loss": 0.09284210205078125, "step": 1076 }, { "epoch": 0.15007315543788755, "grad_norm": 0.5698338150978088, "learning_rate": 3.92974158107055e-05, "loss": 0.11429595947265625, "step": 1077 }, { "epoch": 0.15021249912910192, "grad_norm": 0.4571463167667389, "learning_rate": 3.929493692431981e-05, "loss": 0.08469867706298828, "step": 1078 }, { "epoch": 0.1503518428203163, "grad_norm": 0.9946839809417725, "learning_rate": 3.929245375103965e-05, "loss": 0.12943077087402344, "step": 1079 }, { "epoch": 0.15049118651153068, "grad_norm": 0.6127795577049255, "learning_rate": 3.928996629141671e-05, "loss": 0.1240234375, "step": 1080 }, { "epoch": 0.15063053020274508, "grad_norm": 0.6540313363075256, "learning_rate": 3.928747454600367e-05, "loss": 0.11668777465820312, "step": 1081 }, { "epoch": 0.15076987389395946, "grad_norm": 0.44004571437835693, "learning_rate": 3.928497851535412e-05, "loss": 0.08819961547851562, "step": 1082 }, { "epoch": 0.15090921758517384, "grad_norm": 0.7766216993331909, "learning_rate": 3.9282478200022624e-05, "loss": 0.1381053924560547, "step": 1083 }, { "epoch": 0.15104856127638822, "grad_norm": 0.49621301889419556, "learning_rate": 3.9279973600564706e-05, "loss": 0.0928659439086914, "step": 1084 }, { "epoch": 0.1511879049676026, "grad_norm": 0.658751904964447, "learning_rate": 3.9277464717536815e-05, "loss": 0.10831832885742188, "step": 1085 }, { "epoch": 0.15132724865881697, "grad_norm": 0.6189517974853516, "learning_rate": 3.927495155149639e-05, "loss": 0.09692955017089844, "step": 1086 }, { "epoch": 0.15146659235003135, "grad_norm": 0.7042515277862549, "learning_rate": 3.927243410300177e-05, "loss": 0.09633827209472656, "step": 1087 }, { "epoch": 0.15160593604124573, "grad_norm": 0.7039734125137329, "learning_rate": 3.9269912372612295e-05, "loss": 0.0843353271484375, "step": 1088 }, { "epoch": 0.1517452797324601, "grad_norm": 1.2230112552642822, "learning_rate": 3.926738636088823e-05, "loss": 0.11907577514648438, "step": 1089 }, { "epoch": 0.15188462342367448, "grad_norm": 0.6470591425895691, "learning_rate": 3.92648560683908e-05, "loss": 0.08072471618652344, "step": 1090 }, { "epoch": 0.15202396711488889, "grad_norm": 0.7570467591285706, "learning_rate": 3.926232149568217e-05, "loss": 0.09984970092773438, "step": 1091 }, { "epoch": 0.15216331080610326, "grad_norm": 0.9719909429550171, "learning_rate": 3.925978264332548e-05, "loss": 0.09764480590820312, "step": 1092 }, { "epoch": 0.15230265449731764, "grad_norm": 1.181147575378418, "learning_rate": 3.925723951188478e-05, "loss": 0.09545707702636719, "step": 1093 }, { "epoch": 0.15244199818853202, "grad_norm": 0.8334307074546814, "learning_rate": 3.925469210192512e-05, "loss": 0.0977621078491211, "step": 1094 }, { "epoch": 0.1525813418797464, "grad_norm": 0.49978500604629517, "learning_rate": 3.9252140414012465e-05, "loss": 0.09285736083984375, "step": 1095 }, { "epoch": 0.15272068557096077, "grad_norm": 1.2212903499603271, "learning_rate": 3.9249584448713746e-05, "loss": 0.11314582824707031, "step": 1096 }, { "epoch": 0.15286002926217515, "grad_norm": 1.1671745777130127, "learning_rate": 3.9247024206596836e-05, "loss": 0.125457763671875, "step": 1097 }, { "epoch": 0.15299937295338953, "grad_norm": 1.296875238418579, "learning_rate": 3.924445968823057e-05, "loss": 0.11758708953857422, "step": 1098 }, { "epoch": 0.1531387166446039, "grad_norm": 0.8075811862945557, "learning_rate": 3.924189089418471e-05, "loss": 0.10266685485839844, "step": 1099 }, { "epoch": 0.15327806033581828, "grad_norm": 0.6042525768280029, "learning_rate": 3.923931782503e-05, "loss": 0.09482955932617188, "step": 1100 }, { "epoch": 0.1534174040270327, "grad_norm": 0.48374706506729126, "learning_rate": 3.923674048133811e-05, "loss": 0.11186790466308594, "step": 1101 }, { "epoch": 0.15355674771824707, "grad_norm": 1.056330680847168, "learning_rate": 3.923415886368166e-05, "loss": 0.1139373779296875, "step": 1102 }, { "epoch": 0.15369609140946144, "grad_norm": 0.6797927618026733, "learning_rate": 3.923157297263425e-05, "loss": 0.09133529663085938, "step": 1103 }, { "epoch": 0.15383543510067582, "grad_norm": 0.6420765519142151, "learning_rate": 3.922898280877037e-05, "loss": 0.10420417785644531, "step": 1104 }, { "epoch": 0.1539747787918902, "grad_norm": 0.38202783465385437, "learning_rate": 3.922638837266552e-05, "loss": 0.09093666076660156, "step": 1105 }, { "epoch": 0.15411412248310458, "grad_norm": 0.39757218956947327, "learning_rate": 3.9223789664896136e-05, "loss": 0.09690666198730469, "step": 1106 }, { "epoch": 0.15425346617431895, "grad_norm": 0.8744697570800781, "learning_rate": 3.922118668603956e-05, "loss": 0.12566852569580078, "step": 1107 }, { "epoch": 0.15439280986553333, "grad_norm": 0.34353718161582947, "learning_rate": 3.9218579436674134e-05, "loss": 0.07489585876464844, "step": 1108 }, { "epoch": 0.1545321535567477, "grad_norm": 0.5745982527732849, "learning_rate": 3.921596791737912e-05, "loss": 0.11295318603515625, "step": 1109 }, { "epoch": 0.1546714972479621, "grad_norm": 0.5885826349258423, "learning_rate": 3.9213352128734746e-05, "loss": 0.08493804931640625, "step": 1110 }, { "epoch": 0.1548108409391765, "grad_norm": 1.3089457750320435, "learning_rate": 3.9210732071322175e-05, "loss": 0.10105705261230469, "step": 1111 }, { "epoch": 0.15495018463039087, "grad_norm": 1.3515362739562988, "learning_rate": 3.920810774572353e-05, "loss": 0.10039710998535156, "step": 1112 }, { "epoch": 0.15508952832160525, "grad_norm": 0.6509199142456055, "learning_rate": 3.9205479152521874e-05, "loss": 0.10841178894042969, "step": 1113 }, { "epoch": 0.15522887201281962, "grad_norm": 0.9489114880561829, "learning_rate": 3.920284629230121e-05, "loss": 0.10499000549316406, "step": 1114 }, { "epoch": 0.155368215704034, "grad_norm": 0.9219138622283936, "learning_rate": 3.920020916564652e-05, "loss": 0.1313457489013672, "step": 1115 }, { "epoch": 0.15550755939524838, "grad_norm": 0.8144559264183044, "learning_rate": 3.919756777314369e-05, "loss": 0.10344886779785156, "step": 1116 }, { "epoch": 0.15564690308646276, "grad_norm": 1.4209098815917969, "learning_rate": 3.9194922115379596e-05, "loss": 0.12770843505859375, "step": 1117 }, { "epoch": 0.15578624677767713, "grad_norm": 0.4071964919567108, "learning_rate": 3.919227219294204e-05, "loss": 0.10225105285644531, "step": 1118 }, { "epoch": 0.1559255904688915, "grad_norm": 0.663932204246521, "learning_rate": 3.918961800641976e-05, "loss": 0.11481094360351562, "step": 1119 }, { "epoch": 0.1560649341601059, "grad_norm": 0.38957080245018005, "learning_rate": 3.918695955640247e-05, "loss": 0.0826263427734375, "step": 1120 }, { "epoch": 0.1562042778513203, "grad_norm": 1.3244646787643433, "learning_rate": 3.9184296843480816e-05, "loss": 0.15844154357910156, "step": 1121 }, { "epoch": 0.15634362154253467, "grad_norm": 0.7474139332771301, "learning_rate": 3.918162986824638e-05, "loss": 0.12818527221679688, "step": 1122 }, { "epoch": 0.15648296523374905, "grad_norm": 1.1751627922058105, "learning_rate": 3.9178958631291715e-05, "loss": 0.1099395751953125, "step": 1123 }, { "epoch": 0.15662230892496343, "grad_norm": 0.6349981427192688, "learning_rate": 3.91762831332103e-05, "loss": 0.10024833679199219, "step": 1124 }, { "epoch": 0.1567616526161778, "grad_norm": 0.6428050398826599, "learning_rate": 3.917360337459658e-05, "loss": 0.08183479309082031, "step": 1125 }, { "epoch": 0.15690099630739218, "grad_norm": 0.7403628826141357, "learning_rate": 3.9170919356045935e-05, "loss": 0.12365913391113281, "step": 1126 }, { "epoch": 0.15704033999860656, "grad_norm": 0.5638999938964844, "learning_rate": 3.916823107815469e-05, "loss": 0.14251708984375, "step": 1127 }, { "epoch": 0.15717968368982094, "grad_norm": 0.8675004839897156, "learning_rate": 3.916553854152011e-05, "loss": 0.09070968627929688, "step": 1128 }, { "epoch": 0.15731902738103531, "grad_norm": 0.9647676348686218, "learning_rate": 3.916284174674042e-05, "loss": 0.10032272338867188, "step": 1129 }, { "epoch": 0.1574583710722497, "grad_norm": 0.8367294073104858, "learning_rate": 3.9160140694414796e-05, "loss": 0.10583114624023438, "step": 1130 }, { "epoch": 0.1575977147634641, "grad_norm": 0.7427570819854736, "learning_rate": 3.915743538514334e-05, "loss": 0.10660457611083984, "step": 1131 }, { "epoch": 0.15773705845467847, "grad_norm": 1.2409693002700806, "learning_rate": 3.915472581952711e-05, "loss": 0.15674591064453125, "step": 1132 }, { "epoch": 0.15787640214589285, "grad_norm": 1.4524359703063965, "learning_rate": 3.915201199816812e-05, "loss": 0.1296405792236328, "step": 1133 }, { "epoch": 0.15801574583710723, "grad_norm": 0.6520192623138428, "learning_rate": 3.914929392166931e-05, "loss": 0.07789421081542969, "step": 1134 }, { "epoch": 0.1581550895283216, "grad_norm": 0.8502861261367798, "learning_rate": 3.914657159063458e-05, "loss": 0.09886455535888672, "step": 1135 }, { "epoch": 0.15829443321953598, "grad_norm": 0.5840429067611694, "learning_rate": 3.914384500566876e-05, "loss": 0.10323143005371094, "step": 1136 }, { "epoch": 0.15843377691075036, "grad_norm": 1.6788294315338135, "learning_rate": 3.9141114167377636e-05, "loss": 0.13954925537109375, "step": 1137 }, { "epoch": 0.15857312060196474, "grad_norm": 0.4864406883716583, "learning_rate": 3.9138379076367956e-05, "loss": 0.08394241333007812, "step": 1138 }, { "epoch": 0.15871246429317912, "grad_norm": 0.914016842842102, "learning_rate": 3.913563973324738e-05, "loss": 0.10073471069335938, "step": 1139 }, { "epoch": 0.1588518079843935, "grad_norm": 1.6128093004226685, "learning_rate": 3.913289613862452e-05, "loss": 0.12928390502929688, "step": 1140 }, { "epoch": 0.1589911516756079, "grad_norm": 0.9807238578796387, "learning_rate": 3.913014829310895e-05, "loss": 0.12134933471679688, "step": 1141 }, { "epoch": 0.15913049536682228, "grad_norm": 0.9340755939483643, "learning_rate": 3.9127396197311185e-05, "loss": 0.10314369201660156, "step": 1142 }, { "epoch": 0.15926983905803666, "grad_norm": 0.6124973297119141, "learning_rate": 3.9124639851842666e-05, "loss": 0.10646629333496094, "step": 1143 }, { "epoch": 0.15940918274925103, "grad_norm": 1.4228442907333374, "learning_rate": 3.91218792573158e-05, "loss": 0.12665557861328125, "step": 1144 }, { "epoch": 0.1595485264404654, "grad_norm": 1.0365920066833496, "learning_rate": 3.911911441434392e-05, "loss": 0.13811492919921875, "step": 1145 }, { "epoch": 0.1596878701316798, "grad_norm": 0.6849114298820496, "learning_rate": 3.911634532354131e-05, "loss": 0.11650276184082031, "step": 1146 }, { "epoch": 0.15982721382289417, "grad_norm": 1.1287925243377686, "learning_rate": 3.911357198552321e-05, "loss": 0.10275745391845703, "step": 1147 }, { "epoch": 0.15996655751410854, "grad_norm": 0.7633118033409119, "learning_rate": 3.9110794400905785e-05, "loss": 0.09051322937011719, "step": 1148 }, { "epoch": 0.16010590120532292, "grad_norm": 0.43828871846199036, "learning_rate": 3.9108012570306143e-05, "loss": 0.10183525085449219, "step": 1149 }, { "epoch": 0.1602452448965373, "grad_norm": 0.5595589280128479, "learning_rate": 3.910522649434236e-05, "loss": 0.12334251403808594, "step": 1150 }, { "epoch": 0.1603845885877517, "grad_norm": 0.7032245993614197, "learning_rate": 3.9102436173633425e-05, "loss": 0.10942268371582031, "step": 1151 }, { "epoch": 0.16052393227896608, "grad_norm": 0.4530486464500427, "learning_rate": 3.9099641608799286e-05, "loss": 0.07540130615234375, "step": 1152 }, { "epoch": 0.16066327597018046, "grad_norm": 0.7467103004455566, "learning_rate": 3.9096842800460836e-05, "loss": 0.10842704772949219, "step": 1153 }, { "epoch": 0.16080261966139484, "grad_norm": 0.7465243935585022, "learning_rate": 3.909403974923991e-05, "loss": 0.12427902221679688, "step": 1154 }, { "epoch": 0.1609419633526092, "grad_norm": 0.8461906909942627, "learning_rate": 3.9091232455759274e-05, "loss": 0.13983154296875, "step": 1155 }, { "epoch": 0.1610813070438236, "grad_norm": 0.33672839403152466, "learning_rate": 3.908842092064264e-05, "loss": 0.08561325073242188, "step": 1156 }, { "epoch": 0.16122065073503797, "grad_norm": 0.6950038075447083, "learning_rate": 3.9085605144514674e-05, "loss": 0.09231948852539062, "step": 1157 }, { "epoch": 0.16135999442625235, "grad_norm": 1.0353983640670776, "learning_rate": 3.908278512800098e-05, "loss": 0.1248016357421875, "step": 1158 }, { "epoch": 0.16149933811746672, "grad_norm": 0.8223158121109009, "learning_rate": 3.9079960871728094e-05, "loss": 0.09426498413085938, "step": 1159 }, { "epoch": 0.1616386818086811, "grad_norm": 0.9021525382995605, "learning_rate": 3.907713237632351e-05, "loss": 0.13553619384765625, "step": 1160 }, { "epoch": 0.1617780254998955, "grad_norm": 0.7501322031021118, "learning_rate": 3.907429964241565e-05, "loss": 0.1172332763671875, "step": 1161 }, { "epoch": 0.16191736919110988, "grad_norm": 0.27710118889808655, "learning_rate": 3.907146267063389e-05, "loss": 0.06795883178710938, "step": 1162 }, { "epoch": 0.16205671288232426, "grad_norm": 0.34633028507232666, "learning_rate": 3.906862146160852e-05, "loss": 0.09498214721679688, "step": 1163 }, { "epoch": 0.16219605657353864, "grad_norm": 0.6766300201416016, "learning_rate": 3.9065776015970815e-05, "loss": 0.09692192077636719, "step": 1164 }, { "epoch": 0.16233540026475302, "grad_norm": 1.2373223304748535, "learning_rate": 3.906292633435295e-05, "loss": 0.11261558532714844, "step": 1165 }, { "epoch": 0.1624747439559674, "grad_norm": 1.3194924592971802, "learning_rate": 3.906007241738807e-05, "loss": 0.1446857452392578, "step": 1166 }, { "epoch": 0.16261408764718177, "grad_norm": 1.782264232635498, "learning_rate": 3.9057214265710245e-05, "loss": 0.1426067352294922, "step": 1167 }, { "epoch": 0.16275343133839615, "grad_norm": 0.9703130722045898, "learning_rate": 3.9054351879954505e-05, "loss": 0.08833503723144531, "step": 1168 }, { "epoch": 0.16289277502961053, "grad_norm": 2.350311756134033, "learning_rate": 3.905148526075679e-05, "loss": 0.15925025939941406, "step": 1169 }, { "epoch": 0.1630321187208249, "grad_norm": 1.0752640962600708, "learning_rate": 3.9048614408754e-05, "loss": 0.1352405548095703, "step": 1170 }, { "epoch": 0.1631714624120393, "grad_norm": 0.7407616972923279, "learning_rate": 3.904573932458398e-05, "loss": 0.08530235290527344, "step": 1171 }, { "epoch": 0.1633108061032537, "grad_norm": 0.7590469717979431, "learning_rate": 3.90428600088855e-05, "loss": 0.09467887878417969, "step": 1172 }, { "epoch": 0.16345014979446806, "grad_norm": 0.5962256193161011, "learning_rate": 3.9039976462298284e-05, "loss": 0.09307670593261719, "step": 1173 }, { "epoch": 0.16358949348568244, "grad_norm": 0.6715301871299744, "learning_rate": 3.9037088685462985e-05, "loss": 0.11414718627929688, "step": 1174 }, { "epoch": 0.16372883717689682, "grad_norm": 0.5469124913215637, "learning_rate": 3.9034196679021206e-05, "loss": 0.09520244598388672, "step": 1175 }, { "epoch": 0.1638681808681112, "grad_norm": 0.6244786381721497, "learning_rate": 3.903130044361549e-05, "loss": 0.07747459411621094, "step": 1176 }, { "epoch": 0.16400752455932557, "grad_norm": 1.0193482637405396, "learning_rate": 3.902839997988929e-05, "loss": 0.11363029479980469, "step": 1177 }, { "epoch": 0.16414686825053995, "grad_norm": 0.6269405484199524, "learning_rate": 3.902549528848705e-05, "loss": 0.08823013305664062, "step": 1178 }, { "epoch": 0.16428621194175433, "grad_norm": 0.9852508306503296, "learning_rate": 3.902258637005412e-05, "loss": 0.13587188720703125, "step": 1179 }, { "epoch": 0.1644255556329687, "grad_norm": 0.9998050332069397, "learning_rate": 3.901967322523679e-05, "loss": 0.09872817993164062, "step": 1180 }, { "epoch": 0.16456489932418308, "grad_norm": 1.4214932918548584, "learning_rate": 3.901675585468229e-05, "loss": 0.07799911499023438, "step": 1181 }, { "epoch": 0.1647042430153975, "grad_norm": 1.1722124814987183, "learning_rate": 3.9013834259038805e-05, "loss": 0.14375686645507812, "step": 1182 }, { "epoch": 0.16484358670661187, "grad_norm": 0.5825862288475037, "learning_rate": 3.9010908438955436e-05, "loss": 0.11171340942382812, "step": 1183 }, { "epoch": 0.16498293039782624, "grad_norm": 0.3763311207294464, "learning_rate": 3.900797839508225e-05, "loss": 0.08905601501464844, "step": 1184 }, { "epoch": 0.16512227408904062, "grad_norm": 1.557778000831604, "learning_rate": 3.900504412807021e-05, "loss": 0.13108444213867188, "step": 1185 }, { "epoch": 0.165261617780255, "grad_norm": 0.5098754167556763, "learning_rate": 3.900210563857127e-05, "loss": 0.09978866577148438, "step": 1186 }, { "epoch": 0.16540096147146938, "grad_norm": 0.5490496754646301, "learning_rate": 3.8999162927238274e-05, "loss": 0.09748268127441406, "step": 1187 }, { "epoch": 0.16554030516268375, "grad_norm": 0.7629265785217285, "learning_rate": 3.899621599472504e-05, "loss": 0.1368427276611328, "step": 1188 }, { "epoch": 0.16567964885389813, "grad_norm": 0.4778222143650055, "learning_rate": 3.899326484168629e-05, "loss": 0.09628868103027344, "step": 1189 }, { "epoch": 0.1658189925451125, "grad_norm": 0.4263807535171509, "learning_rate": 3.899030946877773e-05, "loss": 0.07402992248535156, "step": 1190 }, { "epoch": 0.1659583362363269, "grad_norm": 0.2838360667228699, "learning_rate": 3.898734987665596e-05, "loss": 0.07217597961425781, "step": 1191 }, { "epoch": 0.1660976799275413, "grad_norm": 0.4744914472103119, "learning_rate": 3.898438606597853e-05, "loss": 0.09826469421386719, "step": 1192 }, { "epoch": 0.16623702361875567, "grad_norm": 0.7216706275939941, "learning_rate": 3.898141803740393e-05, "loss": 0.10869979858398438, "step": 1193 }, { "epoch": 0.16637636730997005, "grad_norm": 0.6144678592681885, "learning_rate": 3.897844579159161e-05, "loss": 0.09122848510742188, "step": 1194 }, { "epoch": 0.16651571100118442, "grad_norm": 0.5022600293159485, "learning_rate": 3.897546932920191e-05, "loss": 0.08252906799316406, "step": 1195 }, { "epoch": 0.1666550546923988, "grad_norm": 0.6524208784103394, "learning_rate": 3.897248865089615e-05, "loss": 0.09522819519042969, "step": 1196 }, { "epoch": 0.16679439838361318, "grad_norm": 1.0800602436065674, "learning_rate": 3.8969503757336564e-05, "loss": 0.11676979064941406, "step": 1197 }, { "epoch": 0.16693374207482756, "grad_norm": 0.49601659178733826, "learning_rate": 3.896651464918632e-05, "loss": 0.07990837097167969, "step": 1198 }, { "epoch": 0.16707308576604193, "grad_norm": 0.4810534417629242, "learning_rate": 3.896352132710953e-05, "loss": 0.0899658203125, "step": 1199 }, { "epoch": 0.1672124294572563, "grad_norm": 1.1268420219421387, "learning_rate": 3.896052379177125e-05, "loss": 0.11725997924804688, "step": 1200 }, { "epoch": 0.1673517731484707, "grad_norm": 0.6495039463043213, "learning_rate": 3.895752204383746e-05, "loss": 0.09661197662353516, "step": 1201 }, { "epoch": 0.1674911168396851, "grad_norm": 0.819868803024292, "learning_rate": 3.8954516083975075e-05, "loss": 0.08772659301757812, "step": 1202 }, { "epoch": 0.16763046053089947, "grad_norm": 0.6955235004425049, "learning_rate": 3.8951505912851956e-05, "loss": 0.11422920227050781, "step": 1203 }, { "epoch": 0.16776980422211385, "grad_norm": 0.8201819658279419, "learning_rate": 3.89484915311369e-05, "loss": 0.09656143188476562, "step": 1204 }, { "epoch": 0.16790914791332823, "grad_norm": 0.7101025581359863, "learning_rate": 3.8945472939499616e-05, "loss": 0.11252784729003906, "step": 1205 }, { "epoch": 0.1680484916045426, "grad_norm": 0.3870401680469513, "learning_rate": 3.894245013861079e-05, "loss": 0.09090042114257812, "step": 1206 }, { "epoch": 0.16818783529575698, "grad_norm": 0.5592317581176758, "learning_rate": 3.8939423129141996e-05, "loss": 0.07826042175292969, "step": 1207 }, { "epoch": 0.16832717898697136, "grad_norm": 1.2223643064498901, "learning_rate": 3.8936391911765784e-05, "loss": 0.1281757354736328, "step": 1208 }, { "epoch": 0.16846652267818574, "grad_norm": 1.2784321308135986, "learning_rate": 3.893335648715561e-05, "loss": 0.14121246337890625, "step": 1209 }, { "epoch": 0.16860586636940011, "grad_norm": 0.6280313730239868, "learning_rate": 3.893031685598588e-05, "loss": 0.12414360046386719, "step": 1210 }, { "epoch": 0.1687452100606145, "grad_norm": 0.5173035264015198, "learning_rate": 3.8927273018931934e-05, "loss": 0.09871864318847656, "step": 1211 }, { "epoch": 0.1688845537518289, "grad_norm": 0.5425534248352051, "learning_rate": 3.892422497667004e-05, "loss": 0.07556819915771484, "step": 1212 }, { "epoch": 0.16902389744304328, "grad_norm": 0.7019771337509155, "learning_rate": 3.89211727298774e-05, "loss": 0.09800529479980469, "step": 1213 }, { "epoch": 0.16916324113425765, "grad_norm": 0.6072948575019836, "learning_rate": 3.891811627923216e-05, "loss": 0.09136390686035156, "step": 1214 }, { "epoch": 0.16930258482547203, "grad_norm": 1.2019731998443604, "learning_rate": 3.89150556254134e-05, "loss": 0.1353759765625, "step": 1215 }, { "epoch": 0.1694419285166864, "grad_norm": 0.5014553070068359, "learning_rate": 3.89119907691011e-05, "loss": 0.09474945068359375, "step": 1216 }, { "epoch": 0.16958127220790079, "grad_norm": 0.509661853313446, "learning_rate": 3.8908921710976234e-05, "loss": 0.13306427001953125, "step": 1217 }, { "epoch": 0.16972061589911516, "grad_norm": 0.9827958941459656, "learning_rate": 3.890584845172066e-05, "loss": 0.1262645721435547, "step": 1218 }, { "epoch": 0.16985995959032954, "grad_norm": 0.6204047203063965, "learning_rate": 3.890277099201718e-05, "loss": 0.09029579162597656, "step": 1219 }, { "epoch": 0.16999930328154392, "grad_norm": 0.5655670166015625, "learning_rate": 3.889968933254954e-05, "loss": 0.11165618896484375, "step": 1220 }, { "epoch": 0.1701386469727583, "grad_norm": 0.8767064809799194, "learning_rate": 3.889660347400243e-05, "loss": 0.10636234283447266, "step": 1221 }, { "epoch": 0.1702779906639727, "grad_norm": 1.2394474744796753, "learning_rate": 3.889351341706144e-05, "loss": 0.11235618591308594, "step": 1222 }, { "epoch": 0.17041733435518708, "grad_norm": 1.1138149499893188, "learning_rate": 3.8890419162413114e-05, "loss": 0.1050882339477539, "step": 1223 }, { "epoch": 0.17055667804640146, "grad_norm": 0.616955578327179, "learning_rate": 3.8887320710744923e-05, "loss": 0.07587432861328125, "step": 1224 }, { "epoch": 0.17069602173761583, "grad_norm": 0.2585721015930176, "learning_rate": 3.888421806274528e-05, "loss": 0.06199359893798828, "step": 1225 }, { "epoch": 0.1708353654288302, "grad_norm": 1.174164891242981, "learning_rate": 3.8881111219103516e-05, "loss": 0.08356475830078125, "step": 1226 }, { "epoch": 0.1709747091200446, "grad_norm": 1.522684097290039, "learning_rate": 3.88780001805099e-05, "loss": 0.1101837158203125, "step": 1227 }, { "epoch": 0.17111405281125897, "grad_norm": 1.34418523311615, "learning_rate": 3.8874884947655636e-05, "loss": 0.10564422607421875, "step": 1228 }, { "epoch": 0.17125339650247334, "grad_norm": 1.5710673332214355, "learning_rate": 3.8871765521232865e-05, "loss": 0.11625289916992188, "step": 1229 }, { "epoch": 0.17139274019368772, "grad_norm": 0.6479690670967102, "learning_rate": 3.8868641901934636e-05, "loss": 0.11217880249023438, "step": 1230 }, { "epoch": 0.1715320838849021, "grad_norm": 0.7428600788116455, "learning_rate": 3.886551409045496e-05, "loss": 0.1382732391357422, "step": 1231 }, { "epoch": 0.1716714275761165, "grad_norm": 1.1228066682815552, "learning_rate": 3.886238208748876e-05, "loss": 0.08400344848632812, "step": 1232 }, { "epoch": 0.17181077126733088, "grad_norm": 1.0170844793319702, "learning_rate": 3.885924589373189e-05, "loss": 0.10818862915039062, "step": 1233 }, { "epoch": 0.17195011495854526, "grad_norm": 1.093823790550232, "learning_rate": 3.885610550988115e-05, "loss": 0.08206653594970703, "step": 1234 }, { "epoch": 0.17208945864975964, "grad_norm": 1.119759440422058, "learning_rate": 3.885296093663426e-05, "loss": 0.11375808715820312, "step": 1235 }, { "epoch": 0.172228802340974, "grad_norm": 0.4808886647224426, "learning_rate": 3.884981217468987e-05, "loss": 0.08153915405273438, "step": 1236 }, { "epoch": 0.1723681460321884, "grad_norm": 0.4569849967956543, "learning_rate": 3.884665922474756e-05, "loss": 0.08654022216796875, "step": 1237 }, { "epoch": 0.17250748972340277, "grad_norm": 1.3318341970443726, "learning_rate": 3.884350208750784e-05, "loss": 0.12014198303222656, "step": 1238 }, { "epoch": 0.17264683341461715, "grad_norm": 0.34421777725219727, "learning_rate": 3.884034076367218e-05, "loss": 0.08241653442382812, "step": 1239 }, { "epoch": 0.17278617710583152, "grad_norm": 1.0937418937683105, "learning_rate": 3.883717525394292e-05, "loss": 0.120574951171875, "step": 1240 }, { "epoch": 0.1729255207970459, "grad_norm": 0.6259034872055054, "learning_rate": 3.883400555902338e-05, "loss": 0.09210014343261719, "step": 1241 }, { "epoch": 0.1730648644882603, "grad_norm": 0.722667396068573, "learning_rate": 3.88308316796178e-05, "loss": 0.089874267578125, "step": 1242 }, { "epoch": 0.17320420817947468, "grad_norm": 0.677371084690094, "learning_rate": 3.882765361643133e-05, "loss": 0.12020492553710938, "step": 1243 }, { "epoch": 0.17334355187068906, "grad_norm": 0.6265414357185364, "learning_rate": 3.882447137017007e-05, "loss": 0.10531425476074219, "step": 1244 }, { "epoch": 0.17348289556190344, "grad_norm": 0.8198543787002563, "learning_rate": 3.882128494154104e-05, "loss": 0.11435508728027344, "step": 1245 }, { "epoch": 0.17362223925311782, "grad_norm": 0.7933017611503601, "learning_rate": 3.8818094331252194e-05, "loss": 0.10502433776855469, "step": 1246 }, { "epoch": 0.1737615829443322, "grad_norm": 0.855887770652771, "learning_rate": 3.881489954001241e-05, "loss": 0.11224746704101562, "step": 1247 }, { "epoch": 0.17390092663554657, "grad_norm": 0.5570697784423828, "learning_rate": 3.88117005685315e-05, "loss": 0.08007240295410156, "step": 1248 }, { "epoch": 0.17404027032676095, "grad_norm": 0.36361071467399597, "learning_rate": 3.88084974175202e-05, "loss": 0.07812881469726562, "step": 1249 }, { "epoch": 0.17417961401797533, "grad_norm": 0.6572778224945068, "learning_rate": 3.8805290087690196e-05, "loss": 0.09654045104980469, "step": 1250 }, { "epoch": 0.1743189577091897, "grad_norm": 1.4070894718170166, "learning_rate": 3.880207857975405e-05, "loss": 0.16957855224609375, "step": 1251 }, { "epoch": 0.1744583014004041, "grad_norm": 0.8828078508377075, "learning_rate": 3.879886289442531e-05, "loss": 0.11992454528808594, "step": 1252 }, { "epoch": 0.1745976450916185, "grad_norm": 0.34186825156211853, "learning_rate": 3.879564303241841e-05, "loss": 0.07204246520996094, "step": 1253 }, { "epoch": 0.17473698878283286, "grad_norm": 2.0179011821746826, "learning_rate": 3.8792418994448746e-05, "loss": 0.17632484436035156, "step": 1254 }, { "epoch": 0.17487633247404724, "grad_norm": 0.9922803044319153, "learning_rate": 3.8789190781232626e-05, "loss": 0.1372966766357422, "step": 1255 }, { "epoch": 0.17501567616526162, "grad_norm": 0.3026036024093628, "learning_rate": 3.878595839348727e-05, "loss": 0.0642852783203125, "step": 1256 }, { "epoch": 0.175155019856476, "grad_norm": 0.7335386872291565, "learning_rate": 3.878272183193085e-05, "loss": 0.10975360870361328, "step": 1257 }, { "epoch": 0.17529436354769037, "grad_norm": 0.6881363391876221, "learning_rate": 3.8779481097282464e-05, "loss": 0.09327507019042969, "step": 1258 }, { "epoch": 0.17543370723890475, "grad_norm": 1.0665518045425415, "learning_rate": 3.8776236190262114e-05, "loss": 0.11595344543457031, "step": 1259 }, { "epoch": 0.17557305093011913, "grad_norm": 0.3905077278614044, "learning_rate": 3.877298711159076e-05, "loss": 0.08664131164550781, "step": 1260 }, { "epoch": 0.1757123946213335, "grad_norm": 0.4281877875328064, "learning_rate": 3.876973386199025e-05, "loss": 0.059622764587402344, "step": 1261 }, { "epoch": 0.1758517383125479, "grad_norm": 0.8304551839828491, "learning_rate": 3.87664764421834e-05, "loss": 0.10391426086425781, "step": 1262 }, { "epoch": 0.1759910820037623, "grad_norm": 0.7611992359161377, "learning_rate": 3.876321485289394e-05, "loss": 0.10839557647705078, "step": 1263 }, { "epoch": 0.17613042569497667, "grad_norm": 0.44577690958976746, "learning_rate": 3.87599490948465e-05, "loss": 0.092315673828125, "step": 1264 }, { "epoch": 0.17626976938619104, "grad_norm": 0.5830724239349365, "learning_rate": 3.875667916876668e-05, "loss": 0.09503555297851562, "step": 1265 }, { "epoch": 0.17640911307740542, "grad_norm": 0.49108240008354187, "learning_rate": 3.875340507538096e-05, "loss": 0.07281875610351562, "step": 1266 }, { "epoch": 0.1765484567686198, "grad_norm": 0.4723402261734009, "learning_rate": 3.875012681541678e-05, "loss": 0.11981391906738281, "step": 1267 }, { "epoch": 0.17668780045983418, "grad_norm": 0.7398895621299744, "learning_rate": 3.87468443896025e-05, "loss": 0.098968505859375, "step": 1268 }, { "epoch": 0.17682714415104855, "grad_norm": 0.6669520139694214, "learning_rate": 3.8743557798667395e-05, "loss": 0.10626983642578125, "step": 1269 }, { "epoch": 0.17696648784226293, "grad_norm": 1.3382683992385864, "learning_rate": 3.874026704334167e-05, "loss": 0.14954757690429688, "step": 1270 }, { "epoch": 0.1771058315334773, "grad_norm": 0.5279415845870972, "learning_rate": 3.873697212435645e-05, "loss": 0.13609886169433594, "step": 1271 }, { "epoch": 0.17724517522469171, "grad_norm": 0.9191502928733826, "learning_rate": 3.87336730424438e-05, "loss": 0.11865234375, "step": 1272 }, { "epoch": 0.1773845189159061, "grad_norm": 0.9079429507255554, "learning_rate": 3.87303697983367e-05, "loss": 0.11186027526855469, "step": 1273 }, { "epoch": 0.17752386260712047, "grad_norm": 1.1797435283660889, "learning_rate": 3.872706239276904e-05, "loss": 0.11644363403320312, "step": 1274 }, { "epoch": 0.17766320629833485, "grad_norm": 0.5337759256362915, "learning_rate": 3.8723750826475674e-05, "loss": 0.12830543518066406, "step": 1275 }, { "epoch": 0.17780254998954922, "grad_norm": 0.5210356712341309, "learning_rate": 3.872043510019235e-05, "loss": 0.09801864624023438, "step": 1276 }, { "epoch": 0.1779418936807636, "grad_norm": 0.7031455039978027, "learning_rate": 3.871711521465573e-05, "loss": 0.09349822998046875, "step": 1277 }, { "epoch": 0.17808123737197798, "grad_norm": 0.5507082939147949, "learning_rate": 3.871379117060343e-05, "loss": 0.09299468994140625, "step": 1278 }, { "epoch": 0.17822058106319236, "grad_norm": 0.5519847869873047, "learning_rate": 3.871046296877398e-05, "loss": 0.10372352600097656, "step": 1279 }, { "epoch": 0.17835992475440673, "grad_norm": 0.41661226749420166, "learning_rate": 3.870713060990682e-05, "loss": 0.08462715148925781, "step": 1280 }, { "epoch": 0.1784992684456211, "grad_norm": 0.5839899778366089, "learning_rate": 3.870379409474233e-05, "loss": 0.11013317108154297, "step": 1281 }, { "epoch": 0.17863861213683552, "grad_norm": 0.6457597017288208, "learning_rate": 3.870045342402181e-05, "loss": 0.08987808227539062, "step": 1282 }, { "epoch": 0.1787779558280499, "grad_norm": 0.3778478801250458, "learning_rate": 3.8697108598487474e-05, "loss": 0.0996694564819336, "step": 1283 }, { "epoch": 0.17891729951926427, "grad_norm": 0.8293259143829346, "learning_rate": 3.8693759618882475e-05, "loss": 0.11539459228515625, "step": 1284 }, { "epoch": 0.17905664321047865, "grad_norm": 0.920018196105957, "learning_rate": 3.8690406485950874e-05, "loss": 0.09552955627441406, "step": 1285 }, { "epoch": 0.17919598690169303, "grad_norm": 0.4179550111293793, "learning_rate": 3.868704920043766e-05, "loss": 0.08619308471679688, "step": 1286 }, { "epoch": 0.1793353305929074, "grad_norm": 0.8999516367912292, "learning_rate": 3.8683687763088745e-05, "loss": 0.16223907470703125, "step": 1287 }, { "epoch": 0.17947467428412178, "grad_norm": 1.5918910503387451, "learning_rate": 3.868032217465097e-05, "loss": 0.15572357177734375, "step": 1288 }, { "epoch": 0.17961401797533616, "grad_norm": 0.9245583415031433, "learning_rate": 3.867695243587207e-05, "loss": 0.12650299072265625, "step": 1289 }, { "epoch": 0.17975336166655054, "grad_norm": 0.6573454737663269, "learning_rate": 3.8673578547500754e-05, "loss": 0.10108566284179688, "step": 1290 }, { "epoch": 0.17989270535776491, "grad_norm": 0.7187705039978027, "learning_rate": 3.867020051028661e-05, "loss": 0.10828971862792969, "step": 1291 }, { "epoch": 0.18003204904897932, "grad_norm": 0.686320960521698, "learning_rate": 3.8666818324980165e-05, "loss": 0.14690113067626953, "step": 1292 }, { "epoch": 0.1801713927401937, "grad_norm": 1.088727355003357, "learning_rate": 3.866343199233285e-05, "loss": 0.10896492004394531, "step": 1293 }, { "epoch": 0.18031073643140808, "grad_norm": 0.9809849858283997, "learning_rate": 3.866004151309704e-05, "loss": 0.1352214813232422, "step": 1294 }, { "epoch": 0.18045008012262245, "grad_norm": 0.3993098735809326, "learning_rate": 3.8656646888026026e-05, "loss": 0.08288383483886719, "step": 1295 }, { "epoch": 0.18058942381383683, "grad_norm": 0.683144748210907, "learning_rate": 3.8653248117874015e-05, "loss": 0.11868476867675781, "step": 1296 }, { "epoch": 0.1807287675050512, "grad_norm": 0.6973060965538025, "learning_rate": 3.8649845203396125e-05, "loss": 0.07170772552490234, "step": 1297 }, { "epoch": 0.18086811119626559, "grad_norm": 0.8602525591850281, "learning_rate": 3.8646438145348415e-05, "loss": 0.14191436767578125, "step": 1298 }, { "epoch": 0.18100745488747996, "grad_norm": 0.7572188377380371, "learning_rate": 3.8643026944487856e-05, "loss": 0.12969970703125, "step": 1299 }, { "epoch": 0.18114679857869434, "grad_norm": 0.35731810331344604, "learning_rate": 3.8639611601572345e-05, "loss": 0.09122085571289062, "step": 1300 }, { "epoch": 0.18128614226990872, "grad_norm": 0.39366886019706726, "learning_rate": 3.8636192117360676e-05, "loss": 0.1086273193359375, "step": 1301 }, { "epoch": 0.18142548596112312, "grad_norm": 0.46176785230636597, "learning_rate": 3.8632768492612596e-05, "loss": 0.08187675476074219, "step": 1302 }, { "epoch": 0.1815648296523375, "grad_norm": 1.469296932220459, "learning_rate": 3.862934072808875e-05, "loss": 0.11510562896728516, "step": 1303 }, { "epoch": 0.18170417334355188, "grad_norm": 1.019187331199646, "learning_rate": 3.86259088245507e-05, "loss": 0.12382888793945312, "step": 1304 }, { "epoch": 0.18184351703476626, "grad_norm": 0.5867264866828918, "learning_rate": 3.8622472782760956e-05, "loss": 0.15715408325195312, "step": 1305 }, { "epoch": 0.18198286072598063, "grad_norm": 0.5373423099517822, "learning_rate": 3.861903260348291e-05, "loss": 0.11078834533691406, "step": 1306 }, { "epoch": 0.182122204417195, "grad_norm": 0.8571909666061401, "learning_rate": 3.8615588287480906e-05, "loss": 0.13559341430664062, "step": 1307 }, { "epoch": 0.1822615481084094, "grad_norm": 1.1384767293930054, "learning_rate": 3.861213983552018e-05, "loss": 0.10149955749511719, "step": 1308 }, { "epoch": 0.18240089179962377, "grad_norm": 0.8413253426551819, "learning_rate": 3.860868724836691e-05, "loss": 0.1054534912109375, "step": 1309 }, { "epoch": 0.18254023549083814, "grad_norm": 0.5824040770530701, "learning_rate": 3.860523052678818e-05, "loss": 0.10660362243652344, "step": 1310 }, { "epoch": 0.18267957918205252, "grad_norm": 0.576574981212616, "learning_rate": 3.860176967155198e-05, "loss": 0.08987808227539062, "step": 1311 }, { "epoch": 0.18281892287326693, "grad_norm": 0.6395757794380188, "learning_rate": 3.8598304683427257e-05, "loss": 0.08406829833984375, "step": 1312 }, { "epoch": 0.1829582665644813, "grad_norm": 0.6899009943008423, "learning_rate": 3.859483556318384e-05, "loss": 0.09299850463867188, "step": 1313 }, { "epoch": 0.18309761025569568, "grad_norm": 0.7416677474975586, "learning_rate": 3.859136231159248e-05, "loss": 0.08332252502441406, "step": 1314 }, { "epoch": 0.18323695394691006, "grad_norm": 0.5145108103752136, "learning_rate": 3.858788492942486e-05, "loss": 0.12900733947753906, "step": 1315 }, { "epoch": 0.18337629763812444, "grad_norm": 0.480636864900589, "learning_rate": 3.8584403417453586e-05, "loss": 0.08121299743652344, "step": 1316 }, { "epoch": 0.1835156413293388, "grad_norm": 1.873380184173584, "learning_rate": 3.858091777645216e-05, "loss": 0.14214706420898438, "step": 1317 }, { "epoch": 0.1836549850205532, "grad_norm": 1.2113927602767944, "learning_rate": 3.857742800719501e-05, "loss": 0.08494377136230469, "step": 1318 }, { "epoch": 0.18379432871176757, "grad_norm": 0.44741958379745483, "learning_rate": 3.857393411045749e-05, "loss": 0.085693359375, "step": 1319 }, { "epoch": 0.18393367240298195, "grad_norm": 0.5662305355072021, "learning_rate": 3.8570436087015855e-05, "loss": 0.10150623321533203, "step": 1320 }, { "epoch": 0.18407301609419632, "grad_norm": 0.5008440613746643, "learning_rate": 3.8566933937647294e-05, "loss": 0.08993721008300781, "step": 1321 }, { "epoch": 0.18421235978541073, "grad_norm": 0.8278573751449585, "learning_rate": 3.856342766312991e-05, "loss": 0.1232757568359375, "step": 1322 }, { "epoch": 0.1843517034766251, "grad_norm": 0.5242437720298767, "learning_rate": 3.85599172642427e-05, "loss": 0.11133146286010742, "step": 1323 }, { "epoch": 0.18449104716783948, "grad_norm": 0.5966528058052063, "learning_rate": 3.855640274176561e-05, "loss": 0.08792304992675781, "step": 1324 }, { "epoch": 0.18463039085905386, "grad_norm": 0.3977065682411194, "learning_rate": 3.8552884096479476e-05, "loss": 0.09332084655761719, "step": 1325 }, { "epoch": 0.18476973455026824, "grad_norm": 0.689285933971405, "learning_rate": 3.854936132916607e-05, "loss": 0.11291122436523438, "step": 1326 }, { "epoch": 0.18490907824148262, "grad_norm": 1.0005016326904297, "learning_rate": 3.854583444060806e-05, "loss": 0.12997817993164062, "step": 1327 }, { "epoch": 0.185048421932697, "grad_norm": 0.4531738758087158, "learning_rate": 3.854230343158906e-05, "loss": 0.08896446228027344, "step": 1328 }, { "epoch": 0.18518776562391137, "grad_norm": 0.4677441418170929, "learning_rate": 3.8538768302893544e-05, "loss": 0.08194732666015625, "step": 1329 }, { "epoch": 0.18532710931512575, "grad_norm": 1.2755392789840698, "learning_rate": 3.853522905530698e-05, "loss": 0.11968421936035156, "step": 1330 }, { "epoch": 0.18546645300634013, "grad_norm": 0.7076494693756104, "learning_rate": 3.853168568961567e-05, "loss": 0.10310173034667969, "step": 1331 }, { "epoch": 0.18560579669755453, "grad_norm": 0.7455466389656067, "learning_rate": 3.852813820660689e-05, "loss": 0.13688278198242188, "step": 1332 }, { "epoch": 0.1857451403887689, "grad_norm": 0.9865037202835083, "learning_rate": 3.852458660706881e-05, "loss": 0.12118148803710938, "step": 1333 }, { "epoch": 0.1858844840799833, "grad_norm": 0.9995445013046265, "learning_rate": 3.85210308917905e-05, "loss": 0.0994405746459961, "step": 1334 }, { "epoch": 0.18602382777119766, "grad_norm": 0.48521867394447327, "learning_rate": 3.8517471061561974e-05, "loss": 0.08416080474853516, "step": 1335 }, { "epoch": 0.18616317146241204, "grad_norm": 0.42504364252090454, "learning_rate": 3.851390711717414e-05, "loss": 0.08894538879394531, "step": 1336 }, { "epoch": 0.18630251515362642, "grad_norm": 0.24130935966968536, "learning_rate": 3.851033905941882e-05, "loss": 0.06106853485107422, "step": 1337 }, { "epoch": 0.1864418588448408, "grad_norm": 0.8676820397377014, "learning_rate": 3.850676688908877e-05, "loss": 0.10792732238769531, "step": 1338 }, { "epoch": 0.18658120253605517, "grad_norm": 0.7004497051239014, "learning_rate": 3.8503190606977624e-05, "loss": 0.07880973815917969, "step": 1339 }, { "epoch": 0.18672054622726955, "grad_norm": 0.48882248997688293, "learning_rate": 3.849961021387996e-05, "loss": 0.09171295166015625, "step": 1340 }, { "epoch": 0.18685988991848393, "grad_norm": 0.9736695289611816, "learning_rate": 3.849602571059127e-05, "loss": 0.1238861083984375, "step": 1341 }, { "epoch": 0.18699923360969833, "grad_norm": 0.3489600419998169, "learning_rate": 3.849243709790793e-05, "loss": 0.07259559631347656, "step": 1342 }, { "epoch": 0.1871385773009127, "grad_norm": 0.7062949538230896, "learning_rate": 3.848884437662725e-05, "loss": 0.10225868225097656, "step": 1343 }, { "epoch": 0.1872779209921271, "grad_norm": 0.7970095276832581, "learning_rate": 3.8485247547547465e-05, "loss": 0.1495208740234375, "step": 1344 }, { "epoch": 0.18741726468334147, "grad_norm": 0.3726244568824768, "learning_rate": 3.8481646611467704e-05, "loss": 0.09688758850097656, "step": 1345 }, { "epoch": 0.18755660837455584, "grad_norm": 0.7556194067001343, "learning_rate": 3.8478041569188e-05, "loss": 0.12447166442871094, "step": 1346 }, { "epoch": 0.18769595206577022, "grad_norm": 1.2556729316711426, "learning_rate": 3.8474432421509324e-05, "loss": 0.1305103302001953, "step": 1347 }, { "epoch": 0.1878352957569846, "grad_norm": 0.9894823431968689, "learning_rate": 3.847081916923355e-05, "loss": 0.12862586975097656, "step": 1348 }, { "epoch": 0.18797463944819898, "grad_norm": 0.37070226669311523, "learning_rate": 3.846720181316344e-05, "loss": 0.08415603637695312, "step": 1349 }, { "epoch": 0.18811398313941335, "grad_norm": 1.2234729528427124, "learning_rate": 3.846358035410271e-05, "loss": 0.12853240966796875, "step": 1350 }, { "epoch": 0.18825332683062773, "grad_norm": 0.48727330565452576, "learning_rate": 3.845995479285595e-05, "loss": 0.08751487731933594, "step": 1351 }, { "epoch": 0.18839267052184214, "grad_norm": 0.5283886194229126, "learning_rate": 3.845632513022869e-05, "loss": 0.1024932861328125, "step": 1352 }, { "epoch": 0.18853201421305651, "grad_norm": 1.0089560747146606, "learning_rate": 3.845269136702734e-05, "loss": 0.11927986145019531, "step": 1353 }, { "epoch": 0.1886713579042709, "grad_norm": 1.1472110748291016, "learning_rate": 3.844905350405926e-05, "loss": 0.14356613159179688, "step": 1354 }, { "epoch": 0.18881070159548527, "grad_norm": 0.8192573189735413, "learning_rate": 3.8445411542132684e-05, "loss": 0.156646728515625, "step": 1355 }, { "epoch": 0.18895004528669965, "grad_norm": 0.3285615146160126, "learning_rate": 3.8441765482056783e-05, "loss": 0.0782928466796875, "step": 1356 }, { "epoch": 0.18908938897791402, "grad_norm": 1.471805453300476, "learning_rate": 3.843811532464163e-05, "loss": 0.16009140014648438, "step": 1357 }, { "epoch": 0.1892287326691284, "grad_norm": 0.6613411903381348, "learning_rate": 3.8434461070698194e-05, "loss": 0.14098548889160156, "step": 1358 }, { "epoch": 0.18936807636034278, "grad_norm": 0.34900209307670593, "learning_rate": 3.843080272103837e-05, "loss": 0.10255050659179688, "step": 1359 }, { "epoch": 0.18950742005155716, "grad_norm": 0.5859901905059814, "learning_rate": 3.842714027647497e-05, "loss": 0.11478805541992188, "step": 1360 }, { "epoch": 0.18964676374277153, "grad_norm": 0.4439310133457184, "learning_rate": 3.8423473737821705e-05, "loss": 0.09545707702636719, "step": 1361 }, { "epoch": 0.18978610743398594, "grad_norm": 0.5977876782417297, "learning_rate": 3.8419803105893175e-05, "loss": 0.103790283203125, "step": 1362 }, { "epoch": 0.18992545112520032, "grad_norm": 0.6895452737808228, "learning_rate": 3.841612838150494e-05, "loss": 0.11831474304199219, "step": 1363 }, { "epoch": 0.1900647948164147, "grad_norm": 0.6594695448875427, "learning_rate": 3.8412449565473414e-05, "loss": 0.11533546447753906, "step": 1364 }, { "epoch": 0.19020413850762907, "grad_norm": 0.6502701640129089, "learning_rate": 3.840876665861597e-05, "loss": 0.11409378051757812, "step": 1365 }, { "epoch": 0.19034348219884345, "grad_norm": 0.7259324789047241, "learning_rate": 3.840507966175085e-05, "loss": 0.125457763671875, "step": 1366 }, { "epoch": 0.19048282589005783, "grad_norm": 0.34324994683265686, "learning_rate": 3.840138857569722e-05, "loss": 0.08785438537597656, "step": 1367 }, { "epoch": 0.1906221695812722, "grad_norm": 0.6383782029151917, "learning_rate": 3.8397693401275165e-05, "loss": 0.10250473022460938, "step": 1368 }, { "epoch": 0.19076151327248658, "grad_norm": 0.4695620834827423, "learning_rate": 3.8393994139305656e-05, "loss": 0.08772659301757812, "step": 1369 }, { "epoch": 0.19090085696370096, "grad_norm": 0.5953870415687561, "learning_rate": 3.8390290790610595e-05, "loss": 0.09298896789550781, "step": 1370 }, { "epoch": 0.19104020065491534, "grad_norm": 0.20506471395492554, "learning_rate": 3.838658335601278e-05, "loss": 0.06280708312988281, "step": 1371 }, { "epoch": 0.19117954434612974, "grad_norm": 0.7083378434181213, "learning_rate": 3.838287183633591e-05, "loss": 0.12908172607421875, "step": 1372 }, { "epoch": 0.19131888803734412, "grad_norm": 0.6947250962257385, "learning_rate": 3.837915623240462e-05, "loss": 0.14439964294433594, "step": 1373 }, { "epoch": 0.1914582317285585, "grad_norm": 0.7821818590164185, "learning_rate": 3.837543654504441e-05, "loss": 0.10806083679199219, "step": 1374 }, { "epoch": 0.19159757541977288, "grad_norm": 1.120538592338562, "learning_rate": 3.837171277508171e-05, "loss": 0.11051559448242188, "step": 1375 }, { "epoch": 0.19173691911098725, "grad_norm": 0.6469658017158508, "learning_rate": 3.836798492334387e-05, "loss": 0.12801742553710938, "step": 1376 }, { "epoch": 0.19187626280220163, "grad_norm": 0.38692253828048706, "learning_rate": 3.836425299065913e-05, "loss": 0.09635066986083984, "step": 1377 }, { "epoch": 0.192015606493416, "grad_norm": 0.3441038131713867, "learning_rate": 3.836051697785664e-05, "loss": 0.08381271362304688, "step": 1378 }, { "epoch": 0.19215495018463039, "grad_norm": 0.7907981276512146, "learning_rate": 3.8356776885766456e-05, "loss": 0.10181045532226562, "step": 1379 }, { "epoch": 0.19229429387584476, "grad_norm": 0.6198570132255554, "learning_rate": 3.8353032715219534e-05, "loss": 0.09758758544921875, "step": 1380 }, { "epoch": 0.19243363756705914, "grad_norm": 0.4087033271789551, "learning_rate": 3.834928446704775e-05, "loss": 0.09213066101074219, "step": 1381 }, { "epoch": 0.19257298125827352, "grad_norm": 0.8019723296165466, "learning_rate": 3.834553214208389e-05, "loss": 0.09819984436035156, "step": 1382 }, { "epoch": 0.19271232494948792, "grad_norm": 0.8303682804107666, "learning_rate": 3.834177574116161e-05, "loss": 0.08117103576660156, "step": 1383 }, { "epoch": 0.1928516686407023, "grad_norm": 0.6511688828468323, "learning_rate": 3.833801526511552e-05, "loss": 0.0792388916015625, "step": 1384 }, { "epoch": 0.19299101233191668, "grad_norm": 0.850060760974884, "learning_rate": 3.83342507147811e-05, "loss": 0.10252189636230469, "step": 1385 }, { "epoch": 0.19313035602313106, "grad_norm": 0.3934380114078522, "learning_rate": 3.833048209099474e-05, "loss": 0.07392120361328125, "step": 1386 }, { "epoch": 0.19326969971434543, "grad_norm": 0.7533586025238037, "learning_rate": 3.832670939459376e-05, "loss": 0.09762191772460938, "step": 1387 }, { "epoch": 0.1934090434055598, "grad_norm": 0.4304204285144806, "learning_rate": 3.832293262641636e-05, "loss": 0.07915878295898438, "step": 1388 }, { "epoch": 0.1935483870967742, "grad_norm": 1.1906611919403076, "learning_rate": 3.8319151787301644e-05, "loss": 0.12559127807617188, "step": 1389 }, { "epoch": 0.19368773078798857, "grad_norm": 0.693774402141571, "learning_rate": 3.831536687808964e-05, "loss": 0.09670066833496094, "step": 1390 }, { "epoch": 0.19382707447920294, "grad_norm": 0.7913341522216797, "learning_rate": 3.831157789962126e-05, "loss": 0.10721015930175781, "step": 1391 }, { "epoch": 0.19396641817041732, "grad_norm": 0.48556143045425415, "learning_rate": 3.830778485273833e-05, "loss": 0.09945297241210938, "step": 1392 }, { "epoch": 0.19410576186163173, "grad_norm": 0.5833231210708618, "learning_rate": 3.830398773828358e-05, "loss": 0.1147308349609375, "step": 1393 }, { "epoch": 0.1942451055528461, "grad_norm": 0.9020781517028809, "learning_rate": 3.830018655710064e-05, "loss": 0.11287307739257812, "step": 1394 }, { "epoch": 0.19438444924406048, "grad_norm": 0.34590286016464233, "learning_rate": 3.829638131003405e-05, "loss": 0.0658721923828125, "step": 1395 }, { "epoch": 0.19452379293527486, "grad_norm": 1.3885587453842163, "learning_rate": 3.829257199792925e-05, "loss": 0.11473846435546875, "step": 1396 }, { "epoch": 0.19466313662648924, "grad_norm": 0.45876994729042053, "learning_rate": 3.828875862163258e-05, "loss": 0.09295082092285156, "step": 1397 }, { "epoch": 0.1948024803177036, "grad_norm": 0.8232770562171936, "learning_rate": 3.828494118199127e-05, "loss": 0.11212730407714844, "step": 1398 }, { "epoch": 0.194941824008918, "grad_norm": 0.7585610747337341, "learning_rate": 3.828111967985349e-05, "loss": 0.10973358154296875, "step": 1399 }, { "epoch": 0.19508116770013237, "grad_norm": 0.4467112123966217, "learning_rate": 3.8277294116068285e-05, "loss": 0.09208488464355469, "step": 1400 }, { "epoch": 0.19522051139134675, "grad_norm": 2.012354850769043, "learning_rate": 3.8273464491485596e-05, "loss": 0.15551376342773438, "step": 1401 }, { "epoch": 0.19535985508256112, "grad_norm": 1.0755252838134766, "learning_rate": 3.82696308069563e-05, "loss": 0.10248565673828125, "step": 1402 }, { "epoch": 0.19549919877377553, "grad_norm": 0.7562090158462524, "learning_rate": 3.8265793063332135e-05, "loss": 0.09842872619628906, "step": 1403 }, { "epoch": 0.1956385424649899, "grad_norm": 0.823768138885498, "learning_rate": 3.826195126146576e-05, "loss": 0.1348285675048828, "step": 1404 }, { "epoch": 0.19577788615620428, "grad_norm": 0.3633580207824707, "learning_rate": 3.8258105402210755e-05, "loss": 0.09007930755615234, "step": 1405 }, { "epoch": 0.19591722984741866, "grad_norm": 0.4143328368663788, "learning_rate": 3.825425548642156e-05, "loss": 0.07435894012451172, "step": 1406 }, { "epoch": 0.19605657353863304, "grad_norm": 0.6795492172241211, "learning_rate": 3.8250401514953557e-05, "loss": 0.1013193130493164, "step": 1407 }, { "epoch": 0.19619591722984742, "grad_norm": 0.8466312885284424, "learning_rate": 3.824654348866299e-05, "loss": 0.08840751647949219, "step": 1408 }, { "epoch": 0.1963352609210618, "grad_norm": 0.8269075155258179, "learning_rate": 3.824268140840704e-05, "loss": 0.11049461364746094, "step": 1409 }, { "epoch": 0.19647460461227617, "grad_norm": 1.1659709215164185, "learning_rate": 3.823881527504377e-05, "loss": 0.14566993713378906, "step": 1410 }, { "epoch": 0.19661394830349055, "grad_norm": 0.4802590608596802, "learning_rate": 3.823494508943214e-05, "loss": 0.10031318664550781, "step": 1411 }, { "epoch": 0.19675329199470493, "grad_norm": 1.2405728101730347, "learning_rate": 3.8231070852432035e-05, "loss": 0.17720794677734375, "step": 1412 }, { "epoch": 0.19689263568591933, "grad_norm": 0.33376267552375793, "learning_rate": 3.82271925649042e-05, "loss": 0.08036231994628906, "step": 1413 }, { "epoch": 0.1970319793771337, "grad_norm": 0.7798534035682678, "learning_rate": 3.822331022771031e-05, "loss": 0.09315681457519531, "step": 1414 }, { "epoch": 0.1971713230683481, "grad_norm": 0.26651304960250854, "learning_rate": 3.8219423841712935e-05, "loss": 0.06585693359375, "step": 1415 }, { "epoch": 0.19731066675956246, "grad_norm": 0.33895596861839294, "learning_rate": 3.821553340777553e-05, "loss": 0.07099342346191406, "step": 1416 }, { "epoch": 0.19745001045077684, "grad_norm": 0.7491000890731812, "learning_rate": 3.821163892676248e-05, "loss": 0.1431446075439453, "step": 1417 }, { "epoch": 0.19758935414199122, "grad_norm": 0.48909980058670044, "learning_rate": 3.820774039953904e-05, "loss": 0.10382652282714844, "step": 1418 }, { "epoch": 0.1977286978332056, "grad_norm": 0.596738874912262, "learning_rate": 3.820383782697136e-05, "loss": 0.09536552429199219, "step": 1419 }, { "epoch": 0.19786804152441997, "grad_norm": 0.5971279144287109, "learning_rate": 3.819993120992653e-05, "loss": 0.10203933715820312, "step": 1420 }, { "epoch": 0.19800738521563435, "grad_norm": 0.37086930871009827, "learning_rate": 3.819602054927249e-05, "loss": 0.09574699401855469, "step": 1421 }, { "epoch": 0.19814672890684873, "grad_norm": 0.8277623057365417, "learning_rate": 3.8192105845878106e-05, "loss": 0.11203765869140625, "step": 1422 }, { "epoch": 0.19828607259806313, "grad_norm": 0.38132745027542114, "learning_rate": 3.818818710061314e-05, "loss": 0.08694267272949219, "step": 1423 }, { "epoch": 0.1984254162892775, "grad_norm": 0.7666764259338379, "learning_rate": 3.818426431434824e-05, "loss": 0.13161468505859375, "step": 1424 }, { "epoch": 0.1985647599804919, "grad_norm": 0.892369270324707, "learning_rate": 3.818033748795497e-05, "loss": 0.1602935791015625, "step": 1425 }, { "epoch": 0.19870410367170627, "grad_norm": 0.7345067262649536, "learning_rate": 3.817640662230576e-05, "loss": 0.11780166625976562, "step": 1426 }, { "epoch": 0.19884344736292064, "grad_norm": 0.22475594282150269, "learning_rate": 3.8172471718273986e-05, "loss": 0.06590080261230469, "step": 1427 }, { "epoch": 0.19898279105413502, "grad_norm": 0.31588122248649597, "learning_rate": 3.8168532776733874e-05, "loss": 0.08957290649414062, "step": 1428 }, { "epoch": 0.1991221347453494, "grad_norm": 0.40985652804374695, "learning_rate": 3.816458979856058e-05, "loss": 0.09764671325683594, "step": 1429 }, { "epoch": 0.19926147843656378, "grad_norm": 0.5436139702796936, "learning_rate": 3.816064278463013e-05, "loss": 0.12009620666503906, "step": 1430 }, { "epoch": 0.19940082212777815, "grad_norm": 0.6112926602363586, "learning_rate": 3.815669173581947e-05, "loss": 0.08493614196777344, "step": 1431 }, { "epoch": 0.19954016581899253, "grad_norm": 0.8178542852401733, "learning_rate": 3.8152736653006434e-05, "loss": 0.10950279235839844, "step": 1432 }, { "epoch": 0.19967950951020694, "grad_norm": 0.958256721496582, "learning_rate": 3.8148777537069745e-05, "loss": 0.13604164123535156, "step": 1433 }, { "epoch": 0.19981885320142131, "grad_norm": 0.5863037705421448, "learning_rate": 3.8144814388889034e-05, "loss": 0.10473060607910156, "step": 1434 }, { "epoch": 0.1999581968926357, "grad_norm": 0.38497716188430786, "learning_rate": 3.814084720934482e-05, "loss": 0.08390045166015625, "step": 1435 }, { "epoch": 0.20009754058385007, "grad_norm": 1.1983671188354492, "learning_rate": 3.813687599931851e-05, "loss": 0.10915184020996094, "step": 1436 }, { "epoch": 0.20023688427506445, "grad_norm": 0.5354716181755066, "learning_rate": 3.813290075969243e-05, "loss": 0.08707809448242188, "step": 1437 }, { "epoch": 0.20037622796627882, "grad_norm": 0.7907063364982605, "learning_rate": 3.812892149134978e-05, "loss": 0.09860992431640625, "step": 1438 }, { "epoch": 0.2005155716574932, "grad_norm": 1.1451308727264404, "learning_rate": 3.812493819517467e-05, "loss": 0.1474018096923828, "step": 1439 }, { "epoch": 0.20065491534870758, "grad_norm": 0.5149797797203064, "learning_rate": 3.812095087205209e-05, "loss": 0.09700965881347656, "step": 1440 }, { "epoch": 0.20079425903992196, "grad_norm": 0.588095486164093, "learning_rate": 3.811695952286793e-05, "loss": 0.09161949157714844, "step": 1441 }, { "epoch": 0.20093360273113633, "grad_norm": 1.0182271003723145, "learning_rate": 3.8112964148508986e-05, "loss": 0.1202545166015625, "step": 1442 }, { "epoch": 0.20107294642235074, "grad_norm": 0.3629237115383148, "learning_rate": 3.810896474986294e-05, "loss": 0.07866287231445312, "step": 1443 }, { "epoch": 0.20121229011356512, "grad_norm": 0.6830618381500244, "learning_rate": 3.8104961327818354e-05, "loss": 0.1313323974609375, "step": 1444 }, { "epoch": 0.2013516338047795, "grad_norm": 1.473646640777588, "learning_rate": 3.8100953883264705e-05, "loss": 0.11252784729003906, "step": 1445 }, { "epoch": 0.20149097749599387, "grad_norm": 0.8773378133773804, "learning_rate": 3.809694241709235e-05, "loss": 0.11846733093261719, "step": 1446 }, { "epoch": 0.20163032118720825, "grad_norm": 0.29767510294914246, "learning_rate": 3.8092926930192555e-05, "loss": 0.07369613647460938, "step": 1447 }, { "epoch": 0.20176966487842263, "grad_norm": 0.3374917805194855, "learning_rate": 3.8088907423457466e-05, "loss": 0.06881523132324219, "step": 1448 }, { "epoch": 0.201909008569637, "grad_norm": 0.33682623505592346, "learning_rate": 3.8084883897780126e-05, "loss": 0.06980705261230469, "step": 1449 }, { "epoch": 0.20204835226085138, "grad_norm": 0.9696002006530762, "learning_rate": 3.808085635405446e-05, "loss": 0.09891414642333984, "step": 1450 }, { "epoch": 0.20218769595206576, "grad_norm": 0.8802271485328674, "learning_rate": 3.807682479317531e-05, "loss": 0.10804176330566406, "step": 1451 }, { "epoch": 0.20232703964328014, "grad_norm": 0.3295086920261383, "learning_rate": 3.80727892160384e-05, "loss": 0.09547042846679688, "step": 1452 }, { "epoch": 0.20246638333449454, "grad_norm": 0.7114360332489014, "learning_rate": 3.806874962354033e-05, "loss": 0.13481712341308594, "step": 1453 }, { "epoch": 0.20260572702570892, "grad_norm": 0.49520426988601685, "learning_rate": 3.806470601657861e-05, "loss": 0.09775924682617188, "step": 1454 }, { "epoch": 0.2027450707169233, "grad_norm": 0.4204261302947998, "learning_rate": 3.806065839605163e-05, "loss": 0.08002281188964844, "step": 1455 }, { "epoch": 0.20288441440813768, "grad_norm": 0.5384228825569153, "learning_rate": 3.805660676285869e-05, "loss": 0.09677314758300781, "step": 1456 }, { "epoch": 0.20302375809935205, "grad_norm": 0.31039959192276, "learning_rate": 3.805255111789997e-05, "loss": 0.059444427490234375, "step": 1457 }, { "epoch": 0.20316310179056643, "grad_norm": 0.6278024315834045, "learning_rate": 3.804849146207654e-05, "loss": 0.1160116195678711, "step": 1458 }, { "epoch": 0.2033024454817808, "grad_norm": 0.37210944294929504, "learning_rate": 3.804442779629035e-05, "loss": 0.08463001251220703, "step": 1459 }, { "epoch": 0.20344178917299519, "grad_norm": 0.5949891805648804, "learning_rate": 3.804036012144428e-05, "loss": 0.06990814208984375, "step": 1460 }, { "epoch": 0.20358113286420956, "grad_norm": 0.4826754033565521, "learning_rate": 3.8036288438442056e-05, "loss": 0.09526634216308594, "step": 1461 }, { "epoch": 0.20372047655542394, "grad_norm": 0.30970972776412964, "learning_rate": 3.8032212748188306e-05, "loss": 0.068878173828125, "step": 1462 }, { "epoch": 0.20385982024663835, "grad_norm": 0.4353504776954651, "learning_rate": 3.802813305158857e-05, "loss": 0.08207130432128906, "step": 1463 }, { "epoch": 0.20399916393785272, "grad_norm": 0.31890976428985596, "learning_rate": 3.802404934954926e-05, "loss": 0.06662368774414062, "step": 1464 }, { "epoch": 0.2041385076290671, "grad_norm": 0.4211995005607605, "learning_rate": 3.801996164297769e-05, "loss": 0.11152839660644531, "step": 1465 }, { "epoch": 0.20427785132028148, "grad_norm": 1.0828603506088257, "learning_rate": 3.8015869932782034e-05, "loss": 0.14220237731933594, "step": 1466 }, { "epoch": 0.20441719501149586, "grad_norm": 0.4392499625682831, "learning_rate": 3.801177421987139e-05, "loss": 0.08716583251953125, "step": 1467 }, { "epoch": 0.20455653870271023, "grad_norm": 0.5736841559410095, "learning_rate": 3.800767450515574e-05, "loss": 0.10194778442382812, "step": 1468 }, { "epoch": 0.2046958823939246, "grad_norm": 0.7148897647857666, "learning_rate": 3.800357078954593e-05, "loss": 0.11293697357177734, "step": 1469 }, { "epoch": 0.204835226085139, "grad_norm": 0.5559396147727966, "learning_rate": 3.7999463073953715e-05, "loss": 0.11779403686523438, "step": 1470 }, { "epoch": 0.20497456977635337, "grad_norm": 0.5491883754730225, "learning_rate": 3.7995351359291743e-05, "loss": 0.10757064819335938, "step": 1471 }, { "epoch": 0.20511391346756774, "grad_norm": 0.7194819450378418, "learning_rate": 3.799123564647354e-05, "loss": 0.12995338439941406, "step": 1472 }, { "epoch": 0.20525325715878215, "grad_norm": 0.5404316782951355, "learning_rate": 3.7987115936413526e-05, "loss": 0.11382675170898438, "step": 1473 }, { "epoch": 0.20539260084999653, "grad_norm": 0.8655185103416443, "learning_rate": 3.7982992230027e-05, "loss": 0.11959648132324219, "step": 1474 }, { "epoch": 0.2055319445412109, "grad_norm": 0.678997814655304, "learning_rate": 3.797886452823016e-05, "loss": 0.10365104675292969, "step": 1475 }, { "epoch": 0.20567128823242528, "grad_norm": 0.8390398621559143, "learning_rate": 3.797473283194009e-05, "loss": 0.12501144409179688, "step": 1476 }, { "epoch": 0.20581063192363966, "grad_norm": 1.0266169309616089, "learning_rate": 3.797059714207475e-05, "loss": 0.12553787231445312, "step": 1477 }, { "epoch": 0.20594997561485404, "grad_norm": 0.30015701055526733, "learning_rate": 3.7966457459553e-05, "loss": 0.07081985473632812, "step": 1478 }, { "epoch": 0.2060893193060684, "grad_norm": 0.538057267665863, "learning_rate": 3.796231378529458e-05, "loss": 0.10770225524902344, "step": 1479 }, { "epoch": 0.2062286629972828, "grad_norm": 0.4557957053184509, "learning_rate": 3.795816612022014e-05, "loss": 0.10361862182617188, "step": 1480 }, { "epoch": 0.20636800668849717, "grad_norm": 0.5970976948738098, "learning_rate": 3.795401446525117e-05, "loss": 0.13608360290527344, "step": 1481 }, { "epoch": 0.20650735037971155, "grad_norm": 1.1295959949493408, "learning_rate": 3.794985882131008e-05, "loss": 0.11529922485351562, "step": 1482 }, { "epoch": 0.20664669407092595, "grad_norm": 0.8950627446174622, "learning_rate": 3.794569918932016e-05, "loss": 0.08821678161621094, "step": 1483 }, { "epoch": 0.20678603776214033, "grad_norm": 0.955672562122345, "learning_rate": 3.79415355702056e-05, "loss": 0.10879707336425781, "step": 1484 }, { "epoch": 0.2069253814533547, "grad_norm": 0.6705617308616638, "learning_rate": 3.793736796489143e-05, "loss": 0.10563850402832031, "step": 1485 }, { "epoch": 0.20706472514456908, "grad_norm": 0.437788188457489, "learning_rate": 3.7933196374303636e-05, "loss": 0.08569145202636719, "step": 1486 }, { "epoch": 0.20720406883578346, "grad_norm": 0.7959402799606323, "learning_rate": 3.792902079936902e-05, "loss": 0.08927726745605469, "step": 1487 }, { "epoch": 0.20734341252699784, "grad_norm": 0.936450183391571, "learning_rate": 3.79248412410153e-05, "loss": 0.10674571990966797, "step": 1488 }, { "epoch": 0.20748275621821222, "grad_norm": 0.7912521958351135, "learning_rate": 3.79206577001711e-05, "loss": 0.09006309509277344, "step": 1489 }, { "epoch": 0.2076220999094266, "grad_norm": 0.491150438785553, "learning_rate": 3.791647017776589e-05, "loss": 0.06692314147949219, "step": 1490 }, { "epoch": 0.20776144360064097, "grad_norm": 0.40566304326057434, "learning_rate": 3.791227867473004e-05, "loss": 0.09125900268554688, "step": 1491 }, { "epoch": 0.20790078729185535, "grad_norm": 0.3640974462032318, "learning_rate": 3.790808319199483e-05, "loss": 0.0757293701171875, "step": 1492 }, { "epoch": 0.20804013098306975, "grad_norm": 0.3945387899875641, "learning_rate": 3.790388373049236e-05, "loss": 0.06107330322265625, "step": 1493 }, { "epoch": 0.20817947467428413, "grad_norm": 1.162039875984192, "learning_rate": 3.78996802911557e-05, "loss": 0.12734031677246094, "step": 1494 }, { "epoch": 0.2083188183654985, "grad_norm": 0.4436519742012024, "learning_rate": 3.789547287491872e-05, "loss": 0.06573104858398438, "step": 1495 }, { "epoch": 0.2084581620567129, "grad_norm": 0.4242134392261505, "learning_rate": 3.789126148271624e-05, "loss": 0.088714599609375, "step": 1496 }, { "epoch": 0.20859750574792726, "grad_norm": 0.4224456548690796, "learning_rate": 3.7887046115483914e-05, "loss": 0.0977325439453125, "step": 1497 }, { "epoch": 0.20873684943914164, "grad_norm": 0.5047667026519775, "learning_rate": 3.788282677415831e-05, "loss": 0.07758140563964844, "step": 1498 }, { "epoch": 0.20887619313035602, "grad_norm": 0.6682465076446533, "learning_rate": 3.787860345967687e-05, "loss": 0.09996986389160156, "step": 1499 }, { "epoch": 0.2090155368215704, "grad_norm": 0.4682313799858093, "learning_rate": 3.787437617297792e-05, "loss": 0.113739013671875, "step": 1500 }, { "epoch": 0.20915488051278477, "grad_norm": 0.6314793229103088, "learning_rate": 3.787014491500066e-05, "loss": 0.0919036865234375, "step": 1501 }, { "epoch": 0.20929422420399915, "grad_norm": 0.3438890874385834, "learning_rate": 3.786590968668518e-05, "loss": 0.09236717224121094, "step": 1502 }, { "epoch": 0.20943356789521356, "grad_norm": 0.4249441921710968, "learning_rate": 3.7861670488972464e-05, "loss": 0.08953285217285156, "step": 1503 }, { "epoch": 0.20957291158642793, "grad_norm": 0.47448834776878357, "learning_rate": 3.7857427322804346e-05, "loss": 0.08986759185791016, "step": 1504 }, { "epoch": 0.2097122552776423, "grad_norm": 0.5752113461494446, "learning_rate": 3.785318018912357e-05, "loss": 0.11834144592285156, "step": 1505 }, { "epoch": 0.2098515989688567, "grad_norm": 0.8963272571563721, "learning_rate": 3.784892908887375e-05, "loss": 0.10187339782714844, "step": 1506 }, { "epoch": 0.20999094266007107, "grad_norm": 0.9021040797233582, "learning_rate": 3.7844674022999387e-05, "loss": 0.13229751586914062, "step": 1507 }, { "epoch": 0.21013028635128544, "grad_norm": 0.4937811493873596, "learning_rate": 3.784041499244585e-05, "loss": 0.09146881103515625, "step": 1508 }, { "epoch": 0.21026963004249982, "grad_norm": 0.9222509264945984, "learning_rate": 3.783615199815941e-05, "loss": 0.09781455993652344, "step": 1509 }, { "epoch": 0.2104089737337142, "grad_norm": 0.7012776732444763, "learning_rate": 3.78318850410872e-05, "loss": 0.1532745361328125, "step": 1510 }, { "epoch": 0.21054831742492858, "grad_norm": 0.325534850358963, "learning_rate": 3.782761412217725e-05, "loss": 0.08605194091796875, "step": 1511 }, { "epoch": 0.21068766111614295, "grad_norm": 0.6870124936103821, "learning_rate": 3.7823339242378445e-05, "loss": 0.12457275390625, "step": 1512 }, { "epoch": 0.21082700480735736, "grad_norm": 0.43713685870170593, "learning_rate": 3.7819060402640577e-05, "loss": 0.08042716979980469, "step": 1513 }, { "epoch": 0.21096634849857174, "grad_norm": 0.29261136054992676, "learning_rate": 3.7814777603914305e-05, "loss": 0.07395172119140625, "step": 1514 }, { "epoch": 0.21110569218978611, "grad_norm": 0.8282667994499207, "learning_rate": 3.781049084715117e-05, "loss": 0.09408378601074219, "step": 1515 }, { "epoch": 0.2112450358810005, "grad_norm": 0.6816376447677612, "learning_rate": 3.780620013330358e-05, "loss": 0.14255142211914062, "step": 1516 }, { "epoch": 0.21138437957221487, "grad_norm": 0.8154549598693848, "learning_rate": 3.7801905463324855e-05, "loss": 0.17228317260742188, "step": 1517 }, { "epoch": 0.21152372326342925, "grad_norm": 0.551977813243866, "learning_rate": 3.7797606838169156e-05, "loss": 0.0966033935546875, "step": 1518 }, { "epoch": 0.21166306695464362, "grad_norm": 0.600788414478302, "learning_rate": 3.7793304258791544e-05, "loss": 0.08553504943847656, "step": 1519 }, { "epoch": 0.211802410645858, "grad_norm": 0.3054784834384918, "learning_rate": 3.778899772614795e-05, "loss": 0.06509208679199219, "step": 1520 }, { "epoch": 0.21194175433707238, "grad_norm": 0.7253639698028564, "learning_rate": 3.7784687241195195e-05, "loss": 0.12289810180664062, "step": 1521 }, { "epoch": 0.21208109802828676, "grad_norm": 0.7096953988075256, "learning_rate": 3.778037280489096e-05, "loss": 0.09976005554199219, "step": 1522 }, { "epoch": 0.21222044171950116, "grad_norm": 0.348112553358078, "learning_rate": 3.777605441819383e-05, "loss": 0.09137344360351562, "step": 1523 }, { "epoch": 0.21235978541071554, "grad_norm": 0.5054752826690674, "learning_rate": 3.777173208206323e-05, "loss": 0.06853675842285156, "step": 1524 }, { "epoch": 0.21249912910192992, "grad_norm": 0.5703735947608948, "learning_rate": 3.776740579745951e-05, "loss": 0.12443733215332031, "step": 1525 }, { "epoch": 0.2126384727931443, "grad_norm": 0.7497169971466064, "learning_rate": 3.776307556534385e-05, "loss": 0.09358978271484375, "step": 1526 }, { "epoch": 0.21277781648435867, "grad_norm": 0.8233155608177185, "learning_rate": 3.775874138667834e-05, "loss": 0.1333599090576172, "step": 1527 }, { "epoch": 0.21291716017557305, "grad_norm": 0.395746111869812, "learning_rate": 3.775440326242593e-05, "loss": 0.08567619323730469, "step": 1528 }, { "epoch": 0.21305650386678743, "grad_norm": 0.5177441239356995, "learning_rate": 3.775006119355047e-05, "loss": 0.08508682250976562, "step": 1529 }, { "epoch": 0.2131958475580018, "grad_norm": 0.7314834594726562, "learning_rate": 3.7745715181016634e-05, "loss": 0.12312889099121094, "step": 1530 }, { "epoch": 0.21333519124921618, "grad_norm": 0.4594397246837616, "learning_rate": 3.774136522579004e-05, "loss": 0.08123970031738281, "step": 1531 }, { "epoch": 0.21347453494043056, "grad_norm": 0.6907541155815125, "learning_rate": 3.773701132883712e-05, "loss": 0.1280040740966797, "step": 1532 }, { "epoch": 0.21361387863164497, "grad_norm": 0.7328150868415833, "learning_rate": 3.773265349112524e-05, "loss": 0.10618972778320312, "step": 1533 }, { "epoch": 0.21375322232285934, "grad_norm": 0.3529180586338043, "learning_rate": 3.772829171362259e-05, "loss": 0.07922935485839844, "step": 1534 }, { "epoch": 0.21389256601407372, "grad_norm": 0.3982096314430237, "learning_rate": 3.772392599729827e-05, "loss": 0.09147262573242188, "step": 1535 }, { "epoch": 0.2140319097052881, "grad_norm": 0.38216084241867065, "learning_rate": 3.7719556343122236e-05, "loss": 0.08507156372070312, "step": 1536 }, { "epoch": 0.21417125339650248, "grad_norm": 0.5215528607368469, "learning_rate": 3.771518275206532e-05, "loss": 0.0823822021484375, "step": 1537 }, { "epoch": 0.21431059708771685, "grad_norm": 0.38749194145202637, "learning_rate": 3.771080522509925e-05, "loss": 0.10201835632324219, "step": 1538 }, { "epoch": 0.21444994077893123, "grad_norm": 0.46555837988853455, "learning_rate": 3.77064237631966e-05, "loss": 0.09074020385742188, "step": 1539 }, { "epoch": 0.2145892844701456, "grad_norm": 0.47891974449157715, "learning_rate": 3.770203836733084e-05, "loss": 0.08310127258300781, "step": 1540 }, { "epoch": 0.21472862816135999, "grad_norm": 0.43048909306526184, "learning_rate": 3.769764903847629e-05, "loss": 0.09546852111816406, "step": 1541 }, { "epoch": 0.21486797185257436, "grad_norm": 0.7208696603775024, "learning_rate": 3.769325577760817e-05, "loss": 0.09722328186035156, "step": 1542 }, { "epoch": 0.21500731554378877, "grad_norm": 0.4891384243965149, "learning_rate": 3.7688858585702564e-05, "loss": 0.10654830932617188, "step": 1543 }, { "epoch": 0.21514665923500315, "grad_norm": 0.5755232572555542, "learning_rate": 3.768445746373642e-05, "loss": 0.12471199035644531, "step": 1544 }, { "epoch": 0.21528600292621752, "grad_norm": 0.2976595163345337, "learning_rate": 3.768005241268757e-05, "loss": 0.07563018798828125, "step": 1545 }, { "epoch": 0.2154253466174319, "grad_norm": 0.8567584753036499, "learning_rate": 3.7675643433534725e-05, "loss": 0.12109756469726562, "step": 1546 }, { "epoch": 0.21556469030864628, "grad_norm": 0.3276172876358032, "learning_rate": 3.767123052725744e-05, "loss": 0.07333564758300781, "step": 1547 }, { "epoch": 0.21570403399986066, "grad_norm": 0.4382215142250061, "learning_rate": 3.7666813694836176e-05, "loss": 0.10529899597167969, "step": 1548 }, { "epoch": 0.21584337769107503, "grad_norm": 0.369022011756897, "learning_rate": 3.7662392937252255e-05, "loss": 0.07841300964355469, "step": 1549 }, { "epoch": 0.2159827213822894, "grad_norm": 0.8088367581367493, "learning_rate": 3.7657968255487854e-05, "loss": 0.11362838745117188, "step": 1550 }, { "epoch": 0.2161220650735038, "grad_norm": 0.8999560475349426, "learning_rate": 3.765353965052605e-05, "loss": 0.11891365051269531, "step": 1551 }, { "epoch": 0.21626140876471817, "grad_norm": 0.6453534364700317, "learning_rate": 3.764910712335077e-05, "loss": 0.1423654556274414, "step": 1552 }, { "epoch": 0.21640075245593257, "grad_norm": 0.2893756628036499, "learning_rate": 3.764467067494683e-05, "loss": 0.08387374877929688, "step": 1553 }, { "epoch": 0.21654009614714695, "grad_norm": 0.5407136082649231, "learning_rate": 3.7640230306299895e-05, "loss": 0.09052658081054688, "step": 1554 }, { "epoch": 0.21667943983836133, "grad_norm": 0.7949237823486328, "learning_rate": 3.7635786018396524e-05, "loss": 0.11100196838378906, "step": 1555 }, { "epoch": 0.2168187835295757, "grad_norm": 1.0256104469299316, "learning_rate": 3.763133781222412e-05, "loss": 0.11709213256835938, "step": 1556 }, { "epoch": 0.21695812722079008, "grad_norm": 0.6628548502922058, "learning_rate": 3.762688568877099e-05, "loss": 0.13437652587890625, "step": 1557 }, { "epoch": 0.21709747091200446, "grad_norm": 0.3250269889831543, "learning_rate": 3.762242964902629e-05, "loss": 0.08572006225585938, "step": 1558 }, { "epoch": 0.21723681460321884, "grad_norm": 0.7066490650177002, "learning_rate": 3.761796969398005e-05, "loss": 0.10484790802001953, "step": 1559 }, { "epoch": 0.2173761582944332, "grad_norm": 0.4039357602596283, "learning_rate": 3.761350582462317e-05, "loss": 0.09900856018066406, "step": 1560 }, { "epoch": 0.2175155019856476, "grad_norm": 0.7094775438308716, "learning_rate": 3.760903804194742e-05, "loss": 0.1310443878173828, "step": 1561 }, { "epoch": 0.21765484567686197, "grad_norm": 0.798835039138794, "learning_rate": 3.7604566346945437e-05, "loss": 0.10375595092773438, "step": 1562 }, { "epoch": 0.21779418936807637, "grad_norm": 0.8003969192504883, "learning_rate": 3.760009074061073e-05, "loss": 0.17800521850585938, "step": 1563 }, { "epoch": 0.21793353305929075, "grad_norm": 0.5331871509552002, "learning_rate": 3.759561122393767e-05, "loss": 0.12304496765136719, "step": 1564 }, { "epoch": 0.21807287675050513, "grad_norm": 0.45761337876319885, "learning_rate": 3.7591127797921523e-05, "loss": 0.09998798370361328, "step": 1565 }, { "epoch": 0.2182122204417195, "grad_norm": 0.36916062235832214, "learning_rate": 3.7586640463558384e-05, "loss": 0.10434627532958984, "step": 1566 }, { "epoch": 0.21835156413293388, "grad_norm": 0.9214900732040405, "learning_rate": 3.758214922184525e-05, "loss": 0.1218109130859375, "step": 1567 }, { "epoch": 0.21849090782414826, "grad_norm": 0.27142274379730225, "learning_rate": 3.7577654073779956e-05, "loss": 0.08076286315917969, "step": 1568 }, { "epoch": 0.21863025151536264, "grad_norm": 0.41765594482421875, "learning_rate": 3.757315502036124e-05, "loss": 0.10165786743164062, "step": 1569 }, { "epoch": 0.21876959520657702, "grad_norm": 0.4707895517349243, "learning_rate": 3.756865206258868e-05, "loss": 0.1075286865234375, "step": 1570 }, { "epoch": 0.2189089388977914, "grad_norm": 0.6539376974105835, "learning_rate": 3.7564145201462726e-05, "loss": 0.09635591506958008, "step": 1571 }, { "epoch": 0.21904828258900577, "grad_norm": 0.6401981711387634, "learning_rate": 3.755963443798471e-05, "loss": 0.11151885986328125, "step": 1572 }, { "epoch": 0.21918762628022015, "grad_norm": 0.3274432420730591, "learning_rate": 3.7555119773156815e-05, "loss": 0.07125282287597656, "step": 1573 }, { "epoch": 0.21932696997143455, "grad_norm": 0.3339978754520416, "learning_rate": 3.755060120798209e-05, "loss": 0.09644699096679688, "step": 1574 }, { "epoch": 0.21946631366264893, "grad_norm": 0.6201578974723816, "learning_rate": 3.754607874346447e-05, "loss": 0.09425735473632812, "step": 1575 }, { "epoch": 0.2196056573538633, "grad_norm": 0.518813967704773, "learning_rate": 3.7541552380608734e-05, "loss": 0.10203742980957031, "step": 1576 }, { "epoch": 0.2197450010450777, "grad_norm": 0.6740968823432922, "learning_rate": 3.753702212042054e-05, "loss": 0.14271163940429688, "step": 1577 }, { "epoch": 0.21988434473629206, "grad_norm": 0.5926182270050049, "learning_rate": 3.753248796390641e-05, "loss": 0.1084604263305664, "step": 1578 }, { "epoch": 0.22002368842750644, "grad_norm": 0.3018651604652405, "learning_rate": 3.7527949912073725e-05, "loss": 0.08038520812988281, "step": 1579 }, { "epoch": 0.22016303211872082, "grad_norm": 0.5300496220588684, "learning_rate": 3.752340796593074e-05, "loss": 0.12004852294921875, "step": 1580 }, { "epoch": 0.2203023758099352, "grad_norm": 0.5574085116386414, "learning_rate": 3.751886212648657e-05, "loss": 0.12952613830566406, "step": 1581 }, { "epoch": 0.22044171950114957, "grad_norm": 0.9039328694343567, "learning_rate": 3.75143123947512e-05, "loss": 0.1310100555419922, "step": 1582 }, { "epoch": 0.22058106319236395, "grad_norm": 0.317617267370224, "learning_rate": 3.7509758771735475e-05, "loss": 0.07017707824707031, "step": 1583 }, { "epoch": 0.22072040688357836, "grad_norm": 0.21542635560035706, "learning_rate": 3.75052012584511e-05, "loss": 0.06874847412109375, "step": 1584 }, { "epoch": 0.22085975057479273, "grad_norm": 0.5125584602355957, "learning_rate": 3.750063985591067e-05, "loss": 0.12099838256835938, "step": 1585 }, { "epoch": 0.2209990942660071, "grad_norm": 0.49988046288490295, "learning_rate": 3.749607456512759e-05, "loss": 0.10702323913574219, "step": 1586 }, { "epoch": 0.2211384379572215, "grad_norm": 0.5726056694984436, "learning_rate": 3.74915053871162e-05, "loss": 0.07961177825927734, "step": 1587 }, { "epoch": 0.22127778164843587, "grad_norm": 0.34733280539512634, "learning_rate": 3.7486932322891646e-05, "loss": 0.07981109619140625, "step": 1588 }, { "epoch": 0.22141712533965024, "grad_norm": 0.5153899192810059, "learning_rate": 3.748235537346996e-05, "loss": 0.09248733520507812, "step": 1589 }, { "epoch": 0.22155646903086462, "grad_norm": 0.30907776951789856, "learning_rate": 3.747777453986804e-05, "loss": 0.08660602569580078, "step": 1590 }, { "epoch": 0.221695812722079, "grad_norm": 0.6186462640762329, "learning_rate": 3.7473189823103645e-05, "loss": 0.15837669372558594, "step": 1591 }, { "epoch": 0.22183515641329338, "grad_norm": 0.2438896894454956, "learning_rate": 3.746860122419539e-05, "loss": 0.0655527114868164, "step": 1592 }, { "epoch": 0.22197450010450775, "grad_norm": 0.34336549043655396, "learning_rate": 3.746400874416276e-05, "loss": 0.0745391845703125, "step": 1593 }, { "epoch": 0.22211384379572216, "grad_norm": 0.4868924617767334, "learning_rate": 3.745941238402609e-05, "loss": 0.11593246459960938, "step": 1594 }, { "epoch": 0.22225318748693654, "grad_norm": 0.6077494025230408, "learning_rate": 3.74548121448066e-05, "loss": 0.11920928955078125, "step": 1595 }, { "epoch": 0.22239253117815092, "grad_norm": 0.36852696537971497, "learning_rate": 3.745020802752635e-05, "loss": 0.07178401947021484, "step": 1596 }, { "epoch": 0.2225318748693653, "grad_norm": 0.6349460482597351, "learning_rate": 3.744560003320827e-05, "loss": 0.121734619140625, "step": 1597 }, { "epoch": 0.22267121856057967, "grad_norm": 0.27636298537254333, "learning_rate": 3.744098816287616e-05, "loss": 0.07383441925048828, "step": 1598 }, { "epoch": 0.22281056225179405, "grad_norm": 0.7512742877006531, "learning_rate": 3.743637241755465e-05, "loss": 0.11266708374023438, "step": 1599 }, { "epoch": 0.22294990594300843, "grad_norm": 0.47874268889427185, "learning_rate": 3.743175279826928e-05, "loss": 0.10490226745605469, "step": 1600 }, { "epoch": 0.2230892496342228, "grad_norm": 1.0714977979660034, "learning_rate": 3.7427129306046406e-05, "loss": 0.16431427001953125, "step": 1601 }, { "epoch": 0.22322859332543718, "grad_norm": 0.9433776140213013, "learning_rate": 3.7422501941913274e-05, "loss": 0.15381336212158203, "step": 1602 }, { "epoch": 0.22336793701665156, "grad_norm": 0.5465538501739502, "learning_rate": 3.7417870706897964e-05, "loss": 0.11712074279785156, "step": 1603 }, { "epoch": 0.22350728070786596, "grad_norm": 0.6326457262039185, "learning_rate": 3.7413235602029445e-05, "loss": 0.10058212280273438, "step": 1604 }, { "epoch": 0.22364662439908034, "grad_norm": 0.6444109678268433, "learning_rate": 3.740859662833753e-05, "loss": 0.13872146606445312, "step": 1605 }, { "epoch": 0.22378596809029472, "grad_norm": 0.5282261371612549, "learning_rate": 3.7403953786852884e-05, "loss": 0.1089630126953125, "step": 1606 }, { "epoch": 0.2239253117815091, "grad_norm": 0.5039969086647034, "learning_rate": 3.739930707860705e-05, "loss": 0.09267616271972656, "step": 1607 }, { "epoch": 0.22406465547272347, "grad_norm": 0.7581688165664673, "learning_rate": 3.739465650463241e-05, "loss": 0.13608360290527344, "step": 1608 }, { "epoch": 0.22420399916393785, "grad_norm": 0.40179604291915894, "learning_rate": 3.739000206596222e-05, "loss": 0.06851577758789062, "step": 1609 }, { "epoch": 0.22434334285515223, "grad_norm": 0.5212522149085999, "learning_rate": 3.7385343763630594e-05, "loss": 0.10344123840332031, "step": 1610 }, { "epoch": 0.2244826865463666, "grad_norm": 0.6378124356269836, "learning_rate": 3.738068159867251e-05, "loss": 0.09065818786621094, "step": 1611 }, { "epoch": 0.22462203023758098, "grad_norm": 0.27570831775665283, "learning_rate": 3.7376015572123766e-05, "loss": 0.058109283447265625, "step": 1612 }, { "epoch": 0.22476137392879536, "grad_norm": 0.40337181091308594, "learning_rate": 3.737134568502107e-05, "loss": 0.07692718505859375, "step": 1613 }, { "epoch": 0.22490071762000977, "grad_norm": 0.8806300163269043, "learning_rate": 3.7366671938401954e-05, "loss": 0.12884902954101562, "step": 1614 }, { "epoch": 0.22504006131122414, "grad_norm": 0.9037048816680908, "learning_rate": 3.736199433330483e-05, "loss": 0.10411643981933594, "step": 1615 }, { "epoch": 0.22517940500243852, "grad_norm": 0.19295407831668854, "learning_rate": 3.735731287076893e-05, "loss": 0.05302619934082031, "step": 1616 }, { "epoch": 0.2253187486936529, "grad_norm": 0.7655208110809326, "learning_rate": 3.73526275518344e-05, "loss": 0.11087894439697266, "step": 1617 }, { "epoch": 0.22545809238486728, "grad_norm": 0.4710557758808136, "learning_rate": 3.734793837754219e-05, "loss": 0.09324455261230469, "step": 1618 }, { "epoch": 0.22559743607608165, "grad_norm": 0.3750317096710205, "learning_rate": 3.734324534893413e-05, "loss": 0.08462333679199219, "step": 1619 }, { "epoch": 0.22573677976729603, "grad_norm": 0.5708099007606506, "learning_rate": 3.733854846705291e-05, "loss": 0.08513259887695312, "step": 1620 }, { "epoch": 0.2258761234585104, "grad_norm": 0.3139928877353668, "learning_rate": 3.733384773294207e-05, "loss": 0.08656501770019531, "step": 1621 }, { "epoch": 0.22601546714972479, "grad_norm": 0.8058632612228394, "learning_rate": 3.7329143147645994e-05, "loss": 0.11630439758300781, "step": 1622 }, { "epoch": 0.22615481084093916, "grad_norm": 0.5171427726745605, "learning_rate": 3.732443471220994e-05, "loss": 0.09388160705566406, "step": 1623 }, { "epoch": 0.22629415453215357, "grad_norm": 0.8346560597419739, "learning_rate": 3.731972242768002e-05, "loss": 0.1201171875, "step": 1624 }, { "epoch": 0.22643349822336795, "grad_norm": 0.4235908091068268, "learning_rate": 3.73150062951032e-05, "loss": 0.09433937072753906, "step": 1625 }, { "epoch": 0.22657284191458232, "grad_norm": 0.4598454236984253, "learning_rate": 3.731028631552728e-05, "loss": 0.06681632995605469, "step": 1626 }, { "epoch": 0.2267121856057967, "grad_norm": 0.7440026998519897, "learning_rate": 3.7305562490000944e-05, "loss": 0.10416984558105469, "step": 1627 }, { "epoch": 0.22685152929701108, "grad_norm": 0.49010762572288513, "learning_rate": 3.730083481957372e-05, "loss": 0.07461071014404297, "step": 1628 }, { "epoch": 0.22699087298822546, "grad_norm": 1.408625602722168, "learning_rate": 3.729610330529598e-05, "loss": 0.12647628784179688, "step": 1629 }, { "epoch": 0.22713021667943983, "grad_norm": 0.26831820607185364, "learning_rate": 3.7291367948218964e-05, "loss": 0.06605100631713867, "step": 1630 }, { "epoch": 0.2272695603706542, "grad_norm": 0.3455255627632141, "learning_rate": 3.7286628749394754e-05, "loss": 0.06838417053222656, "step": 1631 }, { "epoch": 0.2274089040618686, "grad_norm": 0.8549287915229797, "learning_rate": 3.72818857098763e-05, "loss": 0.10059928894042969, "step": 1632 }, { "epoch": 0.22754824775308297, "grad_norm": 1.1519582271575928, "learning_rate": 3.727713883071739e-05, "loss": 0.106719970703125, "step": 1633 }, { "epoch": 0.22768759144429737, "grad_norm": 1.2846801280975342, "learning_rate": 3.727238811297268e-05, "loss": 0.16149425506591797, "step": 1634 }, { "epoch": 0.22782693513551175, "grad_norm": 0.37317559123039246, "learning_rate": 3.7267633557697666e-05, "loss": 0.08448028564453125, "step": 1635 }, { "epoch": 0.22796627882672613, "grad_norm": 0.994754433631897, "learning_rate": 3.72628751659487e-05, "loss": 0.1497478485107422, "step": 1636 }, { "epoch": 0.2281056225179405, "grad_norm": 0.520328164100647, "learning_rate": 3.725811293878299e-05, "loss": 0.09092330932617188, "step": 1637 }, { "epoch": 0.22824496620915488, "grad_norm": 0.4498412311077118, "learning_rate": 3.72533468772586e-05, "loss": 0.11457443237304688, "step": 1638 }, { "epoch": 0.22838430990036926, "grad_norm": 1.1230679750442505, "learning_rate": 3.724857698243443e-05, "loss": 0.1331329345703125, "step": 1639 }, { "epoch": 0.22852365359158364, "grad_norm": 0.7834765315055847, "learning_rate": 3.724380325537024e-05, "loss": 0.12019729614257812, "step": 1640 }, { "epoch": 0.22866299728279801, "grad_norm": 0.4696687161922455, "learning_rate": 3.723902569712666e-05, "loss": 0.09833908081054688, "step": 1641 }, { "epoch": 0.2288023409740124, "grad_norm": 0.4047260880470276, "learning_rate": 3.7234244308765136e-05, "loss": 0.10163688659667969, "step": 1642 }, { "epoch": 0.22894168466522677, "grad_norm": 0.31232279539108276, "learning_rate": 3.7229459091348e-05, "loss": 0.06556320190429688, "step": 1643 }, { "epoch": 0.22908102835644117, "grad_norm": 0.43527713418006897, "learning_rate": 3.7224670045938406e-05, "loss": 0.08668136596679688, "step": 1644 }, { "epoch": 0.22922037204765555, "grad_norm": 0.5505969524383545, "learning_rate": 3.721987717360037e-05, "loss": 0.08183765411376953, "step": 1645 }, { "epoch": 0.22935971573886993, "grad_norm": 1.3112683296203613, "learning_rate": 3.721508047539877e-05, "loss": 0.15229225158691406, "step": 1646 }, { "epoch": 0.2294990594300843, "grad_norm": 0.725774884223938, "learning_rate": 3.72102799523993e-05, "loss": 0.10330009460449219, "step": 1647 }, { "epoch": 0.22963840312129868, "grad_norm": 1.221149206161499, "learning_rate": 3.720547560566855e-05, "loss": 0.13100051879882812, "step": 1648 }, { "epoch": 0.22977774681251306, "grad_norm": 0.651091456413269, "learning_rate": 3.720066743627393e-05, "loss": 0.09765815734863281, "step": 1649 }, { "epoch": 0.22991709050372744, "grad_norm": 0.5582399368286133, "learning_rate": 3.719585544528371e-05, "loss": 0.10725593566894531, "step": 1650 }, { "epoch": 0.23005643419494182, "grad_norm": 0.3642353415489197, "learning_rate": 3.719103963376699e-05, "loss": 0.07566070556640625, "step": 1651 }, { "epoch": 0.2301957778861562, "grad_norm": 0.9978375434875488, "learning_rate": 3.718622000279374e-05, "loss": 0.0920867919921875, "step": 1652 }, { "epoch": 0.23033512157737057, "grad_norm": 0.8710653781890869, "learning_rate": 3.718139655343477e-05, "loss": 0.09995841979980469, "step": 1653 }, { "epoch": 0.23047446526858498, "grad_norm": 0.6143290400505066, "learning_rate": 3.717656928676175e-05, "loss": 0.08996772766113281, "step": 1654 }, { "epoch": 0.23061380895979935, "grad_norm": 0.30669403076171875, "learning_rate": 3.7171738203847185e-05, "loss": 0.07484626770019531, "step": 1655 }, { "epoch": 0.23075315265101373, "grad_norm": 1.0691511631011963, "learning_rate": 3.7166903305764426e-05, "loss": 0.15988540649414062, "step": 1656 }, { "epoch": 0.2308924963422281, "grad_norm": 1.2117648124694824, "learning_rate": 3.716206459358768e-05, "loss": 0.09419822692871094, "step": 1657 }, { "epoch": 0.2310318400334425, "grad_norm": 0.63372802734375, "learning_rate": 3.7157222068392e-05, "loss": 0.09680366516113281, "step": 1658 }, { "epoch": 0.23117118372465686, "grad_norm": 0.8234993815422058, "learning_rate": 3.715237573125328e-05, "loss": 0.07779693603515625, "step": 1659 }, { "epoch": 0.23131052741587124, "grad_norm": 0.9243489503860474, "learning_rate": 3.7147525583248264e-05, "loss": 0.14867782592773438, "step": 1660 }, { "epoch": 0.23144987110708562, "grad_norm": 0.2812195122241974, "learning_rate": 3.714267162545455e-05, "loss": 0.0814666748046875, "step": 1661 }, { "epoch": 0.2315892147983, "grad_norm": 0.42775958776474, "learning_rate": 3.7137813858950576e-05, "loss": 0.08056831359863281, "step": 1662 }, { "epoch": 0.23172855848951437, "grad_norm": 0.6298354864120483, "learning_rate": 3.713295228481563e-05, "loss": 0.10430717468261719, "step": 1663 }, { "epoch": 0.23186790218072878, "grad_norm": 1.0439720153808594, "learning_rate": 3.712808690412983e-05, "loss": 0.09417533874511719, "step": 1664 }, { "epoch": 0.23200724587194316, "grad_norm": 0.9177694320678711, "learning_rate": 3.7123217717974166e-05, "loss": 0.11417770385742188, "step": 1665 }, { "epoch": 0.23214658956315753, "grad_norm": 0.6626611351966858, "learning_rate": 3.711834472743045e-05, "loss": 0.10962867736816406, "step": 1666 }, { "epoch": 0.2322859332543719, "grad_norm": 0.5949239730834961, "learning_rate": 3.7113467933581364e-05, "loss": 0.08469581604003906, "step": 1667 }, { "epoch": 0.2324252769455863, "grad_norm": 0.9350483417510986, "learning_rate": 3.7108587337510405e-05, "loss": 0.13500404357910156, "step": 1668 }, { "epoch": 0.23256462063680067, "grad_norm": 0.43504223227500916, "learning_rate": 3.7103702940301934e-05, "loss": 0.08293819427490234, "step": 1669 }, { "epoch": 0.23270396432801504, "grad_norm": 0.43150565028190613, "learning_rate": 3.709881474304115e-05, "loss": 0.05774497985839844, "step": 1670 }, { "epoch": 0.23284330801922942, "grad_norm": 0.5862683653831482, "learning_rate": 3.7093922746814104e-05, "loss": 0.09478378295898438, "step": 1671 }, { "epoch": 0.2329826517104438, "grad_norm": 0.41796886920928955, "learning_rate": 3.7089026952707695e-05, "loss": 0.06665515899658203, "step": 1672 }, { "epoch": 0.23312199540165818, "grad_norm": 0.7624088525772095, "learning_rate": 3.7084127361809636e-05, "loss": 0.18622589111328125, "step": 1673 }, { "epoch": 0.23326133909287258, "grad_norm": 0.45114749670028687, "learning_rate": 3.707922397520852e-05, "loss": 0.07932472229003906, "step": 1674 }, { "epoch": 0.23340068278408696, "grad_norm": 0.6560578346252441, "learning_rate": 3.707431679399375e-05, "loss": 0.11545562744140625, "step": 1675 }, { "epoch": 0.23354002647530134, "grad_norm": 0.5277032852172852, "learning_rate": 3.7069405819255615e-05, "loss": 0.1023702621459961, "step": 1676 }, { "epoch": 0.23367937016651572, "grad_norm": 0.47121530771255493, "learning_rate": 3.706449105208521e-05, "loss": 0.09769821166992188, "step": 1677 }, { "epoch": 0.2338187138577301, "grad_norm": 0.6647270917892456, "learning_rate": 3.705957249357447e-05, "loss": 0.10782814025878906, "step": 1678 }, { "epoch": 0.23395805754894447, "grad_norm": 0.5387539863586426, "learning_rate": 3.70546501448162e-05, "loss": 0.11089038848876953, "step": 1679 }, { "epoch": 0.23409740124015885, "grad_norm": 0.6609600782394409, "learning_rate": 3.704972400690404e-05, "loss": 0.11353492736816406, "step": 1680 }, { "epoch": 0.23423674493137323, "grad_norm": 0.5125299096107483, "learning_rate": 3.704479408093245e-05, "loss": 0.12015151977539062, "step": 1681 }, { "epoch": 0.2343760886225876, "grad_norm": 0.3488992154598236, "learning_rate": 3.703986036799676e-05, "loss": 0.08428764343261719, "step": 1682 }, { "epoch": 0.23451543231380198, "grad_norm": 0.33674168586730957, "learning_rate": 3.7034922869193125e-05, "loss": 0.0851583480834961, "step": 1683 }, { "epoch": 0.23465477600501639, "grad_norm": 0.9553260207176208, "learning_rate": 3.702998158561854e-05, "loss": 0.11856651306152344, "step": 1684 }, { "epoch": 0.23479411969623076, "grad_norm": 0.35411953926086426, "learning_rate": 3.7025036518370846e-05, "loss": 0.09574508666992188, "step": 1685 }, { "epoch": 0.23493346338744514, "grad_norm": 0.6003347039222717, "learning_rate": 3.7020087668548725e-05, "loss": 0.10250091552734375, "step": 1686 }, { "epoch": 0.23507280707865952, "grad_norm": 0.44941961765289307, "learning_rate": 3.7015135037251696e-05, "loss": 0.11246109008789062, "step": 1687 }, { "epoch": 0.2352121507698739, "grad_norm": 0.7339297533035278, "learning_rate": 3.7010178625580134e-05, "loss": 0.09427452087402344, "step": 1688 }, { "epoch": 0.23535149446108827, "grad_norm": 0.7725930213928223, "learning_rate": 3.700521843463522e-05, "loss": 0.10149765014648438, "step": 1689 }, { "epoch": 0.23549083815230265, "grad_norm": 0.6277204155921936, "learning_rate": 3.7000254465519014e-05, "loss": 0.12342453002929688, "step": 1690 }, { "epoch": 0.23563018184351703, "grad_norm": 0.8732476830482483, "learning_rate": 3.6995286719334385e-05, "loss": 0.12891197204589844, "step": 1691 }, { "epoch": 0.2357695255347314, "grad_norm": 0.5717354416847229, "learning_rate": 3.6990315197185054e-05, "loss": 0.09356117248535156, "step": 1692 }, { "epoch": 0.23590886922594578, "grad_norm": 0.6999872922897339, "learning_rate": 3.6985339900175584e-05, "loss": 0.1067495346069336, "step": 1693 }, { "epoch": 0.2360482129171602, "grad_norm": 0.8686468005180359, "learning_rate": 3.698036082941137e-05, "loss": 0.11345100402832031, "step": 1694 }, { "epoch": 0.23618755660837457, "grad_norm": 0.7100614905357361, "learning_rate": 3.6975377985998645e-05, "loss": 0.10672760009765625, "step": 1695 }, { "epoch": 0.23632690029958894, "grad_norm": 0.9861154556274414, "learning_rate": 3.697039137104449e-05, "loss": 0.10477638244628906, "step": 1696 }, { "epoch": 0.23646624399080332, "grad_norm": 1.8330574035644531, "learning_rate": 3.696540098565681e-05, "loss": 0.14670944213867188, "step": 1697 }, { "epoch": 0.2366055876820177, "grad_norm": 0.48330506682395935, "learning_rate": 3.696040683094436e-05, "loss": 0.1071624755859375, "step": 1698 }, { "epoch": 0.23674493137323208, "grad_norm": 0.93628990650177, "learning_rate": 3.695540890801672e-05, "loss": 0.1279582977294922, "step": 1699 }, { "epoch": 0.23688427506444645, "grad_norm": 0.6754939556121826, "learning_rate": 3.6950407217984326e-05, "loss": 0.07445335388183594, "step": 1700 }, { "epoch": 0.23702361875566083, "grad_norm": 1.127946376800537, "learning_rate": 3.694540176195843e-05, "loss": 0.11805343627929688, "step": 1701 }, { "epoch": 0.2371629624468752, "grad_norm": 0.5287918448448181, "learning_rate": 3.694039254105113e-05, "loss": 0.0856943130493164, "step": 1702 }, { "epoch": 0.23730230613808959, "grad_norm": 0.4176771640777588, "learning_rate": 3.693537955637537e-05, "loss": 0.09712791442871094, "step": 1703 }, { "epoch": 0.237441649829304, "grad_norm": 0.418195903301239, "learning_rate": 3.6930362809044906e-05, "loss": 0.10551071166992188, "step": 1704 }, { "epoch": 0.23758099352051837, "grad_norm": 0.3771388828754425, "learning_rate": 3.692534230017436e-05, "loss": 0.1008758544921875, "step": 1705 }, { "epoch": 0.23772033721173275, "grad_norm": 1.5284042358398438, "learning_rate": 3.692031803087916e-05, "loss": 0.11960411071777344, "step": 1706 }, { "epoch": 0.23785968090294712, "grad_norm": 0.515853762626648, "learning_rate": 3.691529000227559e-05, "loss": 0.11124038696289062, "step": 1707 }, { "epoch": 0.2379990245941615, "grad_norm": 0.6337981224060059, "learning_rate": 3.691025821548077e-05, "loss": 0.10515499114990234, "step": 1708 }, { "epoch": 0.23813836828537588, "grad_norm": 0.6571255922317505, "learning_rate": 3.6905222671612634e-05, "loss": 0.11510848999023438, "step": 1709 }, { "epoch": 0.23827771197659026, "grad_norm": 0.4082661271095276, "learning_rate": 3.6900183371789984e-05, "loss": 0.08769607543945312, "step": 1710 }, { "epoch": 0.23841705566780463, "grad_norm": 0.4568444490432739, "learning_rate": 3.689514031713242e-05, "loss": 0.07398223876953125, "step": 1711 }, { "epoch": 0.238556399359019, "grad_norm": 0.41724255681037903, "learning_rate": 3.68900935087604e-05, "loss": 0.08727788925170898, "step": 1712 }, { "epoch": 0.2386957430502334, "grad_norm": 0.29547029733657837, "learning_rate": 3.688504294779521e-05, "loss": 0.08379745483398438, "step": 1713 }, { "epoch": 0.2388350867414478, "grad_norm": 0.36507725715637207, "learning_rate": 3.687998863535897e-05, "loss": 0.093017578125, "step": 1714 }, { "epoch": 0.23897443043266217, "grad_norm": 0.9347231388092041, "learning_rate": 3.687493057257464e-05, "loss": 0.10626411437988281, "step": 1715 }, { "epoch": 0.23911377412387655, "grad_norm": 0.30378079414367676, "learning_rate": 3.686986876056599e-05, "loss": 0.07088088989257812, "step": 1716 }, { "epoch": 0.23925311781509093, "grad_norm": 0.40904057025909424, "learning_rate": 3.6864803200457646e-05, "loss": 0.08650398254394531, "step": 1717 }, { "epoch": 0.2393924615063053, "grad_norm": 0.5277330279350281, "learning_rate": 3.685973389337506e-05, "loss": 0.10448455810546875, "step": 1718 }, { "epoch": 0.23953180519751968, "grad_norm": 0.7666447162628174, "learning_rate": 3.6854660840444524e-05, "loss": 0.11308670043945312, "step": 1719 }, { "epoch": 0.23967114888873406, "grad_norm": 0.959380030632019, "learning_rate": 3.6849584042793145e-05, "loss": 0.10850334167480469, "step": 1720 }, { "epoch": 0.23981049257994844, "grad_norm": 0.3588741719722748, "learning_rate": 3.6844503501548866e-05, "loss": 0.06542205810546875, "step": 1721 }, { "epoch": 0.23994983627116281, "grad_norm": 0.32009726762771606, "learning_rate": 3.6839419217840486e-05, "loss": 0.06527137756347656, "step": 1722 }, { "epoch": 0.2400891799623772, "grad_norm": 1.0041871070861816, "learning_rate": 3.6834331192797606e-05, "loss": 0.1396961212158203, "step": 1723 }, { "epoch": 0.2402285236535916, "grad_norm": 0.7159838080406189, "learning_rate": 3.682923942755066e-05, "loss": 0.10334014892578125, "step": 1724 }, { "epoch": 0.24036786734480597, "grad_norm": 0.5270859003067017, "learning_rate": 3.6824143923230936e-05, "loss": 0.11982345581054688, "step": 1725 }, { "epoch": 0.24050721103602035, "grad_norm": 0.3235551118850708, "learning_rate": 3.681904468097054e-05, "loss": 0.08557510375976562, "step": 1726 }, { "epoch": 0.24064655472723473, "grad_norm": 0.3953123688697815, "learning_rate": 3.681394170190239e-05, "loss": 0.08114433288574219, "step": 1727 }, { "epoch": 0.2407858984184491, "grad_norm": 0.7354132533073425, "learning_rate": 3.6808834987160276e-05, "loss": 0.09092140197753906, "step": 1728 }, { "epoch": 0.24092524210966348, "grad_norm": 0.40588700771331787, "learning_rate": 3.680372453787877e-05, "loss": 0.09728336334228516, "step": 1729 }, { "epoch": 0.24106458580087786, "grad_norm": 0.314870148897171, "learning_rate": 3.679861035519331e-05, "loss": 0.08397102355957031, "step": 1730 }, { "epoch": 0.24120392949209224, "grad_norm": 0.7342545986175537, "learning_rate": 3.679349244024015e-05, "loss": 0.09243202209472656, "step": 1731 }, { "epoch": 0.24134327318330662, "grad_norm": 1.2203713655471802, "learning_rate": 3.6788370794156366e-05, "loss": 0.14073753356933594, "step": 1732 }, { "epoch": 0.241482616874521, "grad_norm": 0.954064667224884, "learning_rate": 3.678324541807988e-05, "loss": 0.140106201171875, "step": 1733 }, { "epoch": 0.2416219605657354, "grad_norm": 0.49176448583602905, "learning_rate": 3.677811631314943e-05, "loss": 0.10079383850097656, "step": 1734 }, { "epoch": 0.24176130425694978, "grad_norm": 0.4475259482860565, "learning_rate": 3.677298348050459e-05, "loss": 0.09651851654052734, "step": 1735 }, { "epoch": 0.24190064794816415, "grad_norm": 0.616462767124176, "learning_rate": 3.676784692128575e-05, "loss": 0.09023666381835938, "step": 1736 }, { "epoch": 0.24203999163937853, "grad_norm": 0.7639544606208801, "learning_rate": 3.676270663663414e-05, "loss": 0.09742546081542969, "step": 1737 }, { "epoch": 0.2421793353305929, "grad_norm": 0.5675891041755676, "learning_rate": 3.675756262769182e-05, "loss": 0.07899665832519531, "step": 1738 }, { "epoch": 0.2423186790218073, "grad_norm": 0.9190603494644165, "learning_rate": 3.6752414895601656e-05, "loss": 0.11309814453125, "step": 1739 }, { "epoch": 0.24245802271302166, "grad_norm": 0.39199456572532654, "learning_rate": 3.674726344150737e-05, "loss": 0.0918426513671875, "step": 1740 }, { "epoch": 0.24259736640423604, "grad_norm": 0.7018279433250427, "learning_rate": 3.67421082665535e-05, "loss": 0.12424945831298828, "step": 1741 }, { "epoch": 0.24273671009545042, "grad_norm": 0.6182534694671631, "learning_rate": 3.6736949371885395e-05, "loss": 0.08600616455078125, "step": 1742 }, { "epoch": 0.2428760537866648, "grad_norm": 0.6947794556617737, "learning_rate": 3.6731786758649255e-05, "loss": 0.11729240417480469, "step": 1743 }, { "epoch": 0.2430153974778792, "grad_norm": 1.1020642518997192, "learning_rate": 3.672662042799209e-05, "loss": 0.15318679809570312, "step": 1744 }, { "epoch": 0.24315474116909358, "grad_norm": 0.41621577739715576, "learning_rate": 3.672145038106174e-05, "loss": 0.06282329559326172, "step": 1745 }, { "epoch": 0.24329408486030796, "grad_norm": 0.9790002107620239, "learning_rate": 3.6716276619006874e-05, "loss": 0.10218238830566406, "step": 1746 }, { "epoch": 0.24343342855152234, "grad_norm": 0.5793771147727966, "learning_rate": 3.671109914297698e-05, "loss": 0.08719062805175781, "step": 1747 }, { "epoch": 0.2435727722427367, "grad_norm": 0.39084383845329285, "learning_rate": 3.670591795412238e-05, "loss": 0.08290958404541016, "step": 1748 }, { "epoch": 0.2437121159339511, "grad_norm": 0.6570208072662354, "learning_rate": 3.670073305359421e-05, "loss": 0.09575653076171875, "step": 1749 }, { "epoch": 0.24385145962516547, "grad_norm": 0.9504016637802124, "learning_rate": 3.669554444254444e-05, "loss": 0.08401107788085938, "step": 1750 }, { "epoch": 0.24399080331637985, "grad_norm": 1.0595790147781372, "learning_rate": 3.6690352122125867e-05, "loss": 0.1108551025390625, "step": 1751 }, { "epoch": 0.24413014700759422, "grad_norm": 0.9910376071929932, "learning_rate": 3.668515609349209e-05, "loss": 0.13036346435546875, "step": 1752 }, { "epoch": 0.2442694906988086, "grad_norm": 0.5814268589019775, "learning_rate": 3.667995635779756e-05, "loss": 0.11112785339355469, "step": 1753 }, { "epoch": 0.244408834390023, "grad_norm": 0.5021295547485352, "learning_rate": 3.667475291619754e-05, "loss": 0.0863189697265625, "step": 1754 }, { "epoch": 0.24454817808123738, "grad_norm": 0.46807432174682617, "learning_rate": 3.6669545769848115e-05, "loss": 0.10615730285644531, "step": 1755 }, { "epoch": 0.24468752177245176, "grad_norm": 0.5841719508171082, "learning_rate": 3.666433491990619e-05, "loss": 0.08770942687988281, "step": 1756 }, { "epoch": 0.24482686546366614, "grad_norm": 0.7530962228775024, "learning_rate": 3.66591203675295e-05, "loss": 0.0893707275390625, "step": 1757 }, { "epoch": 0.24496620915488052, "grad_norm": 0.8663804531097412, "learning_rate": 3.665390211387659e-05, "loss": 0.09127616882324219, "step": 1758 }, { "epoch": 0.2451055528460949, "grad_norm": 0.6627533435821533, "learning_rate": 3.664868016010686e-05, "loss": 0.07779502868652344, "step": 1759 }, { "epoch": 0.24524489653730927, "grad_norm": 0.9758980870246887, "learning_rate": 3.664345450738048e-05, "loss": 0.1236114501953125, "step": 1760 }, { "epoch": 0.24538424022852365, "grad_norm": 0.49459898471832275, "learning_rate": 3.6638225156858494e-05, "loss": 0.11063766479492188, "step": 1761 }, { "epoch": 0.24552358391973803, "grad_norm": 0.4179801046848297, "learning_rate": 3.663299210970273e-05, "loss": 0.10082244873046875, "step": 1762 }, { "epoch": 0.2456629276109524, "grad_norm": 0.9350152611732483, "learning_rate": 3.662775536707586e-05, "loss": 0.1270885467529297, "step": 1763 }, { "epoch": 0.2458022713021668, "grad_norm": 0.6197609305381775, "learning_rate": 3.662251493014137e-05, "loss": 0.07829666137695312, "step": 1764 }, { "epoch": 0.24594161499338119, "grad_norm": 0.8590683341026306, "learning_rate": 3.661727080006356e-05, "loss": 0.09772300720214844, "step": 1765 }, { "epoch": 0.24608095868459556, "grad_norm": 0.7772808074951172, "learning_rate": 3.6612022978007546e-05, "loss": 0.11162948608398438, "step": 1766 }, { "epoch": 0.24622030237580994, "grad_norm": 0.6488801836967468, "learning_rate": 3.66067714651393e-05, "loss": 0.1325855255126953, "step": 1767 }, { "epoch": 0.24635964606702432, "grad_norm": 0.35372066497802734, "learning_rate": 3.6601516262625556e-05, "loss": 0.07141876220703125, "step": 1768 }, { "epoch": 0.2464989897582387, "grad_norm": 0.9762297868728638, "learning_rate": 3.6596257371633926e-05, "loss": 0.10553169250488281, "step": 1769 }, { "epoch": 0.24663833344945307, "grad_norm": 0.7614570260047913, "learning_rate": 3.659099479333281e-05, "loss": 0.11890792846679688, "step": 1770 }, { "epoch": 0.24677767714066745, "grad_norm": 0.699006199836731, "learning_rate": 3.658572852889143e-05, "loss": 0.10899066925048828, "step": 1771 }, { "epoch": 0.24691702083188183, "grad_norm": 0.6220634579658508, "learning_rate": 3.658045857947983e-05, "loss": 0.10459518432617188, "step": 1772 }, { "epoch": 0.2470563645230962, "grad_norm": 1.01050865650177, "learning_rate": 3.657518494626887e-05, "loss": 0.13403701782226562, "step": 1773 }, { "epoch": 0.24719570821431058, "grad_norm": 0.4430351257324219, "learning_rate": 3.6569907630430234e-05, "loss": 0.102386474609375, "step": 1774 }, { "epoch": 0.247335051905525, "grad_norm": 0.29094040393829346, "learning_rate": 3.6564626633136416e-05, "loss": 0.0772705078125, "step": 1775 }, { "epoch": 0.24747439559673937, "grad_norm": 0.4308490455150604, "learning_rate": 3.6559341955560744e-05, "loss": 0.08751487731933594, "step": 1776 }, { "epoch": 0.24761373928795374, "grad_norm": 0.9212446212768555, "learning_rate": 3.655405359887734e-05, "loss": 0.126312255859375, "step": 1777 }, { "epoch": 0.24775308297916812, "grad_norm": 0.8355321288108826, "learning_rate": 3.654876156426116e-05, "loss": 0.11662101745605469, "step": 1778 }, { "epoch": 0.2478924266703825, "grad_norm": 0.4076789617538452, "learning_rate": 3.6543465852887975e-05, "loss": 0.11370086669921875, "step": 1779 }, { "epoch": 0.24803177036159688, "grad_norm": 0.6194537281990051, "learning_rate": 3.6538166465934375e-05, "loss": 0.1217193603515625, "step": 1780 }, { "epoch": 0.24817111405281125, "grad_norm": 0.678620457649231, "learning_rate": 3.653286340457776e-05, "loss": 0.11448097229003906, "step": 1781 }, { "epoch": 0.24831045774402563, "grad_norm": 0.3153510093688965, "learning_rate": 3.6527556669996345e-05, "loss": 0.07822227478027344, "step": 1782 }, { "epoch": 0.24844980143524, "grad_norm": 0.8410484790802002, "learning_rate": 3.6522246263369174e-05, "loss": 0.11531257629394531, "step": 1783 }, { "epoch": 0.24858914512645439, "grad_norm": 0.5270363092422485, "learning_rate": 3.6516932185876085e-05, "loss": 0.08694648742675781, "step": 1784 }, { "epoch": 0.2487284888176688, "grad_norm": 0.7318198084831238, "learning_rate": 3.651161443869776e-05, "loss": 0.10180282592773438, "step": 1785 }, { "epoch": 0.24886783250888317, "grad_norm": 0.5747535228729248, "learning_rate": 3.650629302301567e-05, "loss": 0.11678314208984375, "step": 1786 }, { "epoch": 0.24900717620009755, "grad_norm": 0.7830781936645508, "learning_rate": 3.650096794001211e-05, "loss": 0.14970016479492188, "step": 1787 }, { "epoch": 0.24914651989131192, "grad_norm": 1.100631594657898, "learning_rate": 3.6495639190870204e-05, "loss": 0.15678787231445312, "step": 1788 }, { "epoch": 0.2492858635825263, "grad_norm": 0.4166242182254791, "learning_rate": 3.649030677677387e-05, "loss": 0.08962821960449219, "step": 1789 }, { "epoch": 0.24942520727374068, "grad_norm": 0.4716280400753021, "learning_rate": 3.648497069890785e-05, "loss": 0.09626197814941406, "step": 1790 }, { "epoch": 0.24956455096495506, "grad_norm": 0.5687309503555298, "learning_rate": 3.6479630958457696e-05, "loss": 0.08848762512207031, "step": 1791 }, { "epoch": 0.24970389465616943, "grad_norm": 0.787609338760376, "learning_rate": 3.647428755660978e-05, "loss": 0.11913871765136719, "step": 1792 }, { "epoch": 0.2498432383473838, "grad_norm": 0.6540242433547974, "learning_rate": 3.646894049455129e-05, "loss": 0.10382843017578125, "step": 1793 }, { "epoch": 0.2499825820385982, "grad_norm": 0.22238531708717346, "learning_rate": 3.646358977347021e-05, "loss": 0.06094551086425781, "step": 1794 }, { "epoch": 0.25012192572981257, "grad_norm": 0.48430097103118896, "learning_rate": 3.6458235394555345e-05, "loss": 0.07753944396972656, "step": 1795 }, { "epoch": 0.25026126942102694, "grad_norm": 0.497452974319458, "learning_rate": 3.6452877358996336e-05, "loss": 0.09486007690429688, "step": 1796 }, { "epoch": 0.2504006131122413, "grad_norm": 0.6213709115982056, "learning_rate": 3.64475156679836e-05, "loss": 0.09573936462402344, "step": 1797 }, { "epoch": 0.2505399568034557, "grad_norm": 1.040291428565979, "learning_rate": 3.644215032270838e-05, "loss": 0.1268463134765625, "step": 1798 }, { "epoch": 0.2506793004946701, "grad_norm": 0.47205644845962524, "learning_rate": 3.643678132436274e-05, "loss": 0.09928512573242188, "step": 1799 }, { "epoch": 0.2508186441858845, "grad_norm": 0.32311198115348816, "learning_rate": 3.643140867413956e-05, "loss": 0.08672714233398438, "step": 1800 }, { "epoch": 0.2509579878770989, "grad_norm": 0.2704096734523773, "learning_rate": 3.642603237323249e-05, "loss": 0.08147811889648438, "step": 1801 }, { "epoch": 0.25109733156831326, "grad_norm": 0.5769910216331482, "learning_rate": 3.6420652422836046e-05, "loss": 0.09059906005859375, "step": 1802 }, { "epoch": 0.25123667525952764, "grad_norm": 0.7045993208885193, "learning_rate": 3.641526882414553e-05, "loss": 0.1567707061767578, "step": 1803 }, { "epoch": 0.251376018950742, "grad_norm": 0.27969956398010254, "learning_rate": 3.640988157835704e-05, "loss": 0.06602287292480469, "step": 1804 }, { "epoch": 0.2515153626419564, "grad_norm": 0.4284462034702301, "learning_rate": 3.640449068666751e-05, "loss": 0.09215736389160156, "step": 1805 }, { "epoch": 0.2516547063331708, "grad_norm": 0.5937240123748779, "learning_rate": 3.639909615027468e-05, "loss": 0.09843826293945312, "step": 1806 }, { "epoch": 0.25179405002438515, "grad_norm": 0.28131163120269775, "learning_rate": 3.6393697970377074e-05, "loss": 0.07300758361816406, "step": 1807 }, { "epoch": 0.25193339371559953, "grad_norm": 0.37684229016304016, "learning_rate": 3.638829614817405e-05, "loss": 0.08911418914794922, "step": 1808 }, { "epoch": 0.2520727374068139, "grad_norm": 0.4463822841644287, "learning_rate": 3.638289068486577e-05, "loss": 0.11150932312011719, "step": 1809 }, { "epoch": 0.2522120810980283, "grad_norm": 1.3823834657669067, "learning_rate": 3.6377481581653225e-05, "loss": 0.1859283447265625, "step": 1810 }, { "epoch": 0.25235142478924266, "grad_norm": 0.5966286659240723, "learning_rate": 3.637206883973816e-05, "loss": 0.163330078125, "step": 1811 }, { "epoch": 0.25249076848045704, "grad_norm": 0.4158175587654114, "learning_rate": 3.6366652460323186e-05, "loss": 0.09689521789550781, "step": 1812 }, { "epoch": 0.2526301121716714, "grad_norm": 0.4966145157814026, "learning_rate": 3.6361232444611695e-05, "loss": 0.08305549621582031, "step": 1813 }, { "epoch": 0.2527694558628858, "grad_norm": 0.4486822187900543, "learning_rate": 3.635580879380788e-05, "loss": 0.11853408813476562, "step": 1814 }, { "epoch": 0.25290879955410017, "grad_norm": 0.36528071761131287, "learning_rate": 3.635038150911677e-05, "loss": 0.09031486511230469, "step": 1815 }, { "epoch": 0.25304814324531455, "grad_norm": 0.7639957666397095, "learning_rate": 3.634495059174417e-05, "loss": 0.14404869079589844, "step": 1816 }, { "epoch": 0.2531874869365289, "grad_norm": 0.8938929438591003, "learning_rate": 3.633951604289671e-05, "loss": 0.14026641845703125, "step": 1817 }, { "epoch": 0.2533268306277433, "grad_norm": 0.5533015131950378, "learning_rate": 3.633407786378182e-05, "loss": 0.08431243896484375, "step": 1818 }, { "epoch": 0.2534661743189577, "grad_norm": 0.9017908573150635, "learning_rate": 3.632863605560775e-05, "loss": 0.10787582397460938, "step": 1819 }, { "epoch": 0.2536055180101721, "grad_norm": 0.8543798923492432, "learning_rate": 3.632319061958353e-05, "loss": 0.14708709716796875, "step": 1820 }, { "epoch": 0.2537448617013865, "grad_norm": 0.5239341259002686, "learning_rate": 3.631774155691902e-05, "loss": 0.1273975372314453, "step": 1821 }, { "epoch": 0.25388420539260087, "grad_norm": 0.6726720929145813, "learning_rate": 3.631228886882488e-05, "loss": 0.10326766967773438, "step": 1822 }, { "epoch": 0.25402354908381525, "grad_norm": 0.6107078790664673, "learning_rate": 3.630683255651256e-05, "loss": 0.06628799438476562, "step": 1823 }, { "epoch": 0.2541628927750296, "grad_norm": 0.7050951719284058, "learning_rate": 3.6301372621194354e-05, "loss": 0.09975624084472656, "step": 1824 }, { "epoch": 0.254302236466244, "grad_norm": 0.8883379101753235, "learning_rate": 3.6295909064083305e-05, "loss": 0.08823585510253906, "step": 1825 }, { "epoch": 0.2544415801574584, "grad_norm": 1.1860942840576172, "learning_rate": 3.629044188639331e-05, "loss": 0.12037277221679688, "step": 1826 }, { "epoch": 0.25458092384867276, "grad_norm": 0.3693036735057831, "learning_rate": 3.628497108933904e-05, "loss": 0.09196090698242188, "step": 1827 }, { "epoch": 0.25472026753988714, "grad_norm": 0.6446684002876282, "learning_rate": 3.6279496674135985e-05, "loss": 0.1194305419921875, "step": 1828 }, { "epoch": 0.2548596112311015, "grad_norm": 0.7448377013206482, "learning_rate": 3.6274018642000445e-05, "loss": 0.0910491943359375, "step": 1829 }, { "epoch": 0.2549989549223159, "grad_norm": 0.8398482799530029, "learning_rate": 3.62685369941495e-05, "loss": 0.0888671875, "step": 1830 }, { "epoch": 0.25513829861353027, "grad_norm": 0.4471357464790344, "learning_rate": 3.626305173180105e-05, "loss": 0.1293811798095703, "step": 1831 }, { "epoch": 0.25527764230474465, "grad_norm": 0.5265861749649048, "learning_rate": 3.62575628561738e-05, "loss": 0.09464073181152344, "step": 1832 }, { "epoch": 0.255416985995959, "grad_norm": 0.3279283940792084, "learning_rate": 3.6252070368487246e-05, "loss": 0.0825953483581543, "step": 1833 }, { "epoch": 0.2555563296871734, "grad_norm": 0.40153518319129944, "learning_rate": 3.62465742699617e-05, "loss": 0.09640884399414062, "step": 1834 }, { "epoch": 0.2556956733783878, "grad_norm": 0.6762891411781311, "learning_rate": 3.624107456181826e-05, "loss": 0.13918304443359375, "step": 1835 }, { "epoch": 0.25583501706960216, "grad_norm": 0.3646334707736969, "learning_rate": 3.623557124527885e-05, "loss": 0.09185600280761719, "step": 1836 }, { "epoch": 0.25597436076081653, "grad_norm": 0.38965800404548645, "learning_rate": 3.623006432156618e-05, "loss": 0.09678077697753906, "step": 1837 }, { "epoch": 0.2561137044520309, "grad_norm": 0.4134117364883423, "learning_rate": 3.6224553791903744e-05, "loss": 0.10531234741210938, "step": 1838 }, { "epoch": 0.2562530481432453, "grad_norm": 0.5846555233001709, "learning_rate": 3.621903965751588e-05, "loss": 0.0870981216430664, "step": 1839 }, { "epoch": 0.2563923918344597, "grad_norm": 0.8281081318855286, "learning_rate": 3.6213521919627675e-05, "loss": 0.1316070556640625, "step": 1840 }, { "epoch": 0.2565317355256741, "grad_norm": 0.49357369542121887, "learning_rate": 3.6208000579465075e-05, "loss": 0.08634757995605469, "step": 1841 }, { "epoch": 0.2566710792168885, "grad_norm": 0.4211686849594116, "learning_rate": 3.620247563825477e-05, "loss": 0.09738731384277344, "step": 1842 }, { "epoch": 0.25681042290810285, "grad_norm": 0.4803521931171417, "learning_rate": 3.619694709722429e-05, "loss": 0.06947898864746094, "step": 1843 }, { "epoch": 0.25694976659931723, "grad_norm": 0.5710007548332214, "learning_rate": 3.619141495760196e-05, "loss": 0.08675956726074219, "step": 1844 }, { "epoch": 0.2570891102905316, "grad_norm": 1.355593204498291, "learning_rate": 3.618587922061687e-05, "loss": 0.11847305297851562, "step": 1845 }, { "epoch": 0.257228453981746, "grad_norm": 0.44880548119544983, "learning_rate": 3.6180339887498953e-05, "loss": 0.09994697570800781, "step": 1846 }, { "epoch": 0.25736779767296036, "grad_norm": 0.7390772700309753, "learning_rate": 3.617479695947891e-05, "loss": 0.09039878845214844, "step": 1847 }, { "epoch": 0.25750714136417474, "grad_norm": 0.5073873996734619, "learning_rate": 3.616925043778826e-05, "loss": 0.09923648834228516, "step": 1848 }, { "epoch": 0.2576464850553891, "grad_norm": 0.5397263169288635, "learning_rate": 3.6163700323659327e-05, "loss": 0.10794830322265625, "step": 1849 }, { "epoch": 0.2577858287466035, "grad_norm": 0.4900830388069153, "learning_rate": 3.615814661832519e-05, "loss": 0.1214599609375, "step": 1850 }, { "epoch": 0.2579251724378179, "grad_norm": 0.5768001675605774, "learning_rate": 3.6152589323019775e-05, "loss": 0.0909423828125, "step": 1851 }, { "epoch": 0.25806451612903225, "grad_norm": 0.5526692867279053, "learning_rate": 3.614702843897779e-05, "loss": 0.08661842346191406, "step": 1852 }, { "epoch": 0.25820385982024663, "grad_norm": 0.38397684693336487, "learning_rate": 3.6141463967434715e-05, "loss": 0.08755874633789062, "step": 1853 }, { "epoch": 0.258343203511461, "grad_norm": 0.6625437140464783, "learning_rate": 3.613589590962687e-05, "loss": 0.13588714599609375, "step": 1854 }, { "epoch": 0.2584825472026754, "grad_norm": 0.3794143795967102, "learning_rate": 3.6130324266791344e-05, "loss": 0.07888603210449219, "step": 1855 }, { "epoch": 0.25862189089388976, "grad_norm": 0.544284999370575, "learning_rate": 3.612474904016602e-05, "loss": 0.11255073547363281, "step": 1856 }, { "epoch": 0.25876123458510414, "grad_norm": 0.5632256269454956, "learning_rate": 3.61191702309896e-05, "loss": 0.09134483337402344, "step": 1857 }, { "epoch": 0.2589005782763185, "grad_norm": 0.35321030020713806, "learning_rate": 3.611358784050157e-05, "loss": 0.08018302917480469, "step": 1858 }, { "epoch": 0.2590399219675329, "grad_norm": 0.5794420838356018, "learning_rate": 3.610800186994219e-05, "loss": 0.10342025756835938, "step": 1859 }, { "epoch": 0.2591792656587473, "grad_norm": 0.5234116911888123, "learning_rate": 3.6102412320552546e-05, "loss": 0.11651039123535156, "step": 1860 }, { "epoch": 0.2593186093499617, "grad_norm": 0.6101605892181396, "learning_rate": 3.609681919357451e-05, "loss": 0.09016799926757812, "step": 1861 }, { "epoch": 0.2594579530411761, "grad_norm": 0.46730512380599976, "learning_rate": 3.609122249025075e-05, "loss": 0.09749794006347656, "step": 1862 }, { "epoch": 0.25959729673239046, "grad_norm": 0.2993644177913666, "learning_rate": 3.608562221182472e-05, "loss": 0.08300399780273438, "step": 1863 }, { "epoch": 0.25973664042360484, "grad_norm": 0.32802021503448486, "learning_rate": 3.608001835954067e-05, "loss": 0.09124946594238281, "step": 1864 }, { "epoch": 0.2598759841148192, "grad_norm": 0.4572458565235138, "learning_rate": 3.607441093464366e-05, "loss": 0.08585739135742188, "step": 1865 }, { "epoch": 0.2600153278060336, "grad_norm": 0.411652147769928, "learning_rate": 3.606879993837952e-05, "loss": 0.08230209350585938, "step": 1866 }, { "epoch": 0.26015467149724797, "grad_norm": 0.397482305765152, "learning_rate": 3.60631853719949e-05, "loss": 0.06582069396972656, "step": 1867 }, { "epoch": 0.26029401518846235, "grad_norm": 0.7697508335113525, "learning_rate": 3.6057567236737206e-05, "loss": 0.09256935119628906, "step": 1868 }, { "epoch": 0.2604333588796767, "grad_norm": 0.34800007939338684, "learning_rate": 3.605194553385468e-05, "loss": 0.08139228820800781, "step": 1869 }, { "epoch": 0.2605727025708911, "grad_norm": 0.4215603172779083, "learning_rate": 3.6046320264596324e-05, "loss": 0.08046340942382812, "step": 1870 }, { "epoch": 0.2607120462621055, "grad_norm": 1.389743447303772, "learning_rate": 3.6040691430211955e-05, "loss": 0.1567974090576172, "step": 1871 }, { "epoch": 0.26085138995331986, "grad_norm": 0.303472638130188, "learning_rate": 3.603505903195217e-05, "loss": 0.07074832916259766, "step": 1872 }, { "epoch": 0.26099073364453423, "grad_norm": 0.5202268958091736, "learning_rate": 3.602942307106834e-05, "loss": 0.09238624572753906, "step": 1873 }, { "epoch": 0.2611300773357486, "grad_norm": 0.9400713443756104, "learning_rate": 3.602378354881267e-05, "loss": 0.1450824737548828, "step": 1874 }, { "epoch": 0.261269421026963, "grad_norm": 0.4340181350708008, "learning_rate": 3.601814046643813e-05, "loss": 0.09440040588378906, "step": 1875 }, { "epoch": 0.26140876471817737, "grad_norm": 0.5446973443031311, "learning_rate": 3.6012493825198466e-05, "loss": 0.14310836791992188, "step": 1876 }, { "epoch": 0.26154810840939174, "grad_norm": 0.8607577085494995, "learning_rate": 3.600684362634826e-05, "loss": 0.134735107421875, "step": 1877 }, { "epoch": 0.2616874521006061, "grad_norm": 0.28477218747138977, "learning_rate": 3.600118987114283e-05, "loss": 0.08120393753051758, "step": 1878 }, { "epoch": 0.2618267957918205, "grad_norm": 1.1631417274475098, "learning_rate": 3.599553256083833e-05, "loss": 0.1341075897216797, "step": 1879 }, { "epoch": 0.26196613948303493, "grad_norm": 0.4370209574699402, "learning_rate": 3.598987169669168e-05, "loss": 0.08785724639892578, "step": 1880 }, { "epoch": 0.2621054831742493, "grad_norm": 0.5547659993171692, "learning_rate": 3.598420727996059e-05, "loss": 0.1258544921875, "step": 1881 }, { "epoch": 0.2622448268654637, "grad_norm": 0.395129919052124, "learning_rate": 3.597853931190357e-05, "loss": 0.07674217224121094, "step": 1882 }, { "epoch": 0.26238417055667806, "grad_norm": 0.5151911973953247, "learning_rate": 3.597286779377991e-05, "loss": 0.09795475006103516, "step": 1883 }, { "epoch": 0.26252351424789244, "grad_norm": 0.3010560870170593, "learning_rate": 3.5967192726849694e-05, "loss": 0.07511711120605469, "step": 1884 }, { "epoch": 0.2626628579391068, "grad_norm": 0.26032501459121704, "learning_rate": 3.59615141123738e-05, "loss": 0.06875133514404297, "step": 1885 }, { "epoch": 0.2628022016303212, "grad_norm": 0.3008735179901123, "learning_rate": 3.5955831951613866e-05, "loss": 0.07623672485351562, "step": 1886 }, { "epoch": 0.2629415453215356, "grad_norm": 0.5896360874176025, "learning_rate": 3.595014624583235e-05, "loss": 0.096923828125, "step": 1887 }, { "epoch": 0.26308088901274995, "grad_norm": 0.5309996008872986, "learning_rate": 3.5944456996292486e-05, "loss": 0.08529853820800781, "step": 1888 }, { "epoch": 0.26322023270396433, "grad_norm": 0.2979205548763275, "learning_rate": 3.5938764204258306e-05, "loss": 0.07126903533935547, "step": 1889 }, { "epoch": 0.2633595763951787, "grad_norm": 0.3421195149421692, "learning_rate": 3.59330678709946e-05, "loss": 0.08875370025634766, "step": 1890 }, { "epoch": 0.2634989200863931, "grad_norm": 0.3483293354511261, "learning_rate": 3.5927367997766974e-05, "loss": 0.07441329956054688, "step": 1891 }, { "epoch": 0.26363826377760746, "grad_norm": 0.38319557905197144, "learning_rate": 3.592166458584181e-05, "loss": 0.06830978393554688, "step": 1892 }, { "epoch": 0.26377760746882184, "grad_norm": 0.2799524962902069, "learning_rate": 3.591595763648626e-05, "loss": 0.08103752136230469, "step": 1893 }, { "epoch": 0.2639169511600362, "grad_norm": 0.3626091480255127, "learning_rate": 3.59102471509683e-05, "loss": 0.06842613220214844, "step": 1894 }, { "epoch": 0.2640562948512506, "grad_norm": 0.3763192296028137, "learning_rate": 3.590453313055666e-05, "loss": 0.09151077270507812, "step": 1895 }, { "epoch": 0.264195638542465, "grad_norm": 0.4718570113182068, "learning_rate": 3.589881557652087e-05, "loss": 0.11451530456542969, "step": 1896 }, { "epoch": 0.26433498223367935, "grad_norm": 0.7787143588066101, "learning_rate": 3.5893094490131224e-05, "loss": 0.14797306060791016, "step": 1897 }, { "epoch": 0.2644743259248937, "grad_norm": 0.5604667067527771, "learning_rate": 3.588736987265884e-05, "loss": 0.1374530792236328, "step": 1898 }, { "epoch": 0.2646136696161081, "grad_norm": 0.912446141242981, "learning_rate": 3.588164172537557e-05, "loss": 0.16248512268066406, "step": 1899 }, { "epoch": 0.26475301330732254, "grad_norm": 0.32521966099739075, "learning_rate": 3.58759100495541e-05, "loss": 0.06964683532714844, "step": 1900 }, { "epoch": 0.2648923569985369, "grad_norm": 0.6443588137626648, "learning_rate": 3.587017484646787e-05, "loss": 0.1448040008544922, "step": 1901 }, { "epoch": 0.2650317006897513, "grad_norm": 0.49545955657958984, "learning_rate": 3.586443611739111e-05, "loss": 0.1255340576171875, "step": 1902 }, { "epoch": 0.26517104438096567, "grad_norm": 0.5963937640190125, "learning_rate": 3.585869386359884e-05, "loss": 0.09311866760253906, "step": 1903 }, { "epoch": 0.26531038807218005, "grad_norm": 0.5033314824104309, "learning_rate": 3.5852948086366855e-05, "loss": 0.09543323516845703, "step": 1904 }, { "epoch": 0.2654497317633944, "grad_norm": 0.3215762674808502, "learning_rate": 3.584719878697173e-05, "loss": 0.07210159301757812, "step": 1905 }, { "epoch": 0.2655890754546088, "grad_norm": 0.5398496389389038, "learning_rate": 3.5841445966690834e-05, "loss": 0.10020923614501953, "step": 1906 }, { "epoch": 0.2657284191458232, "grad_norm": 0.4440378248691559, "learning_rate": 3.583568962680231e-05, "loss": 0.10395622253417969, "step": 1907 }, { "epoch": 0.26586776283703756, "grad_norm": 0.36795106530189514, "learning_rate": 3.5829929768585086e-05, "loss": 0.09747314453125, "step": 1908 }, { "epoch": 0.26600710652825194, "grad_norm": 0.3598285913467407, "learning_rate": 3.582416639331886e-05, "loss": 0.09177112579345703, "step": 1909 }, { "epoch": 0.2661464502194663, "grad_norm": 0.639314591884613, "learning_rate": 3.5818399502284154e-05, "loss": 0.14349937438964844, "step": 1910 }, { "epoch": 0.2662857939106807, "grad_norm": 0.6710983514785767, "learning_rate": 3.581262909676221e-05, "loss": 0.12166118621826172, "step": 1911 }, { "epoch": 0.26642513760189507, "grad_norm": 0.3551844656467438, "learning_rate": 3.5806855178035085e-05, "loss": 0.07610893249511719, "step": 1912 }, { "epoch": 0.26656448129310945, "grad_norm": 0.3430061936378479, "learning_rate": 3.580107774738562e-05, "loss": 0.06243133544921875, "step": 1913 }, { "epoch": 0.2667038249843238, "grad_norm": 0.37956351041793823, "learning_rate": 3.579529680609742e-05, "loss": 0.0810842514038086, "step": 1914 }, { "epoch": 0.2668431686755382, "grad_norm": 0.6682817339897156, "learning_rate": 3.578951235545489e-05, "loss": 0.1505908966064453, "step": 1915 }, { "epoch": 0.2669825123667526, "grad_norm": 0.7081629633903503, "learning_rate": 3.578372439674319e-05, "loss": 0.13405227661132812, "step": 1916 }, { "epoch": 0.26712185605796696, "grad_norm": 0.9852317571640015, "learning_rate": 3.577793293124828e-05, "loss": 0.14369964599609375, "step": 1917 }, { "epoch": 0.26726119974918133, "grad_norm": 0.4601384997367859, "learning_rate": 3.577213796025689e-05, "loss": 0.080413818359375, "step": 1918 }, { "epoch": 0.2674005434403957, "grad_norm": 0.3979266285896301, "learning_rate": 3.5766339485056524e-05, "loss": 0.07751178741455078, "step": 1919 }, { "epoch": 0.26753988713161014, "grad_norm": 0.4357164800167084, "learning_rate": 3.5760537506935475e-05, "loss": 0.11382484436035156, "step": 1920 }, { "epoch": 0.2676792308228245, "grad_norm": 0.4433880150318146, "learning_rate": 3.575473202718282e-05, "loss": 0.09043121337890625, "step": 1921 }, { "epoch": 0.2678185745140389, "grad_norm": 0.32676956057548523, "learning_rate": 3.574892304708839e-05, "loss": 0.09177398681640625, "step": 1922 }, { "epoch": 0.2679579182052533, "grad_norm": 0.6375782489776611, "learning_rate": 3.5743110567942815e-05, "loss": 0.10851478576660156, "step": 1923 }, { "epoch": 0.26809726189646765, "grad_norm": 0.8322335481643677, "learning_rate": 3.573729459103749e-05, "loss": 0.12631607055664062, "step": 1924 }, { "epoch": 0.26823660558768203, "grad_norm": 0.286443293094635, "learning_rate": 3.573147511766459e-05, "loss": 0.08636665344238281, "step": 1925 }, { "epoch": 0.2683759492788964, "grad_norm": 0.3363337218761444, "learning_rate": 3.5725652149117085e-05, "loss": 0.08887481689453125, "step": 1926 }, { "epoch": 0.2685152929701108, "grad_norm": 0.41697755455970764, "learning_rate": 3.571982568668869e-05, "loss": 0.08847975730895996, "step": 1927 }, { "epoch": 0.26865463666132516, "grad_norm": 0.47883492708206177, "learning_rate": 3.571399573167392e-05, "loss": 0.09887313842773438, "step": 1928 }, { "epoch": 0.26879398035253954, "grad_norm": 0.33878427743911743, "learning_rate": 3.570816228536806e-05, "loss": 0.07528114318847656, "step": 1929 }, { "epoch": 0.2689333240437539, "grad_norm": 0.5932261943817139, "learning_rate": 3.570232534906716e-05, "loss": 0.0956277847290039, "step": 1930 }, { "epoch": 0.2690726677349683, "grad_norm": 1.261786699295044, "learning_rate": 3.569648492406805e-05, "loss": 0.14262104034423828, "step": 1931 }, { "epoch": 0.2692120114261827, "grad_norm": 0.8415667414665222, "learning_rate": 3.569064101166835e-05, "loss": 0.12322330474853516, "step": 1932 }, { "epoch": 0.26935135511739705, "grad_norm": 0.6066407561302185, "learning_rate": 3.568479361316644e-05, "loss": 0.10774803161621094, "step": 1933 }, { "epoch": 0.26949069880861143, "grad_norm": 0.44882458448410034, "learning_rate": 3.567894272986149e-05, "loss": 0.08070564270019531, "step": 1934 }, { "epoch": 0.2696300424998258, "grad_norm": 0.3789900243282318, "learning_rate": 3.567308836305341e-05, "loss": 0.09707069396972656, "step": 1935 }, { "epoch": 0.2697693861910402, "grad_norm": 0.3694375455379486, "learning_rate": 3.566723051404292e-05, "loss": 0.09214401245117188, "step": 1936 }, { "epoch": 0.26990872988225456, "grad_norm": 0.5905842781066895, "learning_rate": 3.56613691841315e-05, "loss": 0.11587715148925781, "step": 1937 }, { "epoch": 0.27004807357346894, "grad_norm": 0.5479986071586609, "learning_rate": 3.5655504374621404e-05, "loss": 0.11419105529785156, "step": 1938 }, { "epoch": 0.2701874172646833, "grad_norm": 0.28680363297462463, "learning_rate": 3.5649636086815656e-05, "loss": 0.07530784606933594, "step": 1939 }, { "epoch": 0.27032676095589775, "grad_norm": 0.5132629871368408, "learning_rate": 3.5643764322018054e-05, "loss": 0.1015777587890625, "step": 1940 }, { "epoch": 0.2704661046471121, "grad_norm": 0.5591539740562439, "learning_rate": 3.563788908153317e-05, "loss": 0.14066696166992188, "step": 1941 }, { "epoch": 0.2706054483383265, "grad_norm": 0.18093343079090118, "learning_rate": 3.563201036666636e-05, "loss": 0.06077384948730469, "step": 1942 }, { "epoch": 0.2707447920295409, "grad_norm": 0.36041295528411865, "learning_rate": 3.562612817872373e-05, "loss": 0.10089111328125, "step": 1943 }, { "epoch": 0.27088413572075526, "grad_norm": 0.5118692517280579, "learning_rate": 3.5620242519012164e-05, "loss": 0.10852622985839844, "step": 1944 }, { "epoch": 0.27102347941196964, "grad_norm": 0.4373393654823303, "learning_rate": 3.561435338883933e-05, "loss": 0.09810447692871094, "step": 1945 }, { "epoch": 0.271162823103184, "grad_norm": 0.37534528970718384, "learning_rate": 3.560846078951366e-05, "loss": 0.10475921630859375, "step": 1946 }, { "epoch": 0.2713021667943984, "grad_norm": 1.006746530532837, "learning_rate": 3.560256472234434e-05, "loss": 0.12412261962890625, "step": 1947 }, { "epoch": 0.27144151048561277, "grad_norm": 0.4274523854255676, "learning_rate": 3.559666518864136e-05, "loss": 0.101806640625, "step": 1948 }, { "epoch": 0.27158085417682715, "grad_norm": 0.595859944820404, "learning_rate": 3.5590762189715445e-05, "loss": 0.1027994155883789, "step": 1949 }, { "epoch": 0.2717201978680415, "grad_norm": 0.8337340354919434, "learning_rate": 3.558485572687812e-05, "loss": 0.11211013793945312, "step": 1950 }, { "epoch": 0.2718595415592559, "grad_norm": 0.5119706392288208, "learning_rate": 3.557894580144166e-05, "loss": 0.07381820678710938, "step": 1951 }, { "epoch": 0.2719988852504703, "grad_norm": 0.8109081387519836, "learning_rate": 3.5573032414719116e-05, "loss": 0.1616191864013672, "step": 1952 }, { "epoch": 0.27213822894168466, "grad_norm": 0.7485285401344299, "learning_rate": 3.556711556802431e-05, "loss": 0.11441993713378906, "step": 1953 }, { "epoch": 0.27227757263289903, "grad_norm": 0.40780994296073914, "learning_rate": 3.556119526267182e-05, "loss": 0.08874130249023438, "step": 1954 }, { "epoch": 0.2724169163241134, "grad_norm": 0.3430754542350769, "learning_rate": 3.5555271499977015e-05, "loss": 0.07614707946777344, "step": 1955 }, { "epoch": 0.2725562600153278, "grad_norm": 0.28607431054115295, "learning_rate": 3.554934428125602e-05, "loss": 0.08019256591796875, "step": 1956 }, { "epoch": 0.27269560370654217, "grad_norm": 0.4054071307182312, "learning_rate": 3.554341360782572e-05, "loss": 0.10265922546386719, "step": 1957 }, { "epoch": 0.27283494739775654, "grad_norm": 0.5661808848381042, "learning_rate": 3.553747948100378e-05, "loss": 0.1094202995300293, "step": 1958 }, { "epoch": 0.2729742910889709, "grad_norm": 0.40176090598106384, "learning_rate": 3.5531541902108624e-05, "loss": 0.06313610076904297, "step": 1959 }, { "epoch": 0.27311363478018535, "grad_norm": 0.9042211771011353, "learning_rate": 3.5525600872459444e-05, "loss": 0.140472412109375, "step": 1960 }, { "epoch": 0.27325297847139973, "grad_norm": 0.30686596035957336, "learning_rate": 3.551965639337621e-05, "loss": 0.0782928466796875, "step": 1961 }, { "epoch": 0.2733923221626141, "grad_norm": 0.7576156854629517, "learning_rate": 3.5513708466179647e-05, "loss": 0.10603523254394531, "step": 1962 }, { "epoch": 0.2735316658538285, "grad_norm": 0.43598252534866333, "learning_rate": 3.550775709219125e-05, "loss": 0.11345672607421875, "step": 1963 }, { "epoch": 0.27367100954504286, "grad_norm": 0.4188361167907715, "learning_rate": 3.550180227273327e-05, "loss": 0.09899330139160156, "step": 1964 }, { "epoch": 0.27381035323625724, "grad_norm": 1.1931653022766113, "learning_rate": 3.549584400912874e-05, "loss": 0.12101173400878906, "step": 1965 }, { "epoch": 0.2739496969274716, "grad_norm": 0.6831144094467163, "learning_rate": 3.5489882302701445e-05, "loss": 0.10581588745117188, "step": 1966 }, { "epoch": 0.274089040618686, "grad_norm": 0.46209463477134705, "learning_rate": 3.548391715477594e-05, "loss": 0.08888626098632812, "step": 1967 }, { "epoch": 0.2742283843099004, "grad_norm": 0.3034350275993347, "learning_rate": 3.547794856667756e-05, "loss": 0.07871818542480469, "step": 1968 }, { "epoch": 0.27436772800111475, "grad_norm": 0.33257126808166504, "learning_rate": 3.547197653973236e-05, "loss": 0.08785438537597656, "step": 1969 }, { "epoch": 0.27450707169232913, "grad_norm": 0.44992557168006897, "learning_rate": 3.546600107526721e-05, "loss": 0.09186553955078125, "step": 1970 }, { "epoch": 0.2746464153835435, "grad_norm": 0.3259158432483673, "learning_rate": 3.546002217460971e-05, "loss": 0.07746505737304688, "step": 1971 }, { "epoch": 0.2747857590747579, "grad_norm": 0.23460717499256134, "learning_rate": 3.5454039839088256e-05, "loss": 0.07041549682617188, "step": 1972 }, { "epoch": 0.27492510276597226, "grad_norm": 0.6748077869415283, "learning_rate": 3.544805407003196e-05, "loss": 0.1273965835571289, "step": 1973 }, { "epoch": 0.27506444645718664, "grad_norm": 0.49056944251060486, "learning_rate": 3.544206486877073e-05, "loss": 0.07704544067382812, "step": 1974 }, { "epoch": 0.275203790148401, "grad_norm": 0.34405627846717834, "learning_rate": 3.543607223663524e-05, "loss": 0.08992862701416016, "step": 1975 }, { "epoch": 0.2753431338396154, "grad_norm": 0.8867620825767517, "learning_rate": 3.543007617495692e-05, "loss": 0.10330009460449219, "step": 1976 }, { "epoch": 0.2754824775308298, "grad_norm": 0.2595212459564209, "learning_rate": 3.5424076685067935e-05, "loss": 0.07495498657226562, "step": 1977 }, { "epoch": 0.27562182122204415, "grad_norm": 0.5839775204658508, "learning_rate": 3.5418073768301254e-05, "loss": 0.12533950805664062, "step": 1978 }, { "epoch": 0.2757611649132585, "grad_norm": 0.47888192534446716, "learning_rate": 3.5412067425990585e-05, "loss": 0.10244560241699219, "step": 1979 }, { "epoch": 0.2759005086044729, "grad_norm": 0.47099775075912476, "learning_rate": 3.54060576594704e-05, "loss": 0.11858177185058594, "step": 1980 }, { "epoch": 0.27603985229568734, "grad_norm": 1.0311931371688843, "learning_rate": 3.540004447007592e-05, "loss": 0.1495380401611328, "step": 1981 }, { "epoch": 0.2761791959869017, "grad_norm": 0.2896905541419983, "learning_rate": 3.5394027859143154e-05, "loss": 0.07521533966064453, "step": 1982 }, { "epoch": 0.2763185396781161, "grad_norm": 0.7052919268608093, "learning_rate": 3.5388007828008845e-05, "loss": 0.11617851257324219, "step": 1983 }, { "epoch": 0.27645788336933047, "grad_norm": 0.5670589208602905, "learning_rate": 3.5381984378010513e-05, "loss": 0.10597896575927734, "step": 1984 }, { "epoch": 0.27659722706054485, "grad_norm": 0.2592502236366272, "learning_rate": 3.5375957510486426e-05, "loss": 0.07413768768310547, "step": 1985 }, { "epoch": 0.2767365707517592, "grad_norm": 0.3200737535953522, "learning_rate": 3.5369927226775625e-05, "loss": 0.07195186614990234, "step": 1986 }, { "epoch": 0.2768759144429736, "grad_norm": 0.34834355115890503, "learning_rate": 3.536389352821789e-05, "loss": 0.09104728698730469, "step": 1987 }, { "epoch": 0.277015258134188, "grad_norm": 0.4452013671398163, "learning_rate": 3.535785641615378e-05, "loss": 0.10104560852050781, "step": 1988 }, { "epoch": 0.27715460182540236, "grad_norm": 0.33302363753318787, "learning_rate": 3.53518158919246e-05, "loss": 0.09110260009765625, "step": 1989 }, { "epoch": 0.27729394551661674, "grad_norm": 0.19890393316745758, "learning_rate": 3.5345771956872416e-05, "loss": 0.06351852416992188, "step": 1990 }, { "epoch": 0.2774332892078311, "grad_norm": 0.30907997488975525, "learning_rate": 3.5339724612340055e-05, "loss": 0.08650779724121094, "step": 1991 }, { "epoch": 0.2775726328990455, "grad_norm": 0.3389478325843811, "learning_rate": 3.5333673859671095e-05, "loss": 0.08023452758789062, "step": 1992 }, { "epoch": 0.27771197659025987, "grad_norm": 0.5167315006256104, "learning_rate": 3.532761970020987e-05, "loss": 0.1363372802734375, "step": 1993 }, { "epoch": 0.27785132028147425, "grad_norm": 0.27678409218788147, "learning_rate": 3.532156213530149e-05, "loss": 0.07525062561035156, "step": 1994 }, { "epoch": 0.2779906639726886, "grad_norm": 0.9599758982658386, "learning_rate": 3.5315501166291806e-05, "loss": 0.12007713317871094, "step": 1995 }, { "epoch": 0.278130007663903, "grad_norm": 0.33756813406944275, "learning_rate": 3.530943679452742e-05, "loss": 0.07428741455078125, "step": 1996 }, { "epoch": 0.2782693513551174, "grad_norm": 0.38301870226860046, "learning_rate": 3.530336902135569e-05, "loss": 0.09238243103027344, "step": 1997 }, { "epoch": 0.27840869504633176, "grad_norm": 0.36167415976524353, "learning_rate": 3.5297297848124756e-05, "loss": 0.08146858215332031, "step": 1998 }, { "epoch": 0.27854803873754613, "grad_norm": 1.2600332498550415, "learning_rate": 3.5291223276183476e-05, "loss": 0.1834716796875, "step": 1999 }, { "epoch": 0.2786873824287605, "grad_norm": 0.39971303939819336, "learning_rate": 3.528514530688149e-05, "loss": 0.09444046020507812, "step": 2000 }, { "epoch": 0.27882672611997494, "grad_norm": 0.41816550493240356, "learning_rate": 3.527906394156919e-05, "loss": 0.09694671630859375, "step": 2001 }, { "epoch": 0.2789660698111893, "grad_norm": 0.5608137845993042, "learning_rate": 3.52729791815977e-05, "loss": 0.10853004455566406, "step": 2002 }, { "epoch": 0.2791054135024037, "grad_norm": 0.5796910524368286, "learning_rate": 3.526689102831892e-05, "loss": 0.13431644439697266, "step": 2003 }, { "epoch": 0.2792447571936181, "grad_norm": 0.35114586353302, "learning_rate": 3.526079948308551e-05, "loss": 0.1079401969909668, "step": 2004 }, { "epoch": 0.27938410088483245, "grad_norm": 0.39952677488327026, "learning_rate": 3.525470454725087e-05, "loss": 0.06465911865234375, "step": 2005 }, { "epoch": 0.27952344457604683, "grad_norm": 0.36104124784469604, "learning_rate": 3.524860622216914e-05, "loss": 0.09728240966796875, "step": 2006 }, { "epoch": 0.2796627882672612, "grad_norm": 0.7839861512184143, "learning_rate": 3.524250450919524e-05, "loss": 0.1350231170654297, "step": 2007 }, { "epoch": 0.2798021319584756, "grad_norm": 0.3416142165660858, "learning_rate": 3.523639940968484e-05, "loss": 0.07802391052246094, "step": 2008 }, { "epoch": 0.27994147564968996, "grad_norm": 0.2836419641971588, "learning_rate": 3.5230290924994334e-05, "loss": 0.06949806213378906, "step": 2009 }, { "epoch": 0.28008081934090434, "grad_norm": 0.41860708594322205, "learning_rate": 3.5224179056480906e-05, "loss": 0.08610343933105469, "step": 2010 }, { "epoch": 0.2802201630321187, "grad_norm": 0.6027640700340271, "learning_rate": 3.521806380550246e-05, "loss": 0.14846229553222656, "step": 2011 }, { "epoch": 0.2803595067233331, "grad_norm": 0.45950642228126526, "learning_rate": 3.5211945173417674e-05, "loss": 0.10294914245605469, "step": 2012 }, { "epoch": 0.2804988504145475, "grad_norm": 0.34128785133361816, "learning_rate": 3.520582316158596e-05, "loss": 0.08978843688964844, "step": 2013 }, { "epoch": 0.28063819410576185, "grad_norm": 0.29848596453666687, "learning_rate": 3.5199697771367494e-05, "loss": 0.06291389465332031, "step": 2014 }, { "epoch": 0.28077753779697623, "grad_norm": 1.010061264038086, "learning_rate": 3.5193569004123204e-05, "loss": 0.102081298828125, "step": 2015 }, { "epoch": 0.2809168814881906, "grad_norm": 0.3186851441860199, "learning_rate": 3.518743686121475e-05, "loss": 0.07619190216064453, "step": 2016 }, { "epoch": 0.281056225179405, "grad_norm": 0.3428669571876526, "learning_rate": 3.5181301344004574e-05, "loss": 0.07553482055664062, "step": 2017 }, { "epoch": 0.28119556887061936, "grad_norm": 0.35573920607566833, "learning_rate": 3.517516245385582e-05, "loss": 0.09304332733154297, "step": 2018 }, { "epoch": 0.28133491256183374, "grad_norm": 0.576701283454895, "learning_rate": 3.5169020192132425e-05, "loss": 0.1267108917236328, "step": 2019 }, { "epoch": 0.2814742562530481, "grad_norm": 0.4636387825012207, "learning_rate": 3.516287456019907e-05, "loss": 0.09097862243652344, "step": 2020 }, { "epoch": 0.28161359994426255, "grad_norm": 0.5109367966651917, "learning_rate": 3.515672555942115e-05, "loss": 0.12767410278320312, "step": 2021 }, { "epoch": 0.2817529436354769, "grad_norm": 0.8496819138526917, "learning_rate": 3.5150573191164855e-05, "loss": 0.15622329711914062, "step": 2022 }, { "epoch": 0.2818922873266913, "grad_norm": 0.5370823740959167, "learning_rate": 3.514441745679708e-05, "loss": 0.09035301208496094, "step": 2023 }, { "epoch": 0.2820316310179057, "grad_norm": 0.6654703617095947, "learning_rate": 3.5138258357685494e-05, "loss": 0.1130685806274414, "step": 2024 }, { "epoch": 0.28217097470912006, "grad_norm": 0.5739553570747375, "learning_rate": 3.513209589519853e-05, "loss": 0.11667251586914062, "step": 2025 }, { "epoch": 0.28231031840033444, "grad_norm": 0.3623608350753784, "learning_rate": 3.512593007070532e-05, "loss": 0.07126045227050781, "step": 2026 }, { "epoch": 0.2824496620915488, "grad_norm": 0.5074231028556824, "learning_rate": 3.5119760885575785e-05, "loss": 0.10278511047363281, "step": 2027 }, { "epoch": 0.2825890057827632, "grad_norm": 0.5079374313354492, "learning_rate": 3.5113588341180564e-05, "loss": 0.11852073669433594, "step": 2028 }, { "epoch": 0.28272834947397757, "grad_norm": 0.38028639554977417, "learning_rate": 3.510741243889106e-05, "loss": 0.08341026306152344, "step": 2029 }, { "epoch": 0.28286769316519195, "grad_norm": 0.3194384276866913, "learning_rate": 3.510123318007943e-05, "loss": 0.06251907348632812, "step": 2030 }, { "epoch": 0.2830070368564063, "grad_norm": 1.5679850578308105, "learning_rate": 3.509505056611855e-05, "loss": 0.15052032470703125, "step": 2031 }, { "epoch": 0.2831463805476207, "grad_norm": 0.7880901098251343, "learning_rate": 3.508886459838206e-05, "loss": 0.11232566833496094, "step": 2032 }, { "epoch": 0.2832857242388351, "grad_norm": 0.3304464817047119, "learning_rate": 3.508267527824434e-05, "loss": 0.07713890075683594, "step": 2033 }, { "epoch": 0.28342506793004946, "grad_norm": 0.5241314172744751, "learning_rate": 3.5076482607080513e-05, "loss": 0.09955596923828125, "step": 2034 }, { "epoch": 0.28356441162126383, "grad_norm": 0.7181317806243896, "learning_rate": 3.507028658626646e-05, "loss": 0.10213279724121094, "step": 2035 }, { "epoch": 0.2837037553124782, "grad_norm": 0.5204509496688843, "learning_rate": 3.5064087217178787e-05, "loss": 0.08506011962890625, "step": 2036 }, { "epoch": 0.2838430990036926, "grad_norm": 0.30570340156555176, "learning_rate": 3.505788450119485e-05, "loss": 0.07583999633789062, "step": 2037 }, { "epoch": 0.28398244269490697, "grad_norm": 0.4159783124923706, "learning_rate": 3.505167843969276e-05, "loss": 0.08308029174804688, "step": 2038 }, { "epoch": 0.28412178638612134, "grad_norm": 0.4155747592449188, "learning_rate": 3.504546903405135e-05, "loss": 0.09405517578125, "step": 2039 }, { "epoch": 0.2842611300773357, "grad_norm": 0.5757187008857727, "learning_rate": 3.5039256285650214e-05, "loss": 0.14109039306640625, "step": 2040 }, { "epoch": 0.28440047376855015, "grad_norm": 0.3665321469306946, "learning_rate": 3.5033040195869685e-05, "loss": 0.09366226196289062, "step": 2041 }, { "epoch": 0.28453981745976453, "grad_norm": 0.2809184491634369, "learning_rate": 3.502682076609084e-05, "loss": 0.06767559051513672, "step": 2042 }, { "epoch": 0.2846791611509789, "grad_norm": 0.5288949608802795, "learning_rate": 3.5020597997695484e-05, "loss": 0.08463287353515625, "step": 2043 }, { "epoch": 0.2848185048421933, "grad_norm": 0.41560277342796326, "learning_rate": 3.501437189206618e-05, "loss": 0.08261299133300781, "step": 2044 }, { "epoch": 0.28495784853340766, "grad_norm": 1.0148755311965942, "learning_rate": 3.5008142450586226e-05, "loss": 0.17163658142089844, "step": 2045 }, { "epoch": 0.28509719222462204, "grad_norm": 0.539237916469574, "learning_rate": 3.500190967463966e-05, "loss": 0.10205078125, "step": 2046 }, { "epoch": 0.2852365359158364, "grad_norm": 0.5211705565452576, "learning_rate": 3.4995673565611265e-05, "loss": 0.09119796752929688, "step": 2047 }, { "epoch": 0.2853758796070508, "grad_norm": 0.3513891100883484, "learning_rate": 3.498943412488656e-05, "loss": 0.06157684326171875, "step": 2048 }, { "epoch": 0.2855152232982652, "grad_norm": 0.5245388746261597, "learning_rate": 3.4983191353851804e-05, "loss": 0.1049041748046875, "step": 2049 }, { "epoch": 0.28565456698947955, "grad_norm": 0.4318659007549286, "learning_rate": 3.4976945253894e-05, "loss": 0.12069511413574219, "step": 2050 }, { "epoch": 0.28579391068069393, "grad_norm": 0.45237475633621216, "learning_rate": 3.49706958264009e-05, "loss": 0.11353111267089844, "step": 2051 }, { "epoch": 0.2859332543719083, "grad_norm": 0.6031201481819153, "learning_rate": 3.496444307276097e-05, "loss": 0.09246444702148438, "step": 2052 }, { "epoch": 0.2860725980631227, "grad_norm": 0.3734297752380371, "learning_rate": 3.495818699436343e-05, "loss": 0.08231544494628906, "step": 2053 }, { "epoch": 0.28621194175433706, "grad_norm": 0.37651169300079346, "learning_rate": 3.495192759259824e-05, "loss": 0.07795906066894531, "step": 2054 }, { "epoch": 0.28635128544555144, "grad_norm": 0.42590630054473877, "learning_rate": 3.49456648688561e-05, "loss": 0.10091209411621094, "step": 2055 }, { "epoch": 0.2864906291367658, "grad_norm": 0.2602212727069855, "learning_rate": 3.493939882452845e-05, "loss": 0.08231163024902344, "step": 2056 }, { "epoch": 0.2866299728279802, "grad_norm": 0.502697765827179, "learning_rate": 3.493312946100743e-05, "loss": 0.10119915008544922, "step": 2057 }, { "epoch": 0.2867693165191946, "grad_norm": 0.44224539399147034, "learning_rate": 3.4926856779685993e-05, "loss": 0.07082748413085938, "step": 2058 }, { "epoch": 0.28690866021040895, "grad_norm": 1.285374402999878, "learning_rate": 3.492058078195776e-05, "loss": 0.12043190002441406, "step": 2059 }, { "epoch": 0.2870480039016233, "grad_norm": 0.5392195582389832, "learning_rate": 3.491430146921712e-05, "loss": 0.11692142486572266, "step": 2060 }, { "epoch": 0.28718734759283776, "grad_norm": 0.5769364237785339, "learning_rate": 3.49080188428592e-05, "loss": 0.0934600830078125, "step": 2061 }, { "epoch": 0.28732669128405214, "grad_norm": 0.4602513313293457, "learning_rate": 3.490173290427984e-05, "loss": 0.10551166534423828, "step": 2062 }, { "epoch": 0.2874660349752665, "grad_norm": 0.44367870688438416, "learning_rate": 3.489544365487564e-05, "loss": 0.1076812744140625, "step": 2063 }, { "epoch": 0.2876053786664809, "grad_norm": 0.47916075587272644, "learning_rate": 3.488915109604393e-05, "loss": 0.07186508178710938, "step": 2064 }, { "epoch": 0.28774472235769527, "grad_norm": 0.6950048208236694, "learning_rate": 3.488285522918277e-05, "loss": 0.11661529541015625, "step": 2065 }, { "epoch": 0.28788406604890965, "grad_norm": 0.5778775811195374, "learning_rate": 3.487655605569096e-05, "loss": 0.10140800476074219, "step": 2066 }, { "epoch": 0.288023409740124, "grad_norm": 0.7643184065818787, "learning_rate": 3.487025357696804e-05, "loss": 0.11916732788085938, "step": 2067 }, { "epoch": 0.2881627534313384, "grad_norm": 0.5924773216247559, "learning_rate": 3.486394779441426e-05, "loss": 0.0875244140625, "step": 2068 }, { "epoch": 0.2883020971225528, "grad_norm": 0.5425692200660706, "learning_rate": 3.485763870943064e-05, "loss": 0.11858940124511719, "step": 2069 }, { "epoch": 0.28844144081376716, "grad_norm": 0.3038046061992645, "learning_rate": 3.48513263234189e-05, "loss": 0.07624244689941406, "step": 2070 }, { "epoch": 0.28858078450498154, "grad_norm": 0.9170233607292175, "learning_rate": 3.484501063778151e-05, "loss": 0.11957359313964844, "step": 2071 }, { "epoch": 0.2887201281961959, "grad_norm": 0.5894279479980469, "learning_rate": 3.483869165392167e-05, "loss": 0.10427093505859375, "step": 2072 }, { "epoch": 0.2888594718874103, "grad_norm": 0.5294084548950195, "learning_rate": 3.483236937324332e-05, "loss": 0.11343002319335938, "step": 2073 }, { "epoch": 0.28899881557862467, "grad_norm": 0.5838432908058167, "learning_rate": 3.482604379715113e-05, "loss": 0.132110595703125, "step": 2074 }, { "epoch": 0.28913815926983905, "grad_norm": 0.5252535939216614, "learning_rate": 3.481971492705048e-05, "loss": 0.08907890319824219, "step": 2075 }, { "epoch": 0.2892775029610534, "grad_norm": 0.32209885120391846, "learning_rate": 3.481338276434753e-05, "loss": 0.07363319396972656, "step": 2076 }, { "epoch": 0.2894168466522678, "grad_norm": 0.4487036466598511, "learning_rate": 3.480704731044911e-05, "loss": 0.09509468078613281, "step": 2077 }, { "epoch": 0.2895561903434822, "grad_norm": 0.2984406054019928, "learning_rate": 3.480070856676283e-05, "loss": 0.08282279968261719, "step": 2078 }, { "epoch": 0.28969553403469656, "grad_norm": 0.4058975279331207, "learning_rate": 3.479436653469702e-05, "loss": 0.08962631225585938, "step": 2079 }, { "epoch": 0.28983487772591093, "grad_norm": 0.46609628200531006, "learning_rate": 3.478802121566073e-05, "loss": 0.08637428283691406, "step": 2080 }, { "epoch": 0.28997422141712537, "grad_norm": 0.6410079598426819, "learning_rate": 3.478167261106373e-05, "loss": 0.10193061828613281, "step": 2081 }, { "epoch": 0.29011356510833974, "grad_norm": 0.7031494379043579, "learning_rate": 3.4775320722316555e-05, "loss": 0.1073760986328125, "step": 2082 }, { "epoch": 0.2902529087995541, "grad_norm": 0.42779994010925293, "learning_rate": 3.476896555083044e-05, "loss": 0.08971118927001953, "step": 2083 }, { "epoch": 0.2903922524907685, "grad_norm": 0.41394221782684326, "learning_rate": 3.476260709801736e-05, "loss": 0.11905288696289062, "step": 2084 }, { "epoch": 0.2905315961819829, "grad_norm": 0.3321639895439148, "learning_rate": 3.475624536529002e-05, "loss": 0.0707244873046875, "step": 2085 }, { "epoch": 0.29067093987319725, "grad_norm": 0.5003089308738708, "learning_rate": 3.4749880354061855e-05, "loss": 0.08369827270507812, "step": 2086 }, { "epoch": 0.29081028356441163, "grad_norm": 0.3917432129383087, "learning_rate": 3.474351206574701e-05, "loss": 0.10224151611328125, "step": 2087 }, { "epoch": 0.290949627255626, "grad_norm": 0.5467779040336609, "learning_rate": 3.4737140501760396e-05, "loss": 0.10201454162597656, "step": 2088 }, { "epoch": 0.2910889709468404, "grad_norm": 0.1576787531375885, "learning_rate": 3.473076566351761e-05, "loss": 0.056171417236328125, "step": 2089 }, { "epoch": 0.29122831463805476, "grad_norm": 0.31077930331230164, "learning_rate": 3.4724387552435004e-05, "loss": 0.08428478240966797, "step": 2090 }, { "epoch": 0.29136765832926914, "grad_norm": 0.3415067791938782, "learning_rate": 3.471800616992965e-05, "loss": 0.08790397644042969, "step": 2091 }, { "epoch": 0.2915070020204835, "grad_norm": 0.4265667498111725, "learning_rate": 3.471162151741934e-05, "loss": 0.09807395935058594, "step": 2092 }, { "epoch": 0.2916463457116979, "grad_norm": 1.0415289402008057, "learning_rate": 3.47052335963226e-05, "loss": 0.15485382080078125, "step": 2093 }, { "epoch": 0.2917856894029123, "grad_norm": 0.642889678478241, "learning_rate": 3.469884240805869e-05, "loss": 0.13623619079589844, "step": 2094 }, { "epoch": 0.29192503309412665, "grad_norm": 0.4386214315891266, "learning_rate": 3.4692447954047566e-05, "loss": 0.08283615112304688, "step": 2095 }, { "epoch": 0.29206437678534103, "grad_norm": 0.33510154485702515, "learning_rate": 3.468605023570993e-05, "loss": 0.0673208236694336, "step": 2096 }, { "epoch": 0.2922037204765554, "grad_norm": 1.1273136138916016, "learning_rate": 3.4679649254467244e-05, "loss": 0.14962387084960938, "step": 2097 }, { "epoch": 0.2923430641677698, "grad_norm": 0.35053569078445435, "learning_rate": 3.467324501174163e-05, "loss": 0.07761764526367188, "step": 2098 }, { "epoch": 0.29248240785898416, "grad_norm": 0.23390597105026245, "learning_rate": 3.466683750895596e-05, "loss": 0.0808868408203125, "step": 2099 }, { "epoch": 0.29262175155019854, "grad_norm": 0.38983651995658875, "learning_rate": 3.4660426747533846e-05, "loss": 0.0955963134765625, "step": 2100 }, { "epoch": 0.29276109524141297, "grad_norm": 0.4707218408584595, "learning_rate": 3.4654012728899624e-05, "loss": 0.09681129455566406, "step": 2101 }, { "epoch": 0.29290043893262735, "grad_norm": 0.38831308484077454, "learning_rate": 3.464759545447832e-05, "loss": 0.0960540771484375, "step": 2102 }, { "epoch": 0.2930397826238417, "grad_norm": 0.7193843126296997, "learning_rate": 3.4641174925695716e-05, "loss": 0.15797805786132812, "step": 2103 }, { "epoch": 0.2931791263150561, "grad_norm": 0.464728444814682, "learning_rate": 3.4634751143978317e-05, "loss": 0.08670616149902344, "step": 2104 }, { "epoch": 0.2933184700062705, "grad_norm": 0.5166411399841309, "learning_rate": 3.4628324110753326e-05, "loss": 0.10496711730957031, "step": 2105 }, { "epoch": 0.29345781369748486, "grad_norm": 0.28511741757392883, "learning_rate": 3.462189382744869e-05, "loss": 0.078155517578125, "step": 2106 }, { "epoch": 0.29359715738869924, "grad_norm": 0.37454676628112793, "learning_rate": 3.461546029549306e-05, "loss": 0.08242607116699219, "step": 2107 }, { "epoch": 0.2937365010799136, "grad_norm": 0.3371250629425049, "learning_rate": 3.4609023516315834e-05, "loss": 0.08198261260986328, "step": 2108 }, { "epoch": 0.293875844771128, "grad_norm": 0.30086231231689453, "learning_rate": 3.4602583491347116e-05, "loss": 0.08827877044677734, "step": 2109 }, { "epoch": 0.29401518846234237, "grad_norm": 0.37922486662864685, "learning_rate": 3.4596140222017725e-05, "loss": 0.08135032653808594, "step": 2110 }, { "epoch": 0.29415453215355675, "grad_norm": 0.31655359268188477, "learning_rate": 3.4589693709759216e-05, "loss": 0.07038688659667969, "step": 2111 }, { "epoch": 0.2942938758447711, "grad_norm": 0.5995901823043823, "learning_rate": 3.4583243956003847e-05, "loss": 0.08998775482177734, "step": 2112 }, { "epoch": 0.2944332195359855, "grad_norm": 0.35880595445632935, "learning_rate": 3.457679096218461e-05, "loss": 0.09398841857910156, "step": 2113 }, { "epoch": 0.2945725632271999, "grad_norm": 0.32209083437919617, "learning_rate": 3.457033472973523e-05, "loss": 0.0819540023803711, "step": 2114 }, { "epoch": 0.29471190691841426, "grad_norm": 0.47115588188171387, "learning_rate": 3.4563875260090114e-05, "loss": 0.10219383239746094, "step": 2115 }, { "epoch": 0.29485125060962863, "grad_norm": 0.40643271803855896, "learning_rate": 3.45574125546844e-05, "loss": 0.09634780883789062, "step": 2116 }, { "epoch": 0.294990594300843, "grad_norm": 0.7056506872177124, "learning_rate": 3.4550946614953984e-05, "loss": 0.1078338623046875, "step": 2117 }, { "epoch": 0.2951299379920574, "grad_norm": 0.4892171025276184, "learning_rate": 3.454447744233543e-05, "loss": 0.1060495376586914, "step": 2118 }, { "epoch": 0.29526928168327177, "grad_norm": 0.4710906147956848, "learning_rate": 3.453800503826604e-05, "loss": 0.09679031372070312, "step": 2119 }, { "epoch": 0.29540862537448614, "grad_norm": 0.4885799288749695, "learning_rate": 3.453152940418384e-05, "loss": 0.094482421875, "step": 2120 }, { "epoch": 0.2955479690657006, "grad_norm": 0.37788811326026917, "learning_rate": 3.4525050541527566e-05, "loss": 0.0719156265258789, "step": 2121 }, { "epoch": 0.29568731275691496, "grad_norm": 0.2686011493206024, "learning_rate": 3.4518568451736675e-05, "loss": 0.06802749633789062, "step": 2122 }, { "epoch": 0.29582665644812933, "grad_norm": 0.2841027081012726, "learning_rate": 3.4512083136251346e-05, "loss": 0.05787467956542969, "step": 2123 }, { "epoch": 0.2959660001393437, "grad_norm": 0.40339699387550354, "learning_rate": 3.450559459651245e-05, "loss": 0.0903482437133789, "step": 2124 }, { "epoch": 0.2961053438305581, "grad_norm": 0.22828826308250427, "learning_rate": 3.449910283396161e-05, "loss": 0.07097625732421875, "step": 2125 }, { "epoch": 0.29624468752177247, "grad_norm": 0.4794375002384186, "learning_rate": 3.4492607850041136e-05, "loss": 0.1063995361328125, "step": 2126 }, { "epoch": 0.29638403121298684, "grad_norm": 0.37380251288414, "learning_rate": 3.448610964619407e-05, "loss": 0.09469985961914062, "step": 2127 }, { "epoch": 0.2965233749042012, "grad_norm": 0.630882740020752, "learning_rate": 3.447960822386417e-05, "loss": 0.1557788848876953, "step": 2128 }, { "epoch": 0.2966627185954156, "grad_norm": 1.3417961597442627, "learning_rate": 3.4473103584495894e-05, "loss": 0.13213729858398438, "step": 2129 }, { "epoch": 0.29680206228663, "grad_norm": 0.4532499611377716, "learning_rate": 3.446659572953443e-05, "loss": 0.10228919982910156, "step": 2130 }, { "epoch": 0.29694140597784435, "grad_norm": 0.31804201006889343, "learning_rate": 3.446008466042566e-05, "loss": 0.08793830871582031, "step": 2131 }, { "epoch": 0.29708074966905873, "grad_norm": 0.4352450966835022, "learning_rate": 3.445357037861622e-05, "loss": 0.09606552124023438, "step": 2132 }, { "epoch": 0.2972200933602731, "grad_norm": 0.35623499751091003, "learning_rate": 3.4447052885553424e-05, "loss": 0.07125282287597656, "step": 2133 }, { "epoch": 0.2973594370514875, "grad_norm": 0.4456421732902527, "learning_rate": 3.44405321826853e-05, "loss": 0.09111595153808594, "step": 2134 }, { "epoch": 0.29749878074270186, "grad_norm": 0.4499965310096741, "learning_rate": 3.443400827146062e-05, "loss": 0.0946044921875, "step": 2135 }, { "epoch": 0.29763812443391624, "grad_norm": 0.40589338541030884, "learning_rate": 3.442748115332882e-05, "loss": 0.09238433837890625, "step": 2136 }, { "epoch": 0.2977774681251306, "grad_norm": 0.3958226144313812, "learning_rate": 3.442095082974011e-05, "loss": 0.10013580322265625, "step": 2137 }, { "epoch": 0.297916811816345, "grad_norm": 0.5077462792396545, "learning_rate": 3.441441730214535e-05, "loss": 0.08050918579101562, "step": 2138 }, { "epoch": 0.2980561555075594, "grad_norm": 0.3427429795265198, "learning_rate": 3.440788057199616e-05, "loss": 0.08791351318359375, "step": 2139 }, { "epoch": 0.29819549919877375, "grad_norm": 0.63393634557724, "learning_rate": 3.440134064074483e-05, "loss": 0.13384628295898438, "step": 2140 }, { "epoch": 0.2983348428899882, "grad_norm": 0.4252096712589264, "learning_rate": 3.4394797509844415e-05, "loss": 0.08881092071533203, "step": 2141 }, { "epoch": 0.29847418658120256, "grad_norm": 0.30493465065956116, "learning_rate": 3.438825118074863e-05, "loss": 0.06873321533203125, "step": 2142 }, { "epoch": 0.29861353027241694, "grad_norm": 0.608375608921051, "learning_rate": 3.4381701654911915e-05, "loss": 0.1201934814453125, "step": 2143 }, { "epoch": 0.2987528739636313, "grad_norm": 0.49948617815971375, "learning_rate": 3.437514893378943e-05, "loss": 0.09692001342773438, "step": 2144 }, { "epoch": 0.2988922176548457, "grad_norm": 0.30702710151672363, "learning_rate": 3.4368593018837046e-05, "loss": 0.07010269165039062, "step": 2145 }, { "epoch": 0.29903156134606007, "grad_norm": 0.5603824853897095, "learning_rate": 3.4362033911511336e-05, "loss": 0.10648155212402344, "step": 2146 }, { "epoch": 0.29917090503727445, "grad_norm": 0.5419380068778992, "learning_rate": 3.435547161326958e-05, "loss": 0.133544921875, "step": 2147 }, { "epoch": 0.2993102487284888, "grad_norm": 0.9297410249710083, "learning_rate": 3.434890612556977e-05, "loss": 0.13912200927734375, "step": 2148 }, { "epoch": 0.2994495924197032, "grad_norm": 0.6058835983276367, "learning_rate": 3.434233744987061e-05, "loss": 0.13639259338378906, "step": 2149 }, { "epoch": 0.2995889361109176, "grad_norm": 0.32640838623046875, "learning_rate": 3.433576558763151e-05, "loss": 0.07975959777832031, "step": 2150 }, { "epoch": 0.29972827980213196, "grad_norm": 0.3459543287754059, "learning_rate": 3.4329190540312596e-05, "loss": 0.08760643005371094, "step": 2151 }, { "epoch": 0.29986762349334634, "grad_norm": 0.33536621928215027, "learning_rate": 3.432261230937468e-05, "loss": 0.08847427368164062, "step": 2152 }, { "epoch": 0.3000069671845607, "grad_norm": 0.7921039462089539, "learning_rate": 3.431603089627929e-05, "loss": 0.1289386749267578, "step": 2153 }, { "epoch": 0.3001463108757751, "grad_norm": 0.220619335770607, "learning_rate": 3.4309446302488686e-05, "loss": 0.05788612365722656, "step": 2154 }, { "epoch": 0.30028565456698947, "grad_norm": 0.6694440841674805, "learning_rate": 3.4302858529465806e-05, "loss": 0.10042381286621094, "step": 2155 }, { "epoch": 0.30042499825820385, "grad_norm": 0.41608506441116333, "learning_rate": 3.429626757867429e-05, "loss": 0.10307121276855469, "step": 2156 }, { "epoch": 0.3005643419494182, "grad_norm": 0.6776466369628906, "learning_rate": 3.428967345157852e-05, "loss": 0.13116455078125, "step": 2157 }, { "epoch": 0.3007036856406326, "grad_norm": 0.6127967834472656, "learning_rate": 3.428307614964354e-05, "loss": 0.11480522155761719, "step": 2158 }, { "epoch": 0.300843029331847, "grad_norm": 1.661865234375, "learning_rate": 3.427647567433512e-05, "loss": 0.14780902862548828, "step": 2159 }, { "epoch": 0.30098237302306136, "grad_norm": 0.7376072406768799, "learning_rate": 3.426987202711976e-05, "loss": 0.12442779541015625, "step": 2160 }, { "epoch": 0.3011217167142758, "grad_norm": 0.5364863872528076, "learning_rate": 3.4263265209464606e-05, "loss": 0.06836128234863281, "step": 2161 }, { "epoch": 0.30126106040549017, "grad_norm": 0.3374653458595276, "learning_rate": 3.4256655222837574e-05, "loss": 0.08504009246826172, "step": 2162 }, { "epoch": 0.30140040409670454, "grad_norm": 0.3822915256023407, "learning_rate": 3.425004206870723e-05, "loss": 0.07573127746582031, "step": 2163 }, { "epoch": 0.3015397477879189, "grad_norm": 0.9280564188957214, "learning_rate": 3.424342574854286e-05, "loss": 0.124542236328125, "step": 2164 }, { "epoch": 0.3016790914791333, "grad_norm": 0.5022726058959961, "learning_rate": 3.423680626381449e-05, "loss": 0.09351921081542969, "step": 2165 }, { "epoch": 0.3018184351703477, "grad_norm": 0.455067902803421, "learning_rate": 3.423018361599279e-05, "loss": 0.1001739501953125, "step": 2166 }, { "epoch": 0.30195777886156205, "grad_norm": 0.5218072533607483, "learning_rate": 3.4223557806549175e-05, "loss": 0.12045764923095703, "step": 2167 }, { "epoch": 0.30209712255277643, "grad_norm": 0.4877760410308838, "learning_rate": 3.421692883695574e-05, "loss": 0.10284614562988281, "step": 2168 }, { "epoch": 0.3022364662439908, "grad_norm": 0.4854704439640045, "learning_rate": 3.4210296708685303e-05, "loss": 0.0814208984375, "step": 2169 }, { "epoch": 0.3023758099352052, "grad_norm": 0.3876161277294159, "learning_rate": 3.420366142321136e-05, "loss": 0.0862274169921875, "step": 2170 }, { "epoch": 0.30251515362641956, "grad_norm": 0.519382655620575, "learning_rate": 3.419702298200812e-05, "loss": 0.11932945251464844, "step": 2171 }, { "epoch": 0.30265449731763394, "grad_norm": 0.3671526610851288, "learning_rate": 3.41903813865505e-05, "loss": 0.09107780456542969, "step": 2172 }, { "epoch": 0.3027938410088483, "grad_norm": 0.5745139122009277, "learning_rate": 3.418373663831411e-05, "loss": 0.10152053833007812, "step": 2173 }, { "epoch": 0.3029331847000627, "grad_norm": 0.46935635805130005, "learning_rate": 3.4177088738775254e-05, "loss": 0.09948158264160156, "step": 2174 }, { "epoch": 0.3030725283912771, "grad_norm": 1.0329387187957764, "learning_rate": 3.417043768941095e-05, "loss": 0.1117544174194336, "step": 2175 }, { "epoch": 0.30321187208249145, "grad_norm": 1.037937045097351, "learning_rate": 3.416378349169891e-05, "loss": 0.117767333984375, "step": 2176 }, { "epoch": 0.30335121577370583, "grad_norm": 0.49376338720321655, "learning_rate": 3.415712614711755e-05, "loss": 0.10068893432617188, "step": 2177 }, { "epoch": 0.3034905594649202, "grad_norm": 0.3958212733268738, "learning_rate": 3.4150465657145964e-05, "loss": 0.07669639587402344, "step": 2178 }, { "epoch": 0.3036299031561346, "grad_norm": 0.2620695233345032, "learning_rate": 3.414380202326397e-05, "loss": 0.0654592514038086, "step": 2179 }, { "epoch": 0.30376924684734896, "grad_norm": 0.5514135360717773, "learning_rate": 3.413713524695208e-05, "loss": 0.09516525268554688, "step": 2180 }, { "epoch": 0.30390859053856334, "grad_norm": 0.4060947000980377, "learning_rate": 3.413046532969149e-05, "loss": 0.07757759094238281, "step": 2181 }, { "epoch": 0.30404793422977777, "grad_norm": 0.4602896571159363, "learning_rate": 3.412379227296411e-05, "loss": 0.07181739807128906, "step": 2182 }, { "epoch": 0.30418727792099215, "grad_norm": 0.43439313769340515, "learning_rate": 3.411711607825253e-05, "loss": 0.07863235473632812, "step": 2183 }, { "epoch": 0.3043266216122065, "grad_norm": 0.3391636312007904, "learning_rate": 3.411043674704007e-05, "loss": 0.08497810363769531, "step": 2184 }, { "epoch": 0.3044659653034209, "grad_norm": 0.5677062273025513, "learning_rate": 3.4103754280810705e-05, "loss": 0.07610607147216797, "step": 2185 }, { "epoch": 0.3046053089946353, "grad_norm": 0.5425165891647339, "learning_rate": 3.409706868104913e-05, "loss": 0.12258052825927734, "step": 2186 }, { "epoch": 0.30474465268584966, "grad_norm": 0.26035138964653015, "learning_rate": 3.409037994924074e-05, "loss": 0.06361007690429688, "step": 2187 }, { "epoch": 0.30488399637706404, "grad_norm": 0.5197556614875793, "learning_rate": 3.408368808687161e-05, "loss": 0.1010751724243164, "step": 2188 }, { "epoch": 0.3050233400682784, "grad_norm": 0.6934305429458618, "learning_rate": 3.407699309542853e-05, "loss": 0.10816001892089844, "step": 2189 }, { "epoch": 0.3051626837594928, "grad_norm": 0.3629651963710785, "learning_rate": 3.407029497639896e-05, "loss": 0.10110282897949219, "step": 2190 }, { "epoch": 0.30530202745070717, "grad_norm": 0.9198886752128601, "learning_rate": 3.406359373127108e-05, "loss": 0.1447925567626953, "step": 2191 }, { "epoch": 0.30544137114192155, "grad_norm": 0.5217763185501099, "learning_rate": 3.405688936153375e-05, "loss": 0.09319305419921875, "step": 2192 }, { "epoch": 0.3055807148331359, "grad_norm": 0.81162428855896, "learning_rate": 3.405018186867653e-05, "loss": 0.10169029235839844, "step": 2193 }, { "epoch": 0.3057200585243503, "grad_norm": 0.4772144556045532, "learning_rate": 3.404347125418967e-05, "loss": 0.08695220947265625, "step": 2194 }, { "epoch": 0.3058594022155647, "grad_norm": 0.4903084933757782, "learning_rate": 3.4036757519564116e-05, "loss": 0.09914004802703857, "step": 2195 }, { "epoch": 0.30599874590677906, "grad_norm": 0.6063910722732544, "learning_rate": 3.40300406662915e-05, "loss": 0.1074676513671875, "step": 2196 }, { "epoch": 0.30613808959799343, "grad_norm": 1.0479623079299927, "learning_rate": 3.402332069586416e-05, "loss": 0.11800861358642578, "step": 2197 }, { "epoch": 0.3062774332892078, "grad_norm": 0.8403177261352539, "learning_rate": 3.401659760977513e-05, "loss": 0.09145736694335938, "step": 2198 }, { "epoch": 0.3064167769804222, "grad_norm": 0.44769829511642456, "learning_rate": 3.4009871409518104e-05, "loss": 0.08980751037597656, "step": 2199 }, { "epoch": 0.30655612067163657, "grad_norm": 0.21064536273479462, "learning_rate": 3.40031420965875e-05, "loss": 0.06361770629882812, "step": 2200 }, { "epoch": 0.30669546436285094, "grad_norm": 0.5954223275184631, "learning_rate": 3.399640967247843e-05, "loss": 0.08922767639160156, "step": 2201 }, { "epoch": 0.3068348080540654, "grad_norm": 0.3422307074069977, "learning_rate": 3.398967413868666e-05, "loss": 0.08225822448730469, "step": 2202 }, { "epoch": 0.30697415174527976, "grad_norm": 0.5269918441772461, "learning_rate": 3.3982935496708704e-05, "loss": 0.08499908447265625, "step": 2203 }, { "epoch": 0.30711349543649413, "grad_norm": 0.5362560153007507, "learning_rate": 3.397619374804171e-05, "loss": 0.10434722900390625, "step": 2204 }, { "epoch": 0.3072528391277085, "grad_norm": 0.560541570186615, "learning_rate": 3.3969448894183536e-05, "loss": 0.090484619140625, "step": 2205 }, { "epoch": 0.3073921828189229, "grad_norm": 0.6833621263504028, "learning_rate": 3.396270093663276e-05, "loss": 0.08775997161865234, "step": 2206 }, { "epoch": 0.30753152651013727, "grad_norm": 0.6220759153366089, "learning_rate": 3.39559498768886e-05, "loss": 0.10339927673339844, "step": 2207 }, { "epoch": 0.30767087020135164, "grad_norm": 0.5056524276733398, "learning_rate": 3.3949195716451004e-05, "loss": 0.10686302185058594, "step": 2208 }, { "epoch": 0.307810213892566, "grad_norm": 0.5647354125976562, "learning_rate": 3.394243845682058e-05, "loss": 0.11247062683105469, "step": 2209 }, { "epoch": 0.3079495575837804, "grad_norm": 0.32976895570755005, "learning_rate": 3.3935678099498644e-05, "loss": 0.07585620880126953, "step": 2210 }, { "epoch": 0.3080889012749948, "grad_norm": 0.4210070073604584, "learning_rate": 3.392891464598719e-05, "loss": 0.09619712829589844, "step": 2211 }, { "epoch": 0.30822824496620915, "grad_norm": 0.9212906956672668, "learning_rate": 3.3922148097788906e-05, "loss": 0.1073150634765625, "step": 2212 }, { "epoch": 0.30836758865742353, "grad_norm": 0.39670461416244507, "learning_rate": 3.3915378456407167e-05, "loss": 0.09037399291992188, "step": 2213 }, { "epoch": 0.3085069323486379, "grad_norm": 0.5660991668701172, "learning_rate": 3.390860572334602e-05, "loss": 0.07079696655273438, "step": 2214 }, { "epoch": 0.3086462760398523, "grad_norm": 0.406087189912796, "learning_rate": 3.390182990011022e-05, "loss": 0.06858158111572266, "step": 2215 }, { "epoch": 0.30878561973106666, "grad_norm": 0.35389095544815063, "learning_rate": 3.389505098820521e-05, "loss": 0.09491920471191406, "step": 2216 }, { "epoch": 0.30892496342228104, "grad_norm": 0.27242687344551086, "learning_rate": 3.388826898913709e-05, "loss": 0.07175445556640625, "step": 2217 }, { "epoch": 0.3090643071134954, "grad_norm": 0.2788008749485016, "learning_rate": 3.3881483904412685e-05, "loss": 0.08760643005371094, "step": 2218 }, { "epoch": 0.3092036508047098, "grad_norm": 0.5513746738433838, "learning_rate": 3.3874695735539467e-05, "loss": 0.11725044250488281, "step": 2219 }, { "epoch": 0.3093429944959242, "grad_norm": 0.4611223340034485, "learning_rate": 3.3867904484025626e-05, "loss": 0.09033870697021484, "step": 2220 }, { "epoch": 0.30948233818713855, "grad_norm": 0.4473017156124115, "learning_rate": 3.3861110151380015e-05, "loss": 0.10693931579589844, "step": 2221 }, { "epoch": 0.309621681878353, "grad_norm": 0.3503588140010834, "learning_rate": 3.3854312739112186e-05, "loss": 0.07238960266113281, "step": 2222 }, { "epoch": 0.30976102556956736, "grad_norm": 0.31143879890441895, "learning_rate": 3.384751224873237e-05, "loss": 0.07255363464355469, "step": 2223 }, { "epoch": 0.30990036926078174, "grad_norm": 0.5713512897491455, "learning_rate": 3.384070868175146e-05, "loss": 0.10464668273925781, "step": 2224 }, { "epoch": 0.3100397129519961, "grad_norm": 0.411577045917511, "learning_rate": 3.383390203968109e-05, "loss": 0.09222984313964844, "step": 2225 }, { "epoch": 0.3101790566432105, "grad_norm": 0.48561152815818787, "learning_rate": 3.38270923240335e-05, "loss": 0.07882118225097656, "step": 2226 }, { "epoch": 0.31031840033442487, "grad_norm": 0.39045268297195435, "learning_rate": 3.382027953632169e-05, "loss": 0.09436225891113281, "step": 2227 }, { "epoch": 0.31045774402563925, "grad_norm": 0.5065590143203735, "learning_rate": 3.381346367805928e-05, "loss": 0.116546630859375, "step": 2228 }, { "epoch": 0.3105970877168536, "grad_norm": 0.2066865712404251, "learning_rate": 3.3806644750760615e-05, "loss": 0.06245994567871094, "step": 2229 }, { "epoch": 0.310736431408068, "grad_norm": 0.765433132648468, "learning_rate": 3.3799822755940694e-05, "loss": 0.09021663665771484, "step": 2230 }, { "epoch": 0.3108757750992824, "grad_norm": 0.4410562515258789, "learning_rate": 3.379299769511521e-05, "loss": 0.09302711486816406, "step": 2231 }, { "epoch": 0.31101511879049676, "grad_norm": 0.790136456489563, "learning_rate": 3.3786169569800534e-05, "loss": 0.16009521484375, "step": 2232 }, { "epoch": 0.31115446248171114, "grad_norm": 0.6399275660514832, "learning_rate": 3.377933838151374e-05, "loss": 0.10659217834472656, "step": 2233 }, { "epoch": 0.3112938061729255, "grad_norm": 0.451762855052948, "learning_rate": 3.377250413177253e-05, "loss": 0.10004997253417969, "step": 2234 }, { "epoch": 0.3114331498641399, "grad_norm": 0.4026309847831726, "learning_rate": 3.3765666822095336e-05, "loss": 0.09274101257324219, "step": 2235 }, { "epoch": 0.31157249355535427, "grad_norm": 0.48816579580307007, "learning_rate": 3.375882645400125e-05, "loss": 0.10879135131835938, "step": 2236 }, { "epoch": 0.31171183724656865, "grad_norm": 0.8027631640434265, "learning_rate": 3.375198302901004e-05, "loss": 0.1460552215576172, "step": 2237 }, { "epoch": 0.311851180937783, "grad_norm": 0.3675945997238159, "learning_rate": 3.3745136548642175e-05, "loss": 0.11374282836914062, "step": 2238 }, { "epoch": 0.3119905246289974, "grad_norm": 0.4578910171985626, "learning_rate": 3.373828701441877e-05, "loss": 0.11362457275390625, "step": 2239 }, { "epoch": 0.3121298683202118, "grad_norm": 0.5062185525894165, "learning_rate": 3.3731434427861644e-05, "loss": 0.0947265625, "step": 2240 }, { "epoch": 0.31226921201142616, "grad_norm": 0.3537193238735199, "learning_rate": 3.372457879049328e-05, "loss": 0.0774688720703125, "step": 2241 }, { "epoch": 0.3124085557026406, "grad_norm": 0.3560682237148285, "learning_rate": 3.3717720103836846e-05, "loss": 0.0877695083618164, "step": 2242 }, { "epoch": 0.31254789939385497, "grad_norm": 0.628003716468811, "learning_rate": 3.371085836941618e-05, "loss": 0.12603378295898438, "step": 2243 }, { "epoch": 0.31268724308506934, "grad_norm": 0.3049165904521942, "learning_rate": 3.370399358875582e-05, "loss": 0.07451343536376953, "step": 2244 }, { "epoch": 0.3128265867762837, "grad_norm": 0.7593726515769958, "learning_rate": 3.3697125763380944e-05, "loss": 0.12245655059814453, "step": 2245 }, { "epoch": 0.3129659304674981, "grad_norm": 0.5958145260810852, "learning_rate": 3.369025489481744e-05, "loss": 0.11583518981933594, "step": 2246 }, { "epoch": 0.3131052741587125, "grad_norm": 0.4666860103607178, "learning_rate": 3.3683380984591845e-05, "loss": 0.09042549133300781, "step": 2247 }, { "epoch": 0.31324461784992685, "grad_norm": 0.7894067168235779, "learning_rate": 3.36765040342314e-05, "loss": 0.14007186889648438, "step": 2248 }, { "epoch": 0.31338396154114123, "grad_norm": 0.42105093598365784, "learning_rate": 3.3669624045264e-05, "loss": 0.1000518798828125, "step": 2249 }, { "epoch": 0.3135233052323556, "grad_norm": 0.45504456758499146, "learning_rate": 3.3662741019218206e-05, "loss": 0.08559226989746094, "step": 2250 }, { "epoch": 0.31366264892357, "grad_norm": 0.3422819674015045, "learning_rate": 3.3655854957623295e-05, "loss": 0.07419967651367188, "step": 2251 }, { "epoch": 0.31380199261478436, "grad_norm": 0.47475188970565796, "learning_rate": 3.3648965862009174e-05, "loss": 0.0955047607421875, "step": 2252 }, { "epoch": 0.31394133630599874, "grad_norm": 0.5955170392990112, "learning_rate": 3.364207373390645e-05, "loss": 0.13274765014648438, "step": 2253 }, { "epoch": 0.3140806799972131, "grad_norm": 0.37457892298698425, "learning_rate": 3.3635178574846403e-05, "loss": 0.10373687744140625, "step": 2254 }, { "epoch": 0.3142200236884275, "grad_norm": 0.2952471077442169, "learning_rate": 3.362828038636097e-05, "loss": 0.07410907745361328, "step": 2255 }, { "epoch": 0.3143593673796419, "grad_norm": 0.45747804641723633, "learning_rate": 3.3621379169982774e-05, "loss": 0.09686279296875, "step": 2256 }, { "epoch": 0.31449871107085625, "grad_norm": 0.29270270466804504, "learning_rate": 3.361447492724511e-05, "loss": 0.06411361694335938, "step": 2257 }, { "epoch": 0.31463805476207063, "grad_norm": 0.8030925393104553, "learning_rate": 3.3607567659681934e-05, "loss": 0.10518074035644531, "step": 2258 }, { "epoch": 0.314777398453285, "grad_norm": 0.45017898082733154, "learning_rate": 3.3600657368827894e-05, "loss": 0.1027374267578125, "step": 2259 }, { "epoch": 0.3149167421444994, "grad_norm": 0.34578078985214233, "learning_rate": 3.35937440562183e-05, "loss": 0.07234573364257812, "step": 2260 }, { "epoch": 0.31505608583571376, "grad_norm": 0.4659200608730316, "learning_rate": 3.358682772338912e-05, "loss": 0.11041736602783203, "step": 2261 }, { "epoch": 0.3151954295269282, "grad_norm": 0.21286527812480927, "learning_rate": 3.357990837187701e-05, "loss": 0.06775569915771484, "step": 2262 }, { "epoch": 0.31533477321814257, "grad_norm": 0.7375885248184204, "learning_rate": 3.35729860032193e-05, "loss": 0.11047935485839844, "step": 2263 }, { "epoch": 0.31547411690935695, "grad_norm": 0.5221675634384155, "learning_rate": 3.356606061895398e-05, "loss": 0.1099081039428711, "step": 2264 }, { "epoch": 0.3156134606005713, "grad_norm": 0.3933596611022949, "learning_rate": 3.35591322206197e-05, "loss": 0.09245967864990234, "step": 2265 }, { "epoch": 0.3157528042917857, "grad_norm": 0.5408973693847656, "learning_rate": 3.355220080975581e-05, "loss": 0.08696937561035156, "step": 2266 }, { "epoch": 0.3158921479830001, "grad_norm": 0.2773128151893616, "learning_rate": 3.3545266387902295e-05, "loss": 0.06807804107666016, "step": 2267 }, { "epoch": 0.31603149167421446, "grad_norm": 0.30009403824806213, "learning_rate": 3.353832895659984e-05, "loss": 0.08386993408203125, "step": 2268 }, { "epoch": 0.31617083536542884, "grad_norm": 0.47882503271102905, "learning_rate": 3.353138851738976e-05, "loss": 0.11203384399414062, "step": 2269 }, { "epoch": 0.3163101790566432, "grad_norm": 0.4055362045764923, "learning_rate": 3.352444507181409e-05, "loss": 0.09613800048828125, "step": 2270 }, { "epoch": 0.3164495227478576, "grad_norm": 0.4450908899307251, "learning_rate": 3.3517498621415496e-05, "loss": 0.09634971618652344, "step": 2271 }, { "epoch": 0.31658886643907197, "grad_norm": 0.6092128753662109, "learning_rate": 3.3510549167737316e-05, "loss": 0.11800003051757812, "step": 2272 }, { "epoch": 0.31672821013028635, "grad_norm": 0.4656425714492798, "learning_rate": 3.350359671232356e-05, "loss": 0.09119224548339844, "step": 2273 }, { "epoch": 0.3168675538215007, "grad_norm": 0.6673257350921631, "learning_rate": 3.349664125671891e-05, "loss": 0.12740707397460938, "step": 2274 }, { "epoch": 0.3170068975127151, "grad_norm": 0.33994215726852417, "learning_rate": 3.3489682802468704e-05, "loss": 0.07729053497314453, "step": 2275 }, { "epoch": 0.3171462412039295, "grad_norm": 0.34801408648490906, "learning_rate": 3.348272135111895e-05, "loss": 0.09315109252929688, "step": 2276 }, { "epoch": 0.31728558489514386, "grad_norm": 0.41522085666656494, "learning_rate": 3.347575690421633e-05, "loss": 0.08078384399414062, "step": 2277 }, { "epoch": 0.31742492858635823, "grad_norm": 0.608802080154419, "learning_rate": 3.346878946330819e-05, "loss": 0.11621761322021484, "step": 2278 }, { "epoch": 0.3175642722775726, "grad_norm": 0.7342398762702942, "learning_rate": 3.346181902994252e-05, "loss": 0.14013290405273438, "step": 2279 }, { "epoch": 0.317703615968787, "grad_norm": 0.6116031408309937, "learning_rate": 3.3454845605668e-05, "loss": 0.11138534545898438, "step": 2280 }, { "epoch": 0.31784295966000137, "grad_norm": 0.3514629900455475, "learning_rate": 3.3447869192033974e-05, "loss": 0.094879150390625, "step": 2281 }, { "epoch": 0.3179823033512158, "grad_norm": 0.4417176842689514, "learning_rate": 3.344088979059042e-05, "loss": 0.08700275421142578, "step": 2282 }, { "epoch": 0.3181216470424302, "grad_norm": 0.33068954944610596, "learning_rate": 3.343390740288803e-05, "loss": 0.07244300842285156, "step": 2283 }, { "epoch": 0.31826099073364456, "grad_norm": 0.5799704790115356, "learning_rate": 3.3426922030478106e-05, "loss": 0.09161376953125, "step": 2284 }, { "epoch": 0.31840033442485893, "grad_norm": 0.3916057348251343, "learning_rate": 3.341993367491266e-05, "loss": 0.07014656066894531, "step": 2285 }, { "epoch": 0.3185396781160733, "grad_norm": 0.9308236241340637, "learning_rate": 3.3412942337744326e-05, "loss": 0.11835098266601562, "step": 2286 }, { "epoch": 0.3186790218072877, "grad_norm": 0.32238301634788513, "learning_rate": 3.340594802052642e-05, "loss": 0.06805610656738281, "step": 2287 }, { "epoch": 0.31881836549850207, "grad_norm": 0.344722181558609, "learning_rate": 3.339895072481294e-05, "loss": 0.07584762573242188, "step": 2288 }, { "epoch": 0.31895770918971644, "grad_norm": 0.5328919291496277, "learning_rate": 3.3391950452158504e-05, "loss": 0.11554527282714844, "step": 2289 }, { "epoch": 0.3190970528809308, "grad_norm": 0.5167103409767151, "learning_rate": 3.338494720411842e-05, "loss": 0.09307098388671875, "step": 2290 }, { "epoch": 0.3192363965721452, "grad_norm": 0.659233033657074, "learning_rate": 3.337794098224866e-05, "loss": 0.12290191650390625, "step": 2291 }, { "epoch": 0.3193757402633596, "grad_norm": 0.4836517870426178, "learning_rate": 3.337093178810583e-05, "loss": 0.08838462829589844, "step": 2292 }, { "epoch": 0.31951508395457395, "grad_norm": 0.4207834303379059, "learning_rate": 3.336391962324722e-05, "loss": 0.08433914184570312, "step": 2293 }, { "epoch": 0.31965442764578833, "grad_norm": 0.38034912943840027, "learning_rate": 3.3356904489230784e-05, "loss": 0.0902099609375, "step": 2294 }, { "epoch": 0.3197937713370027, "grad_norm": 0.2723650336265564, "learning_rate": 3.3349886387615096e-05, "loss": 0.07352066040039062, "step": 2295 }, { "epoch": 0.3199331150282171, "grad_norm": 0.23615872859954834, "learning_rate": 3.334286531995945e-05, "loss": 0.07040023803710938, "step": 2296 }, { "epoch": 0.32007245871943146, "grad_norm": 0.48382166028022766, "learning_rate": 3.3335841287823746e-05, "loss": 0.10594940185546875, "step": 2297 }, { "epoch": 0.32021180241064584, "grad_norm": 0.2502215504646301, "learning_rate": 3.332881429276857e-05, "loss": 0.08007049560546875, "step": 2298 }, { "epoch": 0.3203511461018602, "grad_norm": 0.2912226915359497, "learning_rate": 3.3321784336355163e-05, "loss": 0.06398391723632812, "step": 2299 }, { "epoch": 0.3204904897930746, "grad_norm": 0.5374675989151001, "learning_rate": 3.331475142014542e-05, "loss": 0.10400581359863281, "step": 2300 }, { "epoch": 0.320629833484289, "grad_norm": 0.3450746238231659, "learning_rate": 3.3307715545701885e-05, "loss": 0.09013557434082031, "step": 2301 }, { "epoch": 0.3207691771755034, "grad_norm": 0.4561043381690979, "learning_rate": 3.3300676714587784e-05, "loss": 0.10864639282226562, "step": 2302 }, { "epoch": 0.3209085208667178, "grad_norm": 0.5234081149101257, "learning_rate": 3.329363492836697e-05, "loss": 0.09229087829589844, "step": 2303 }, { "epoch": 0.32104786455793216, "grad_norm": 0.6799538731575012, "learning_rate": 3.328659018860398e-05, "loss": 0.11582565307617188, "step": 2304 }, { "epoch": 0.32118720824914654, "grad_norm": 0.6993386745452881, "learning_rate": 3.3279542496863984e-05, "loss": 0.1275920867919922, "step": 2305 }, { "epoch": 0.3213265519403609, "grad_norm": 0.32343941926956177, "learning_rate": 3.3272491854712825e-05, "loss": 0.09890556335449219, "step": 2306 }, { "epoch": 0.3214658956315753, "grad_norm": 0.28908371925354004, "learning_rate": 3.326543826371699e-05, "loss": 0.06862640380859375, "step": 2307 }, { "epoch": 0.32160523932278967, "grad_norm": 0.3716498017311096, "learning_rate": 3.3258381725443625e-05, "loss": 0.08921146392822266, "step": 2308 }, { "epoch": 0.32174458301400405, "grad_norm": 0.26163074374198914, "learning_rate": 3.325132224146054e-05, "loss": 0.06203651428222656, "step": 2309 }, { "epoch": 0.3218839267052184, "grad_norm": 0.5224486589431763, "learning_rate": 3.3244259813336185e-05, "loss": 0.11212730407714844, "step": 2310 }, { "epoch": 0.3220232703964328, "grad_norm": 0.5506842136383057, "learning_rate": 3.323719444263967e-05, "loss": 0.1135406494140625, "step": 2311 }, { "epoch": 0.3221626140876472, "grad_norm": 0.5278815627098083, "learning_rate": 3.323012613094075e-05, "loss": 0.10018062591552734, "step": 2312 }, { "epoch": 0.32230195777886156, "grad_norm": 0.5578588247299194, "learning_rate": 3.322305487980987e-05, "loss": 0.10527992248535156, "step": 2313 }, { "epoch": 0.32244130147007594, "grad_norm": 0.34109964966773987, "learning_rate": 3.3215980690818076e-05, "loss": 0.07400131225585938, "step": 2314 }, { "epoch": 0.3225806451612903, "grad_norm": 0.5088338851928711, "learning_rate": 3.32089035655371e-05, "loss": 0.1118011474609375, "step": 2315 }, { "epoch": 0.3227199888525047, "grad_norm": 0.32191866636276245, "learning_rate": 3.320182350553931e-05, "loss": 0.0729684829711914, "step": 2316 }, { "epoch": 0.32285933254371907, "grad_norm": 0.5021439790725708, "learning_rate": 3.319474051239775e-05, "loss": 0.11319923400878906, "step": 2317 }, { "epoch": 0.32299867623493345, "grad_norm": 0.4694070518016815, "learning_rate": 3.318765458768608e-05, "loss": 0.10040473937988281, "step": 2318 }, { "epoch": 0.3231380199261478, "grad_norm": 0.435435950756073, "learning_rate": 3.318056573297864e-05, "loss": 0.09393119812011719, "step": 2319 }, { "epoch": 0.3232773636173622, "grad_norm": 0.5262898206710815, "learning_rate": 3.317347394985042e-05, "loss": 0.1358470916748047, "step": 2320 }, { "epoch": 0.3234167073085766, "grad_norm": 0.4200402796268463, "learning_rate": 3.316637923987704e-05, "loss": 0.08157157897949219, "step": 2321 }, { "epoch": 0.323556050999791, "grad_norm": 0.30181458592414856, "learning_rate": 3.315928160463478e-05, "loss": 0.07543563842773438, "step": 2322 }, { "epoch": 0.3236953946910054, "grad_norm": 0.3008681833744049, "learning_rate": 3.3152181045700584e-05, "loss": 0.08365631103515625, "step": 2323 }, { "epoch": 0.32383473838221977, "grad_norm": 0.30835631489753723, "learning_rate": 3.314507756465202e-05, "loss": 0.09855461120605469, "step": 2324 }, { "epoch": 0.32397408207343414, "grad_norm": 0.5550879240036011, "learning_rate": 3.313797116306734e-05, "loss": 0.14342689514160156, "step": 2325 }, { "epoch": 0.3241134257646485, "grad_norm": 0.35623231530189514, "learning_rate": 3.3130861842525416e-05, "loss": 0.09258079528808594, "step": 2326 }, { "epoch": 0.3242527694558629, "grad_norm": 0.6147512793540955, "learning_rate": 3.3123749604605765e-05, "loss": 0.134246826171875, "step": 2327 }, { "epoch": 0.3243921131470773, "grad_norm": 0.9250198006629944, "learning_rate": 3.311663445088858e-05, "loss": 0.16204833984375, "step": 2328 }, { "epoch": 0.32453145683829165, "grad_norm": 0.4717129170894623, "learning_rate": 3.310951638295467e-05, "loss": 0.09883499145507812, "step": 2329 }, { "epoch": 0.32467080052950603, "grad_norm": 0.24493888020515442, "learning_rate": 3.310239540238552e-05, "loss": 0.07372856140136719, "step": 2330 }, { "epoch": 0.3248101442207204, "grad_norm": 0.624098002910614, "learning_rate": 3.3095271510763234e-05, "loss": 0.13164329528808594, "step": 2331 }, { "epoch": 0.3249494879119348, "grad_norm": 0.22563324868679047, "learning_rate": 3.3088144709670596e-05, "loss": 0.06113624572753906, "step": 2332 }, { "epoch": 0.32508883160314916, "grad_norm": 0.45894870162010193, "learning_rate": 3.3081015000691014e-05, "loss": 0.08589839935302734, "step": 2333 }, { "epoch": 0.32522817529436354, "grad_norm": 0.2636943459510803, "learning_rate": 3.3073882385408535e-05, "loss": 0.0672459602355957, "step": 2334 }, { "epoch": 0.3253675189855779, "grad_norm": 0.6906598806381226, "learning_rate": 3.306674686540788e-05, "loss": 0.10051250457763672, "step": 2335 }, { "epoch": 0.3255068626767923, "grad_norm": 0.9698188900947571, "learning_rate": 3.305960844227439e-05, "loss": 0.15515518188476562, "step": 2336 }, { "epoch": 0.3256462063680067, "grad_norm": 0.2422633022069931, "learning_rate": 3.305246711759406e-05, "loss": 0.0769805908203125, "step": 2337 }, { "epoch": 0.32578555005922105, "grad_norm": 0.493241548538208, "learning_rate": 3.3045322892953524e-05, "loss": 0.1036825180053711, "step": 2338 }, { "epoch": 0.32592489375043543, "grad_norm": 0.36997658014297485, "learning_rate": 3.303817576994008e-05, "loss": 0.0882425308227539, "step": 2339 }, { "epoch": 0.3260642374416498, "grad_norm": 0.5829008221626282, "learning_rate": 3.303102575014164e-05, "loss": 0.09795761108398438, "step": 2340 }, { "epoch": 0.3262035811328642, "grad_norm": 0.7862222194671631, "learning_rate": 3.3023872835146775e-05, "loss": 0.10712337493896484, "step": 2341 }, { "epoch": 0.3263429248240786, "grad_norm": 0.32332322001457214, "learning_rate": 3.301671702654472e-05, "loss": 0.06812667846679688, "step": 2342 }, { "epoch": 0.326482268515293, "grad_norm": 0.2966870665550232, "learning_rate": 3.300955832592531e-05, "loss": 0.0689544677734375, "step": 2343 }, { "epoch": 0.3266216122065074, "grad_norm": 0.3951704204082489, "learning_rate": 3.300239673487905e-05, "loss": 0.10073089599609375, "step": 2344 }, { "epoch": 0.32676095589772175, "grad_norm": 0.573266327381134, "learning_rate": 3.299523225499709e-05, "loss": 0.10152053833007812, "step": 2345 }, { "epoch": 0.3269002995889361, "grad_norm": 0.34922972321510315, "learning_rate": 3.298806488787121e-05, "loss": 0.07849311828613281, "step": 2346 }, { "epoch": 0.3270396432801505, "grad_norm": 0.40301749110221863, "learning_rate": 3.2980894635093837e-05, "loss": 0.09319496154785156, "step": 2347 }, { "epoch": 0.3271789869713649, "grad_norm": 0.7585023045539856, "learning_rate": 3.297372149825803e-05, "loss": 0.1248016357421875, "step": 2348 }, { "epoch": 0.32731833066257926, "grad_norm": 0.3214467465877533, "learning_rate": 3.2966545478957504e-05, "loss": 0.08795547485351562, "step": 2349 }, { "epoch": 0.32745767435379364, "grad_norm": 0.6638066172599792, "learning_rate": 3.29593665787866e-05, "loss": 0.1151885986328125, "step": 2350 }, { "epoch": 0.327597018045008, "grad_norm": 0.4727615714073181, "learning_rate": 3.295218479934032e-05, "loss": 0.105743408203125, "step": 2351 }, { "epoch": 0.3277363617362224, "grad_norm": 0.3728771209716797, "learning_rate": 3.2945000142214274e-05, "loss": 0.0948953628540039, "step": 2352 }, { "epoch": 0.32787570542743677, "grad_norm": 0.2520064413547516, "learning_rate": 3.293781260900473e-05, "loss": 0.07794475555419922, "step": 2353 }, { "epoch": 0.32801504911865115, "grad_norm": 0.3920459449291229, "learning_rate": 3.29306222013086e-05, "loss": 0.084991455078125, "step": 2354 }, { "epoch": 0.3281543928098655, "grad_norm": 0.6644890904426575, "learning_rate": 3.292342892072344e-05, "loss": 0.10717296600341797, "step": 2355 }, { "epoch": 0.3282937365010799, "grad_norm": 0.323684424161911, "learning_rate": 3.2916232768847404e-05, "loss": 0.07300376892089844, "step": 2356 }, { "epoch": 0.3284330801922943, "grad_norm": 0.5622155070304871, "learning_rate": 3.2909033747279344e-05, "loss": 0.11129951477050781, "step": 2357 }, { "epoch": 0.32857242388350866, "grad_norm": 0.4102945625782013, "learning_rate": 3.29018318576187e-05, "loss": 0.09053707122802734, "step": 2358 }, { "epoch": 0.32871176757472303, "grad_norm": 0.29124870896339417, "learning_rate": 3.289462710146557e-05, "loss": 0.06834983825683594, "step": 2359 }, { "epoch": 0.3288511112659374, "grad_norm": 0.35967689752578735, "learning_rate": 3.288741948042069e-05, "loss": 0.09321403503417969, "step": 2360 }, { "epoch": 0.3289904549571518, "grad_norm": 0.39062491059303284, "learning_rate": 3.288020899608542e-05, "loss": 0.08416557312011719, "step": 2361 }, { "epoch": 0.32912979864836617, "grad_norm": 0.40589770674705505, "learning_rate": 3.287299565006177e-05, "loss": 0.07540512084960938, "step": 2362 }, { "epoch": 0.3292691423395806, "grad_norm": 0.45984339714050293, "learning_rate": 3.286577944395239e-05, "loss": 0.12878990173339844, "step": 2363 }, { "epoch": 0.329408486030795, "grad_norm": 0.3910803496837616, "learning_rate": 3.2858560379360546e-05, "loss": 0.09573841094970703, "step": 2364 }, { "epoch": 0.32954782972200936, "grad_norm": 0.34030821919441223, "learning_rate": 3.2851338457890154e-05, "loss": 0.09856033325195312, "step": 2365 }, { "epoch": 0.32968717341322373, "grad_norm": 0.2837493419647217, "learning_rate": 3.284411368114575e-05, "loss": 0.09913063049316406, "step": 2366 }, { "epoch": 0.3298265171044381, "grad_norm": 0.4155622720718384, "learning_rate": 3.283688605073253e-05, "loss": 0.0820608139038086, "step": 2367 }, { "epoch": 0.3299658607956525, "grad_norm": 0.5526661276817322, "learning_rate": 3.282965556825629e-05, "loss": 0.11838626861572266, "step": 2368 }, { "epoch": 0.33010520448686687, "grad_norm": 0.3923546075820923, "learning_rate": 3.282242223532349e-05, "loss": 0.0969076156616211, "step": 2369 }, { "epoch": 0.33024454817808124, "grad_norm": 0.4976341426372528, "learning_rate": 3.281518605354123e-05, "loss": 0.1115884780883789, "step": 2370 }, { "epoch": 0.3303838918692956, "grad_norm": 0.35734495520591736, "learning_rate": 3.280794702451719e-05, "loss": 0.07043647766113281, "step": 2371 }, { "epoch": 0.33052323556051, "grad_norm": 0.3963504731655121, "learning_rate": 3.2800705149859725e-05, "loss": 0.09220218658447266, "step": 2372 }, { "epoch": 0.3306625792517244, "grad_norm": 0.5049615502357483, "learning_rate": 3.2793460431177827e-05, "loss": 0.09704780578613281, "step": 2373 }, { "epoch": 0.33080192294293875, "grad_norm": 0.3017151355743408, "learning_rate": 3.27862128700811e-05, "loss": 0.08580589294433594, "step": 2374 }, { "epoch": 0.33094126663415313, "grad_norm": 0.35772690176963806, "learning_rate": 3.277896246817979e-05, "loss": 0.08547019958496094, "step": 2375 }, { "epoch": 0.3310806103253675, "grad_norm": 0.19267503917217255, "learning_rate": 3.277170922708477e-05, "loss": 0.06628799438476562, "step": 2376 }, { "epoch": 0.3312199540165819, "grad_norm": 0.4267140328884125, "learning_rate": 3.276445314840754e-05, "loss": 0.09536933898925781, "step": 2377 }, { "epoch": 0.33135929770779626, "grad_norm": 0.5670433640480042, "learning_rate": 3.275719423376024e-05, "loss": 0.10722541809082031, "step": 2378 }, { "epoch": 0.33149864139901064, "grad_norm": 0.33942684531211853, "learning_rate": 3.274993248475563e-05, "loss": 0.09595108032226562, "step": 2379 }, { "epoch": 0.331637985090225, "grad_norm": 0.4974816143512726, "learning_rate": 3.274266790300711e-05, "loss": 0.09083080291748047, "step": 2380 }, { "epoch": 0.3317773287814394, "grad_norm": 0.18702788650989532, "learning_rate": 3.2735400490128695e-05, "loss": 0.06667423248291016, "step": 2381 }, { "epoch": 0.3319166724726538, "grad_norm": 0.4869811534881592, "learning_rate": 3.272813024773506e-05, "loss": 0.10177230834960938, "step": 2382 }, { "epoch": 0.3320560161638682, "grad_norm": 0.39756399393081665, "learning_rate": 3.272085717744146e-05, "loss": 0.09592247009277344, "step": 2383 }, { "epoch": 0.3321953598550826, "grad_norm": 0.393979012966156, "learning_rate": 3.271358128086381e-05, "loss": 0.07917022705078125, "step": 2384 }, { "epoch": 0.33233470354629696, "grad_norm": 0.5237410664558411, "learning_rate": 3.270630255961867e-05, "loss": 0.13058853149414062, "step": 2385 }, { "epoch": 0.33247404723751134, "grad_norm": 0.5242125988006592, "learning_rate": 3.269902101532319e-05, "loss": 0.11997604370117188, "step": 2386 }, { "epoch": 0.3326133909287257, "grad_norm": 0.3407009541988373, "learning_rate": 3.269173664959516e-05, "loss": 0.10419654846191406, "step": 2387 }, { "epoch": 0.3327527346199401, "grad_norm": 0.5610882639884949, "learning_rate": 3.2684449464053006e-05, "loss": 0.10400962829589844, "step": 2388 }, { "epoch": 0.33289207831115447, "grad_norm": 0.4991978108882904, "learning_rate": 3.2677159460315766e-05, "loss": 0.1324005126953125, "step": 2389 }, { "epoch": 0.33303142200236885, "grad_norm": 0.7368584871292114, "learning_rate": 3.2669866640003124e-05, "loss": 0.1494121551513672, "step": 2390 }, { "epoch": 0.3331707656935832, "grad_norm": 0.5434150099754333, "learning_rate": 3.266257100473538e-05, "loss": 0.0918588638305664, "step": 2391 }, { "epoch": 0.3333101093847976, "grad_norm": 0.30917656421661377, "learning_rate": 3.2655272556133436e-05, "loss": 0.07354927062988281, "step": 2392 }, { "epoch": 0.333449453076012, "grad_norm": 0.3430536389350891, "learning_rate": 3.264797129581886e-05, "loss": 0.07726478576660156, "step": 2393 }, { "epoch": 0.33358879676722636, "grad_norm": 0.3795054256916046, "learning_rate": 3.264066722541382e-05, "loss": 0.10697364807128906, "step": 2394 }, { "epoch": 0.33372814045844074, "grad_norm": 0.40601861476898193, "learning_rate": 3.263336034654112e-05, "loss": 0.09152030944824219, "step": 2395 }, { "epoch": 0.3338674841496551, "grad_norm": 0.44102704524993896, "learning_rate": 3.262605066082417e-05, "loss": 0.08270454406738281, "step": 2396 }, { "epoch": 0.3340068278408695, "grad_norm": 0.3930211067199707, "learning_rate": 3.261873816988702e-05, "loss": 0.07275009155273438, "step": 2397 }, { "epoch": 0.33414617153208387, "grad_norm": 0.36082959175109863, "learning_rate": 3.261142287535433e-05, "loss": 0.0689554214477539, "step": 2398 }, { "epoch": 0.33428551522329825, "grad_norm": 0.4438636898994446, "learning_rate": 3.2604104778851416e-05, "loss": 0.10985755920410156, "step": 2399 }, { "epoch": 0.3344248589145126, "grad_norm": 0.31746554374694824, "learning_rate": 3.259678388200417e-05, "loss": 0.08141613006591797, "step": 2400 }, { "epoch": 0.334564202605727, "grad_norm": 0.4546755254268646, "learning_rate": 3.258946018643914e-05, "loss": 0.07799720764160156, "step": 2401 }, { "epoch": 0.3347035462969414, "grad_norm": 0.44316938519477844, "learning_rate": 3.2582133693783475e-05, "loss": 0.08976078033447266, "step": 2402 }, { "epoch": 0.3348428899881558, "grad_norm": 0.42000386118888855, "learning_rate": 3.257480440566496e-05, "loss": 0.08169174194335938, "step": 2403 }, { "epoch": 0.3349822336793702, "grad_norm": 0.2285415232181549, "learning_rate": 3.256747232371199e-05, "loss": 0.07017135620117188, "step": 2404 }, { "epoch": 0.33512157737058457, "grad_norm": 0.46919485926628113, "learning_rate": 3.256013744955359e-05, "loss": 0.12347221374511719, "step": 2405 }, { "epoch": 0.33526092106179894, "grad_norm": 0.6146566867828369, "learning_rate": 3.25527997848194e-05, "loss": 0.10839462280273438, "step": 2406 }, { "epoch": 0.3354002647530133, "grad_norm": 0.35578176379203796, "learning_rate": 3.2545459331139694e-05, "loss": 0.09717178344726562, "step": 2407 }, { "epoch": 0.3355396084442277, "grad_norm": 0.5027663111686707, "learning_rate": 3.253811609014533e-05, "loss": 0.11473417282104492, "step": 2408 }, { "epoch": 0.3356789521354421, "grad_norm": 0.2492402046918869, "learning_rate": 3.2530770063467835e-05, "loss": 0.07414054870605469, "step": 2409 }, { "epoch": 0.33581829582665645, "grad_norm": 0.6268411874771118, "learning_rate": 3.2523421252739295e-05, "loss": 0.10320281982421875, "step": 2410 }, { "epoch": 0.33595763951787083, "grad_norm": 0.266360878944397, "learning_rate": 3.2516069659592485e-05, "loss": 0.07423591613769531, "step": 2411 }, { "epoch": 0.3360969832090852, "grad_norm": 0.33042073249816895, "learning_rate": 3.2508715285660734e-05, "loss": 0.09781837463378906, "step": 2412 }, { "epoch": 0.3362363269002996, "grad_norm": 0.39457595348358154, "learning_rate": 3.250135813257803e-05, "loss": 0.11181449890136719, "step": 2413 }, { "epoch": 0.33637567059151396, "grad_norm": 0.46706685423851013, "learning_rate": 3.249399820197895e-05, "loss": 0.06671714782714844, "step": 2414 }, { "epoch": 0.33651501428272834, "grad_norm": 0.5006923675537109, "learning_rate": 3.248663549549872e-05, "loss": 0.09073925018310547, "step": 2415 }, { "epoch": 0.3366543579739427, "grad_norm": 0.2768676280975342, "learning_rate": 3.247927001477316e-05, "loss": 0.07122039794921875, "step": 2416 }, { "epoch": 0.3367937016651571, "grad_norm": 0.4138382375240326, "learning_rate": 3.247190176143871e-05, "loss": 0.08867454528808594, "step": 2417 }, { "epoch": 0.3369330453563715, "grad_norm": 0.4021446406841278, "learning_rate": 3.246453073713242e-05, "loss": 0.09118080139160156, "step": 2418 }, { "epoch": 0.33707238904758585, "grad_norm": 0.4186002314090729, "learning_rate": 3.245715694349197e-05, "loss": 0.08011531829833984, "step": 2419 }, { "epoch": 0.33721173273880023, "grad_norm": 0.44859442114830017, "learning_rate": 3.244978038215566e-05, "loss": 0.09143638610839844, "step": 2420 }, { "epoch": 0.3373510764300146, "grad_norm": 0.33427193760871887, "learning_rate": 3.244240105476237e-05, "loss": 0.08266639709472656, "step": 2421 }, { "epoch": 0.337490420121229, "grad_norm": 0.45313364267349243, "learning_rate": 3.243501896295164e-05, "loss": 0.09927177429199219, "step": 2422 }, { "epoch": 0.3376297638124434, "grad_norm": 0.7248185873031616, "learning_rate": 3.242763410836358e-05, "loss": 0.12139129638671875, "step": 2423 }, { "epoch": 0.3377691075036578, "grad_norm": 0.5738295912742615, "learning_rate": 3.242024649263896e-05, "loss": 0.08926010131835938, "step": 2424 }, { "epoch": 0.3379084511948722, "grad_norm": 0.7763272523880005, "learning_rate": 3.241285611741913e-05, "loss": 0.12429046630859375, "step": 2425 }, { "epoch": 0.33804779488608655, "grad_norm": 0.4029110074043274, "learning_rate": 3.240546298434606e-05, "loss": 0.08815956115722656, "step": 2426 }, { "epoch": 0.3381871385773009, "grad_norm": 0.3251311480998993, "learning_rate": 3.2398067095062325e-05, "loss": 0.09242057800292969, "step": 2427 }, { "epoch": 0.3383264822685153, "grad_norm": 0.41928163170814514, "learning_rate": 3.239066845121114e-05, "loss": 0.07892990112304688, "step": 2428 }, { "epoch": 0.3384658259597297, "grad_norm": 0.675408124923706, "learning_rate": 3.238326705443631e-05, "loss": 0.09620285034179688, "step": 2429 }, { "epoch": 0.33860516965094406, "grad_norm": 0.34462517499923706, "learning_rate": 3.237586290638226e-05, "loss": 0.09120559692382812, "step": 2430 }, { "epoch": 0.33874451334215844, "grad_norm": 0.3990703523159027, "learning_rate": 3.2368456008694014e-05, "loss": 0.09833526611328125, "step": 2431 }, { "epoch": 0.3388838570333728, "grad_norm": 0.7682449221611023, "learning_rate": 3.2361046363017216e-05, "loss": 0.14214134216308594, "step": 2432 }, { "epoch": 0.3390232007245872, "grad_norm": 0.4660167098045349, "learning_rate": 3.2353633970998135e-05, "loss": 0.09596443176269531, "step": 2433 }, { "epoch": 0.33916254441580157, "grad_norm": 0.314060240983963, "learning_rate": 3.2346218834283605e-05, "loss": 0.082183837890625, "step": 2434 }, { "epoch": 0.33930188810701595, "grad_norm": 0.5374533534049988, "learning_rate": 3.233880095452113e-05, "loss": 0.12430191040039062, "step": 2435 }, { "epoch": 0.3394412317982303, "grad_norm": 0.5047711133956909, "learning_rate": 3.2331380333358794e-05, "loss": 0.12836074829101562, "step": 2436 }, { "epoch": 0.3395805754894447, "grad_norm": 0.44736090302467346, "learning_rate": 3.232395697244526e-05, "loss": 0.10450935363769531, "step": 2437 }, { "epoch": 0.3397199191806591, "grad_norm": 0.31245410442352295, "learning_rate": 3.231653087342986e-05, "loss": 0.08687973022460938, "step": 2438 }, { "epoch": 0.33985926287187346, "grad_norm": 0.3404836058616638, "learning_rate": 3.230910203796248e-05, "loss": 0.08744049072265625, "step": 2439 }, { "epoch": 0.33999860656308784, "grad_norm": 0.21726205945014954, "learning_rate": 3.2301670467693654e-05, "loss": 0.06679916381835938, "step": 2440 }, { "epoch": 0.3401379502543022, "grad_norm": 0.5237830877304077, "learning_rate": 3.22942361642745e-05, "loss": 0.08211040496826172, "step": 2441 }, { "epoch": 0.3402772939455166, "grad_norm": 0.2824972867965698, "learning_rate": 3.228679912935675e-05, "loss": 0.08375740051269531, "step": 2442 }, { "epoch": 0.340416637636731, "grad_norm": 0.3641866147518158, "learning_rate": 3.227935936459276e-05, "loss": 0.11381912231445312, "step": 2443 }, { "epoch": 0.3405559813279454, "grad_norm": 0.6694666743278503, "learning_rate": 3.2271916871635455e-05, "loss": 0.14427757263183594, "step": 2444 }, { "epoch": 0.3406953250191598, "grad_norm": 0.541516125202179, "learning_rate": 3.226447165213839e-05, "loss": 0.11024284362792969, "step": 2445 }, { "epoch": 0.34083466871037416, "grad_norm": 0.3573431372642517, "learning_rate": 3.225702370775572e-05, "loss": 0.10333061218261719, "step": 2446 }, { "epoch": 0.34097401240158853, "grad_norm": 0.48090577125549316, "learning_rate": 3.224957304014223e-05, "loss": 0.1178741455078125, "step": 2447 }, { "epoch": 0.3411133560928029, "grad_norm": 0.4571234881877899, "learning_rate": 3.224211965095326e-05, "loss": 0.10483551025390625, "step": 2448 }, { "epoch": 0.3412526997840173, "grad_norm": 0.5651015043258667, "learning_rate": 3.2234663541844805e-05, "loss": 0.12268638610839844, "step": 2449 }, { "epoch": 0.34139204347523167, "grad_norm": 0.8033178448677063, "learning_rate": 3.222720471447343e-05, "loss": 0.10239124298095703, "step": 2450 }, { "epoch": 0.34153138716644604, "grad_norm": 0.5018488168716431, "learning_rate": 3.221974317049632e-05, "loss": 0.08542060852050781, "step": 2451 }, { "epoch": 0.3416707308576604, "grad_norm": 0.8319666981697083, "learning_rate": 3.221227891157125e-05, "loss": 0.1290435791015625, "step": 2452 }, { "epoch": 0.3418100745488748, "grad_norm": 0.5570886731147766, "learning_rate": 3.220481193935663e-05, "loss": 0.09189414978027344, "step": 2453 }, { "epoch": 0.3419494182400892, "grad_norm": 0.4300248324871063, "learning_rate": 3.219734225551143e-05, "loss": 0.1008768081665039, "step": 2454 }, { "epoch": 0.34208876193130355, "grad_norm": 0.4105485677719116, "learning_rate": 3.2189869861695254e-05, "loss": 0.08240699768066406, "step": 2455 }, { "epoch": 0.34222810562251793, "grad_norm": 0.44879260659217834, "learning_rate": 3.218239475956829e-05, "loss": 0.10956954956054688, "step": 2456 }, { "epoch": 0.3423674493137323, "grad_norm": 0.3781580626964569, "learning_rate": 3.217491695079134e-05, "loss": 0.09499740600585938, "step": 2457 }, { "epoch": 0.3425067930049467, "grad_norm": 0.20924383401870728, "learning_rate": 3.216743643702581e-05, "loss": 0.05468177795410156, "step": 2458 }, { "epoch": 0.34264613669616106, "grad_norm": 0.5202205777168274, "learning_rate": 3.215995321993368e-05, "loss": 0.10908985137939453, "step": 2459 }, { "epoch": 0.34278548038737544, "grad_norm": 0.7814775705337524, "learning_rate": 3.215246730117757e-05, "loss": 0.12062263488769531, "step": 2460 }, { "epoch": 0.3429248240785898, "grad_norm": 0.291515052318573, "learning_rate": 3.2144978682420664e-05, "loss": 0.09375190734863281, "step": 2461 }, { "epoch": 0.3430641677698042, "grad_norm": 0.5268447995185852, "learning_rate": 3.2137487365326773e-05, "loss": 0.1308135986328125, "step": 2462 }, { "epoch": 0.34320351146101863, "grad_norm": 0.5495997071266174, "learning_rate": 3.212999335156029e-05, "loss": 0.10853767395019531, "step": 2463 }, { "epoch": 0.343342855152233, "grad_norm": 0.4850027561187744, "learning_rate": 3.212249664278622e-05, "loss": 0.13413524627685547, "step": 2464 }, { "epoch": 0.3434821988434474, "grad_norm": 0.36481475830078125, "learning_rate": 3.211499724067016e-05, "loss": 0.08443450927734375, "step": 2465 }, { "epoch": 0.34362154253466176, "grad_norm": 0.2388271987438202, "learning_rate": 3.2107495146878295e-05, "loss": 0.07675743103027344, "step": 2466 }, { "epoch": 0.34376088622587614, "grad_norm": 0.2634911835193634, "learning_rate": 3.2099990363077434e-05, "loss": 0.08017158508300781, "step": 2467 }, { "epoch": 0.3439002299170905, "grad_norm": 1.1354248523712158, "learning_rate": 3.209248289093496e-05, "loss": 0.1427021026611328, "step": 2468 }, { "epoch": 0.3440395736083049, "grad_norm": 0.40939679741859436, "learning_rate": 3.208497273211886e-05, "loss": 0.0929250717163086, "step": 2469 }, { "epoch": 0.34417891729951927, "grad_norm": 0.24878805875778198, "learning_rate": 3.207745988829773e-05, "loss": 0.07772636413574219, "step": 2470 }, { "epoch": 0.34431826099073365, "grad_norm": 0.4284425675868988, "learning_rate": 3.206994436114074e-05, "loss": 0.07416725158691406, "step": 2471 }, { "epoch": 0.344457604681948, "grad_norm": 0.46328625082969666, "learning_rate": 3.206242615231768e-05, "loss": 0.07968711853027344, "step": 2472 }, { "epoch": 0.3445969483731624, "grad_norm": 0.5142204761505127, "learning_rate": 3.2054905263498916e-05, "loss": 0.08838224411010742, "step": 2473 }, { "epoch": 0.3447362920643768, "grad_norm": 0.40713605284690857, "learning_rate": 3.2047381696355424e-05, "loss": 0.09913825988769531, "step": 2474 }, { "epoch": 0.34487563575559116, "grad_norm": 0.512003481388092, "learning_rate": 3.2039855452558755e-05, "loss": 0.08614349365234375, "step": 2475 }, { "epoch": 0.34501497944680554, "grad_norm": 0.6679466366767883, "learning_rate": 3.203232653378109e-05, "loss": 0.12686920166015625, "step": 2476 }, { "epoch": 0.3451543231380199, "grad_norm": 0.25272807478904724, "learning_rate": 3.202479494169516e-05, "loss": 0.06745147705078125, "step": 2477 }, { "epoch": 0.3452936668292343, "grad_norm": 0.47477859258651733, "learning_rate": 3.2017260677974346e-05, "loss": 0.0842275619506836, "step": 2478 }, { "epoch": 0.34543301052044867, "grad_norm": 0.40169757604599, "learning_rate": 3.200972374429255e-05, "loss": 0.07072257995605469, "step": 2479 }, { "epoch": 0.34557235421166305, "grad_norm": 0.5866200923919678, "learning_rate": 3.200218414232433e-05, "loss": 0.1068878173828125, "step": 2480 }, { "epoch": 0.3457116979028774, "grad_norm": 0.3350193202495575, "learning_rate": 3.199464187374481e-05, "loss": 0.08732795715332031, "step": 2481 }, { "epoch": 0.3458510415940918, "grad_norm": 0.639800488948822, "learning_rate": 3.19870969402297e-05, "loss": 0.11146354675292969, "step": 2482 }, { "epoch": 0.34599038528530623, "grad_norm": 0.47244054079055786, "learning_rate": 3.197954934345533e-05, "loss": 0.07835960388183594, "step": 2483 }, { "epoch": 0.3461297289765206, "grad_norm": 0.3806304633617401, "learning_rate": 3.1971999085098583e-05, "loss": 0.08087348937988281, "step": 2484 }, { "epoch": 0.346269072667735, "grad_norm": 0.3921177387237549, "learning_rate": 3.196444616683698e-05, "loss": 0.08104801177978516, "step": 2485 }, { "epoch": 0.34640841635894937, "grad_norm": 0.7224194407463074, "learning_rate": 3.195689059034858e-05, "loss": 0.09578323364257812, "step": 2486 }, { "epoch": 0.34654776005016374, "grad_norm": 0.7315101623535156, "learning_rate": 3.194933235731207e-05, "loss": 0.10746383666992188, "step": 2487 }, { "epoch": 0.3466871037413781, "grad_norm": 0.3492971658706665, "learning_rate": 3.194177146940673e-05, "loss": 0.07104015350341797, "step": 2488 }, { "epoch": 0.3468264474325925, "grad_norm": 0.3480431139469147, "learning_rate": 3.193420792831239e-05, "loss": 0.08890533447265625, "step": 2489 }, { "epoch": 0.3469657911238069, "grad_norm": 0.37840065360069275, "learning_rate": 3.192664173570952e-05, "loss": 0.087982177734375, "step": 2490 }, { "epoch": 0.34710513481502125, "grad_norm": 0.44156667590141296, "learning_rate": 3.1919072893279144e-05, "loss": 0.10117149353027344, "step": 2491 }, { "epoch": 0.34724447850623563, "grad_norm": 0.3404620587825775, "learning_rate": 3.1911501402702886e-05, "loss": 0.07656288146972656, "step": 2492 }, { "epoch": 0.34738382219745, "grad_norm": 0.6152765154838562, "learning_rate": 3.1903927265662965e-05, "loss": 0.10150146484375, "step": 2493 }, { "epoch": 0.3475231658886644, "grad_norm": 0.6122232675552368, "learning_rate": 3.189635048384217e-05, "loss": 0.11285972595214844, "step": 2494 }, { "epoch": 0.34766250957987876, "grad_norm": 1.2048684358596802, "learning_rate": 3.18887710589239e-05, "loss": 0.1577892303466797, "step": 2495 }, { "epoch": 0.34780185327109314, "grad_norm": 0.5289834141731262, "learning_rate": 3.188118899259213e-05, "loss": 0.12551307678222656, "step": 2496 }, { "epoch": 0.3479411969623075, "grad_norm": 0.3810412287712097, "learning_rate": 3.1873604286531415e-05, "loss": 0.07194805145263672, "step": 2497 }, { "epoch": 0.3480805406535219, "grad_norm": 0.384438157081604, "learning_rate": 3.18660169424269e-05, "loss": 0.10820770263671875, "step": 2498 }, { "epoch": 0.3482198843447363, "grad_norm": 0.31853631138801575, "learning_rate": 3.185842696196434e-05, "loss": 0.0894460678100586, "step": 2499 }, { "epoch": 0.34835922803595065, "grad_norm": 0.30965960025787354, "learning_rate": 3.185083434683003e-05, "loss": 0.07777786254882812, "step": 2500 }, { "epoch": 0.34849857172716503, "grad_norm": 0.45966655015945435, "learning_rate": 3.184323909871089e-05, "loss": 0.10226249694824219, "step": 2501 }, { "epoch": 0.3486379154183794, "grad_norm": 0.49824199080467224, "learning_rate": 3.1835641219294414e-05, "loss": 0.10141277313232422, "step": 2502 }, { "epoch": 0.34877725910959384, "grad_norm": 0.4209989905357361, "learning_rate": 3.182804071026867e-05, "loss": 0.10384941101074219, "step": 2503 }, { "epoch": 0.3489166028008082, "grad_norm": 0.4403812885284424, "learning_rate": 3.1820437573322306e-05, "loss": 0.09401512145996094, "step": 2504 }, { "epoch": 0.3490559464920226, "grad_norm": 0.4102729260921478, "learning_rate": 3.181283181014459e-05, "loss": 0.09067916870117188, "step": 2505 }, { "epoch": 0.349195290183237, "grad_norm": 0.2255939096212387, "learning_rate": 3.1805223422425334e-05, "loss": 0.06485366821289062, "step": 2506 }, { "epoch": 0.34933463387445135, "grad_norm": 0.4002390503883362, "learning_rate": 3.179761241185495e-05, "loss": 0.11719512939453125, "step": 2507 }, { "epoch": 0.3494739775656657, "grad_norm": 0.2519642114639282, "learning_rate": 3.178999878012443e-05, "loss": 0.07923126220703125, "step": 2508 }, { "epoch": 0.3496133212568801, "grad_norm": 0.3779487609863281, "learning_rate": 3.178238252892536e-05, "loss": 0.08405208587646484, "step": 2509 }, { "epoch": 0.3497526649480945, "grad_norm": 0.4608590304851532, "learning_rate": 3.177476365994989e-05, "loss": 0.08600425720214844, "step": 2510 }, { "epoch": 0.34989200863930886, "grad_norm": 0.513778805732727, "learning_rate": 3.1767142174890746e-05, "loss": 0.11716270446777344, "step": 2511 }, { "epoch": 0.35003135233052324, "grad_norm": 0.4361875057220459, "learning_rate": 3.175951807544126e-05, "loss": 0.0967559814453125, "step": 2512 }, { "epoch": 0.3501706960217376, "grad_norm": 0.5316446423530579, "learning_rate": 3.1751891363295344e-05, "loss": 0.10214042663574219, "step": 2513 }, { "epoch": 0.350310039712952, "grad_norm": 0.6804463267326355, "learning_rate": 3.1744262040147454e-05, "loss": 0.1665191650390625, "step": 2514 }, { "epoch": 0.35044938340416637, "grad_norm": 0.40050479769706726, "learning_rate": 3.173663010769267e-05, "loss": 0.08290290832519531, "step": 2515 }, { "epoch": 0.35058872709538075, "grad_norm": 0.22831140458583832, "learning_rate": 3.172899556762663e-05, "loss": 0.0614471435546875, "step": 2516 }, { "epoch": 0.3507280707865951, "grad_norm": 0.5256701707839966, "learning_rate": 3.172135842164555e-05, "loss": 0.1012868881225586, "step": 2517 }, { "epoch": 0.3508674144778095, "grad_norm": 0.87656170129776, "learning_rate": 3.171371867144624e-05, "loss": 0.11284303665161133, "step": 2518 }, { "epoch": 0.3510067581690239, "grad_norm": 0.5709128379821777, "learning_rate": 3.1706076318726056e-05, "loss": 0.08726882934570312, "step": 2519 }, { "epoch": 0.35114610186023826, "grad_norm": 0.908566415309906, "learning_rate": 3.1698431365182974e-05, "loss": 0.14369583129882812, "step": 2520 }, { "epoch": 0.35128544555145264, "grad_norm": 0.35034847259521484, "learning_rate": 3.169078381251552e-05, "loss": 0.06421661376953125, "step": 2521 }, { "epoch": 0.351424789242667, "grad_norm": 0.6176842451095581, "learning_rate": 3.168313366242281e-05, "loss": 0.09510040283203125, "step": 2522 }, { "epoch": 0.35156413293388145, "grad_norm": 0.4559093713760376, "learning_rate": 3.167548091660454e-05, "loss": 0.09984970092773438, "step": 2523 }, { "epoch": 0.3517034766250958, "grad_norm": 0.3195243179798126, "learning_rate": 3.166782557676095e-05, "loss": 0.08394241333007812, "step": 2524 }, { "epoch": 0.3518428203163102, "grad_norm": 0.2382056564092636, "learning_rate": 3.1660167644592915e-05, "loss": 0.07370567321777344, "step": 2525 }, { "epoch": 0.3519821640075246, "grad_norm": 0.3201528787612915, "learning_rate": 3.165250712180182e-05, "loss": 0.08929443359375, "step": 2526 }, { "epoch": 0.35212150769873896, "grad_norm": 0.6946732997894287, "learning_rate": 3.1644844010089686e-05, "loss": 0.09323501586914062, "step": 2527 }, { "epoch": 0.35226085138995333, "grad_norm": 0.3719169795513153, "learning_rate": 3.163717831115906e-05, "loss": 0.07489013671875, "step": 2528 }, { "epoch": 0.3524001950811677, "grad_norm": 0.157009094953537, "learning_rate": 3.1629510026713095e-05, "loss": 0.05513572692871094, "step": 2529 }, { "epoch": 0.3525395387723821, "grad_norm": 0.5100787281990051, "learning_rate": 3.162183915845551e-05, "loss": 0.09361648559570312, "step": 2530 }, { "epoch": 0.35267888246359647, "grad_norm": 0.6863251328468323, "learning_rate": 3.16141657080906e-05, "loss": 0.09431695938110352, "step": 2531 }, { "epoch": 0.35281822615481084, "grad_norm": 0.4269714057445526, "learning_rate": 3.160648967732322e-05, "loss": 0.08947372436523438, "step": 2532 }, { "epoch": 0.3529575698460252, "grad_norm": 0.2729628384113312, "learning_rate": 3.159881106785882e-05, "loss": 0.07842445373535156, "step": 2533 }, { "epoch": 0.3530969135372396, "grad_norm": 0.4875258505344391, "learning_rate": 3.15911298814034e-05, "loss": 0.09406280517578125, "step": 2534 }, { "epoch": 0.353236257228454, "grad_norm": 0.445156067609787, "learning_rate": 3.1583446119663555e-05, "loss": 0.09652328491210938, "step": 2535 }, { "epoch": 0.35337560091966835, "grad_norm": 0.2816588878631592, "learning_rate": 3.1575759784346436e-05, "loss": 0.0845489501953125, "step": 2536 }, { "epoch": 0.35351494461088273, "grad_norm": 0.3294815123081207, "learning_rate": 3.1568070877159766e-05, "loss": 0.07814645767211914, "step": 2537 }, { "epoch": 0.3536542883020971, "grad_norm": 0.2936975955963135, "learning_rate": 3.1560379399811856e-05, "loss": 0.08336257934570312, "step": 2538 }, { "epoch": 0.3537936319933115, "grad_norm": 0.4594877362251282, "learning_rate": 3.155268535401157e-05, "loss": 0.10181427001953125, "step": 2539 }, { "epoch": 0.35393297568452586, "grad_norm": 0.4611753523349762, "learning_rate": 3.1544988741468353e-05, "loss": 0.11417961120605469, "step": 2540 }, { "epoch": 0.35407231937574024, "grad_norm": 0.2853682339191437, "learning_rate": 3.153728956389221e-05, "loss": 0.07401466369628906, "step": 2541 }, { "epoch": 0.3542116630669546, "grad_norm": 0.4520014524459839, "learning_rate": 3.152958782299373e-05, "loss": 0.08025074005126953, "step": 2542 }, { "epoch": 0.35435100675816905, "grad_norm": 0.330563485622406, "learning_rate": 3.152188352048406e-05, "loss": 0.07129669189453125, "step": 2543 }, { "epoch": 0.35449035044938343, "grad_norm": 0.4054713547229767, "learning_rate": 3.1514176658074925e-05, "loss": 0.09200286865234375, "step": 2544 }, { "epoch": 0.3546296941405978, "grad_norm": 0.33665603399276733, "learning_rate": 3.15064672374786e-05, "loss": 0.0760507583618164, "step": 2545 }, { "epoch": 0.3547690378318122, "grad_norm": 0.49538248777389526, "learning_rate": 3.149875526040796e-05, "loss": 0.10960197448730469, "step": 2546 }, { "epoch": 0.35490838152302656, "grad_norm": 0.7118152976036072, "learning_rate": 3.1491040728576416e-05, "loss": 0.10572242736816406, "step": 2547 }, { "epoch": 0.35504772521424094, "grad_norm": 0.3852025866508484, "learning_rate": 3.1483323643697965e-05, "loss": 0.07859420776367188, "step": 2548 }, { "epoch": 0.3551870689054553, "grad_norm": 0.4282594323158264, "learning_rate": 3.147560400748719e-05, "loss": 0.09489822387695312, "step": 2549 }, { "epoch": 0.3553264125966697, "grad_norm": 0.36423978209495544, "learning_rate": 3.146788182165917e-05, "loss": 0.10332298278808594, "step": 2550 }, { "epoch": 0.35546575628788407, "grad_norm": 0.20002001523971558, "learning_rate": 3.146015708792964e-05, "loss": 0.06299591064453125, "step": 2551 }, { "epoch": 0.35560509997909845, "grad_norm": 0.3775453269481659, "learning_rate": 3.1452429808014845e-05, "loss": 0.08882904052734375, "step": 2552 }, { "epoch": 0.3557444436703128, "grad_norm": 0.5813329219818115, "learning_rate": 3.1444699983631604e-05, "loss": 0.10185813903808594, "step": 2553 }, { "epoch": 0.3558837873615272, "grad_norm": 0.6250063180923462, "learning_rate": 3.143696761649732e-05, "loss": 0.10751152038574219, "step": 2554 }, { "epoch": 0.3560231310527416, "grad_norm": 1.0059397220611572, "learning_rate": 3.1429232708329935e-05, "loss": 0.1645793914794922, "step": 2555 }, { "epoch": 0.35616247474395596, "grad_norm": 0.2661990821361542, "learning_rate": 3.142149526084798e-05, "loss": 0.06608724594116211, "step": 2556 }, { "epoch": 0.35630181843517034, "grad_norm": 0.32851940393447876, "learning_rate": 3.1413755275770533e-05, "loss": 0.06689929962158203, "step": 2557 }, { "epoch": 0.3564411621263847, "grad_norm": 0.3427017033100128, "learning_rate": 3.1406012754817246e-05, "loss": 0.11832952499389648, "step": 2558 }, { "epoch": 0.3565805058175991, "grad_norm": 0.45496290922164917, "learning_rate": 3.139826769970833e-05, "loss": 0.11744880676269531, "step": 2559 }, { "epoch": 0.35671984950881347, "grad_norm": 0.38608700037002563, "learning_rate": 3.139052011216456e-05, "loss": 0.08851432800292969, "step": 2560 }, { "epoch": 0.35685919320002785, "grad_norm": 0.6037548184394836, "learning_rate": 3.138276999390726e-05, "loss": 0.10453224182128906, "step": 2561 }, { "epoch": 0.3569985368912422, "grad_norm": 0.3369615972042084, "learning_rate": 3.1375017346658354e-05, "loss": 0.09618949890136719, "step": 2562 }, { "epoch": 0.3571378805824566, "grad_norm": 0.386778861284256, "learning_rate": 3.136726217214028e-05, "loss": 0.0967249870300293, "step": 2563 }, { "epoch": 0.35727722427367103, "grad_norm": 0.6766453981399536, "learning_rate": 3.1359504472076074e-05, "loss": 0.11552047729492188, "step": 2564 }, { "epoch": 0.3574165679648854, "grad_norm": 0.21613693237304688, "learning_rate": 3.1351744248189314e-05, "loss": 0.07839202880859375, "step": 2565 }, { "epoch": 0.3575559116560998, "grad_norm": 0.4077081084251404, "learning_rate": 3.134398150220415e-05, "loss": 0.1036834716796875, "step": 2566 }, { "epoch": 0.35769525534731417, "grad_norm": 0.4320823550224304, "learning_rate": 3.133621623584528e-05, "loss": 0.08541488647460938, "step": 2567 }, { "epoch": 0.35783459903852854, "grad_norm": 0.23426011204719543, "learning_rate": 3.132844845083798e-05, "loss": 0.07692337036132812, "step": 2568 }, { "epoch": 0.3579739427297429, "grad_norm": 0.19030626118183136, "learning_rate": 3.132067814890806e-05, "loss": 0.05902862548828125, "step": 2569 }, { "epoch": 0.3581132864209573, "grad_norm": 0.40281978249549866, "learning_rate": 3.1312905331781914e-05, "loss": 0.10067176818847656, "step": 2570 }, { "epoch": 0.3582526301121717, "grad_norm": 0.529802143573761, "learning_rate": 3.130513000118648e-05, "loss": 0.12243175506591797, "step": 2571 }, { "epoch": 0.35839197380338605, "grad_norm": 0.47500455379486084, "learning_rate": 3.1297352158849264e-05, "loss": 0.1415557861328125, "step": 2572 }, { "epoch": 0.35853131749460043, "grad_norm": 0.392120897769928, "learning_rate": 3.128957180649832e-05, "loss": 0.08834648132324219, "step": 2573 }, { "epoch": 0.3586706611858148, "grad_norm": 0.75846266746521, "learning_rate": 3.128178894586226e-05, "loss": 0.13341522216796875, "step": 2574 }, { "epoch": 0.3588100048770292, "grad_norm": 0.6151486039161682, "learning_rate": 3.1274003578670264e-05, "loss": 0.13898277282714844, "step": 2575 }, { "epoch": 0.35894934856824356, "grad_norm": 0.3831477761268616, "learning_rate": 3.126621570665207e-05, "loss": 0.09893417358398438, "step": 2576 }, { "epoch": 0.35908869225945794, "grad_norm": 0.4112849235534668, "learning_rate": 3.125842533153796e-05, "loss": 0.08327484130859375, "step": 2577 }, { "epoch": 0.3592280359506723, "grad_norm": 0.6142802238464355, "learning_rate": 3.1250632455058764e-05, "loss": 0.10808181762695312, "step": 2578 }, { "epoch": 0.3593673796418867, "grad_norm": 0.5233549475669861, "learning_rate": 3.12428370789459e-05, "loss": 0.0998220443725586, "step": 2579 }, { "epoch": 0.3595067233331011, "grad_norm": 0.7534226775169373, "learning_rate": 3.1235039204931316e-05, "loss": 0.10317039489746094, "step": 2580 }, { "epoch": 0.35964606702431545, "grad_norm": 0.3940846621990204, "learning_rate": 3.122723883474752e-05, "loss": 0.08102226257324219, "step": 2581 }, { "epoch": 0.35978541071552983, "grad_norm": 1.2365977764129639, "learning_rate": 3.1219435970127574e-05, "loss": 0.1277332305908203, "step": 2582 }, { "epoch": 0.3599247544067442, "grad_norm": 0.5809354782104492, "learning_rate": 3.12116306128051e-05, "loss": 0.0838613510131836, "step": 2583 }, { "epoch": 0.36006409809795864, "grad_norm": 0.3610229194164276, "learning_rate": 3.1203822764514274e-05, "loss": 0.08929634094238281, "step": 2584 }, { "epoch": 0.360203441789173, "grad_norm": 0.7103058695793152, "learning_rate": 3.1196012426989814e-05, "loss": 0.1682300567626953, "step": 2585 }, { "epoch": 0.3603427854803874, "grad_norm": 0.43314695358276367, "learning_rate": 3.1188199601967e-05, "loss": 0.10078811645507812, "step": 2586 }, { "epoch": 0.3604821291716018, "grad_norm": 0.32209959626197815, "learning_rate": 3.118038429118167e-05, "loss": 0.07481765747070312, "step": 2587 }, { "epoch": 0.36062147286281615, "grad_norm": 0.3271544277667999, "learning_rate": 3.1172566496370205e-05, "loss": 0.0765218734741211, "step": 2588 }, { "epoch": 0.36076081655403053, "grad_norm": 0.3193660080432892, "learning_rate": 3.116474621926953e-05, "loss": 0.09214591979980469, "step": 2589 }, { "epoch": 0.3609001602452449, "grad_norm": 0.3044019937515259, "learning_rate": 3.115692346161715e-05, "loss": 0.07052040100097656, "step": 2590 }, { "epoch": 0.3610395039364593, "grad_norm": 0.2673049867153168, "learning_rate": 3.1149098225151086e-05, "loss": 0.07297134399414062, "step": 2591 }, { "epoch": 0.36117884762767366, "grad_norm": 0.46576112508773804, "learning_rate": 3.114127051160994e-05, "loss": 0.10613155364990234, "step": 2592 }, { "epoch": 0.36131819131888804, "grad_norm": 0.2673509120941162, "learning_rate": 3.1133440322732846e-05, "loss": 0.06754302978515625, "step": 2593 }, { "epoch": 0.3614575350101024, "grad_norm": 0.44114530086517334, "learning_rate": 3.112560766025949e-05, "loss": 0.08633804321289062, "step": 2594 }, { "epoch": 0.3615968787013168, "grad_norm": 0.3208615481853485, "learning_rate": 3.1117772525930115e-05, "loss": 0.09144401550292969, "step": 2595 }, { "epoch": 0.36173622239253117, "grad_norm": 0.4067465364933014, "learning_rate": 3.11099349214855e-05, "loss": 0.09580802917480469, "step": 2596 }, { "epoch": 0.36187556608374555, "grad_norm": 0.4733952581882477, "learning_rate": 3.1102094848667e-05, "loss": 0.09858512878417969, "step": 2597 }, { "epoch": 0.3620149097749599, "grad_norm": 0.473481148481369, "learning_rate": 3.109425230921649e-05, "loss": 0.07828235626220703, "step": 2598 }, { "epoch": 0.3621542534661743, "grad_norm": 0.21305640041828156, "learning_rate": 3.10864073048764e-05, "loss": 0.05056571960449219, "step": 2599 }, { "epoch": 0.3622935971573887, "grad_norm": 0.8415384888648987, "learning_rate": 3.107855983738971e-05, "loss": 0.12586402893066406, "step": 2600 }, { "epoch": 0.36243294084860306, "grad_norm": 0.3478691875934601, "learning_rate": 3.107070990849995e-05, "loss": 0.08661651611328125, "step": 2601 }, { "epoch": 0.36257228453981744, "grad_norm": 0.6279574036598206, "learning_rate": 3.1062857519951215e-05, "loss": 0.11527538299560547, "step": 2602 }, { "epoch": 0.3627116282310318, "grad_norm": 0.7989532947540283, "learning_rate": 3.105500267348809e-05, "loss": 0.12757110595703125, "step": 2603 }, { "epoch": 0.36285097192224625, "grad_norm": 0.39449018239974976, "learning_rate": 3.1047145370855764e-05, "loss": 0.08160972595214844, "step": 2604 }, { "epoch": 0.3629903156134606, "grad_norm": 0.5415140986442566, "learning_rate": 3.103928561379996e-05, "loss": 0.1280841827392578, "step": 2605 }, { "epoch": 0.363129659304675, "grad_norm": 0.35626840591430664, "learning_rate": 3.103142340406691e-05, "loss": 0.07424545288085938, "step": 2606 }, { "epoch": 0.3632690029958894, "grad_norm": 0.6165229678153992, "learning_rate": 3.102355874340343e-05, "loss": 0.07974624633789062, "step": 2607 }, { "epoch": 0.36340834668710376, "grad_norm": 0.5115894079208374, "learning_rate": 3.101569163355688e-05, "loss": 0.11807632446289062, "step": 2608 }, { "epoch": 0.36354769037831813, "grad_norm": 0.48784202337265015, "learning_rate": 3.100782207627513e-05, "loss": 0.09202194213867188, "step": 2609 }, { "epoch": 0.3636870340695325, "grad_norm": 0.5993459820747375, "learning_rate": 3.099995007330664e-05, "loss": 0.09099578857421875, "step": 2610 }, { "epoch": 0.3638263777607469, "grad_norm": 0.5753583908081055, "learning_rate": 3.099207562640037e-05, "loss": 0.10274505615234375, "step": 2611 }, { "epoch": 0.36396572145196127, "grad_norm": 0.7421638369560242, "learning_rate": 3.098419873730585e-05, "loss": 0.10706329345703125, "step": 2612 }, { "epoch": 0.36410506514317564, "grad_norm": 0.3109113872051239, "learning_rate": 3.097631940777314e-05, "loss": 0.08759498596191406, "step": 2613 }, { "epoch": 0.36424440883439, "grad_norm": 0.42095738649368286, "learning_rate": 3.096843763955285e-05, "loss": 0.08941078186035156, "step": 2614 }, { "epoch": 0.3643837525256044, "grad_norm": 0.23949962854385376, "learning_rate": 3.096055343439614e-05, "loss": 0.07265281677246094, "step": 2615 }, { "epoch": 0.3645230962168188, "grad_norm": 0.4515247941017151, "learning_rate": 3.095266679405468e-05, "loss": 0.09041976928710938, "step": 2616 }, { "epoch": 0.36466243990803315, "grad_norm": 0.5558367371559143, "learning_rate": 3.094477772028072e-05, "loss": 0.09982490539550781, "step": 2617 }, { "epoch": 0.36480178359924753, "grad_norm": 0.338630348443985, "learning_rate": 3.0936886214827024e-05, "loss": 0.08219337463378906, "step": 2618 }, { "epoch": 0.3649411272904619, "grad_norm": 0.3557131886482239, "learning_rate": 3.09289922794469e-05, "loss": 0.07383155822753906, "step": 2619 }, { "epoch": 0.3650804709816763, "grad_norm": 0.43100714683532715, "learning_rate": 3.092109591589421e-05, "loss": 0.09056472778320312, "step": 2620 }, { "epoch": 0.36521981467289066, "grad_norm": 0.709992527961731, "learning_rate": 3.091319712592333e-05, "loss": 0.10838508605957031, "step": 2621 }, { "epoch": 0.36535915836410504, "grad_norm": 0.7200655341148376, "learning_rate": 3.0905295911289216e-05, "loss": 0.11471366882324219, "step": 2622 }, { "epoch": 0.3654985020553194, "grad_norm": 0.27600622177124023, "learning_rate": 3.089739227374732e-05, "loss": 0.06329917907714844, "step": 2623 }, { "epoch": 0.36563784574653385, "grad_norm": 0.35377442836761475, "learning_rate": 3.088948621505364e-05, "loss": 0.0729217529296875, "step": 2624 }, { "epoch": 0.36577718943774823, "grad_norm": 0.6577832698822021, "learning_rate": 3.088157773696474e-05, "loss": 0.10921859741210938, "step": 2625 }, { "epoch": 0.3659165331289626, "grad_norm": 0.40780776739120483, "learning_rate": 3.08736668412377e-05, "loss": 0.09307670593261719, "step": 2626 }, { "epoch": 0.366055876820177, "grad_norm": 0.6203763484954834, "learning_rate": 3.0865753529630135e-05, "loss": 0.12179088592529297, "step": 2627 }, { "epoch": 0.36619522051139136, "grad_norm": 0.8463844656944275, "learning_rate": 3.085783780390021e-05, "loss": 0.14350128173828125, "step": 2628 }, { "epoch": 0.36633456420260574, "grad_norm": 0.4993295669555664, "learning_rate": 3.0849919665806605e-05, "loss": 0.08677291870117188, "step": 2629 }, { "epoch": 0.3664739078938201, "grad_norm": 0.6247550249099731, "learning_rate": 3.0841999117108564e-05, "loss": 0.09033966064453125, "step": 2630 }, { "epoch": 0.3666132515850345, "grad_norm": 0.3499354422092438, "learning_rate": 3.0834076159565847e-05, "loss": 0.082977294921875, "step": 2631 }, { "epoch": 0.36675259527624887, "grad_norm": 0.3901529312133789, "learning_rate": 3.082615079493876e-05, "loss": 0.09166908264160156, "step": 2632 }, { "epoch": 0.36689193896746325, "grad_norm": 0.5176382660865784, "learning_rate": 3.081822302498812e-05, "loss": 0.10700225830078125, "step": 2633 }, { "epoch": 0.3670312826586776, "grad_norm": 0.37949952483177185, "learning_rate": 3.081029285147531e-05, "loss": 0.12396240234375, "step": 2634 }, { "epoch": 0.367170626349892, "grad_norm": 0.16940274834632874, "learning_rate": 3.080236027616224e-05, "loss": 0.05377006530761719, "step": 2635 }, { "epoch": 0.3673099700411064, "grad_norm": 1.1633319854736328, "learning_rate": 3.079442530081133e-05, "loss": 0.13844871520996094, "step": 2636 }, { "epoch": 0.36744931373232076, "grad_norm": 0.5803998112678528, "learning_rate": 3.078648792718556e-05, "loss": 0.10044097900390625, "step": 2637 }, { "epoch": 0.36758865742353514, "grad_norm": 0.5642547011375427, "learning_rate": 3.0778548157048434e-05, "loss": 0.11301994323730469, "step": 2638 }, { "epoch": 0.3677280011147495, "grad_norm": 0.32669633626937866, "learning_rate": 3.0770605992163986e-05, "loss": 0.07831001281738281, "step": 2639 }, { "epoch": 0.3678673448059639, "grad_norm": 0.35218387842178345, "learning_rate": 3.076266143429679e-05, "loss": 0.093170166015625, "step": 2640 }, { "epoch": 0.36800668849717827, "grad_norm": 0.4158170819282532, "learning_rate": 3.0754714485211925e-05, "loss": 0.0768594741821289, "step": 2641 }, { "epoch": 0.36814603218839265, "grad_norm": 0.45826539397239685, "learning_rate": 3.0746765146675043e-05, "loss": 0.08993148803710938, "step": 2642 }, { "epoch": 0.368285375879607, "grad_norm": 0.5935287475585938, "learning_rate": 3.0738813420452295e-05, "loss": 0.1072225570678711, "step": 2643 }, { "epoch": 0.36842471957082146, "grad_norm": 0.22847943007946014, "learning_rate": 3.073085930831038e-05, "loss": 0.06132316589355469, "step": 2644 }, { "epoch": 0.36856406326203583, "grad_norm": 0.31108322739601135, "learning_rate": 3.072290281201652e-05, "loss": 0.08017349243164062, "step": 2645 }, { "epoch": 0.3687034069532502, "grad_norm": 0.4615246653556824, "learning_rate": 3.071494393333846e-05, "loss": 0.0951690673828125, "step": 2646 }, { "epoch": 0.3688427506444646, "grad_norm": 0.32167354226112366, "learning_rate": 3.0706982674044486e-05, "loss": 0.087005615234375, "step": 2647 }, { "epoch": 0.36898209433567897, "grad_norm": 0.4453098475933075, "learning_rate": 3.06990190359034e-05, "loss": 0.09072494506835938, "step": 2648 }, { "epoch": 0.36912143802689334, "grad_norm": 0.41377750039100647, "learning_rate": 3.069105302068455e-05, "loss": 0.09400367736816406, "step": 2649 }, { "epoch": 0.3692607817181077, "grad_norm": 0.42259132862091064, "learning_rate": 3.06830846301578e-05, "loss": 0.07926177978515625, "step": 2650 }, { "epoch": 0.3694001254093221, "grad_norm": 0.4374361038208008, "learning_rate": 3.067511386609354e-05, "loss": 0.08720970153808594, "step": 2651 }, { "epoch": 0.3695394691005365, "grad_norm": 0.353947252035141, "learning_rate": 3.0667140730262706e-05, "loss": 0.0829019546508789, "step": 2652 }, { "epoch": 0.36967881279175085, "grad_norm": 0.43192946910858154, "learning_rate": 3.065916522443673e-05, "loss": 0.09763145446777344, "step": 2653 }, { "epoch": 0.36981815648296523, "grad_norm": 0.4580514430999756, "learning_rate": 3.06511873503876e-05, "loss": 0.10834121704101562, "step": 2654 }, { "epoch": 0.3699575001741796, "grad_norm": 0.7476041316986084, "learning_rate": 3.0643207109887804e-05, "loss": 0.1402416229248047, "step": 2655 }, { "epoch": 0.370096843865394, "grad_norm": 0.31842339038848877, "learning_rate": 3.063522450471038e-05, "loss": 0.07881927490234375, "step": 2656 }, { "epoch": 0.37023618755660836, "grad_norm": 0.41811904311180115, "learning_rate": 3.062723953662888e-05, "loss": 0.10394477844238281, "step": 2657 }, { "epoch": 0.37037553124782274, "grad_norm": 0.48975223302841187, "learning_rate": 3.061925220741738e-05, "loss": 0.06064033508300781, "step": 2658 }, { "epoch": 0.3705148749390371, "grad_norm": 1.1146239042282104, "learning_rate": 3.0611262518850464e-05, "loss": 0.11457061767578125, "step": 2659 }, { "epoch": 0.3706542186302515, "grad_norm": 0.5867258906364441, "learning_rate": 3.0603270472703294e-05, "loss": 0.11674690246582031, "step": 2660 }, { "epoch": 0.3707935623214659, "grad_norm": 0.2562991976737976, "learning_rate": 3.05952760707515e-05, "loss": 0.07279300689697266, "step": 2661 }, { "epoch": 0.37093290601268025, "grad_norm": 0.674834132194519, "learning_rate": 3.0587279314771253e-05, "loss": 0.08330154418945312, "step": 2662 }, { "epoch": 0.37107224970389463, "grad_norm": 0.49679428339004517, "learning_rate": 3.057928020653925e-05, "loss": 0.08529090881347656, "step": 2663 }, { "epoch": 0.37121159339510906, "grad_norm": 0.7925440669059753, "learning_rate": 3.057127874783272e-05, "loss": 0.11136245727539062, "step": 2664 }, { "epoch": 0.37135093708632344, "grad_norm": 0.24290738999843597, "learning_rate": 3.0563274940429404e-05, "loss": 0.07623291015625, "step": 2665 }, { "epoch": 0.3714902807775378, "grad_norm": 0.7929752469062805, "learning_rate": 3.055526878610755e-05, "loss": 0.13171768188476562, "step": 2666 }, { "epoch": 0.3716296244687522, "grad_norm": 0.9059690237045288, "learning_rate": 3.054726028664595e-05, "loss": 0.1026153564453125, "step": 2667 }, { "epoch": 0.3717689681599666, "grad_norm": 0.32353541254997253, "learning_rate": 3.053924944382393e-05, "loss": 0.08931732177734375, "step": 2668 }, { "epoch": 0.37190831185118095, "grad_norm": 0.6103008985519409, "learning_rate": 3.053123625942128e-05, "loss": 0.10427093505859375, "step": 2669 }, { "epoch": 0.37204765554239533, "grad_norm": 0.599092423915863, "learning_rate": 3.052322073521837e-05, "loss": 0.08529090881347656, "step": 2670 }, { "epoch": 0.3721869992336097, "grad_norm": 0.6153716444969177, "learning_rate": 3.0515202872996067e-05, "loss": 0.09033584594726562, "step": 2671 }, { "epoch": 0.3723263429248241, "grad_norm": 0.24148611724376678, "learning_rate": 3.050718267453575e-05, "loss": 0.061034202575683594, "step": 2672 }, { "epoch": 0.37246568661603846, "grad_norm": 1.1671512126922607, "learning_rate": 3.0499160141619323e-05, "loss": 0.10204887390136719, "step": 2673 }, { "epoch": 0.37260503030725284, "grad_norm": 0.2623699903488159, "learning_rate": 3.049113527602922e-05, "loss": 0.07540130615234375, "step": 2674 }, { "epoch": 0.3727443739984672, "grad_norm": 0.2784218192100525, "learning_rate": 3.0483108079548366e-05, "loss": 0.09188270568847656, "step": 2675 }, { "epoch": 0.3728837176896816, "grad_norm": 0.3810289204120636, "learning_rate": 3.0475078553960234e-05, "loss": 0.09445762634277344, "step": 2676 }, { "epoch": 0.37302306138089597, "grad_norm": 0.7802804708480835, "learning_rate": 3.0467046701048795e-05, "loss": 0.13504409790039062, "step": 2677 }, { "epoch": 0.37316240507211035, "grad_norm": 0.5049695372581482, "learning_rate": 3.0459012522598553e-05, "loss": 0.1236572265625, "step": 2678 }, { "epoch": 0.3733017487633247, "grad_norm": 0.208942249417305, "learning_rate": 3.04509760203945e-05, "loss": 0.07639312744140625, "step": 2679 }, { "epoch": 0.3734410924545391, "grad_norm": 0.36450180411338806, "learning_rate": 3.0442937196222186e-05, "loss": 0.06351470947265625, "step": 2680 }, { "epoch": 0.3735804361457535, "grad_norm": 0.5124438405036926, "learning_rate": 3.043489605186764e-05, "loss": 0.06998443603515625, "step": 2681 }, { "epoch": 0.37371977983696786, "grad_norm": 0.7344912886619568, "learning_rate": 3.0426852589117422e-05, "loss": 0.11066246032714844, "step": 2682 }, { "epoch": 0.37385912352818224, "grad_norm": 0.4974328279495239, "learning_rate": 3.041880680975861e-05, "loss": 0.12248420715332031, "step": 2683 }, { "epoch": 0.37399846721939667, "grad_norm": 0.3984052836894989, "learning_rate": 3.041075871557879e-05, "loss": 0.08704471588134766, "step": 2684 }, { "epoch": 0.37413781091061105, "grad_norm": 0.29928821325302124, "learning_rate": 3.0402708308366066e-05, "loss": 0.07342529296875, "step": 2685 }, { "epoch": 0.3742771546018254, "grad_norm": 0.3556903898715973, "learning_rate": 3.039465558990905e-05, "loss": 0.08797454833984375, "step": 2686 }, { "epoch": 0.3744164982930398, "grad_norm": 0.5514016151428223, "learning_rate": 3.038660056199688e-05, "loss": 0.11268043518066406, "step": 2687 }, { "epoch": 0.3745558419842542, "grad_norm": 0.7045251131057739, "learning_rate": 3.037854322641919e-05, "loss": 0.11430931091308594, "step": 2688 }, { "epoch": 0.37469518567546856, "grad_norm": 0.35879555344581604, "learning_rate": 3.0370483584966144e-05, "loss": 0.07147026062011719, "step": 2689 }, { "epoch": 0.37483452936668293, "grad_norm": 0.6388649344444275, "learning_rate": 3.036242163942841e-05, "loss": 0.10208892822265625, "step": 2690 }, { "epoch": 0.3749738730578973, "grad_norm": 0.3103763163089752, "learning_rate": 3.035435739159716e-05, "loss": 0.09404563903808594, "step": 2691 }, { "epoch": 0.3751132167491117, "grad_norm": 0.6099969148635864, "learning_rate": 3.03462908432641e-05, "loss": 0.11745262145996094, "step": 2692 }, { "epoch": 0.37525256044032607, "grad_norm": 0.5519262552261353, "learning_rate": 3.033822199622142e-05, "loss": 0.10070419311523438, "step": 2693 }, { "epoch": 0.37539190413154044, "grad_norm": 0.3458658456802368, "learning_rate": 3.033015085226184e-05, "loss": 0.09826374053955078, "step": 2694 }, { "epoch": 0.3755312478227548, "grad_norm": 0.345923513174057, "learning_rate": 3.0322077413178578e-05, "loss": 0.07012939453125, "step": 2695 }, { "epoch": 0.3756705915139692, "grad_norm": 0.8151650428771973, "learning_rate": 3.0314001680765375e-05, "loss": 0.1175537109375, "step": 2696 }, { "epoch": 0.3758099352051836, "grad_norm": 0.33849337697029114, "learning_rate": 3.0305923656816473e-05, "loss": 0.07731914520263672, "step": 2697 }, { "epoch": 0.37594927889639795, "grad_norm": 0.714036226272583, "learning_rate": 3.0297843343126617e-05, "loss": 0.09450340270996094, "step": 2698 }, { "epoch": 0.37608862258761233, "grad_norm": 0.3599410653114319, "learning_rate": 3.0289760741491077e-05, "loss": 0.080169677734375, "step": 2699 }, { "epoch": 0.3762279662788267, "grad_norm": 0.2828015685081482, "learning_rate": 3.028167585370562e-05, "loss": 0.08306121826171875, "step": 2700 }, { "epoch": 0.3763673099700411, "grad_norm": 0.8141335248947144, "learning_rate": 3.0273588681566523e-05, "loss": 0.16182994842529297, "step": 2701 }, { "epoch": 0.37650665366125546, "grad_norm": 0.2689749598503113, "learning_rate": 3.026549922687057e-05, "loss": 0.08111095428466797, "step": 2702 }, { "epoch": 0.37664599735246984, "grad_norm": 0.9530807137489319, "learning_rate": 3.0257407491415053e-05, "loss": 0.10832405090332031, "step": 2703 }, { "epoch": 0.3767853410436843, "grad_norm": 0.4666097164154053, "learning_rate": 3.0249313476997772e-05, "loss": 0.08621978759765625, "step": 2704 }, { "epoch": 0.37692468473489865, "grad_norm": 0.5367114543914795, "learning_rate": 3.0241217185417034e-05, "loss": 0.10029792785644531, "step": 2705 }, { "epoch": 0.37706402842611303, "grad_norm": 0.3195343017578125, "learning_rate": 3.023311861847165e-05, "loss": 0.09476089477539062, "step": 2706 }, { "epoch": 0.3772033721173274, "grad_norm": 0.4547435939311981, "learning_rate": 3.0225017777960927e-05, "loss": 0.08100509643554688, "step": 2707 }, { "epoch": 0.3773427158085418, "grad_norm": 0.45441633462905884, "learning_rate": 3.0216914665684705e-05, "loss": 0.09466934204101562, "step": 2708 }, { "epoch": 0.37748205949975616, "grad_norm": 0.3753572702407837, "learning_rate": 3.020880928344329e-05, "loss": 0.07953548431396484, "step": 2709 }, { "epoch": 0.37762140319097054, "grad_norm": 0.8422170877456665, "learning_rate": 3.0200701633037534e-05, "loss": 0.1198434829711914, "step": 2710 }, { "epoch": 0.3777607468821849, "grad_norm": 0.4495704174041748, "learning_rate": 3.0192591716268755e-05, "loss": 0.09206199645996094, "step": 2711 }, { "epoch": 0.3779000905733993, "grad_norm": 0.38908272981643677, "learning_rate": 3.0184479534938797e-05, "loss": 0.10435104370117188, "step": 2712 }, { "epoch": 0.37803943426461367, "grad_norm": 1.0148074626922607, "learning_rate": 3.0176365090850005e-05, "loss": 0.15790557861328125, "step": 2713 }, { "epoch": 0.37817877795582805, "grad_norm": 0.3954848051071167, "learning_rate": 3.0168248385805223e-05, "loss": 0.11053276062011719, "step": 2714 }, { "epoch": 0.3783181216470424, "grad_norm": 0.2946760654449463, "learning_rate": 3.0160129421607792e-05, "loss": 0.07213020324707031, "step": 2715 }, { "epoch": 0.3784574653382568, "grad_norm": 0.5702301263809204, "learning_rate": 3.015200820006156e-05, "loss": 0.09881401062011719, "step": 2716 }, { "epoch": 0.3785968090294712, "grad_norm": 0.6966204643249512, "learning_rate": 3.014388472297088e-05, "loss": 0.09740638732910156, "step": 2717 }, { "epoch": 0.37873615272068556, "grad_norm": 0.45229408144950867, "learning_rate": 3.013575899214061e-05, "loss": 0.10313606262207031, "step": 2718 }, { "epoch": 0.37887549641189994, "grad_norm": 0.5059126019477844, "learning_rate": 3.0127631009376093e-05, "loss": 0.11496162414550781, "step": 2719 }, { "epoch": 0.3790148401031143, "grad_norm": 0.665940523147583, "learning_rate": 3.011950077648318e-05, "loss": 0.13916778564453125, "step": 2720 }, { "epoch": 0.3791541837943287, "grad_norm": 0.506413996219635, "learning_rate": 3.0111368295268225e-05, "loss": 0.08782577514648438, "step": 2721 }, { "epoch": 0.37929352748554307, "grad_norm": 0.2189047634601593, "learning_rate": 3.0103233567538086e-05, "loss": 0.0667104721069336, "step": 2722 }, { "epoch": 0.37943287117675745, "grad_norm": 0.3517168462276459, "learning_rate": 3.009509659510011e-05, "loss": 0.08195877075195312, "step": 2723 }, { "epoch": 0.3795722148679719, "grad_norm": 0.23586399853229523, "learning_rate": 3.008695737976214e-05, "loss": 0.06647872924804688, "step": 2724 }, { "epoch": 0.37971155855918626, "grad_norm": 0.6002417802810669, "learning_rate": 3.0078815923332532e-05, "loss": 0.09085273742675781, "step": 2725 }, { "epoch": 0.37985090225040063, "grad_norm": 0.5633906126022339, "learning_rate": 3.007067222762013e-05, "loss": 0.10212516784667969, "step": 2726 }, { "epoch": 0.379990245941615, "grad_norm": 0.5210935473442078, "learning_rate": 3.0062526294434273e-05, "loss": 0.10518836975097656, "step": 2727 }, { "epoch": 0.3801295896328294, "grad_norm": 0.37298083305358887, "learning_rate": 3.005437812558481e-05, "loss": 0.10016632080078125, "step": 2728 }, { "epoch": 0.38026893332404377, "grad_norm": 0.6972285509109497, "learning_rate": 3.004622772288207e-05, "loss": 0.10857200622558594, "step": 2729 }, { "epoch": 0.38040827701525814, "grad_norm": 0.4334034323692322, "learning_rate": 3.003807508813689e-05, "loss": 0.07750511169433594, "step": 2730 }, { "epoch": 0.3805476207064725, "grad_norm": 0.6945555806159973, "learning_rate": 3.00299202231606e-05, "loss": 0.09650707244873047, "step": 2731 }, { "epoch": 0.3806869643976869, "grad_norm": 0.41155219078063965, "learning_rate": 3.0021763129765024e-05, "loss": 0.08482933044433594, "step": 2732 }, { "epoch": 0.3808263080889013, "grad_norm": 0.5360231995582581, "learning_rate": 3.0013603809762473e-05, "loss": 0.10396766662597656, "step": 2733 }, { "epoch": 0.38096565178011566, "grad_norm": 0.49952879548072815, "learning_rate": 3.0005442264965778e-05, "loss": 0.10853004455566406, "step": 2734 }, { "epoch": 0.38110499547133003, "grad_norm": 0.31619298458099365, "learning_rate": 2.9997278497188236e-05, "loss": 0.09268379211425781, "step": 2735 }, { "epoch": 0.3812443391625444, "grad_norm": 0.3898692727088928, "learning_rate": 2.9989112508243655e-05, "loss": 0.08002281188964844, "step": 2736 }, { "epoch": 0.3813836828537588, "grad_norm": 0.5013592839241028, "learning_rate": 2.998094429994633e-05, "loss": 0.12183380126953125, "step": 2737 }, { "epoch": 0.38152302654497317, "grad_norm": 0.6989168524742126, "learning_rate": 2.9972773874111057e-05, "loss": 0.12021255493164062, "step": 2738 }, { "epoch": 0.38166237023618754, "grad_norm": 0.568914532661438, "learning_rate": 2.99646012325531e-05, "loss": 0.12770462036132812, "step": 2739 }, { "epoch": 0.3818017139274019, "grad_norm": 0.612546443939209, "learning_rate": 2.995642637708825e-05, "loss": 0.12451553344726562, "step": 2740 }, { "epoch": 0.3819410576186163, "grad_norm": 0.5529236793518066, "learning_rate": 2.9948249309532768e-05, "loss": 0.07091617584228516, "step": 2741 }, { "epoch": 0.3820804013098307, "grad_norm": 0.4931420087814331, "learning_rate": 2.9940070031703413e-05, "loss": 0.09982109069824219, "step": 2742 }, { "epoch": 0.38221974500104505, "grad_norm": 0.45883163809776306, "learning_rate": 2.9931888545417435e-05, "loss": 0.10249900817871094, "step": 2743 }, { "epoch": 0.3823590886922595, "grad_norm": 0.41469842195510864, "learning_rate": 2.9923704852492566e-05, "loss": 0.09606361389160156, "step": 2744 }, { "epoch": 0.38249843238347386, "grad_norm": 0.21494698524475098, "learning_rate": 2.9915518954747038e-05, "loss": 0.065521240234375, "step": 2745 }, { "epoch": 0.38263777607468824, "grad_norm": 0.35185182094573975, "learning_rate": 2.9907330853999583e-05, "loss": 0.10042285919189453, "step": 2746 }, { "epoch": 0.3827771197659026, "grad_norm": 0.39386314153671265, "learning_rate": 2.9899140552069396e-05, "loss": 0.09450912475585938, "step": 2747 }, { "epoch": 0.382916463457117, "grad_norm": 0.5368359088897705, "learning_rate": 2.989094805077618e-05, "loss": 0.1170501708984375, "step": 2748 }, { "epoch": 0.3830558071483314, "grad_norm": 0.5673030018806458, "learning_rate": 2.9882753351940115e-05, "loss": 0.09240961074829102, "step": 2749 }, { "epoch": 0.38319515083954575, "grad_norm": 0.4494551122188568, "learning_rate": 2.987455645738189e-05, "loss": 0.10066032409667969, "step": 2750 }, { "epoch": 0.38333449453076013, "grad_norm": 0.41963085532188416, "learning_rate": 2.9866357368922657e-05, "loss": 0.10503387451171875, "step": 2751 }, { "epoch": 0.3834738382219745, "grad_norm": 0.20522433519363403, "learning_rate": 2.985815608838407e-05, "loss": 0.060642242431640625, "step": 2752 }, { "epoch": 0.3836131819131889, "grad_norm": 0.7208806872367859, "learning_rate": 2.984995261758827e-05, "loss": 0.10602951049804688, "step": 2753 }, { "epoch": 0.38375252560440326, "grad_norm": 0.5711843371391296, "learning_rate": 2.984174695835787e-05, "loss": 0.12057304382324219, "step": 2754 }, { "epoch": 0.38389186929561764, "grad_norm": 0.278260201215744, "learning_rate": 2.983353911251599e-05, "loss": 0.069183349609375, "step": 2755 }, { "epoch": 0.384031212986832, "grad_norm": 0.6637197136878967, "learning_rate": 2.9825329081886222e-05, "loss": 0.11963844299316406, "step": 2756 }, { "epoch": 0.3841705566780464, "grad_norm": 0.25681716203689575, "learning_rate": 2.981711686829264e-05, "loss": 0.07241439819335938, "step": 2757 }, { "epoch": 0.38430990036926077, "grad_norm": 0.7481124997138977, "learning_rate": 2.9808902473559835e-05, "loss": 0.12314891815185547, "step": 2758 }, { "epoch": 0.38444924406047515, "grad_norm": 0.24926702678203583, "learning_rate": 2.9800685899512828e-05, "loss": 0.07191658020019531, "step": 2759 }, { "epoch": 0.3845885877516895, "grad_norm": 0.45428547263145447, "learning_rate": 2.9792467147977174e-05, "loss": 0.11787223815917969, "step": 2760 }, { "epoch": 0.3847279314429039, "grad_norm": 0.32125124335289, "learning_rate": 2.9784246220778885e-05, "loss": 0.08127880096435547, "step": 2761 }, { "epoch": 0.3848672751341183, "grad_norm": 0.2578955590724945, "learning_rate": 2.9776023119744462e-05, "loss": 0.07527732849121094, "step": 2762 }, { "epoch": 0.38500661882533266, "grad_norm": 0.4422241747379303, "learning_rate": 2.976779784670089e-05, "loss": 0.09022903442382812, "step": 2763 }, { "epoch": 0.38514596251654704, "grad_norm": 0.5517653822898865, "learning_rate": 2.9759570403475644e-05, "loss": 0.12624740600585938, "step": 2764 }, { "epoch": 0.38528530620776147, "grad_norm": 0.23249341547489166, "learning_rate": 2.975134079189667e-05, "loss": 0.07310295104980469, "step": 2765 }, { "epoch": 0.38542464989897585, "grad_norm": 0.6443906426429749, "learning_rate": 2.9743109013792395e-05, "loss": 0.11773872375488281, "step": 2766 }, { "epoch": 0.3855639935901902, "grad_norm": 0.2953103184700012, "learning_rate": 2.9734875070991736e-05, "loss": 0.08864402770996094, "step": 2767 }, { "epoch": 0.3857033372814046, "grad_norm": 0.32889044284820557, "learning_rate": 2.9726638965324088e-05, "loss": 0.07188940048217773, "step": 2768 }, { "epoch": 0.385842680972619, "grad_norm": 0.522597074508667, "learning_rate": 2.9718400698619327e-05, "loss": 0.1144256591796875, "step": 2769 }, { "epoch": 0.38598202466383336, "grad_norm": 0.421981543302536, "learning_rate": 2.9710160272707803e-05, "loss": 0.0855865478515625, "step": 2770 }, { "epoch": 0.38612136835504773, "grad_norm": 0.5474709868431091, "learning_rate": 2.9701917689420354e-05, "loss": 0.0909576416015625, "step": 2771 }, { "epoch": 0.3862607120462621, "grad_norm": 0.5345871448516846, "learning_rate": 2.9693672950588292e-05, "loss": 0.11491012573242188, "step": 2772 }, { "epoch": 0.3864000557374765, "grad_norm": 0.2521311342716217, "learning_rate": 2.9685426058043414e-05, "loss": 0.07110023498535156, "step": 2773 }, { "epoch": 0.38653939942869087, "grad_norm": 0.3329651355743408, "learning_rate": 2.9677177013617987e-05, "loss": 0.0724954605102539, "step": 2774 }, { "epoch": 0.38667874311990524, "grad_norm": 0.4630734324455261, "learning_rate": 2.9668925819144755e-05, "loss": 0.09348487854003906, "step": 2775 }, { "epoch": 0.3868180868111196, "grad_norm": 0.39918097853660583, "learning_rate": 2.966067247645696e-05, "loss": 0.1049041748046875, "step": 2776 }, { "epoch": 0.386957430502334, "grad_norm": 0.5957585573196411, "learning_rate": 2.965241698738829e-05, "loss": 0.11333656311035156, "step": 2777 }, { "epoch": 0.3870967741935484, "grad_norm": 0.4458919167518616, "learning_rate": 2.9644159353772937e-05, "loss": 0.09021186828613281, "step": 2778 }, { "epoch": 0.38723611788476275, "grad_norm": 0.3416711091995239, "learning_rate": 2.9635899577445558e-05, "loss": 0.0747528076171875, "step": 2779 }, { "epoch": 0.38737546157597713, "grad_norm": 0.4731020927429199, "learning_rate": 2.9627637660241283e-05, "loss": 0.114105224609375, "step": 2780 }, { "epoch": 0.3875148052671915, "grad_norm": 0.5346294045448303, "learning_rate": 2.9619373603995724e-05, "loss": 0.10131072998046875, "step": 2781 }, { "epoch": 0.3876541489584059, "grad_norm": 0.5147583484649658, "learning_rate": 2.9611107410544958e-05, "loss": 0.10749149322509766, "step": 2782 }, { "epoch": 0.38779349264962026, "grad_norm": 0.3791317343711853, "learning_rate": 2.9602839081725558e-05, "loss": 0.10463905334472656, "step": 2783 }, { "epoch": 0.38793283634083464, "grad_norm": 0.5833604335784912, "learning_rate": 2.959456861937455e-05, "loss": 0.0988922119140625, "step": 2784 }, { "epoch": 0.3880721800320491, "grad_norm": 0.3031501770019531, "learning_rate": 2.958629602532944e-05, "loss": 0.0731353759765625, "step": 2785 }, { "epoch": 0.38821152372326345, "grad_norm": 0.27346330881118774, "learning_rate": 2.9578021301428212e-05, "loss": 0.08133506774902344, "step": 2786 }, { "epoch": 0.38835086741447783, "grad_norm": 0.3339281678199768, "learning_rate": 2.9569744449509322e-05, "loss": 0.08997344970703125, "step": 2787 }, { "epoch": 0.3884902111056922, "grad_norm": 0.7204552292823792, "learning_rate": 2.9561465471411693e-05, "loss": 0.09783935546875, "step": 2788 }, { "epoch": 0.3886295547969066, "grad_norm": 0.2614377439022064, "learning_rate": 2.955318436897473e-05, "loss": 0.06814956665039062, "step": 2789 }, { "epoch": 0.38876889848812096, "grad_norm": 0.20632733404636383, "learning_rate": 2.9544901144038303e-05, "loss": 0.063232421875, "step": 2790 }, { "epoch": 0.38890824217933534, "grad_norm": 0.3239797353744507, "learning_rate": 2.9536615798442755e-05, "loss": 0.07658576965332031, "step": 2791 }, { "epoch": 0.3890475858705497, "grad_norm": 0.46518293023109436, "learning_rate": 2.9528328334028903e-05, "loss": 0.07776355743408203, "step": 2792 }, { "epoch": 0.3891869295617641, "grad_norm": 0.4190099835395813, "learning_rate": 2.952003875263803e-05, "loss": 0.09036064147949219, "step": 2793 }, { "epoch": 0.38932627325297847, "grad_norm": 0.5311266779899597, "learning_rate": 2.9511747056111893e-05, "loss": 0.11085891723632812, "step": 2794 }, { "epoch": 0.38946561694419285, "grad_norm": 0.37859615683555603, "learning_rate": 2.9503453246292716e-05, "loss": 0.09800529479980469, "step": 2795 }, { "epoch": 0.3896049606354072, "grad_norm": 0.6319118142127991, "learning_rate": 2.9495157325023195e-05, "loss": 0.11688613891601562, "step": 2796 }, { "epoch": 0.3897443043266216, "grad_norm": 0.9914302229881287, "learning_rate": 2.9486859294146497e-05, "loss": 0.1352405548095703, "step": 2797 }, { "epoch": 0.389883648017836, "grad_norm": 0.7224761843681335, "learning_rate": 2.9478559155506244e-05, "loss": 0.1156768798828125, "step": 2798 }, { "epoch": 0.39002299170905036, "grad_norm": 0.41691628098487854, "learning_rate": 2.9470256910946555e-05, "loss": 0.11707878112792969, "step": 2799 }, { "epoch": 0.39016233540026474, "grad_norm": 0.2999376058578491, "learning_rate": 2.946195256231199e-05, "loss": 0.0853729248046875, "step": 2800 }, { "epoch": 0.3903016790914791, "grad_norm": 0.9876853227615356, "learning_rate": 2.9453646111447582e-05, "loss": 0.17195415496826172, "step": 2801 }, { "epoch": 0.3904410227826935, "grad_norm": 0.5433264374732971, "learning_rate": 2.944533756019884e-05, "loss": 0.1074676513671875, "step": 2802 }, { "epoch": 0.39058036647390787, "grad_norm": 0.6145113110542297, "learning_rate": 2.9437026910411734e-05, "loss": 0.11833953857421875, "step": 2803 }, { "epoch": 0.39071971016512225, "grad_norm": 0.3740767538547516, "learning_rate": 2.9428714163932697e-05, "loss": 0.07789039611816406, "step": 2804 }, { "epoch": 0.3908590538563367, "grad_norm": 0.4510919153690338, "learning_rate": 2.9420399322608637e-05, "loss": 0.10042953491210938, "step": 2805 }, { "epoch": 0.39099839754755106, "grad_norm": 0.49457767605781555, "learning_rate": 2.9412082388286916e-05, "loss": 0.08388710021972656, "step": 2806 }, { "epoch": 0.39113774123876544, "grad_norm": 0.6645947694778442, "learning_rate": 2.940376336281537e-05, "loss": 0.11479473114013672, "step": 2807 }, { "epoch": 0.3912770849299798, "grad_norm": 0.3037877082824707, "learning_rate": 2.9395442248042297e-05, "loss": 0.08470916748046875, "step": 2808 }, { "epoch": 0.3914164286211942, "grad_norm": 0.632085382938385, "learning_rate": 2.9387119045816453e-05, "loss": 0.1334667205810547, "step": 2809 }, { "epoch": 0.39155577231240857, "grad_norm": 0.7185274958610535, "learning_rate": 2.9378793757987082e-05, "loss": 0.13877487182617188, "step": 2810 }, { "epoch": 0.39169511600362295, "grad_norm": 0.46159228682518005, "learning_rate": 2.9370466386403843e-05, "loss": 0.1102447509765625, "step": 2811 }, { "epoch": 0.3918344596948373, "grad_norm": 0.31972458958625793, "learning_rate": 2.9362136932916914e-05, "loss": 0.08866310119628906, "step": 2812 }, { "epoch": 0.3919738033860517, "grad_norm": 0.3936508595943451, "learning_rate": 2.93538053993769e-05, "loss": 0.09870052337646484, "step": 2813 }, { "epoch": 0.3921131470772661, "grad_norm": 0.4467742145061493, "learning_rate": 2.9345471787634873e-05, "loss": 0.10474395751953125, "step": 2814 }, { "epoch": 0.39225249076848046, "grad_norm": 0.43785348534584045, "learning_rate": 2.933713609954238e-05, "loss": 0.09344482421875, "step": 2815 }, { "epoch": 0.39239183445969483, "grad_norm": 0.6319786310195923, "learning_rate": 2.9328798336951415e-05, "loss": 0.09176921844482422, "step": 2816 }, { "epoch": 0.3925311781509092, "grad_norm": 0.3951207995414734, "learning_rate": 2.9320458501714437e-05, "loss": 0.08819580078125, "step": 2817 }, { "epoch": 0.3926705218421236, "grad_norm": 0.2947540283203125, "learning_rate": 2.931211659568437e-05, "loss": 0.0671548843383789, "step": 2818 }, { "epoch": 0.39280986553333797, "grad_norm": 0.4707702696323395, "learning_rate": 2.93037726207146e-05, "loss": 0.08539962768554688, "step": 2819 }, { "epoch": 0.39294920922455234, "grad_norm": 0.4581524431705475, "learning_rate": 2.929542657865896e-05, "loss": 0.09482574462890625, "step": 2820 }, { "epoch": 0.3930885529157667, "grad_norm": 0.31296730041503906, "learning_rate": 2.9287078471371747e-05, "loss": 0.07076263427734375, "step": 2821 }, { "epoch": 0.3932278966069811, "grad_norm": 0.5241219401359558, "learning_rate": 2.927872830070773e-05, "loss": 0.12018966674804688, "step": 2822 }, { "epoch": 0.3933672402981955, "grad_norm": 0.5782728791236877, "learning_rate": 2.9270376068522117e-05, "loss": 0.10172748565673828, "step": 2823 }, { "epoch": 0.39350658398940985, "grad_norm": 0.4213505685329437, "learning_rate": 2.9262021776670594e-05, "loss": 0.08958244323730469, "step": 2824 }, { "epoch": 0.3936459276806243, "grad_norm": 0.4802245795726776, "learning_rate": 2.9253665427009283e-05, "loss": 0.08833694458007812, "step": 2825 }, { "epoch": 0.39378527137183866, "grad_norm": 0.6023840308189392, "learning_rate": 2.9245307021394787e-05, "loss": 0.16649627685546875, "step": 2826 }, { "epoch": 0.39392461506305304, "grad_norm": 0.8773688673973083, "learning_rate": 2.9236946561684133e-05, "loss": 0.142608642578125, "step": 2827 }, { "epoch": 0.3940639587542674, "grad_norm": 0.45546507835388184, "learning_rate": 2.922858404973484e-05, "loss": 0.11491584777832031, "step": 2828 }, { "epoch": 0.3942033024454818, "grad_norm": 0.7287598252296448, "learning_rate": 2.922021948740487e-05, "loss": 0.12036895751953125, "step": 2829 }, { "epoch": 0.3943426461366962, "grad_norm": 0.4068584740161896, "learning_rate": 2.9211852876552624e-05, "loss": 0.10361480712890625, "step": 2830 }, { "epoch": 0.39448198982791055, "grad_norm": 0.8591437935829163, "learning_rate": 2.9203484219036986e-05, "loss": 0.1234292984008789, "step": 2831 }, { "epoch": 0.39462133351912493, "grad_norm": 0.41472795605659485, "learning_rate": 2.9195113516717267e-05, "loss": 0.08765792846679688, "step": 2832 }, { "epoch": 0.3947606772103393, "grad_norm": 0.33983391523361206, "learning_rate": 2.9186740771453253e-05, "loss": 0.08751392364501953, "step": 2833 }, { "epoch": 0.3949000209015537, "grad_norm": 0.4313623905181885, "learning_rate": 2.9178365985105182e-05, "loss": 0.08644580841064453, "step": 2834 }, { "epoch": 0.39503936459276806, "grad_norm": 0.4976264238357544, "learning_rate": 2.916998915953373e-05, "loss": 0.11478614807128906, "step": 2835 }, { "epoch": 0.39517870828398244, "grad_norm": 0.4120500981807709, "learning_rate": 2.916161029660004e-05, "loss": 0.09641838073730469, "step": 2836 }, { "epoch": 0.3953180519751968, "grad_norm": 0.5672963857650757, "learning_rate": 2.915322939816571e-05, "loss": 0.08379411697387695, "step": 2837 }, { "epoch": 0.3954573956664112, "grad_norm": 0.334119975566864, "learning_rate": 2.9144846466092773e-05, "loss": 0.06439781188964844, "step": 2838 }, { "epoch": 0.39559673935762557, "grad_norm": 0.6263145208358765, "learning_rate": 2.9136461502243735e-05, "loss": 0.11399173736572266, "step": 2839 }, { "epoch": 0.39573608304883995, "grad_norm": 0.3615053594112396, "learning_rate": 2.9128074508481544e-05, "loss": 0.0775289535522461, "step": 2840 }, { "epoch": 0.3958754267400543, "grad_norm": 0.4296785593032837, "learning_rate": 2.9119685486669587e-05, "loss": 0.08209228515625, "step": 2841 }, { "epoch": 0.3960147704312687, "grad_norm": 0.3805120587348938, "learning_rate": 2.911129443867173e-05, "loss": 0.09244251251220703, "step": 2842 }, { "epoch": 0.3961541141224831, "grad_norm": 0.6638845801353455, "learning_rate": 2.9102901366352254e-05, "loss": 0.10248088836669922, "step": 2843 }, { "epoch": 0.39629345781369746, "grad_norm": 0.23928716778755188, "learning_rate": 2.909450627157592e-05, "loss": 0.06439399719238281, "step": 2844 }, { "epoch": 0.3964328015049119, "grad_norm": 0.5579345226287842, "learning_rate": 2.9086109156207926e-05, "loss": 0.09394168853759766, "step": 2845 }, { "epoch": 0.39657214519612627, "grad_norm": 0.6000582575798035, "learning_rate": 2.9077710022113918e-05, "loss": 0.12530136108398438, "step": 2846 }, { "epoch": 0.39671148888734065, "grad_norm": 0.6729063987731934, "learning_rate": 2.906930887115999e-05, "loss": 0.11494636535644531, "step": 2847 }, { "epoch": 0.396850832578555, "grad_norm": 0.4611850678920746, "learning_rate": 2.906090570521268e-05, "loss": 0.09486770629882812, "step": 2848 }, { "epoch": 0.3969901762697694, "grad_norm": 0.3406074345111847, "learning_rate": 2.9052500526138994e-05, "loss": 0.07277584075927734, "step": 2849 }, { "epoch": 0.3971295199609838, "grad_norm": 0.4217374324798584, "learning_rate": 2.904409333580636e-05, "loss": 0.08868026733398438, "step": 2850 }, { "epoch": 0.39726886365219816, "grad_norm": 0.28599727153778076, "learning_rate": 2.903568413608267e-05, "loss": 0.07407951354980469, "step": 2851 }, { "epoch": 0.39740820734341253, "grad_norm": 0.31757208704948425, "learning_rate": 2.9027272928836248e-05, "loss": 0.09232902526855469, "step": 2852 }, { "epoch": 0.3975475510346269, "grad_norm": 0.4805624186992645, "learning_rate": 2.901885971593588e-05, "loss": 0.08654594421386719, "step": 2853 }, { "epoch": 0.3976868947258413, "grad_norm": 0.594313383102417, "learning_rate": 2.901044449925079e-05, "loss": 0.13433456420898438, "step": 2854 }, { "epoch": 0.39782623841705567, "grad_norm": 0.4714030623435974, "learning_rate": 2.9002027280650643e-05, "loss": 0.11092185974121094, "step": 2855 }, { "epoch": 0.39796558210827004, "grad_norm": 0.4146770238876343, "learning_rate": 2.899360806200555e-05, "loss": 0.09079456329345703, "step": 2856 }, { "epoch": 0.3981049257994844, "grad_norm": 0.3807482123374939, "learning_rate": 2.8985186845186077e-05, "loss": 0.09280967712402344, "step": 2857 }, { "epoch": 0.3982442694906988, "grad_norm": 0.4483656585216522, "learning_rate": 2.897676363206322e-05, "loss": 0.08497238159179688, "step": 2858 }, { "epoch": 0.3983836131819132, "grad_norm": 0.6151071190834045, "learning_rate": 2.8968338424508426e-05, "loss": 0.1251373291015625, "step": 2859 }, { "epoch": 0.39852295687312755, "grad_norm": 0.38683465123176575, "learning_rate": 2.895991122439359e-05, "loss": 0.06981563568115234, "step": 2860 }, { "epoch": 0.39866230056434193, "grad_norm": 0.539776086807251, "learning_rate": 2.895148203359103e-05, "loss": 0.10857963562011719, "step": 2861 }, { "epoch": 0.3988016442555563, "grad_norm": 0.7045628428459167, "learning_rate": 2.8943050853973536e-05, "loss": 0.0862579345703125, "step": 2862 }, { "epoch": 0.3989409879467707, "grad_norm": 0.5766880512237549, "learning_rate": 2.893461768741431e-05, "loss": 0.09726905822753906, "step": 2863 }, { "epoch": 0.39908033163798506, "grad_norm": 0.4177318811416626, "learning_rate": 2.892618253578702e-05, "loss": 0.10309982299804688, "step": 2864 }, { "epoch": 0.3992196753291995, "grad_norm": 1.0094282627105713, "learning_rate": 2.8917745400965755e-05, "loss": 0.11057281494140625, "step": 2865 }, { "epoch": 0.3993590190204139, "grad_norm": 0.39825648069381714, "learning_rate": 2.8909306284825058e-05, "loss": 0.07055854797363281, "step": 2866 }, { "epoch": 0.39949836271162825, "grad_norm": 0.3099052906036377, "learning_rate": 2.8900865189239907e-05, "loss": 0.07972526550292969, "step": 2867 }, { "epoch": 0.39963770640284263, "grad_norm": 0.3715667426586151, "learning_rate": 2.889242211608572e-05, "loss": 0.08117103576660156, "step": 2868 }, { "epoch": 0.399777050094057, "grad_norm": 0.3709195554256439, "learning_rate": 2.8883977067238363e-05, "loss": 0.07629013061523438, "step": 2869 }, { "epoch": 0.3999163937852714, "grad_norm": 0.451784610748291, "learning_rate": 2.887553004457412e-05, "loss": 0.07976913452148438, "step": 2870 }, { "epoch": 0.40005573747648576, "grad_norm": 0.4687831699848175, "learning_rate": 2.8867081049969738e-05, "loss": 0.10159111022949219, "step": 2871 }, { "epoch": 0.40019508116770014, "grad_norm": 0.5947216153144836, "learning_rate": 2.8858630085302378e-05, "loss": 0.1293649673461914, "step": 2872 }, { "epoch": 0.4003344248589145, "grad_norm": 0.2705707848072052, "learning_rate": 2.885017715244966e-05, "loss": 0.07787513732910156, "step": 2873 }, { "epoch": 0.4004737685501289, "grad_norm": 0.26179739832878113, "learning_rate": 2.884172225328964e-05, "loss": 0.06071281433105469, "step": 2874 }, { "epoch": 0.40061311224134327, "grad_norm": 0.7979472279548645, "learning_rate": 2.883326538970079e-05, "loss": 0.13092422485351562, "step": 2875 }, { "epoch": 0.40075245593255765, "grad_norm": 0.6518993973731995, "learning_rate": 2.8824806563562037e-05, "loss": 0.11884307861328125, "step": 2876 }, { "epoch": 0.400891799623772, "grad_norm": 0.3480386435985565, "learning_rate": 2.8816345776752737e-05, "loss": 0.08157920837402344, "step": 2877 }, { "epoch": 0.4010311433149864, "grad_norm": 0.2972913384437561, "learning_rate": 2.880788303115269e-05, "loss": 0.08409690856933594, "step": 2878 }, { "epoch": 0.4011704870062008, "grad_norm": 0.5967562794685364, "learning_rate": 2.8799418328642116e-05, "loss": 0.11619377136230469, "step": 2879 }, { "epoch": 0.40130983069741516, "grad_norm": 0.5714161992073059, "learning_rate": 2.879095167110169e-05, "loss": 0.09496879577636719, "step": 2880 }, { "epoch": 0.40144917438862954, "grad_norm": 0.634818971157074, "learning_rate": 2.8782483060412502e-05, "loss": 0.10003471374511719, "step": 2881 }, { "epoch": 0.4015885180798439, "grad_norm": 0.36395058035850525, "learning_rate": 2.8774012498456083e-05, "loss": 0.09754180908203125, "step": 2882 }, { "epoch": 0.4017278617710583, "grad_norm": 0.8031824827194214, "learning_rate": 2.8765539987114403e-05, "loss": 0.11302757263183594, "step": 2883 }, { "epoch": 0.40186720546227267, "grad_norm": 0.39306899905204773, "learning_rate": 2.8757065528269855e-05, "loss": 0.10762405395507812, "step": 2884 }, { "epoch": 0.4020065491534871, "grad_norm": 0.2913625240325928, "learning_rate": 2.8748589123805274e-05, "loss": 0.07906723022460938, "step": 2885 }, { "epoch": 0.4021458928447015, "grad_norm": 0.47080209851264954, "learning_rate": 2.874011077560393e-05, "loss": 0.11349868774414062, "step": 2886 }, { "epoch": 0.40228523653591586, "grad_norm": 0.5210425853729248, "learning_rate": 2.8731630485549504e-05, "loss": 0.11547279357910156, "step": 2887 }, { "epoch": 0.40242458022713024, "grad_norm": 0.22618699073791504, "learning_rate": 2.8723148255526138e-05, "loss": 0.06984329223632812, "step": 2888 }, { "epoch": 0.4025639239183446, "grad_norm": 0.44981807470321655, "learning_rate": 2.8714664087418374e-05, "loss": 0.09727668762207031, "step": 2889 }, { "epoch": 0.402703267609559, "grad_norm": 0.3494621813297272, "learning_rate": 2.8706177983111216e-05, "loss": 0.08778190612792969, "step": 2890 }, { "epoch": 0.40284261130077337, "grad_norm": 0.5396576523780823, "learning_rate": 2.869768994449007e-05, "loss": 0.09456253051757812, "step": 2891 }, { "epoch": 0.40298195499198775, "grad_norm": 0.2663780748844147, "learning_rate": 2.86891999734408e-05, "loss": 0.07070732116699219, "step": 2892 }, { "epoch": 0.4031212986832021, "grad_norm": 0.758431077003479, "learning_rate": 2.868070807184966e-05, "loss": 0.11065864562988281, "step": 2893 }, { "epoch": 0.4032606423744165, "grad_norm": 0.3624112904071808, "learning_rate": 2.867221424160338e-05, "loss": 0.08304405212402344, "step": 2894 }, { "epoch": 0.4033999860656309, "grad_norm": 0.8406776189804077, "learning_rate": 2.866371848458908e-05, "loss": 0.15859413146972656, "step": 2895 }, { "epoch": 0.40353932975684526, "grad_norm": 0.3445568084716797, "learning_rate": 2.8655220802694334e-05, "loss": 0.09676933288574219, "step": 2896 }, { "epoch": 0.40367867344805963, "grad_norm": 0.37975454330444336, "learning_rate": 2.864672119780713e-05, "loss": 0.10293006896972656, "step": 2897 }, { "epoch": 0.403818017139274, "grad_norm": 0.5747414827346802, "learning_rate": 2.8638219671815873e-05, "loss": 0.07542800903320312, "step": 2898 }, { "epoch": 0.4039573608304884, "grad_norm": 0.7423135042190552, "learning_rate": 2.8629716226609427e-05, "loss": 0.11152076721191406, "step": 2899 }, { "epoch": 0.40409670452170277, "grad_norm": 0.18259790539741516, "learning_rate": 2.8621210864077053e-05, "loss": 0.061981201171875, "step": 2900 }, { "epoch": 0.40423604821291714, "grad_norm": 0.42934221029281616, "learning_rate": 2.861270358610845e-05, "loss": 0.0935516357421875, "step": 2901 }, { "epoch": 0.4043753919041315, "grad_norm": 0.2880236506462097, "learning_rate": 2.8604194394593744e-05, "loss": 0.07050514221191406, "step": 2902 }, { "epoch": 0.4045147355953459, "grad_norm": 0.49966609477996826, "learning_rate": 2.8595683291423476e-05, "loss": 0.10454559326171875, "step": 2903 }, { "epoch": 0.4046540792865603, "grad_norm": 0.4908045530319214, "learning_rate": 2.858717027848863e-05, "loss": 0.09884488582611084, "step": 2904 }, { "epoch": 0.4047934229777747, "grad_norm": 0.5836314558982849, "learning_rate": 2.857865535768059e-05, "loss": 0.11692428588867188, "step": 2905 }, { "epoch": 0.4049327666689891, "grad_norm": 0.37020042538642883, "learning_rate": 2.8570138530891188e-05, "loss": 0.09495162963867188, "step": 2906 }, { "epoch": 0.40507211036020346, "grad_norm": 0.47730156779289246, "learning_rate": 2.8561619800012657e-05, "loss": 0.09744071960449219, "step": 2907 }, { "epoch": 0.40521145405141784, "grad_norm": 0.831049919128418, "learning_rate": 2.8553099166937685e-05, "loss": 0.1297168731689453, "step": 2908 }, { "epoch": 0.4053507977426322, "grad_norm": 0.45376503467559814, "learning_rate": 2.8544576633559335e-05, "loss": 0.08504104614257812, "step": 2909 }, { "epoch": 0.4054901414338466, "grad_norm": 0.28864362835884094, "learning_rate": 2.853605220177114e-05, "loss": 0.07505607604980469, "step": 2910 }, { "epoch": 0.405629485125061, "grad_norm": 0.5174407362937927, "learning_rate": 2.8527525873467022e-05, "loss": 0.1446208953857422, "step": 2911 }, { "epoch": 0.40576882881627535, "grad_norm": 0.31805428862571716, "learning_rate": 2.851899765054135e-05, "loss": 0.07036590576171875, "step": 2912 }, { "epoch": 0.40590817250748973, "grad_norm": 0.29066160321235657, "learning_rate": 2.8510467534888886e-05, "loss": 0.06470203399658203, "step": 2913 }, { "epoch": 0.4060475161987041, "grad_norm": 0.4190223515033722, "learning_rate": 2.8501935528404833e-05, "loss": 0.08709335327148438, "step": 2914 }, { "epoch": 0.4061868598899185, "grad_norm": 0.3024798035621643, "learning_rate": 2.849340163298481e-05, "loss": 0.09853553771972656, "step": 2915 }, { "epoch": 0.40632620358113286, "grad_norm": 0.30848273634910583, "learning_rate": 2.848486585052485e-05, "loss": 0.08180713653564453, "step": 2916 }, { "epoch": 0.40646554727234724, "grad_norm": 0.19469375908374786, "learning_rate": 2.8476328182921414e-05, "loss": 0.059627532958984375, "step": 2917 }, { "epoch": 0.4066048909635616, "grad_norm": 0.47342467308044434, "learning_rate": 2.8467788632071367e-05, "loss": 0.11958694458007812, "step": 2918 }, { "epoch": 0.406744234654776, "grad_norm": 0.5535681247711182, "learning_rate": 2.845924719987202e-05, "loss": 0.11155509948730469, "step": 2919 }, { "epoch": 0.40688357834599037, "grad_norm": 0.6099625825881958, "learning_rate": 2.8450703888221066e-05, "loss": 0.08853816986083984, "step": 2920 }, { "epoch": 0.40702292203720475, "grad_norm": 0.30032074451446533, "learning_rate": 2.844215869901664e-05, "loss": 0.0713491439819336, "step": 2921 }, { "epoch": 0.4071622657284191, "grad_norm": 0.6499759554862976, "learning_rate": 2.8433611634157293e-05, "loss": 0.10539436340332031, "step": 2922 }, { "epoch": 0.4073016094196335, "grad_norm": 0.35065481066703796, "learning_rate": 2.8425062695541975e-05, "loss": 0.08057212829589844, "step": 2923 }, { "epoch": 0.4074409531108479, "grad_norm": 0.38863053917884827, "learning_rate": 2.8416511885070085e-05, "loss": 0.07405853271484375, "step": 2924 }, { "epoch": 0.4075802968020623, "grad_norm": 0.3987928032875061, "learning_rate": 2.84079592046414e-05, "loss": 0.08418464660644531, "step": 2925 }, { "epoch": 0.4077196404932767, "grad_norm": 0.3448917865753174, "learning_rate": 2.839940465615614e-05, "loss": 0.08751296997070312, "step": 2926 }, { "epoch": 0.40785898418449107, "grad_norm": 0.31232085824012756, "learning_rate": 2.8390848241514918e-05, "loss": 0.08133506774902344, "step": 2927 }, { "epoch": 0.40799832787570545, "grad_norm": 0.8299886584281921, "learning_rate": 2.8382289962618793e-05, "loss": 0.13628768920898438, "step": 2928 }, { "epoch": 0.4081376715669198, "grad_norm": 0.5269566774368286, "learning_rate": 2.8373729821369206e-05, "loss": 0.093505859375, "step": 2929 }, { "epoch": 0.4082770152581342, "grad_norm": 0.5619516968727112, "learning_rate": 2.8365167819668027e-05, "loss": 0.11731243133544922, "step": 2930 }, { "epoch": 0.4084163589493486, "grad_norm": 0.7887362837791443, "learning_rate": 2.835660395941754e-05, "loss": 0.13588333129882812, "step": 2931 }, { "epoch": 0.40855570264056296, "grad_norm": 0.2972719967365265, "learning_rate": 2.8348038242520438e-05, "loss": 0.07999229431152344, "step": 2932 }, { "epoch": 0.40869504633177733, "grad_norm": 0.5917282700538635, "learning_rate": 2.833947067087983e-05, "loss": 0.10748863220214844, "step": 2933 }, { "epoch": 0.4088343900229917, "grad_norm": 0.5834023356437683, "learning_rate": 2.833090124639923e-05, "loss": 0.11511421203613281, "step": 2934 }, { "epoch": 0.4089737337142061, "grad_norm": 0.2627822458744049, "learning_rate": 2.832232997098257e-05, "loss": 0.08635711669921875, "step": 2935 }, { "epoch": 0.40911307740542047, "grad_norm": 0.4111623764038086, "learning_rate": 2.831375684653419e-05, "loss": 0.1255779266357422, "step": 2936 }, { "epoch": 0.40925242109663484, "grad_norm": 0.5223656892776489, "learning_rate": 2.8305181874958844e-05, "loss": 0.115966796875, "step": 2937 }, { "epoch": 0.4093917647878492, "grad_norm": 0.6312443614006042, "learning_rate": 2.82966050581617e-05, "loss": 0.12738037109375, "step": 2938 }, { "epoch": 0.4095311084790636, "grad_norm": 0.39587321877479553, "learning_rate": 2.8288026398048326e-05, "loss": 0.09307670593261719, "step": 2939 }, { "epoch": 0.409670452170278, "grad_norm": 0.5742647647857666, "learning_rate": 2.8279445896524705e-05, "loss": 0.09099006652832031, "step": 2940 }, { "epoch": 0.40980979586149235, "grad_norm": 0.4007696211338043, "learning_rate": 2.8270863555497227e-05, "loss": 0.08545875549316406, "step": 2941 }, { "epoch": 0.40994913955270673, "grad_norm": 0.47694236040115356, "learning_rate": 2.82622793768727e-05, "loss": 0.0868072509765625, "step": 2942 }, { "epoch": 0.4100884832439211, "grad_norm": 0.31636810302734375, "learning_rate": 2.8253693362558322e-05, "loss": 0.07649517059326172, "step": 2943 }, { "epoch": 0.4102278269351355, "grad_norm": 0.49565309286117554, "learning_rate": 2.8245105514461712e-05, "loss": 0.10487842559814453, "step": 2944 }, { "epoch": 0.4103671706263499, "grad_norm": 0.3699641227722168, "learning_rate": 2.82365158344909e-05, "loss": 0.10815048217773438, "step": 2945 }, { "epoch": 0.4105065143175643, "grad_norm": 0.5241638422012329, "learning_rate": 2.822792432455431e-05, "loss": 0.11094474792480469, "step": 2946 }, { "epoch": 0.4106458580087787, "grad_norm": 0.8141152262687683, "learning_rate": 2.8219330986560783e-05, "loss": 0.12200164794921875, "step": 2947 }, { "epoch": 0.41078520169999305, "grad_norm": 0.426407128572464, "learning_rate": 2.821073582241956e-05, "loss": 0.10487747192382812, "step": 2948 }, { "epoch": 0.41092454539120743, "grad_norm": 0.45884084701538086, "learning_rate": 2.820213883404029e-05, "loss": 0.0910797119140625, "step": 2949 }, { "epoch": 0.4110638890824218, "grad_norm": 0.4297236204147339, "learning_rate": 2.8193540023333033e-05, "loss": 0.07059669494628906, "step": 2950 }, { "epoch": 0.4112032327736362, "grad_norm": 0.5169134736061096, "learning_rate": 2.818493939220824e-05, "loss": 0.08985328674316406, "step": 2951 }, { "epoch": 0.41134257646485056, "grad_norm": 0.3720305562019348, "learning_rate": 2.8176336942576785e-05, "loss": 0.07719039916992188, "step": 2952 }, { "epoch": 0.41148192015606494, "grad_norm": 0.41053661704063416, "learning_rate": 2.816773267634993e-05, "loss": 0.08061790466308594, "step": 2953 }, { "epoch": 0.4116212638472793, "grad_norm": 0.5452696681022644, "learning_rate": 2.8159126595439344e-05, "loss": 0.126678466796875, "step": 2954 }, { "epoch": 0.4117606075384937, "grad_norm": 0.2549055218696594, "learning_rate": 2.8150518701757104e-05, "loss": 0.0647134780883789, "step": 2955 }, { "epoch": 0.4118999512297081, "grad_norm": 0.4326753318309784, "learning_rate": 2.814190899721569e-05, "loss": 0.09583473205566406, "step": 2956 }, { "epoch": 0.41203929492092245, "grad_norm": 0.6350720524787903, "learning_rate": 2.8133297483727972e-05, "loss": 0.08076858520507812, "step": 2957 }, { "epoch": 0.4121786386121368, "grad_norm": 0.5242518186569214, "learning_rate": 2.8124684163207252e-05, "loss": 0.08086252212524414, "step": 2958 }, { "epoch": 0.4123179823033512, "grad_norm": 0.4909093379974365, "learning_rate": 2.8116069037567187e-05, "loss": 0.09307193756103516, "step": 2959 }, { "epoch": 0.4124573259945656, "grad_norm": 0.7606944441795349, "learning_rate": 2.8107452108721887e-05, "loss": 0.12036895751953125, "step": 2960 }, { "epoch": 0.41259666968577996, "grad_norm": 0.479745477437973, "learning_rate": 2.809883337858582e-05, "loss": 0.10840225219726562, "step": 2961 }, { "epoch": 0.41273601337699434, "grad_norm": 0.4662981331348419, "learning_rate": 2.8090212849073877e-05, "loss": 0.10615825653076172, "step": 2962 }, { "epoch": 0.4128753570682087, "grad_norm": 0.5484395027160645, "learning_rate": 2.8081590522101342e-05, "loss": 0.1029977798461914, "step": 2963 }, { "epoch": 0.4130147007594231, "grad_norm": 0.38715115189552307, "learning_rate": 2.8072966399583897e-05, "loss": 0.08767318725585938, "step": 2964 }, { "epoch": 0.41315404445063747, "grad_norm": 0.4839401841163635, "learning_rate": 2.8064340483437625e-05, "loss": 0.10988616943359375, "step": 2965 }, { "epoch": 0.4132933881418519, "grad_norm": 0.3486270010471344, "learning_rate": 2.8055712775579012e-05, "loss": 0.07895278930664062, "step": 2966 }, { "epoch": 0.4134327318330663, "grad_norm": 0.3146027624607086, "learning_rate": 2.8047083277924935e-05, "loss": 0.08570671081542969, "step": 2967 }, { "epoch": 0.41357207552428066, "grad_norm": 0.41636112332344055, "learning_rate": 2.803845199239267e-05, "loss": 0.0882568359375, "step": 2968 }, { "epoch": 0.41371141921549504, "grad_norm": 0.4933621287345886, "learning_rate": 2.8029818920899902e-05, "loss": 0.10334396362304688, "step": 2969 }, { "epoch": 0.4138507629067094, "grad_norm": 0.18043652176856995, "learning_rate": 2.8021184065364684e-05, "loss": 0.06083488464355469, "step": 2970 }, { "epoch": 0.4139901065979238, "grad_norm": 0.2700120508670807, "learning_rate": 2.8012547427705497e-05, "loss": 0.08107757568359375, "step": 2971 }, { "epoch": 0.41412945028913817, "grad_norm": 0.3799881041049957, "learning_rate": 2.80039090098412e-05, "loss": 0.08291244506835938, "step": 2972 }, { "epoch": 0.41426879398035255, "grad_norm": 0.4240100681781769, "learning_rate": 2.7995268813691052e-05, "loss": 0.08331871032714844, "step": 2973 }, { "epoch": 0.4144081376715669, "grad_norm": 0.4528537392616272, "learning_rate": 2.7986626841174717e-05, "loss": 0.09042549133300781, "step": 2974 }, { "epoch": 0.4145474813627813, "grad_norm": 0.4231869876384735, "learning_rate": 2.7977983094212224e-05, "loss": 0.09463691711425781, "step": 2975 }, { "epoch": 0.4146868250539957, "grad_norm": 0.42183446884155273, "learning_rate": 2.7969337574724033e-05, "loss": 0.08738327026367188, "step": 2976 }, { "epoch": 0.41482616874521006, "grad_norm": 0.3546229898929596, "learning_rate": 2.7960690284630976e-05, "loss": 0.07401657104492188, "step": 2977 }, { "epoch": 0.41496551243642443, "grad_norm": 0.6682326197624207, "learning_rate": 2.7952041225854283e-05, "loss": 0.12703514099121094, "step": 2978 }, { "epoch": 0.4151048561276388, "grad_norm": 0.6465019583702087, "learning_rate": 2.7943390400315577e-05, "loss": 0.11440467834472656, "step": 2979 }, { "epoch": 0.4152441998188532, "grad_norm": 0.5679382681846619, "learning_rate": 2.793473780993688e-05, "loss": 0.1015777587890625, "step": 2980 }, { "epoch": 0.41538354351006757, "grad_norm": 0.3499954044818878, "learning_rate": 2.792608345664059e-05, "loss": 0.07579612731933594, "step": 2981 }, { "epoch": 0.41552288720128194, "grad_norm": 0.4513980746269226, "learning_rate": 2.791742734234951e-05, "loss": 0.09237957000732422, "step": 2982 }, { "epoch": 0.4156622308924963, "grad_norm": 0.43855834007263184, "learning_rate": 2.7908769468986837e-05, "loss": 0.08110666275024414, "step": 2983 }, { "epoch": 0.4158015745837107, "grad_norm": 0.29996269941329956, "learning_rate": 2.7900109838476138e-05, "loss": 0.0807342529296875, "step": 2984 }, { "epoch": 0.4159409182749251, "grad_norm": 0.4602603316307068, "learning_rate": 2.789144845274141e-05, "loss": 0.09967613220214844, "step": 2985 }, { "epoch": 0.4160802619661395, "grad_norm": 0.4237877130508423, "learning_rate": 2.7882785313706996e-05, "loss": 0.09588241577148438, "step": 2986 }, { "epoch": 0.4162196056573539, "grad_norm": 0.42571425437927246, "learning_rate": 2.787412042329765e-05, "loss": 0.08127212524414062, "step": 2987 }, { "epoch": 0.41635894934856826, "grad_norm": 0.248041570186615, "learning_rate": 2.7865453783438517e-05, "loss": 0.06004047393798828, "step": 2988 }, { "epoch": 0.41649829303978264, "grad_norm": 0.32979148626327515, "learning_rate": 2.785678539605512e-05, "loss": 0.07060050964355469, "step": 2989 }, { "epoch": 0.416637636730997, "grad_norm": 0.5158145427703857, "learning_rate": 2.7848115263073386e-05, "loss": 0.09709358215332031, "step": 2990 }, { "epoch": 0.4167769804222114, "grad_norm": 0.361479789018631, "learning_rate": 2.7839443386419613e-05, "loss": 0.09724617004394531, "step": 2991 }, { "epoch": 0.4169163241134258, "grad_norm": 0.5252524614334106, "learning_rate": 2.7830769768020504e-05, "loss": 0.10721015930175781, "step": 2992 }, { "epoch": 0.41705566780464015, "grad_norm": 0.4230610728263855, "learning_rate": 2.782209440980312e-05, "loss": 0.10601234436035156, "step": 2993 }, { "epoch": 0.41719501149585453, "grad_norm": 0.1995987594127655, "learning_rate": 2.781341731369495e-05, "loss": 0.05877208709716797, "step": 2994 }, { "epoch": 0.4173343551870689, "grad_norm": 0.469099760055542, "learning_rate": 2.780473848162383e-05, "loss": 0.09636878967285156, "step": 2995 }, { "epoch": 0.4174736988782833, "grad_norm": 0.41076505184173584, "learning_rate": 2.779605791551801e-05, "loss": 0.0984344482421875, "step": 2996 }, { "epoch": 0.41761304256949766, "grad_norm": 0.691967248916626, "learning_rate": 2.778737561730611e-05, "loss": 0.11032295227050781, "step": 2997 }, { "epoch": 0.41775238626071204, "grad_norm": 0.4118465781211853, "learning_rate": 2.7778691588917127e-05, "loss": 0.08633232116699219, "step": 2998 }, { "epoch": 0.4178917299519264, "grad_norm": 0.44758427143096924, "learning_rate": 2.777000583228047e-05, "loss": 0.09740543365478516, "step": 2999 }, { "epoch": 0.4180310736431408, "grad_norm": 0.3800640404224396, "learning_rate": 2.776131834932591e-05, "loss": 0.07234764099121094, "step": 3000 }, { "epoch": 0.41817041733435517, "grad_norm": 0.2727917432785034, "learning_rate": 2.7752629141983605e-05, "loss": 0.07259941101074219, "step": 3001 }, { "epoch": 0.41830976102556955, "grad_norm": 0.17896799743175507, "learning_rate": 2.77439382121841e-05, "loss": 0.05730438232421875, "step": 3002 }, { "epoch": 0.4184491047167839, "grad_norm": 0.3604126274585724, "learning_rate": 2.773524556185832e-05, "loss": 0.08320236206054688, "step": 3003 }, { "epoch": 0.4185884484079983, "grad_norm": 0.15601606667041779, "learning_rate": 2.7726551192937577e-05, "loss": 0.057816505432128906, "step": 3004 }, { "epoch": 0.4187277920992127, "grad_norm": 0.564013659954071, "learning_rate": 2.7717855107353557e-05, "loss": 0.09427070617675781, "step": 3005 }, { "epoch": 0.4188671357904271, "grad_norm": 0.2955789566040039, "learning_rate": 2.770915730703834e-05, "loss": 0.09270095825195312, "step": 3006 }, { "epoch": 0.4190064794816415, "grad_norm": 0.3839729428291321, "learning_rate": 2.7700457793924357e-05, "loss": 0.09511184692382812, "step": 3007 }, { "epoch": 0.41914582317285587, "grad_norm": 0.5673214793205261, "learning_rate": 2.7691756569944473e-05, "loss": 0.10733795166015625, "step": 3008 }, { "epoch": 0.41928516686407025, "grad_norm": 0.29565975069999695, "learning_rate": 2.7683053637031874e-05, "loss": 0.09211921691894531, "step": 3009 }, { "epoch": 0.4194245105552846, "grad_norm": 0.43969300389289856, "learning_rate": 2.7674348997120174e-05, "loss": 0.08995914459228516, "step": 3010 }, { "epoch": 0.419563854246499, "grad_norm": 0.38074493408203125, "learning_rate": 2.7665642652143327e-05, "loss": 0.10424518585205078, "step": 3011 }, { "epoch": 0.4197031979377134, "grad_norm": 0.3844943940639496, "learning_rate": 2.7656934604035694e-05, "loss": 0.09274864196777344, "step": 3012 }, { "epoch": 0.41984254162892776, "grad_norm": 0.4631916582584381, "learning_rate": 2.7648224854732005e-05, "loss": 0.09597206115722656, "step": 3013 }, { "epoch": 0.41998188532014213, "grad_norm": 0.2502036690711975, "learning_rate": 2.7639513406167363e-05, "loss": 0.0676126480102539, "step": 3014 }, { "epoch": 0.4201212290113565, "grad_norm": 0.2362333983182907, "learning_rate": 2.763080026027726e-05, "loss": 0.07489967346191406, "step": 3015 }, { "epoch": 0.4202605727025709, "grad_norm": 0.3751072287559509, "learning_rate": 2.762208541899755e-05, "loss": 0.093963623046875, "step": 3016 }, { "epoch": 0.42039991639378527, "grad_norm": 0.3404616415500641, "learning_rate": 2.761336888426448e-05, "loss": 0.06876182556152344, "step": 3017 }, { "epoch": 0.42053926008499964, "grad_norm": 0.45011523365974426, "learning_rate": 2.7604650658014648e-05, "loss": 0.09289169311523438, "step": 3018 }, { "epoch": 0.420678603776214, "grad_norm": 0.26344069838523865, "learning_rate": 2.7595930742185068e-05, "loss": 0.07769203186035156, "step": 3019 }, { "epoch": 0.4208179474674284, "grad_norm": 0.9897775650024414, "learning_rate": 2.758720913871309e-05, "loss": 0.12956809997558594, "step": 3020 }, { "epoch": 0.4209572911586428, "grad_norm": 0.9581022262573242, "learning_rate": 2.7578485849536464e-05, "loss": 0.11178398132324219, "step": 3021 }, { "epoch": 0.42109663484985715, "grad_norm": 0.31706711649894714, "learning_rate": 2.7569760876593298e-05, "loss": 0.0904083251953125, "step": 3022 }, { "epoch": 0.42123597854107153, "grad_norm": 0.41639766097068787, "learning_rate": 2.7561034221822085e-05, "loss": 0.09426307678222656, "step": 3023 }, { "epoch": 0.4213753222322859, "grad_norm": 0.6080905795097351, "learning_rate": 2.7552305887161693e-05, "loss": 0.16414642333984375, "step": 3024 }, { "epoch": 0.4215146659235003, "grad_norm": 0.8979246020317078, "learning_rate": 2.754357587455135e-05, "loss": 0.11898994445800781, "step": 3025 }, { "epoch": 0.4216540096147147, "grad_norm": 0.6686037182807922, "learning_rate": 2.7534844185930674e-05, "loss": 0.106475830078125, "step": 3026 }, { "epoch": 0.4217933533059291, "grad_norm": 0.4539884924888611, "learning_rate": 2.7526110823239647e-05, "loss": 0.10874176025390625, "step": 3027 }, { "epoch": 0.4219326969971435, "grad_norm": 0.4164336323738098, "learning_rate": 2.7517375788418613e-05, "loss": 0.11487579345703125, "step": 3028 }, { "epoch": 0.42207204068835785, "grad_norm": 0.3385312259197235, "learning_rate": 2.7508639083408306e-05, "loss": 0.08850669860839844, "step": 3029 }, { "epoch": 0.42221138437957223, "grad_norm": 0.3954416513442993, "learning_rate": 2.749990071014982e-05, "loss": 0.09598731994628906, "step": 3030 }, { "epoch": 0.4223507280707866, "grad_norm": 0.6479269862174988, "learning_rate": 2.749116067058462e-05, "loss": 0.1256732940673828, "step": 3031 }, { "epoch": 0.422490071762001, "grad_norm": 0.7110391855239868, "learning_rate": 2.748241896665455e-05, "loss": 0.12057876586914062, "step": 3032 }, { "epoch": 0.42262941545321536, "grad_norm": 0.5215420722961426, "learning_rate": 2.7473675600301807e-05, "loss": 0.08901786804199219, "step": 3033 }, { "epoch": 0.42276875914442974, "grad_norm": 0.5117837190628052, "learning_rate": 2.7464930573468973e-05, "loss": 0.07907485961914062, "step": 3034 }, { "epoch": 0.4229081028356441, "grad_norm": 0.29275521636009216, "learning_rate": 2.7456183888098995e-05, "loss": 0.06429100036621094, "step": 3035 }, { "epoch": 0.4230474465268585, "grad_norm": 0.5966756343841553, "learning_rate": 2.7447435546135186e-05, "loss": 0.08964920043945312, "step": 3036 }, { "epoch": 0.4231867902180729, "grad_norm": 0.7810686826705933, "learning_rate": 2.7438685549521228e-05, "loss": 0.12935447692871094, "step": 3037 }, { "epoch": 0.42332613390928725, "grad_norm": 0.30725204944610596, "learning_rate": 2.742993390020116e-05, "loss": 0.07145309448242188, "step": 3038 }, { "epoch": 0.4234654776005016, "grad_norm": 0.5059618949890137, "learning_rate": 2.742118060011941e-05, "loss": 0.0894937515258789, "step": 3039 }, { "epoch": 0.423604821291716, "grad_norm": 0.2828565835952759, "learning_rate": 2.7412425651220767e-05, "loss": 0.07688045501708984, "step": 3040 }, { "epoch": 0.4237441649829304, "grad_norm": 0.6171338558197021, "learning_rate": 2.7403669055450363e-05, "loss": 0.11657905578613281, "step": 3041 }, { "epoch": 0.42388350867414476, "grad_norm": 0.305751234292984, "learning_rate": 2.739491081475373e-05, "loss": 0.0838155746459961, "step": 3042 }, { "epoch": 0.42402285236535914, "grad_norm": 0.577755868434906, "learning_rate": 2.738615093107674e-05, "loss": 0.10146141052246094, "step": 3043 }, { "epoch": 0.4241621960565735, "grad_norm": 0.6004830598831177, "learning_rate": 2.7377389406365642e-05, "loss": 0.11208343505859375, "step": 3044 }, { "epoch": 0.4243015397477879, "grad_norm": 0.32550421357154846, "learning_rate": 2.7368626242567046e-05, "loss": 0.08430862426757812, "step": 3045 }, { "epoch": 0.4244408834390023, "grad_norm": 0.2881087064743042, "learning_rate": 2.735986144162793e-05, "loss": 0.0760040283203125, "step": 3046 }, { "epoch": 0.4245802271302167, "grad_norm": 0.4742453694343567, "learning_rate": 2.735109500549563e-05, "loss": 0.08654022216796875, "step": 3047 }, { "epoch": 0.4247195708214311, "grad_norm": 0.435769259929657, "learning_rate": 2.7342326936117847e-05, "loss": 0.10020637512207031, "step": 3048 }, { "epoch": 0.42485891451264546, "grad_norm": 0.34583523869514465, "learning_rate": 2.7333557235442648e-05, "loss": 0.08666181564331055, "step": 3049 }, { "epoch": 0.42499825820385984, "grad_norm": 0.4487597346305847, "learning_rate": 2.732478590541846e-05, "loss": 0.07819366455078125, "step": 3050 }, { "epoch": 0.4251376018950742, "grad_norm": 0.5147772431373596, "learning_rate": 2.7316012947994067e-05, "loss": 0.12531471252441406, "step": 3051 }, { "epoch": 0.4252769455862886, "grad_norm": 0.2830198407173157, "learning_rate": 2.730723836511863e-05, "loss": 0.06543350219726562, "step": 3052 }, { "epoch": 0.42541628927750297, "grad_norm": 0.2761100232601166, "learning_rate": 2.729846215874165e-05, "loss": 0.07118797302246094, "step": 3053 }, { "epoch": 0.42555563296871735, "grad_norm": 0.46774086356163025, "learning_rate": 2.728968433081301e-05, "loss": 0.11004447937011719, "step": 3054 }, { "epoch": 0.4256949766599317, "grad_norm": 0.40086862444877625, "learning_rate": 2.728090488328293e-05, "loss": 0.0838165283203125, "step": 3055 }, { "epoch": 0.4258343203511461, "grad_norm": 0.5478135347366333, "learning_rate": 2.727212381810202e-05, "loss": 0.09226226806640625, "step": 3056 }, { "epoch": 0.4259736640423605, "grad_norm": 0.44372865557670593, "learning_rate": 2.7263341137221217e-05, "loss": 0.09789466857910156, "step": 3057 }, { "epoch": 0.42611300773357486, "grad_norm": 0.6014997363090515, "learning_rate": 2.725455684259185e-05, "loss": 0.10030174255371094, "step": 3058 }, { "epoch": 0.42625235142478923, "grad_norm": 0.25951048731803894, "learning_rate": 2.724577093616556e-05, "loss": 0.07194232940673828, "step": 3059 }, { "epoch": 0.4263916951160036, "grad_norm": 0.3270566761493683, "learning_rate": 2.72369834198944e-05, "loss": 0.078399658203125, "step": 3060 }, { "epoch": 0.426531038807218, "grad_norm": 0.31611230969429016, "learning_rate": 2.7228194295730747e-05, "loss": 0.08364486694335938, "step": 3061 }, { "epoch": 0.42667038249843237, "grad_norm": 0.5268136262893677, "learning_rate": 2.7219403565627342e-05, "loss": 0.11836051940917969, "step": 3062 }, { "epoch": 0.42680972618964674, "grad_norm": 0.4463173449039459, "learning_rate": 2.721061123153729e-05, "loss": 0.11009025573730469, "step": 3063 }, { "epoch": 0.4269490698808611, "grad_norm": 0.4011792242527008, "learning_rate": 2.720181729541404e-05, "loss": 0.08667755126953125, "step": 3064 }, { "epoch": 0.4270884135720755, "grad_norm": 0.4120916426181793, "learning_rate": 2.719302175921141e-05, "loss": 0.08592605590820312, "step": 3065 }, { "epoch": 0.42722775726328993, "grad_norm": 0.35560086369514465, "learning_rate": 2.7184224624883566e-05, "loss": 0.08373451232910156, "step": 3066 }, { "epoch": 0.4273671009545043, "grad_norm": 0.37799519300460815, "learning_rate": 2.7175425894385026e-05, "loss": 0.08482170104980469, "step": 3067 }, { "epoch": 0.4275064446457187, "grad_norm": 0.4046212136745453, "learning_rate": 2.7166625569670664e-05, "loss": 0.08334159851074219, "step": 3068 }, { "epoch": 0.42764578833693306, "grad_norm": 0.23191151022911072, "learning_rate": 2.715782365269573e-05, "loss": 0.05487632751464844, "step": 3069 }, { "epoch": 0.42778513202814744, "grad_norm": 0.40622830390930176, "learning_rate": 2.714902014541579e-05, "loss": 0.09069061279296875, "step": 3070 }, { "epoch": 0.4279244757193618, "grad_norm": 0.7446821331977844, "learning_rate": 2.7140215049786783e-05, "loss": 0.105987548828125, "step": 3071 }, { "epoch": 0.4280638194105762, "grad_norm": 0.3094369173049927, "learning_rate": 2.7131408367765017e-05, "loss": 0.08099365234375, "step": 3072 }, { "epoch": 0.4282031631017906, "grad_norm": 0.538990318775177, "learning_rate": 2.7122600101307113e-05, "loss": 0.10638427734375, "step": 3073 }, { "epoch": 0.42834250679300495, "grad_norm": 0.35945823788642883, "learning_rate": 2.7113790252370093e-05, "loss": 0.0973052978515625, "step": 3074 }, { "epoch": 0.42848185048421933, "grad_norm": 0.5654674768447876, "learning_rate": 2.710497882291127e-05, "loss": 0.11312675476074219, "step": 3075 }, { "epoch": 0.4286211941754337, "grad_norm": 0.6413313150405884, "learning_rate": 2.7096165814888373e-05, "loss": 0.1388416290283203, "step": 3076 }, { "epoch": 0.4287605378666481, "grad_norm": 0.3120116591453552, "learning_rate": 2.7087351230259442e-05, "loss": 0.09258651733398438, "step": 3077 }, { "epoch": 0.42889988155786246, "grad_norm": 0.4430004358291626, "learning_rate": 2.7078535070982873e-05, "loss": 0.08355331420898438, "step": 3078 }, { "epoch": 0.42903922524907684, "grad_norm": 0.4878310561180115, "learning_rate": 2.7069717339017415e-05, "loss": 0.10353469848632812, "step": 3079 }, { "epoch": 0.4291785689402912, "grad_norm": 0.6105650663375854, "learning_rate": 2.706089803632217e-05, "loss": 0.10670280456542969, "step": 3080 }, { "epoch": 0.4293179126315056, "grad_norm": 0.6068555116653442, "learning_rate": 2.7052077164856584e-05, "loss": 0.0952911376953125, "step": 3081 }, { "epoch": 0.42945725632271997, "grad_norm": 0.4432108700275421, "learning_rate": 2.7043254726580457e-05, "loss": 0.10673332214355469, "step": 3082 }, { "epoch": 0.42959660001393435, "grad_norm": 0.45219582319259644, "learning_rate": 2.7034430723453925e-05, "loss": 0.09998226165771484, "step": 3083 }, { "epoch": 0.4297359437051487, "grad_norm": 0.7268244624137878, "learning_rate": 2.7025605157437483e-05, "loss": 0.10045242309570312, "step": 3084 }, { "epoch": 0.4298752873963631, "grad_norm": 0.5471758842468262, "learning_rate": 2.701677803049198e-05, "loss": 0.09918689727783203, "step": 3085 }, { "epoch": 0.43001463108757754, "grad_norm": 0.4069291651248932, "learning_rate": 2.700794934457859e-05, "loss": 0.0905914306640625, "step": 3086 }, { "epoch": 0.4301539747787919, "grad_norm": 0.43674585223197937, "learning_rate": 2.6999119101658854e-05, "loss": 0.08358478546142578, "step": 3087 }, { "epoch": 0.4302933184700063, "grad_norm": 0.41119179129600525, "learning_rate": 2.699028730369464e-05, "loss": 0.07599544525146484, "step": 3088 }, { "epoch": 0.43043266216122067, "grad_norm": 0.5414245128631592, "learning_rate": 2.6981453952648178e-05, "loss": 0.10011863708496094, "step": 3089 }, { "epoch": 0.43057200585243505, "grad_norm": 0.29360273480415344, "learning_rate": 2.6972619050482044e-05, "loss": 0.08612823486328125, "step": 3090 }, { "epoch": 0.4307113495436494, "grad_norm": 0.6129921078681946, "learning_rate": 2.6963782599159135e-05, "loss": 0.08502769470214844, "step": 3091 }, { "epoch": 0.4308506932348638, "grad_norm": 0.49789607524871826, "learning_rate": 2.6954944600642724e-05, "loss": 0.11270904541015625, "step": 3092 }, { "epoch": 0.4309900369260782, "grad_norm": 0.4156376123428345, "learning_rate": 2.6946105056896406e-05, "loss": 0.0885624885559082, "step": 3093 }, { "epoch": 0.43112938061729256, "grad_norm": 0.28965625166893005, "learning_rate": 2.693726396988413e-05, "loss": 0.06447792053222656, "step": 3094 }, { "epoch": 0.43126872430850693, "grad_norm": 0.40528684854507446, "learning_rate": 2.6928421341570178e-05, "loss": 0.09171295166015625, "step": 3095 }, { "epoch": 0.4314080679997213, "grad_norm": 0.6176245212554932, "learning_rate": 2.691957717391918e-05, "loss": 0.09287834167480469, "step": 3096 }, { "epoch": 0.4315474116909357, "grad_norm": 0.3871535658836365, "learning_rate": 2.6910731468896112e-05, "loss": 0.07746601104736328, "step": 3097 }, { "epoch": 0.43168675538215007, "grad_norm": 0.502102792263031, "learning_rate": 2.690188422846629e-05, "loss": 0.11438941955566406, "step": 3098 }, { "epoch": 0.43182609907336444, "grad_norm": 0.5268672108650208, "learning_rate": 2.6893035454595363e-05, "loss": 0.12081146240234375, "step": 3099 }, { "epoch": 0.4319654427645788, "grad_norm": 0.8232094049453735, "learning_rate": 2.688418514924932e-05, "loss": 0.11641883850097656, "step": 3100 }, { "epoch": 0.4321047864557932, "grad_norm": 0.41472020745277405, "learning_rate": 2.6875333314394517e-05, "loss": 0.09449195861816406, "step": 3101 }, { "epoch": 0.4322441301470076, "grad_norm": 0.5742473006248474, "learning_rate": 2.6866479951997616e-05, "loss": 0.08580589294433594, "step": 3102 }, { "epoch": 0.43238347383822195, "grad_norm": 0.3602655529975891, "learning_rate": 2.685762506402563e-05, "loss": 0.08642578125, "step": 3103 }, { "epoch": 0.43252281752943633, "grad_norm": 0.8047970533370972, "learning_rate": 2.6848768652445924e-05, "loss": 0.14673709869384766, "step": 3104 }, { "epoch": 0.4326621612206507, "grad_norm": 0.42502206563949585, "learning_rate": 2.6839910719226173e-05, "loss": 0.09339523315429688, "step": 3105 }, { "epoch": 0.43280150491186514, "grad_norm": 0.4490775167942047, "learning_rate": 2.683105126633443e-05, "loss": 0.11094474792480469, "step": 3106 }, { "epoch": 0.4329408486030795, "grad_norm": 0.4816376566886902, "learning_rate": 2.6822190295739038e-05, "loss": 0.09279060363769531, "step": 3107 }, { "epoch": 0.4330801922942939, "grad_norm": 0.6091939806938171, "learning_rate": 2.6813327809408723e-05, "loss": 0.10823249816894531, "step": 3108 }, { "epoch": 0.4332195359855083, "grad_norm": 0.5936306715011597, "learning_rate": 2.680446380931252e-05, "loss": 0.10489463806152344, "step": 3109 }, { "epoch": 0.43335887967672265, "grad_norm": 0.5704661011695862, "learning_rate": 2.6795598297419806e-05, "loss": 0.10129547119140625, "step": 3110 }, { "epoch": 0.43349822336793703, "grad_norm": 0.28526198863983154, "learning_rate": 2.6786731275700297e-05, "loss": 0.07546138763427734, "step": 3111 }, { "epoch": 0.4336375670591514, "grad_norm": 0.40714725852012634, "learning_rate": 2.6777862746124045e-05, "loss": 0.08675289154052734, "step": 3112 }, { "epoch": 0.4337769107503658, "grad_norm": 0.6405243873596191, "learning_rate": 2.6768992710661428e-05, "loss": 0.11686038970947266, "step": 3113 }, { "epoch": 0.43391625444158016, "grad_norm": 0.249418243765831, "learning_rate": 2.676012117128317e-05, "loss": 0.06254196166992188, "step": 3114 }, { "epoch": 0.43405559813279454, "grad_norm": 0.5908002853393555, "learning_rate": 2.6751248129960323e-05, "loss": 0.10704803466796875, "step": 3115 }, { "epoch": 0.4341949418240089, "grad_norm": 0.3314914405345917, "learning_rate": 2.6742373588664276e-05, "loss": 0.09360408782958984, "step": 3116 }, { "epoch": 0.4343342855152233, "grad_norm": 0.24369199573993683, "learning_rate": 2.673349754936675e-05, "loss": 0.06453514099121094, "step": 3117 }, { "epoch": 0.4344736292064377, "grad_norm": 0.3088749945163727, "learning_rate": 2.6724620014039794e-05, "loss": 0.0769195556640625, "step": 3118 }, { "epoch": 0.43461297289765205, "grad_norm": 0.607825517654419, "learning_rate": 2.67157409846558e-05, "loss": 0.13092994689941406, "step": 3119 }, { "epoch": 0.4347523165888664, "grad_norm": 0.5324974060058594, "learning_rate": 2.670686046318748e-05, "loss": 0.09222602844238281, "step": 3120 }, { "epoch": 0.4348916602800808, "grad_norm": 0.48396164178848267, "learning_rate": 2.669797845160788e-05, "loss": 0.10568428039550781, "step": 3121 }, { "epoch": 0.4350310039712952, "grad_norm": 0.2298593819141388, "learning_rate": 2.66890949518904e-05, "loss": 0.06252098083496094, "step": 3122 }, { "epoch": 0.43517034766250956, "grad_norm": 0.4656989574432373, "learning_rate": 2.6680209966008727e-05, "loss": 0.10078048706054688, "step": 3123 }, { "epoch": 0.43530969135372394, "grad_norm": 0.7908186316490173, "learning_rate": 2.6671323495936913e-05, "loss": 0.11881446838378906, "step": 3124 }, { "epoch": 0.4354490350449383, "grad_norm": 0.6851418018341064, "learning_rate": 2.666243554364932e-05, "loss": 0.09810638427734375, "step": 3125 }, { "epoch": 0.43558837873615275, "grad_norm": 0.5078226327896118, "learning_rate": 2.6653546111120664e-05, "loss": 0.1016073226928711, "step": 3126 }, { "epoch": 0.4357277224273671, "grad_norm": 0.38181978464126587, "learning_rate": 2.664465520032596e-05, "loss": 0.08620929718017578, "step": 3127 }, { "epoch": 0.4358670661185815, "grad_norm": 0.8450789451599121, "learning_rate": 2.6635762813240574e-05, "loss": 0.13358211517333984, "step": 3128 }, { "epoch": 0.4360064098097959, "grad_norm": 0.36622416973114014, "learning_rate": 2.662686895184019e-05, "loss": 0.06759262084960938, "step": 3129 }, { "epoch": 0.43614575350101026, "grad_norm": 0.389064222574234, "learning_rate": 2.6617973618100817e-05, "loss": 0.08028411865234375, "step": 3130 }, { "epoch": 0.43628509719222464, "grad_norm": 0.6169992685317993, "learning_rate": 2.6609076813998795e-05, "loss": 0.0865325927734375, "step": 3131 }, { "epoch": 0.436424440883439, "grad_norm": 0.5708268880844116, "learning_rate": 2.6600178541510792e-05, "loss": 0.08453083038330078, "step": 3132 }, { "epoch": 0.4365637845746534, "grad_norm": 0.5644346475601196, "learning_rate": 2.65912788026138e-05, "loss": 0.08947372436523438, "step": 3133 }, { "epoch": 0.43670312826586777, "grad_norm": 0.18803328275680542, "learning_rate": 2.6582377599285143e-05, "loss": 0.0657968521118164, "step": 3134 }, { "epoch": 0.43684247195708215, "grad_norm": 0.30600976943969727, "learning_rate": 2.6573474933502466e-05, "loss": 0.06857681274414062, "step": 3135 }, { "epoch": 0.4369818156482965, "grad_norm": 0.45744991302490234, "learning_rate": 2.6564570807243728e-05, "loss": 0.06744575500488281, "step": 3136 }, { "epoch": 0.4371211593395109, "grad_norm": 0.5566093325614929, "learning_rate": 2.655566522248723e-05, "loss": 0.09020137786865234, "step": 3137 }, { "epoch": 0.4372605030307253, "grad_norm": 0.5168794989585876, "learning_rate": 2.6546758181211593e-05, "loss": 0.10427665710449219, "step": 3138 }, { "epoch": 0.43739984672193966, "grad_norm": 0.6565899848937988, "learning_rate": 2.653784968539574e-05, "loss": 0.10200309753417969, "step": 3139 }, { "epoch": 0.43753919041315403, "grad_norm": 0.5680868625640869, "learning_rate": 2.652893973701896e-05, "loss": 0.1434001922607422, "step": 3140 }, { "epoch": 0.4376785341043684, "grad_norm": 0.4077380299568176, "learning_rate": 2.652002833806082e-05, "loss": 0.08700942993164062, "step": 3141 }, { "epoch": 0.4378178777955828, "grad_norm": 0.27713924646377563, "learning_rate": 2.6511115490501244e-05, "loss": 0.07441902160644531, "step": 3142 }, { "epoch": 0.43795722148679717, "grad_norm": 0.5776276588439941, "learning_rate": 2.650220119632046e-05, "loss": 0.1021881103515625, "step": 3143 }, { "epoch": 0.43809656517801154, "grad_norm": 0.2088954895734787, "learning_rate": 2.649328545749901e-05, "loss": 0.05626487731933594, "step": 3144 }, { "epoch": 0.4382359088692259, "grad_norm": 0.4736892580986023, "learning_rate": 2.648436827601778e-05, "loss": 0.09764480590820312, "step": 3145 }, { "epoch": 0.4383752525604403, "grad_norm": 0.275655597448349, "learning_rate": 2.6475449653857964e-05, "loss": 0.07464981079101562, "step": 3146 }, { "epoch": 0.43851459625165473, "grad_norm": 0.5600424408912659, "learning_rate": 2.6466529593001065e-05, "loss": 0.11087799072265625, "step": 3147 }, { "epoch": 0.4386539399428691, "grad_norm": 0.44646841287612915, "learning_rate": 2.6457608095428925e-05, "loss": 0.08103561401367188, "step": 3148 }, { "epoch": 0.4387932836340835, "grad_norm": 0.37932389974594116, "learning_rate": 2.64486851631237e-05, "loss": 0.07647895812988281, "step": 3149 }, { "epoch": 0.43893262732529786, "grad_norm": 0.5100890398025513, "learning_rate": 2.6439760798067854e-05, "loss": 0.09775733947753906, "step": 3150 }, { "epoch": 0.43907197101651224, "grad_norm": 0.6459035277366638, "learning_rate": 2.6430835002244183e-05, "loss": 0.10758018493652344, "step": 3151 }, { "epoch": 0.4392113147077266, "grad_norm": 0.3352036774158478, "learning_rate": 2.6421907777635793e-05, "loss": 0.08616065979003906, "step": 3152 }, { "epoch": 0.439350658398941, "grad_norm": 0.6230624914169312, "learning_rate": 2.641297912622611e-05, "loss": 0.11764907836914062, "step": 3153 }, { "epoch": 0.4394900020901554, "grad_norm": 0.4858156442642212, "learning_rate": 2.640404904999887e-05, "loss": 0.11335945129394531, "step": 3154 }, { "epoch": 0.43962934578136975, "grad_norm": 0.5678015947341919, "learning_rate": 2.639511755093814e-05, "loss": 0.1326580047607422, "step": 3155 }, { "epoch": 0.43976868947258413, "grad_norm": 0.3392687439918518, "learning_rate": 2.63861846310283e-05, "loss": 0.09939765930175781, "step": 3156 }, { "epoch": 0.4399080331637985, "grad_norm": 0.4600037634372711, "learning_rate": 2.6377250292254023e-05, "loss": 0.09518909454345703, "step": 3157 }, { "epoch": 0.4400473768550129, "grad_norm": 0.4338976740837097, "learning_rate": 2.6368314536600337e-05, "loss": 0.10296249389648438, "step": 3158 }, { "epoch": 0.44018672054622726, "grad_norm": 0.3142938017845154, "learning_rate": 2.6359377366052546e-05, "loss": 0.07747268676757812, "step": 3159 }, { "epoch": 0.44032606423744164, "grad_norm": 0.35865795612335205, "learning_rate": 2.6350438782596293e-05, "loss": 0.06481075286865234, "step": 3160 }, { "epoch": 0.440465407928656, "grad_norm": 0.7369528412818909, "learning_rate": 2.6341498788217527e-05, "loss": 0.13164520263671875, "step": 3161 }, { "epoch": 0.4406047516198704, "grad_norm": 0.2851244807243347, "learning_rate": 2.6332557384902506e-05, "loss": 0.07604408264160156, "step": 3162 }, { "epoch": 0.44074409531108477, "grad_norm": 0.2298494130373001, "learning_rate": 2.6323614574637812e-05, "loss": 0.07416200637817383, "step": 3163 }, { "epoch": 0.44088343900229915, "grad_norm": 0.4025443494319916, "learning_rate": 2.6314670359410332e-05, "loss": 0.08670997619628906, "step": 3164 }, { "epoch": 0.4410227826935135, "grad_norm": 0.515621542930603, "learning_rate": 2.630572474120726e-05, "loss": 0.10008668899536133, "step": 3165 }, { "epoch": 0.4411621263847279, "grad_norm": 0.28302887082099915, "learning_rate": 2.6296777722016108e-05, "loss": 0.07881355285644531, "step": 3166 }, { "epoch": 0.44130147007594234, "grad_norm": 0.509435772895813, "learning_rate": 2.6287829303824713e-05, "loss": 0.13525009155273438, "step": 3167 }, { "epoch": 0.4414408137671567, "grad_norm": 0.6929137110710144, "learning_rate": 2.6278879488621197e-05, "loss": 0.126373291015625, "step": 3168 }, { "epoch": 0.4415801574583711, "grad_norm": 0.42747148871421814, "learning_rate": 2.626992827839401e-05, "loss": 0.09135627746582031, "step": 3169 }, { "epoch": 0.44171950114958547, "grad_norm": 0.470749169588089, "learning_rate": 2.62609756751319e-05, "loss": 0.11327552795410156, "step": 3170 }, { "epoch": 0.44185884484079985, "grad_norm": 0.1951466202735901, "learning_rate": 2.6252021680823937e-05, "loss": 0.06481456756591797, "step": 3171 }, { "epoch": 0.4419981885320142, "grad_norm": 0.2759275436401367, "learning_rate": 2.6243066297459495e-05, "loss": 0.08458518981933594, "step": 3172 }, { "epoch": 0.4421375322232286, "grad_norm": 0.4868941903114319, "learning_rate": 2.623410952702825e-05, "loss": 0.11844444274902344, "step": 3173 }, { "epoch": 0.442276875914443, "grad_norm": 0.3588199019432068, "learning_rate": 2.62251513715202e-05, "loss": 0.10197257995605469, "step": 3174 }, { "epoch": 0.44241621960565736, "grad_norm": 0.4048108160495758, "learning_rate": 2.6216191832925634e-05, "loss": 0.09224319458007812, "step": 3175 }, { "epoch": 0.44255556329687173, "grad_norm": 0.21726645529270172, "learning_rate": 2.620723091323516e-05, "loss": 0.061919212341308594, "step": 3176 }, { "epoch": 0.4426949069880861, "grad_norm": 0.2929801344871521, "learning_rate": 2.6198268614439694e-05, "loss": 0.07763862609863281, "step": 3177 }, { "epoch": 0.4428342506793005, "grad_norm": 0.35297369956970215, "learning_rate": 2.618930493853045e-05, "loss": 0.094024658203125, "step": 3178 }, { "epoch": 0.44297359437051487, "grad_norm": 0.41006776690483093, "learning_rate": 2.618033988749895e-05, "loss": 0.10381317138671875, "step": 3179 }, { "epoch": 0.44311293806172924, "grad_norm": 0.5403784513473511, "learning_rate": 2.6171373463337028e-05, "loss": 0.1233210563659668, "step": 3180 }, { "epoch": 0.4432522817529436, "grad_norm": 0.2832668423652649, "learning_rate": 2.616240566803682e-05, "loss": 0.06217765808105469, "step": 3181 }, { "epoch": 0.443391625444158, "grad_norm": 0.39890509843826294, "learning_rate": 2.6153436503590765e-05, "loss": 0.11121845245361328, "step": 3182 }, { "epoch": 0.4435309691353724, "grad_norm": 0.4453290104866028, "learning_rate": 2.6144465971991596e-05, "loss": 0.1016693115234375, "step": 3183 }, { "epoch": 0.44367031282658675, "grad_norm": 0.3747658133506775, "learning_rate": 2.6135494075232366e-05, "loss": 0.11945152282714844, "step": 3184 }, { "epoch": 0.44380965651780113, "grad_norm": 0.3644007444381714, "learning_rate": 2.612652081530644e-05, "loss": 0.09680938720703125, "step": 3185 }, { "epoch": 0.4439490002090155, "grad_norm": 0.3593016564846039, "learning_rate": 2.6117546194207454e-05, "loss": 0.091400146484375, "step": 3186 }, { "epoch": 0.44408834390022994, "grad_norm": 0.37312301993370056, "learning_rate": 2.6108570213929366e-05, "loss": 0.07148265838623047, "step": 3187 }, { "epoch": 0.4442276875914443, "grad_norm": 0.18219980597496033, "learning_rate": 2.609959287646645e-05, "loss": 0.065521240234375, "step": 3188 }, { "epoch": 0.4443670312826587, "grad_norm": 0.4620741307735443, "learning_rate": 2.6090614183813236e-05, "loss": 0.08932113647460938, "step": 3189 }, { "epoch": 0.4445063749738731, "grad_norm": 0.27734750509262085, "learning_rate": 2.6081634137964615e-05, "loss": 0.07478904724121094, "step": 3190 }, { "epoch": 0.44464571866508745, "grad_norm": 0.47322383522987366, "learning_rate": 2.607265274091573e-05, "loss": 0.10181236267089844, "step": 3191 }, { "epoch": 0.44478506235630183, "grad_norm": 0.8202535510063171, "learning_rate": 2.6063669994662043e-05, "loss": 0.1680927276611328, "step": 3192 }, { "epoch": 0.4449244060475162, "grad_norm": 0.35152265429496765, "learning_rate": 2.605468590119932e-05, "loss": 0.0787191390991211, "step": 3193 }, { "epoch": 0.4450637497387306, "grad_norm": 0.2865127623081207, "learning_rate": 2.6045700462523625e-05, "loss": 0.08188438415527344, "step": 3194 }, { "epoch": 0.44520309342994496, "grad_norm": 0.3493317663669586, "learning_rate": 2.6036713680631312e-05, "loss": 0.07248115539550781, "step": 3195 }, { "epoch": 0.44534243712115934, "grad_norm": 0.34856924414634705, "learning_rate": 2.6027725557519037e-05, "loss": 0.08799171447753906, "step": 3196 }, { "epoch": 0.4454817808123737, "grad_norm": 0.3275778293609619, "learning_rate": 2.601873609518376e-05, "loss": 0.09891700744628906, "step": 3197 }, { "epoch": 0.4456211245035881, "grad_norm": 0.6363255381584167, "learning_rate": 2.600974529562273e-05, "loss": 0.11921310424804688, "step": 3198 }, { "epoch": 0.4457604681948025, "grad_norm": 0.2859232723712921, "learning_rate": 2.6000753160833506e-05, "loss": 0.07242870330810547, "step": 3199 }, { "epoch": 0.44589981188601685, "grad_norm": 0.30385327339172363, "learning_rate": 2.599175969281392e-05, "loss": 0.08919525146484375, "step": 3200 }, { "epoch": 0.44603915557723123, "grad_norm": 0.4143965542316437, "learning_rate": 2.5982764893562137e-05, "loss": 0.10561561584472656, "step": 3201 }, { "epoch": 0.4461784992684456, "grad_norm": 0.35189366340637207, "learning_rate": 2.5973768765076578e-05, "loss": 0.08381462097167969, "step": 3202 }, { "epoch": 0.44631784295966, "grad_norm": 0.4816943109035492, "learning_rate": 2.5964771309355978e-05, "loss": 0.13361740112304688, "step": 3203 }, { "epoch": 0.44645718665087436, "grad_norm": 0.23214365541934967, "learning_rate": 2.595577252839938e-05, "loss": 0.07532501220703125, "step": 3204 }, { "epoch": 0.44659653034208874, "grad_norm": 0.40004539489746094, "learning_rate": 2.594677242420609e-05, "loss": 0.11186981201171875, "step": 3205 }, { "epoch": 0.4467358740333031, "grad_norm": 0.36163726449012756, "learning_rate": 2.593777099877574e-05, "loss": 0.08881187438964844, "step": 3206 }, { "epoch": 0.44687521772451755, "grad_norm": 0.5212134718894958, "learning_rate": 2.592876825410823e-05, "loss": 0.10515213012695312, "step": 3207 }, { "epoch": 0.4470145614157319, "grad_norm": 0.511530876159668, "learning_rate": 2.5919764192203777e-05, "loss": 0.08733177185058594, "step": 3208 }, { "epoch": 0.4471539051069463, "grad_norm": 0.3775254786014557, "learning_rate": 2.591075881506287e-05, "loss": 0.09482669830322266, "step": 3209 }, { "epoch": 0.4472932487981607, "grad_norm": 0.2528819441795349, "learning_rate": 2.5901752124686294e-05, "loss": 0.06436920166015625, "step": 3210 }, { "epoch": 0.44743259248937506, "grad_norm": 0.38951048254966736, "learning_rate": 2.5892744123075138e-05, "loss": 0.090911865234375, "step": 3211 }, { "epoch": 0.44757193618058944, "grad_norm": 0.4155295491218567, "learning_rate": 2.5883734812230773e-05, "loss": 0.09823989868164062, "step": 3212 }, { "epoch": 0.4477112798718038, "grad_norm": 0.27602076530456543, "learning_rate": 2.587472419415486e-05, "loss": 0.08160400390625, "step": 3213 }, { "epoch": 0.4478506235630182, "grad_norm": 0.5807915329933167, "learning_rate": 2.5865712270849354e-05, "loss": 0.1274738311767578, "step": 3214 }, { "epoch": 0.44798996725423257, "grad_norm": 0.31890591979026794, "learning_rate": 2.5856699044316496e-05, "loss": 0.08365440368652344, "step": 3215 }, { "epoch": 0.44812931094544695, "grad_norm": 0.6423806548118591, "learning_rate": 2.5847684516558817e-05, "loss": 0.11694145202636719, "step": 3216 }, { "epoch": 0.4482686546366613, "grad_norm": 0.5192328095436096, "learning_rate": 2.583866868957915e-05, "loss": 0.0835113525390625, "step": 3217 }, { "epoch": 0.4484079983278757, "grad_norm": 0.4879690408706665, "learning_rate": 2.5829651565380598e-05, "loss": 0.13028335571289062, "step": 3218 }, { "epoch": 0.4485473420190901, "grad_norm": 0.9135570526123047, "learning_rate": 2.5820633145966564e-05, "loss": 0.15550994873046875, "step": 3219 }, { "epoch": 0.44868668571030446, "grad_norm": 0.4413624703884125, "learning_rate": 2.581161343334073e-05, "loss": 0.10725593566894531, "step": 3220 }, { "epoch": 0.44882602940151883, "grad_norm": 0.8122366070747375, "learning_rate": 2.5802592429507067e-05, "loss": 0.1569499969482422, "step": 3221 }, { "epoch": 0.4489653730927332, "grad_norm": 0.38626131415367126, "learning_rate": 2.579357013646985e-05, "loss": 0.08245468139648438, "step": 3222 }, { "epoch": 0.4491047167839476, "grad_norm": 0.4608629047870636, "learning_rate": 2.578454655623361e-05, "loss": 0.1023550033569336, "step": 3223 }, { "epoch": 0.44924406047516197, "grad_norm": 0.40798383951187134, "learning_rate": 2.5775521690803197e-05, "loss": 0.08950042724609375, "step": 3224 }, { "epoch": 0.44938340416637634, "grad_norm": 0.28237906098365784, "learning_rate": 2.5766495542183717e-05, "loss": 0.06744194030761719, "step": 3225 }, { "epoch": 0.4495227478575907, "grad_norm": 0.5146475434303284, "learning_rate": 2.575746811238058e-05, "loss": 0.10599708557128906, "step": 3226 }, { "epoch": 0.44966209154880515, "grad_norm": 0.4172583520412445, "learning_rate": 2.574843940339947e-05, "loss": 0.08574104309082031, "step": 3227 }, { "epoch": 0.44980143524001953, "grad_norm": 0.4712950885295868, "learning_rate": 2.5739409417246367e-05, "loss": 0.11024093627929688, "step": 3228 }, { "epoch": 0.4499407789312339, "grad_norm": 0.5308732986450195, "learning_rate": 2.5730378155927524e-05, "loss": 0.10855722427368164, "step": 3229 }, { "epoch": 0.4500801226224483, "grad_norm": 0.7482237219810486, "learning_rate": 2.5721345621449483e-05, "loss": 0.14079952239990234, "step": 3230 }, { "epoch": 0.45021946631366266, "grad_norm": 0.496581107378006, "learning_rate": 2.5712311815819063e-05, "loss": 0.08372879028320312, "step": 3231 }, { "epoch": 0.45035881000487704, "grad_norm": 0.4173150360584259, "learning_rate": 2.570327674104337e-05, "loss": 0.0843057632446289, "step": 3232 }, { "epoch": 0.4504981536960914, "grad_norm": 0.7348178029060364, "learning_rate": 2.56942403991298e-05, "loss": 0.1483745574951172, "step": 3233 }, { "epoch": 0.4506374973873058, "grad_norm": 0.3747888207435608, "learning_rate": 2.568520279208601e-05, "loss": 0.08870506286621094, "step": 3234 }, { "epoch": 0.4507768410785202, "grad_norm": 0.39361846446990967, "learning_rate": 2.5676163921919955e-05, "loss": 0.08873748779296875, "step": 3235 }, { "epoch": 0.45091618476973455, "grad_norm": 0.4448181986808777, "learning_rate": 2.566712379063987e-05, "loss": 0.11276054382324219, "step": 3236 }, { "epoch": 0.45105552846094893, "grad_norm": 0.5968166589736938, "learning_rate": 2.565808240025425e-05, "loss": 0.13665008544921875, "step": 3237 }, { "epoch": 0.4511948721521633, "grad_norm": 0.27689939737319946, "learning_rate": 2.5649039752771914e-05, "loss": 0.06540107727050781, "step": 3238 }, { "epoch": 0.4513342158433777, "grad_norm": 0.5549759864807129, "learning_rate": 2.5639995850201902e-05, "loss": 0.11178779602050781, "step": 3239 }, { "epoch": 0.45147355953459206, "grad_norm": 0.2442561835050583, "learning_rate": 2.5630950694553582e-05, "loss": 0.0769805908203125, "step": 3240 }, { "epoch": 0.45161290322580644, "grad_norm": 0.4681416451931, "learning_rate": 2.5621904287836568e-05, "loss": 0.11967658996582031, "step": 3241 }, { "epoch": 0.4517522469170208, "grad_norm": 0.3008041977882385, "learning_rate": 2.5612856632060776e-05, "loss": 0.07324504852294922, "step": 3242 }, { "epoch": 0.4518915906082352, "grad_norm": 0.5438188910484314, "learning_rate": 2.5603807729236387e-05, "loss": 0.11035728454589844, "step": 3243 }, { "epoch": 0.45203093429944957, "grad_norm": 0.485194593667984, "learning_rate": 2.559475758137385e-05, "loss": 0.0882568359375, "step": 3244 }, { "epoch": 0.45217027799066395, "grad_norm": 0.5395824909210205, "learning_rate": 2.5585706190483914e-05, "loss": 0.10652351379394531, "step": 3245 }, { "epoch": 0.4523096216818783, "grad_norm": 0.4361737370491028, "learning_rate": 2.5576653558577588e-05, "loss": 0.10880851745605469, "step": 3246 }, { "epoch": 0.45244896537309276, "grad_norm": 0.31356266140937805, "learning_rate": 2.556759968766615e-05, "loss": 0.09412574768066406, "step": 3247 }, { "epoch": 0.45258830906430714, "grad_norm": 0.3259497582912445, "learning_rate": 2.5558544579761177e-05, "loss": 0.08284187316894531, "step": 3248 }, { "epoch": 0.4527276527555215, "grad_norm": 0.4353976547718048, "learning_rate": 2.5549488236874506e-05, "loss": 0.07864761352539062, "step": 3249 }, { "epoch": 0.4528669964467359, "grad_norm": 0.7550287246704102, "learning_rate": 2.554043066101824e-05, "loss": 0.10595560073852539, "step": 3250 }, { "epoch": 0.45300634013795027, "grad_norm": 0.4789970815181732, "learning_rate": 2.5531371854204773e-05, "loss": 0.08000564575195312, "step": 3251 }, { "epoch": 0.45314568382916465, "grad_norm": 0.4191497564315796, "learning_rate": 2.5522311818446762e-05, "loss": 0.08799362182617188, "step": 3252 }, { "epoch": 0.453285027520379, "grad_norm": 0.6698973178863525, "learning_rate": 2.5513250555757143e-05, "loss": 0.10775947570800781, "step": 3253 }, { "epoch": 0.4534243712115934, "grad_norm": 0.8357587456703186, "learning_rate": 2.5504188068149126e-05, "loss": 0.1771373748779297, "step": 3254 }, { "epoch": 0.4535637149028078, "grad_norm": 0.3086981177330017, "learning_rate": 2.5495124357636174e-05, "loss": 0.07584953308105469, "step": 3255 }, { "epoch": 0.45370305859402216, "grad_norm": 0.37314149737358093, "learning_rate": 2.5486059426232052e-05, "loss": 0.0926198959350586, "step": 3256 }, { "epoch": 0.45384240228523653, "grad_norm": 0.5016579031944275, "learning_rate": 2.547699327595077e-05, "loss": 0.12589454650878906, "step": 3257 }, { "epoch": 0.4539817459764509, "grad_norm": 0.3545801043510437, "learning_rate": 2.5467925908806622e-05, "loss": 0.10574722290039062, "step": 3258 }, { "epoch": 0.4541210896676653, "grad_norm": 0.24271726608276367, "learning_rate": 2.5458857326814178e-05, "loss": 0.07344865798950195, "step": 3259 }, { "epoch": 0.45426043335887967, "grad_norm": 0.3427760899066925, "learning_rate": 2.5449787531988258e-05, "loss": 0.10113716125488281, "step": 3260 }, { "epoch": 0.45439977705009404, "grad_norm": 0.33136647939682007, "learning_rate": 2.5440716526343972e-05, "loss": 0.07993078231811523, "step": 3261 }, { "epoch": 0.4545391207413084, "grad_norm": 0.45315349102020264, "learning_rate": 2.543164431189669e-05, "loss": 0.10439491271972656, "step": 3262 }, { "epoch": 0.4546784644325228, "grad_norm": 0.43442025780677795, "learning_rate": 2.5422570890662046e-05, "loss": 0.10935020446777344, "step": 3263 }, { "epoch": 0.4548178081237372, "grad_norm": 0.4548247158527374, "learning_rate": 2.541349626465595e-05, "loss": 0.1125335693359375, "step": 3264 }, { "epoch": 0.45495715181495155, "grad_norm": 0.5460521578788757, "learning_rate": 2.5404420435894578e-05, "loss": 0.14475631713867188, "step": 3265 }, { "epoch": 0.45509649550616593, "grad_norm": 0.3531234860420227, "learning_rate": 2.539534340639436e-05, "loss": 0.08663749694824219, "step": 3266 }, { "epoch": 0.45523583919738037, "grad_norm": 0.5236177444458008, "learning_rate": 2.538626517817203e-05, "loss": 0.08452033996582031, "step": 3267 }, { "epoch": 0.45537518288859474, "grad_norm": 0.7299370169639587, "learning_rate": 2.5377185753244537e-05, "loss": 0.13871002197265625, "step": 3268 }, { "epoch": 0.4555145265798091, "grad_norm": 0.4558258056640625, "learning_rate": 2.5368105133629143e-05, "loss": 0.10540771484375, "step": 3269 }, { "epoch": 0.4556538702710235, "grad_norm": 0.3420353829860687, "learning_rate": 2.5359023321343336e-05, "loss": 0.07732009887695312, "step": 3270 }, { "epoch": 0.4557932139622379, "grad_norm": 0.4567525088787079, "learning_rate": 2.5349940318404895e-05, "loss": 0.060706138610839844, "step": 3271 }, { "epoch": 0.45593255765345225, "grad_norm": 0.4598923623561859, "learning_rate": 2.5340856126831864e-05, "loss": 0.10048866271972656, "step": 3272 }, { "epoch": 0.45607190134466663, "grad_norm": 0.3988206386566162, "learning_rate": 2.5331770748642527e-05, "loss": 0.09199714660644531, "step": 3273 }, { "epoch": 0.456211245035881, "grad_norm": 0.7231907844543457, "learning_rate": 2.5322684185855458e-05, "loss": 0.1343231201171875, "step": 3274 }, { "epoch": 0.4563505887270954, "grad_norm": 0.4518740773200989, "learning_rate": 2.5313596440489483e-05, "loss": 0.09477806091308594, "step": 3275 }, { "epoch": 0.45648993241830976, "grad_norm": 0.5364910364151001, "learning_rate": 2.530450751456369e-05, "loss": 0.08585739135742188, "step": 3276 }, { "epoch": 0.45662927610952414, "grad_norm": 0.36743879318237305, "learning_rate": 2.5295417410097437e-05, "loss": 0.07013893127441406, "step": 3277 }, { "epoch": 0.4567686198007385, "grad_norm": 0.6568399667739868, "learning_rate": 2.5286326129110325e-05, "loss": 0.11915779113769531, "step": 3278 }, { "epoch": 0.4569079634919529, "grad_norm": 0.49057039618492126, "learning_rate": 2.5277233673622236e-05, "loss": 0.10801506042480469, "step": 3279 }, { "epoch": 0.4570473071831673, "grad_norm": 0.5719925761222839, "learning_rate": 2.526814004565331e-05, "loss": 0.09646224975585938, "step": 3280 }, { "epoch": 0.45718665087438165, "grad_norm": 0.2587944269180298, "learning_rate": 2.5259045247223933e-05, "loss": 0.07669258117675781, "step": 3281 }, { "epoch": 0.45732599456559603, "grad_norm": 0.5176771879196167, "learning_rate": 2.524994928035477e-05, "loss": 0.09971141815185547, "step": 3282 }, { "epoch": 0.4574653382568104, "grad_norm": 0.4955536723136902, "learning_rate": 2.5240852147066742e-05, "loss": 0.10239791870117188, "step": 3283 }, { "epoch": 0.4576046819480248, "grad_norm": 0.5372061729431152, "learning_rate": 2.5231753849381013e-05, "loss": 0.13330459594726562, "step": 3284 }, { "epoch": 0.45774402563923916, "grad_norm": 0.43841353058815, "learning_rate": 2.5222654389319025e-05, "loss": 0.11880302429199219, "step": 3285 }, { "epoch": 0.45788336933045354, "grad_norm": 0.42088252305984497, "learning_rate": 2.5213553768902466e-05, "loss": 0.10557174682617188, "step": 3286 }, { "epoch": 0.45802271302166797, "grad_norm": 0.3761953115463257, "learning_rate": 2.520445199015328e-05, "loss": 0.08219718933105469, "step": 3287 }, { "epoch": 0.45816205671288235, "grad_norm": 0.31508657336235046, "learning_rate": 2.5195349055093693e-05, "loss": 0.06741714477539062, "step": 3288 }, { "epoch": 0.4583014004040967, "grad_norm": 0.4324892461299896, "learning_rate": 2.5186244965746146e-05, "loss": 0.07382488250732422, "step": 3289 }, { "epoch": 0.4584407440953111, "grad_norm": 0.20893768966197968, "learning_rate": 2.5177139724133376e-05, "loss": 0.05457782745361328, "step": 3290 }, { "epoch": 0.4585800877865255, "grad_norm": 0.4441015422344208, "learning_rate": 2.5168033332278358e-05, "loss": 0.10797500610351562, "step": 3291 }, { "epoch": 0.45871943147773986, "grad_norm": 0.31421732902526855, "learning_rate": 2.5158925792204317e-05, "loss": 0.07995796203613281, "step": 3292 }, { "epoch": 0.45885877516895424, "grad_norm": 0.3070082664489746, "learning_rate": 2.514981710593475e-05, "loss": 0.08426856994628906, "step": 3293 }, { "epoch": 0.4589981188601686, "grad_norm": 0.3476617634296417, "learning_rate": 2.5140707275493394e-05, "loss": 0.11507225036621094, "step": 3294 }, { "epoch": 0.459137462551383, "grad_norm": 0.33254557847976685, "learning_rate": 2.5131596302904245e-05, "loss": 0.08539772033691406, "step": 3295 }, { "epoch": 0.45927680624259737, "grad_norm": 0.2930243909358978, "learning_rate": 2.5122484190191553e-05, "loss": 0.07838249206542969, "step": 3296 }, { "epoch": 0.45941614993381175, "grad_norm": 0.4825838804244995, "learning_rate": 2.511337093937982e-05, "loss": 0.10203170776367188, "step": 3297 }, { "epoch": 0.4595554936250261, "grad_norm": 0.3848988711833954, "learning_rate": 2.510425655249381e-05, "loss": 0.08478736877441406, "step": 3298 }, { "epoch": 0.4596948373162405, "grad_norm": 0.4111996293067932, "learning_rate": 2.509514103155852e-05, "loss": 0.0934610366821289, "step": 3299 }, { "epoch": 0.4598341810074549, "grad_norm": 0.6232131123542786, "learning_rate": 2.5086024378599217e-05, "loss": 0.11145782470703125, "step": 3300 }, { "epoch": 0.45997352469866926, "grad_norm": 0.2702008783817291, "learning_rate": 2.5076906595641422e-05, "loss": 0.06834125518798828, "step": 3301 }, { "epoch": 0.46011286838988363, "grad_norm": 0.43404415249824524, "learning_rate": 2.5067787684710886e-05, "loss": 0.10466384887695312, "step": 3302 }, { "epoch": 0.460252212081098, "grad_norm": 0.47960561513900757, "learning_rate": 2.5058667647833615e-05, "loss": 0.09465217590332031, "step": 3303 }, { "epoch": 0.4603915557723124, "grad_norm": 0.2939611077308655, "learning_rate": 2.50495464870359e-05, "loss": 0.07997322082519531, "step": 3304 }, { "epoch": 0.46053089946352677, "grad_norm": 0.4728347063064575, "learning_rate": 2.5040424204344226e-05, "loss": 0.09991073608398438, "step": 3305 }, { "epoch": 0.46067024315474114, "grad_norm": 0.44260019063949585, "learning_rate": 2.5031300801785374e-05, "loss": 0.09527587890625, "step": 3306 }, { "epoch": 0.4608095868459556, "grad_norm": 0.2764643430709839, "learning_rate": 2.502217628138635e-05, "loss": 0.07491493225097656, "step": 3307 }, { "epoch": 0.46094893053716995, "grad_norm": 0.30959954857826233, "learning_rate": 2.5013050645174414e-05, "loss": 0.08568954467773438, "step": 3308 }, { "epoch": 0.46108827422838433, "grad_norm": 0.383931964635849, "learning_rate": 2.5003923895177073e-05, "loss": 0.10343742370605469, "step": 3309 }, { "epoch": 0.4612276179195987, "grad_norm": 0.6403789520263672, "learning_rate": 2.499479603342209e-05, "loss": 0.10701179504394531, "step": 3310 }, { "epoch": 0.4613669616108131, "grad_norm": 0.33281171321868896, "learning_rate": 2.4985667061937458e-05, "loss": 0.07886123657226562, "step": 3311 }, { "epoch": 0.46150630530202746, "grad_norm": 0.5058579444885254, "learning_rate": 2.4976536982751426e-05, "loss": 0.12371253967285156, "step": 3312 }, { "epoch": 0.46164564899324184, "grad_norm": 0.365314245223999, "learning_rate": 2.4967405797892498e-05, "loss": 0.07683372497558594, "step": 3313 }, { "epoch": 0.4617849926844562, "grad_norm": 0.5600185990333557, "learning_rate": 2.4958273509389406e-05, "loss": 0.1122598648071289, "step": 3314 }, { "epoch": 0.4619243363756706, "grad_norm": 0.3521411418914795, "learning_rate": 2.4949140119271144e-05, "loss": 0.09858131408691406, "step": 3315 }, { "epoch": 0.462063680066885, "grad_norm": 0.3245527744293213, "learning_rate": 2.4940005629566927e-05, "loss": 0.08791160583496094, "step": 3316 }, { "epoch": 0.46220302375809935, "grad_norm": 0.4042893052101135, "learning_rate": 2.4930870042306253e-05, "loss": 0.08312606811523438, "step": 3317 }, { "epoch": 0.46234236744931373, "grad_norm": 0.4990641176700592, "learning_rate": 2.4921733359518824e-05, "loss": 0.11503791809082031, "step": 3318 }, { "epoch": 0.4624817111405281, "grad_norm": 0.33785480260849, "learning_rate": 2.4912595583234608e-05, "loss": 0.08199119567871094, "step": 3319 }, { "epoch": 0.4626210548317425, "grad_norm": 0.48627999424934387, "learning_rate": 2.4903456715483817e-05, "loss": 0.10497856140136719, "step": 3320 }, { "epoch": 0.46276039852295686, "grad_norm": 0.2803114354610443, "learning_rate": 2.489431675829688e-05, "loss": 0.06209278106689453, "step": 3321 }, { "epoch": 0.46289974221417124, "grad_norm": 0.3343244194984436, "learning_rate": 2.488517571370451e-05, "loss": 0.08159255981445312, "step": 3322 }, { "epoch": 0.4630390859053856, "grad_norm": 0.5613617300987244, "learning_rate": 2.487603358373762e-05, "loss": 0.12671852111816406, "step": 3323 }, { "epoch": 0.4631784295966, "grad_norm": 0.4465530812740326, "learning_rate": 2.486689037042739e-05, "loss": 0.10179710388183594, "step": 3324 }, { "epoch": 0.46331777328781437, "grad_norm": 0.17726410925388336, "learning_rate": 2.485774607580523e-05, "loss": 0.05734825134277344, "step": 3325 }, { "epoch": 0.46345711697902875, "grad_norm": 0.46700364351272583, "learning_rate": 2.4848600701902804e-05, "loss": 0.12024307250976562, "step": 3326 }, { "epoch": 0.4635964606702432, "grad_norm": 0.685096025466919, "learning_rate": 2.483945425075199e-05, "loss": 0.09981346130371094, "step": 3327 }, { "epoch": 0.46373580436145756, "grad_norm": 0.32713958621025085, "learning_rate": 2.4830306724384933e-05, "loss": 0.0828104019165039, "step": 3328 }, { "epoch": 0.46387514805267194, "grad_norm": 0.4482177793979645, "learning_rate": 2.4821158124834e-05, "loss": 0.10051441192626953, "step": 3329 }, { "epoch": 0.4640144917438863, "grad_norm": 0.26099684834480286, "learning_rate": 2.4812008454131796e-05, "loss": 0.07974624633789062, "step": 3330 }, { "epoch": 0.4641538354351007, "grad_norm": 0.2743476629257202, "learning_rate": 2.4802857714311177e-05, "loss": 0.07523345947265625, "step": 3331 }, { "epoch": 0.46429317912631507, "grad_norm": 0.576599657535553, "learning_rate": 2.479370590740522e-05, "loss": 0.12535667419433594, "step": 3332 }, { "epoch": 0.46443252281752945, "grad_norm": 0.23570410907268524, "learning_rate": 2.478455303544726e-05, "loss": 0.0752716064453125, "step": 3333 }, { "epoch": 0.4645718665087438, "grad_norm": 0.2721812427043915, "learning_rate": 2.4775399100470837e-05, "loss": 0.07195281982421875, "step": 3334 }, { "epoch": 0.4647112101999582, "grad_norm": 0.28208863735198975, "learning_rate": 2.4766244104509775e-05, "loss": 0.08131122589111328, "step": 3335 }, { "epoch": 0.4648505538911726, "grad_norm": 0.47009608149528503, "learning_rate": 2.475708804959808e-05, "loss": 0.11071634292602539, "step": 3336 }, { "epoch": 0.46498989758238696, "grad_norm": 0.423000305891037, "learning_rate": 2.474793093777002e-05, "loss": 0.08918476104736328, "step": 3337 }, { "epoch": 0.46512924127360133, "grad_norm": 0.4123391807079315, "learning_rate": 2.473877277106011e-05, "loss": 0.11799430847167969, "step": 3338 }, { "epoch": 0.4652685849648157, "grad_norm": 0.42980530858039856, "learning_rate": 2.4729613551503074e-05, "loss": 0.0882415771484375, "step": 3339 }, { "epoch": 0.4654079286560301, "grad_norm": 0.8886896967887878, "learning_rate": 2.472045328113389e-05, "loss": 0.11425971984863281, "step": 3340 }, { "epoch": 0.46554727234724447, "grad_norm": 0.33905407786369324, "learning_rate": 2.4711291961987756e-05, "loss": 0.11098670959472656, "step": 3341 }, { "epoch": 0.46568661603845884, "grad_norm": 0.4354941248893738, "learning_rate": 2.470212959610011e-05, "loss": 0.09681320190429688, "step": 3342 }, { "epoch": 0.4658259597296732, "grad_norm": 0.34834063053131104, "learning_rate": 2.4692966185506615e-05, "loss": 0.10825729370117188, "step": 3343 }, { "epoch": 0.4659653034208876, "grad_norm": 0.6264306902885437, "learning_rate": 2.468380173224318e-05, "loss": 0.14245986938476562, "step": 3344 }, { "epoch": 0.466104647112102, "grad_norm": 0.3684813380241394, "learning_rate": 2.467463623834593e-05, "loss": 0.08417892456054688, "step": 3345 }, { "epoch": 0.46624399080331635, "grad_norm": 0.39931419491767883, "learning_rate": 2.4665469705851232e-05, "loss": 0.09123802185058594, "step": 3346 }, { "epoch": 0.46638333449453073, "grad_norm": 0.21815811097621918, "learning_rate": 2.465630213679568e-05, "loss": 0.06405448913574219, "step": 3347 }, { "epoch": 0.46652267818574517, "grad_norm": 0.5298275947570801, "learning_rate": 2.4647133533216097e-05, "loss": 0.11026382446289062, "step": 3348 }, { "epoch": 0.46666202187695954, "grad_norm": 0.4270552098751068, "learning_rate": 2.4637963897149545e-05, "loss": 0.08842086791992188, "step": 3349 }, { "epoch": 0.4668013655681739, "grad_norm": 0.37219056487083435, "learning_rate": 2.4628793230633293e-05, "loss": 0.08679008483886719, "step": 3350 }, { "epoch": 0.4669407092593883, "grad_norm": 0.43733301758766174, "learning_rate": 2.461962153570487e-05, "loss": 0.09819269180297852, "step": 3351 }, { "epoch": 0.4670800529506027, "grad_norm": 0.5456830263137817, "learning_rate": 2.4610448814402008e-05, "loss": 0.12476158142089844, "step": 3352 }, { "epoch": 0.46721939664181705, "grad_norm": 0.2934306263923645, "learning_rate": 2.4601275068762673e-05, "loss": 0.06586647033691406, "step": 3353 }, { "epoch": 0.46735874033303143, "grad_norm": 0.38268908858299255, "learning_rate": 2.459210030082507e-05, "loss": 0.09622001647949219, "step": 3354 }, { "epoch": 0.4674980840242458, "grad_norm": 0.39008960127830505, "learning_rate": 2.4582924512627616e-05, "loss": 0.083526611328125, "step": 3355 }, { "epoch": 0.4676374277154602, "grad_norm": 0.3850474953651428, "learning_rate": 2.4573747706208966e-05, "loss": 0.08421707153320312, "step": 3356 }, { "epoch": 0.46777677140667456, "grad_norm": 0.475953072309494, "learning_rate": 2.4564569883608003e-05, "loss": 0.09879684448242188, "step": 3357 }, { "epoch": 0.46791611509788894, "grad_norm": 0.4203357696533203, "learning_rate": 2.455539104686382e-05, "loss": 0.07872486114501953, "step": 3358 }, { "epoch": 0.4680554587891033, "grad_norm": 0.34013116359710693, "learning_rate": 2.4546211198015746e-05, "loss": 0.09143447875976562, "step": 3359 }, { "epoch": 0.4681948024803177, "grad_norm": 0.19097289443016052, "learning_rate": 2.4537030339103333e-05, "loss": 0.058040618896484375, "step": 3360 }, { "epoch": 0.4683341461715321, "grad_norm": 0.5390450954437256, "learning_rate": 2.4527848472166364e-05, "loss": 0.09769821166992188, "step": 3361 }, { "epoch": 0.46847348986274645, "grad_norm": 0.36776474118232727, "learning_rate": 2.4518665599244836e-05, "loss": 0.07756280899047852, "step": 3362 }, { "epoch": 0.46861283355396083, "grad_norm": 0.4971564710140228, "learning_rate": 2.450948172237898e-05, "loss": 0.1048574447631836, "step": 3363 }, { "epoch": 0.4687521772451752, "grad_norm": 0.2971155643463135, "learning_rate": 2.450029684360923e-05, "loss": 0.08499717712402344, "step": 3364 }, { "epoch": 0.4688915209363896, "grad_norm": 0.4844661355018616, "learning_rate": 2.449111096497627e-05, "loss": 0.1285991668701172, "step": 3365 }, { "epoch": 0.46903086462760396, "grad_norm": 0.6175223588943481, "learning_rate": 2.448192408852098e-05, "loss": 0.11851310729980469, "step": 3366 }, { "epoch": 0.46917020831881834, "grad_norm": 0.26451897621154785, "learning_rate": 2.4472736216284495e-05, "loss": 0.06792259216308594, "step": 3367 }, { "epoch": 0.46930955201003277, "grad_norm": 0.26346156001091003, "learning_rate": 2.4463547350308123e-05, "loss": 0.069915771484375, "step": 3368 }, { "epoch": 0.46944889570124715, "grad_norm": 0.3335268497467041, "learning_rate": 2.4454357492633444e-05, "loss": 0.09439277648925781, "step": 3369 }, { "epoch": 0.4695882393924615, "grad_norm": 0.6052480340003967, "learning_rate": 2.4445166645302223e-05, "loss": 0.11198806762695312, "step": 3370 }, { "epoch": 0.4697275830836759, "grad_norm": 0.4253769814968109, "learning_rate": 2.4435974810356455e-05, "loss": 0.09080696105957031, "step": 3371 }, { "epoch": 0.4698669267748903, "grad_norm": 0.5820679068565369, "learning_rate": 2.4426781989838365e-05, "loss": 0.08974742889404297, "step": 3372 }, { "epoch": 0.47000627046610466, "grad_norm": 0.3703373074531555, "learning_rate": 2.4417588185790374e-05, "loss": 0.08946990966796875, "step": 3373 }, { "epoch": 0.47014561415731904, "grad_norm": 0.3728737533092499, "learning_rate": 2.4408393400255146e-05, "loss": 0.07477188110351562, "step": 3374 }, { "epoch": 0.4702849578485334, "grad_norm": 0.4379877746105194, "learning_rate": 2.4399197635275554e-05, "loss": 0.09031295776367188, "step": 3375 }, { "epoch": 0.4704243015397478, "grad_norm": 0.46233823895454407, "learning_rate": 2.4390000892894677e-05, "loss": 0.09814929962158203, "step": 3376 }, { "epoch": 0.47056364523096217, "grad_norm": 0.39544522762298584, "learning_rate": 2.4380803175155833e-05, "loss": 0.087890625, "step": 3377 }, { "epoch": 0.47070298892217655, "grad_norm": 0.31529501080513, "learning_rate": 2.4371604484102535e-05, "loss": 0.0864248275756836, "step": 3378 }, { "epoch": 0.4708423326133909, "grad_norm": 0.497875839471817, "learning_rate": 2.4362404821778528e-05, "loss": 0.1032094955444336, "step": 3379 }, { "epoch": 0.4709816763046053, "grad_norm": 0.517224907875061, "learning_rate": 2.4353204190227767e-05, "loss": 0.1325225830078125, "step": 3380 }, { "epoch": 0.4711210199958197, "grad_norm": 0.4223402738571167, "learning_rate": 2.4344002591494415e-05, "loss": 0.09404182434082031, "step": 3381 }, { "epoch": 0.47126036368703406, "grad_norm": 0.24303072690963745, "learning_rate": 2.433480002762286e-05, "loss": 0.07674026489257812, "step": 3382 }, { "epoch": 0.47139970737824843, "grad_norm": 0.265598863363266, "learning_rate": 2.4325596500657714e-05, "loss": 0.06712627410888672, "step": 3383 }, { "epoch": 0.4715390510694628, "grad_norm": 0.33699771761894226, "learning_rate": 2.431639201264377e-05, "loss": 0.09324264526367188, "step": 3384 }, { "epoch": 0.4716783947606772, "grad_norm": 0.20662163197994232, "learning_rate": 2.4307186565626073e-05, "loss": 0.07404327392578125, "step": 3385 }, { "epoch": 0.47181773845189157, "grad_norm": 0.9201899766921997, "learning_rate": 2.429798016164986e-05, "loss": 0.1257781982421875, "step": 3386 }, { "epoch": 0.47195708214310594, "grad_norm": 0.3018714487552643, "learning_rate": 2.428877280276057e-05, "loss": 0.08494949340820312, "step": 3387 }, { "epoch": 0.4720964258343204, "grad_norm": 0.3003727197647095, "learning_rate": 2.4279564491003883e-05, "loss": 0.0684213638305664, "step": 3388 }, { "epoch": 0.47223576952553475, "grad_norm": 0.6570353507995605, "learning_rate": 2.4270355228425664e-05, "loss": 0.10650253295898438, "step": 3389 }, { "epoch": 0.47237511321674913, "grad_norm": 0.4527633786201477, "learning_rate": 2.4261145017072014e-05, "loss": 0.10768508911132812, "step": 3390 }, { "epoch": 0.4725144569079635, "grad_norm": 0.6783212423324585, "learning_rate": 2.425193385898922e-05, "loss": 0.12325477600097656, "step": 3391 }, { "epoch": 0.4726538005991779, "grad_norm": 0.2917686998844147, "learning_rate": 2.42427217562238e-05, "loss": 0.06637763977050781, "step": 3392 }, { "epoch": 0.47279314429039226, "grad_norm": 0.38302546739578247, "learning_rate": 2.4233508710822466e-05, "loss": 0.10655784606933594, "step": 3393 }, { "epoch": 0.47293248798160664, "grad_norm": 0.5236696600914001, "learning_rate": 2.4224294724832152e-05, "loss": 0.0955648422241211, "step": 3394 }, { "epoch": 0.473071831672821, "grad_norm": 0.41216036677360535, "learning_rate": 2.421507980029999e-05, "loss": 0.09142303466796875, "step": 3395 }, { "epoch": 0.4732111753640354, "grad_norm": 0.32322224974632263, "learning_rate": 2.4205863939273328e-05, "loss": 0.0830078125, "step": 3396 }, { "epoch": 0.4733505190552498, "grad_norm": 0.2783767282962799, "learning_rate": 2.4196647143799723e-05, "loss": 0.08205986022949219, "step": 3397 }, { "epoch": 0.47348986274646415, "grad_norm": 0.505244791507721, "learning_rate": 2.4187429415926927e-05, "loss": 0.08932113647460938, "step": 3398 }, { "epoch": 0.47362920643767853, "grad_norm": 0.4565367102622986, "learning_rate": 2.4178210757702924e-05, "loss": 0.10303115844726562, "step": 3399 }, { "epoch": 0.4737685501288929, "grad_norm": 0.6338425278663635, "learning_rate": 2.4168991171175872e-05, "loss": 0.15367507934570312, "step": 3400 }, { "epoch": 0.4739078938201073, "grad_norm": 0.36610090732574463, "learning_rate": 2.415977065839417e-05, "loss": 0.07883644104003906, "step": 3401 }, { "epoch": 0.47404723751132166, "grad_norm": 0.5593715310096741, "learning_rate": 2.4150549221406395e-05, "loss": 0.10924339294433594, "step": 3402 }, { "epoch": 0.47418658120253604, "grad_norm": 1.0157338380813599, "learning_rate": 2.4141326862261332e-05, "loss": 0.16391944885253906, "step": 3403 }, { "epoch": 0.4743259248937504, "grad_norm": 0.3761114180088043, "learning_rate": 2.4132103583008008e-05, "loss": 0.09770870208740234, "step": 3404 }, { "epoch": 0.4744652685849648, "grad_norm": 0.26143428683280945, "learning_rate": 2.4122879385695587e-05, "loss": 0.07155418395996094, "step": 3405 }, { "epoch": 0.47460461227617917, "grad_norm": 0.4475577175617218, "learning_rate": 2.41136542723735e-05, "loss": 0.08676719665527344, "step": 3406 }, { "epoch": 0.47474395596739355, "grad_norm": 1.1987353563308716, "learning_rate": 2.410442824509135e-05, "loss": 0.13416099548339844, "step": 3407 }, { "epoch": 0.474883299658608, "grad_norm": 0.32180583477020264, "learning_rate": 2.409520130589895e-05, "loss": 0.08172798156738281, "step": 3408 }, { "epoch": 0.47502264334982236, "grad_norm": 0.2820810377597809, "learning_rate": 2.4085973456846318e-05, "loss": 0.07158565521240234, "step": 3409 }, { "epoch": 0.47516198704103674, "grad_norm": 0.20304766297340393, "learning_rate": 2.4076744699983663e-05, "loss": 0.06934547424316406, "step": 3410 }, { "epoch": 0.4753013307322511, "grad_norm": 0.4546390175819397, "learning_rate": 2.4067515037361408e-05, "loss": 0.10280418395996094, "step": 3411 }, { "epoch": 0.4754406744234655, "grad_norm": 0.35618364810943604, "learning_rate": 2.405828447103018e-05, "loss": 0.07711029052734375, "step": 3412 }, { "epoch": 0.47558001811467987, "grad_norm": 0.7602665424346924, "learning_rate": 2.4049053003040795e-05, "loss": 0.12159347534179688, "step": 3413 }, { "epoch": 0.47571936180589425, "grad_norm": 0.5211936831474304, "learning_rate": 2.4039820635444264e-05, "loss": 0.1286754608154297, "step": 3414 }, { "epoch": 0.4758587054971086, "grad_norm": 0.35267195105552673, "learning_rate": 2.4030587370291835e-05, "loss": 0.0835113525390625, "step": 3415 }, { "epoch": 0.475998049188323, "grad_norm": 0.40632766485214233, "learning_rate": 2.4021353209634896e-05, "loss": 0.08858489990234375, "step": 3416 }, { "epoch": 0.4761373928795374, "grad_norm": 0.3793032765388489, "learning_rate": 2.4012118155525094e-05, "loss": 0.10012435913085938, "step": 3417 }, { "epoch": 0.47627673657075176, "grad_norm": 0.36834463477134705, "learning_rate": 2.4002882210014227e-05, "loss": 0.07397651672363281, "step": 3418 }, { "epoch": 0.47641608026196614, "grad_norm": 0.4297516644001007, "learning_rate": 2.3993645375154328e-05, "loss": 0.10202217102050781, "step": 3419 }, { "epoch": 0.4765554239531805, "grad_norm": 0.2647695243358612, "learning_rate": 2.3984407652997607e-05, "loss": 0.06284141540527344, "step": 3420 }, { "epoch": 0.4766947676443949, "grad_norm": 0.21692104637622833, "learning_rate": 2.397516904559646e-05, "loss": 0.05802297592163086, "step": 3421 }, { "epoch": 0.47683411133560927, "grad_norm": 0.3594118654727936, "learning_rate": 2.3965929555003512e-05, "loss": 0.08323097229003906, "step": 3422 }, { "epoch": 0.47697345502682365, "grad_norm": 0.4874275326728821, "learning_rate": 2.3956689183271557e-05, "loss": 0.09844779968261719, "step": 3423 }, { "epoch": 0.477112798718038, "grad_norm": 0.5953719615936279, "learning_rate": 2.39474479324536e-05, "loss": 0.11598968505859375, "step": 3424 }, { "epoch": 0.4772521424092524, "grad_norm": 0.47271066904067993, "learning_rate": 2.3938205804602835e-05, "loss": 0.0986328125, "step": 3425 }, { "epoch": 0.4773914861004668, "grad_norm": 0.47366422414779663, "learning_rate": 2.392896280177265e-05, "loss": 0.08501434326171875, "step": 3426 }, { "epoch": 0.47753082979168116, "grad_norm": 0.482148140668869, "learning_rate": 2.391971892601663e-05, "loss": 0.11348724365234375, "step": 3427 }, { "epoch": 0.4776701734828956, "grad_norm": 0.8253277540206909, "learning_rate": 2.3910474179388557e-05, "loss": 0.18464088439941406, "step": 3428 }, { "epoch": 0.47780951717410997, "grad_norm": 0.31773489713668823, "learning_rate": 2.3901228563942397e-05, "loss": 0.0797109603881836, "step": 3429 }, { "epoch": 0.47794886086532434, "grad_norm": 0.5292791128158569, "learning_rate": 2.389198208173231e-05, "loss": 0.10355854034423828, "step": 3430 }, { "epoch": 0.4780882045565387, "grad_norm": 0.4860965311527252, "learning_rate": 2.3882734734812673e-05, "loss": 0.09836196899414062, "step": 3431 }, { "epoch": 0.4782275482477531, "grad_norm": 0.1627788096666336, "learning_rate": 2.3873486525238008e-05, "loss": 0.06105327606201172, "step": 3432 }, { "epoch": 0.4783668919389675, "grad_norm": 0.4386437237262726, "learning_rate": 2.3864237455063083e-05, "loss": 0.129913330078125, "step": 3433 }, { "epoch": 0.47850623563018185, "grad_norm": 0.30330219864845276, "learning_rate": 2.3854987526342806e-05, "loss": 0.084808349609375, "step": 3434 }, { "epoch": 0.47864557932139623, "grad_norm": 0.3240766227245331, "learning_rate": 2.3845736741132317e-05, "loss": 0.0643014907836914, "step": 3435 }, { "epoch": 0.4787849230126106, "grad_norm": 0.6141373515129089, "learning_rate": 2.3836485101486928e-05, "loss": 0.09175682067871094, "step": 3436 }, { "epoch": 0.478924266703825, "grad_norm": 0.5576227903366089, "learning_rate": 2.382723260946213e-05, "loss": 0.12470054626464844, "step": 3437 }, { "epoch": 0.47906361039503936, "grad_norm": 0.32891637086868286, "learning_rate": 2.3817979267113633e-05, "loss": 0.10100364685058594, "step": 3438 }, { "epoch": 0.47920295408625374, "grad_norm": 0.6603897213935852, "learning_rate": 2.3808725076497297e-05, "loss": 0.10672760009765625, "step": 3439 }, { "epoch": 0.4793422977774681, "grad_norm": 0.5592063069343567, "learning_rate": 2.3799470039669212e-05, "loss": 0.121490478515625, "step": 3440 }, { "epoch": 0.4794816414686825, "grad_norm": 0.6322094798088074, "learning_rate": 2.3790214158685622e-05, "loss": 0.09412765502929688, "step": 3441 }, { "epoch": 0.4796209851598969, "grad_norm": 0.26640185713768005, "learning_rate": 2.3780957435602984e-05, "loss": 0.08119964599609375, "step": 3442 }, { "epoch": 0.47976032885111125, "grad_norm": 0.3336607813835144, "learning_rate": 2.377169987247792e-05, "loss": 0.10359430313110352, "step": 3443 }, { "epoch": 0.47989967254232563, "grad_norm": 0.4002796411514282, "learning_rate": 2.376244147136726e-05, "loss": 0.08101606369018555, "step": 3444 }, { "epoch": 0.48003901623354, "grad_norm": 0.42045366764068604, "learning_rate": 2.3753182234327994e-05, "loss": 0.09898185729980469, "step": 3445 }, { "epoch": 0.4801783599247544, "grad_norm": 0.5751110315322876, "learning_rate": 2.374392216341733e-05, "loss": 0.13056564331054688, "step": 3446 }, { "epoch": 0.48031770361596876, "grad_norm": 0.27729788422584534, "learning_rate": 2.3734661260692633e-05, "loss": 0.07246589660644531, "step": 3447 }, { "epoch": 0.4804570473071832, "grad_norm": 0.33470696210861206, "learning_rate": 2.3725399528211462e-05, "loss": 0.07543277740478516, "step": 3448 }, { "epoch": 0.48059639099839757, "grad_norm": 0.3665543794631958, "learning_rate": 2.371613696803158e-05, "loss": 0.09302711486816406, "step": 3449 }, { "epoch": 0.48073573468961195, "grad_norm": 0.5296663045883179, "learning_rate": 2.3706873582210893e-05, "loss": 0.10642623901367188, "step": 3450 }, { "epoch": 0.4808750783808263, "grad_norm": 0.36532270908355713, "learning_rate": 2.369760937280753e-05, "loss": 0.08485984802246094, "step": 3451 }, { "epoch": 0.4810144220720407, "grad_norm": 0.3713747560977936, "learning_rate": 2.368834434187979e-05, "loss": 0.08110427856445312, "step": 3452 }, { "epoch": 0.4811537657632551, "grad_norm": 0.41377294063568115, "learning_rate": 2.3679078491486133e-05, "loss": 0.08055686950683594, "step": 3453 }, { "epoch": 0.48129310945446946, "grad_norm": 0.6762192845344543, "learning_rate": 2.3669811823685235e-05, "loss": 0.09663581848144531, "step": 3454 }, { "epoch": 0.48143245314568384, "grad_norm": 0.6754793524742126, "learning_rate": 2.366054434053593e-05, "loss": 0.13373947143554688, "step": 3455 }, { "epoch": 0.4815717968368982, "grad_norm": 0.4549417197704315, "learning_rate": 2.3651276044097246e-05, "loss": 0.10376548767089844, "step": 3456 }, { "epoch": 0.4817111405281126, "grad_norm": 0.22388340532779694, "learning_rate": 2.364200693642839e-05, "loss": 0.0695199966430664, "step": 3457 }, { "epoch": 0.48185048421932697, "grad_norm": 0.47678929567337036, "learning_rate": 2.363273701958873e-05, "loss": 0.08212566375732422, "step": 3458 }, { "epoch": 0.48198982791054135, "grad_norm": 0.6933940649032593, "learning_rate": 2.3623466295637848e-05, "loss": 0.11297225952148438, "step": 3459 }, { "epoch": 0.4821291716017557, "grad_norm": 0.8080404996871948, "learning_rate": 2.3614194766635482e-05, "loss": 0.0964670181274414, "step": 3460 }, { "epoch": 0.4822685152929701, "grad_norm": 0.4244355261325836, "learning_rate": 2.3604922434641545e-05, "loss": 0.10590934753417969, "step": 3461 }, { "epoch": 0.4824078589841845, "grad_norm": 0.35458946228027344, "learning_rate": 2.3595649301716154e-05, "loss": 0.08524417877197266, "step": 3462 }, { "epoch": 0.48254720267539886, "grad_norm": 0.44528841972351074, "learning_rate": 2.3586375369919573e-05, "loss": 0.0905294418334961, "step": 3463 }, { "epoch": 0.48268654636661323, "grad_norm": 0.4322637617588043, "learning_rate": 2.3577100641312258e-05, "loss": 0.10003852844238281, "step": 3464 }, { "epoch": 0.4828258900578276, "grad_norm": 0.32373663783073425, "learning_rate": 2.356782511795486e-05, "loss": 0.08164787292480469, "step": 3465 }, { "epoch": 0.482965233749042, "grad_norm": 0.3465843200683594, "learning_rate": 2.3558548801908164e-05, "loss": 0.0884552001953125, "step": 3466 }, { "epoch": 0.48310457744025637, "grad_norm": 0.6706493496894836, "learning_rate": 2.3549271695233177e-05, "loss": 0.1169748306274414, "step": 3467 }, { "epoch": 0.4832439211314708, "grad_norm": 0.4178025722503662, "learning_rate": 2.353999379999104e-05, "loss": 0.08529376983642578, "step": 3468 }, { "epoch": 0.4833832648226852, "grad_norm": 0.42493778467178345, "learning_rate": 2.3530715118243105e-05, "loss": 0.10468292236328125, "step": 3469 }, { "epoch": 0.48352260851389955, "grad_norm": 0.4895673394203186, "learning_rate": 2.3521435652050886e-05, "loss": 0.09110450744628906, "step": 3470 }, { "epoch": 0.48366195220511393, "grad_norm": 0.44503864645957947, "learning_rate": 2.351215540347605e-05, "loss": 0.0815887451171875, "step": 3471 }, { "epoch": 0.4838012958963283, "grad_norm": 0.6322842240333557, "learning_rate": 2.350287437458047e-05, "loss": 0.07789993286132812, "step": 3472 }, { "epoch": 0.4839406395875427, "grad_norm": 0.3704487979412079, "learning_rate": 2.349359256742618e-05, "loss": 0.07791328430175781, "step": 3473 }, { "epoch": 0.48407998327875706, "grad_norm": 0.4088389575481415, "learning_rate": 2.3484309984075376e-05, "loss": 0.08663749694824219, "step": 3474 }, { "epoch": 0.48421932696997144, "grad_norm": 0.2975269854068756, "learning_rate": 2.3475026626590443e-05, "loss": 0.0869293212890625, "step": 3475 }, { "epoch": 0.4843586706611858, "grad_norm": 0.4452313482761383, "learning_rate": 2.3465742497033932e-05, "loss": 0.10309028625488281, "step": 3476 }, { "epoch": 0.4844980143524002, "grad_norm": 0.36391445994377136, "learning_rate": 2.345645759746856e-05, "loss": 0.07858943939208984, "step": 3477 }, { "epoch": 0.4846373580436146, "grad_norm": 0.31398671865463257, "learning_rate": 2.3447171929957224e-05, "loss": 0.07616901397705078, "step": 3478 }, { "epoch": 0.48477670173482895, "grad_norm": 0.6435450911521912, "learning_rate": 2.3437885496562986e-05, "loss": 0.11063575744628906, "step": 3479 }, { "epoch": 0.48491604542604333, "grad_norm": 0.501363217830658, "learning_rate": 2.3428598299349076e-05, "loss": 0.10598373413085938, "step": 3480 }, { "epoch": 0.4850553891172577, "grad_norm": 0.4656845033168793, "learning_rate": 2.34193103403789e-05, "loss": 0.10491943359375, "step": 3481 }, { "epoch": 0.4851947328084721, "grad_norm": 0.341849148273468, "learning_rate": 2.341002162171603e-05, "loss": 0.08456230163574219, "step": 3482 }, { "epoch": 0.48533407649968646, "grad_norm": 0.41346633434295654, "learning_rate": 2.3400732145424216e-05, "loss": 0.08855342864990234, "step": 3483 }, { "epoch": 0.48547342019090084, "grad_norm": 0.7435234785079956, "learning_rate": 2.339144191356735e-05, "loss": 0.11557865142822266, "step": 3484 }, { "epoch": 0.4856127638821152, "grad_norm": 0.24933378398418427, "learning_rate": 2.3382150928209523e-05, "loss": 0.06495094299316406, "step": 3485 }, { "epoch": 0.4857521075733296, "grad_norm": 1.3599504232406616, "learning_rate": 2.3372859191414978e-05, "loss": 0.201904296875, "step": 3486 }, { "epoch": 0.48589145126454397, "grad_norm": 0.3012869358062744, "learning_rate": 2.3363566705248117e-05, "loss": 0.08104515075683594, "step": 3487 }, { "epoch": 0.4860307949557584, "grad_norm": 0.33018165826797485, "learning_rate": 2.3354273471773534e-05, "loss": 0.09753227233886719, "step": 3488 }, { "epoch": 0.4861701386469728, "grad_norm": 0.3006400763988495, "learning_rate": 2.3344979493055958e-05, "loss": 0.07224655151367188, "step": 3489 }, { "epoch": 0.48630948233818716, "grad_norm": 0.34737178683280945, "learning_rate": 2.333568477116031e-05, "loss": 0.07097816467285156, "step": 3490 }, { "epoch": 0.48644882602940154, "grad_norm": 0.4151892066001892, "learning_rate": 2.3326389308151658e-05, "loss": 0.09715557098388672, "step": 3491 }, { "epoch": 0.4865881697206159, "grad_norm": 0.8376125693321228, "learning_rate": 2.3317093106095246e-05, "loss": 0.13702011108398438, "step": 3492 }, { "epoch": 0.4867275134118303, "grad_norm": 0.3436349928379059, "learning_rate": 2.330779616705648e-05, "loss": 0.072509765625, "step": 3493 }, { "epoch": 0.48686685710304467, "grad_norm": 0.43082335591316223, "learning_rate": 2.329849849310092e-05, "loss": 0.08440685272216797, "step": 3494 }, { "epoch": 0.48700620079425905, "grad_norm": 0.4261794984340668, "learning_rate": 2.3289200086294298e-05, "loss": 0.11366844177246094, "step": 3495 }, { "epoch": 0.4871455444854734, "grad_norm": 0.4665292203426361, "learning_rate": 2.3279900948702516e-05, "loss": 0.0989370346069336, "step": 3496 }, { "epoch": 0.4872848881766878, "grad_norm": 0.3691820204257965, "learning_rate": 2.3270601082391623e-05, "loss": 0.08914947509765625, "step": 3497 }, { "epoch": 0.4874242318679022, "grad_norm": 0.6231489181518555, "learning_rate": 2.3261300489427835e-05, "loss": 0.10580062866210938, "step": 3498 }, { "epoch": 0.48756357555911656, "grad_norm": 1.090087890625, "learning_rate": 2.3251999171877538e-05, "loss": 0.14092636108398438, "step": 3499 }, { "epoch": 0.48770291925033094, "grad_norm": 0.32475176453590393, "learning_rate": 2.3242697131807267e-05, "loss": 0.07478141784667969, "step": 3500 }, { "epoch": 0.4878422629415453, "grad_norm": 0.35325801372528076, "learning_rate": 2.3233394371283727e-05, "loss": 0.09547233581542969, "step": 3501 }, { "epoch": 0.4879816066327597, "grad_norm": 0.4399093687534332, "learning_rate": 2.322409089237378e-05, "loss": 0.10532188415527344, "step": 3502 }, { "epoch": 0.48812095032397407, "grad_norm": 0.7503423690795898, "learning_rate": 2.321478669714444e-05, "loss": 0.08897781372070312, "step": 3503 }, { "epoch": 0.48826029401518845, "grad_norm": 0.6290386915206909, "learning_rate": 2.3205481787662895e-05, "loss": 0.12149810791015625, "step": 3504 }, { "epoch": 0.4883996377064028, "grad_norm": 0.3666436970233917, "learning_rate": 2.3196176165996476e-05, "loss": 0.076629638671875, "step": 3505 }, { "epoch": 0.4885389813976172, "grad_norm": 0.597728431224823, "learning_rate": 2.3186869834212682e-05, "loss": 0.0934906005859375, "step": 3506 }, { "epoch": 0.4886783250888316, "grad_norm": 0.2929260730743408, "learning_rate": 2.3177562794379173e-05, "loss": 0.09285354614257812, "step": 3507 }, { "epoch": 0.488817668780046, "grad_norm": 0.4453027844429016, "learning_rate": 2.3168255048563753e-05, "loss": 0.08423042297363281, "step": 3508 }, { "epoch": 0.4889570124712604, "grad_norm": 0.5629048347473145, "learning_rate": 2.3158946598834393e-05, "loss": 0.1123809814453125, "step": 3509 }, { "epoch": 0.48909635616247477, "grad_norm": 0.561160683631897, "learning_rate": 2.314963744725922e-05, "loss": 0.10912704467773438, "step": 3510 }, { "epoch": 0.48923569985368914, "grad_norm": 0.3977614939212799, "learning_rate": 2.314032759590651e-05, "loss": 0.08147430419921875, "step": 3511 }, { "epoch": 0.4893750435449035, "grad_norm": 0.40343931317329407, "learning_rate": 2.313101704684471e-05, "loss": 0.09473228454589844, "step": 3512 }, { "epoch": 0.4895143872361179, "grad_norm": 0.5881628394126892, "learning_rate": 2.31217058021424e-05, "loss": 0.10203838348388672, "step": 3513 }, { "epoch": 0.4896537309273323, "grad_norm": 0.497330904006958, "learning_rate": 2.3112393863868327e-05, "loss": 0.10740089416503906, "step": 3514 }, { "epoch": 0.48979307461854665, "grad_norm": 0.556941568851471, "learning_rate": 2.3103081234091406e-05, "loss": 0.09459686279296875, "step": 3515 }, { "epoch": 0.48993241830976103, "grad_norm": 0.3576592803001404, "learning_rate": 2.3093767914880668e-05, "loss": 0.08832359313964844, "step": 3516 }, { "epoch": 0.4900717620009754, "grad_norm": 0.33244749903678894, "learning_rate": 2.308445390830534e-05, "loss": 0.09192752838134766, "step": 3517 }, { "epoch": 0.4902111056921898, "grad_norm": 0.714246928691864, "learning_rate": 2.3075139216434762e-05, "loss": 0.14908981323242188, "step": 3518 }, { "epoch": 0.49035044938340416, "grad_norm": 0.3638019561767578, "learning_rate": 2.3065823841338465e-05, "loss": 0.08140945434570312, "step": 3519 }, { "epoch": 0.49048979307461854, "grad_norm": 0.30502814054489136, "learning_rate": 2.3056507785086105e-05, "loss": 0.06790924072265625, "step": 3520 }, { "epoch": 0.4906291367658329, "grad_norm": 0.24218115210533142, "learning_rate": 2.304719104974749e-05, "loss": 0.06303119659423828, "step": 3521 }, { "epoch": 0.4907684804570473, "grad_norm": 0.2330855280160904, "learning_rate": 2.3037873637392596e-05, "loss": 0.06697511672973633, "step": 3522 }, { "epoch": 0.4909078241482617, "grad_norm": 0.47906139492988586, "learning_rate": 2.3028555550091536e-05, "loss": 0.07579421997070312, "step": 3523 }, { "epoch": 0.49104716783947605, "grad_norm": 0.3973087966442108, "learning_rate": 2.3019236789914575e-05, "loss": 0.08883953094482422, "step": 3524 }, { "epoch": 0.49118651153069043, "grad_norm": 0.6008855104446411, "learning_rate": 2.300991735893213e-05, "loss": 0.1333789825439453, "step": 3525 }, { "epoch": 0.4913258552219048, "grad_norm": 0.5370943546295166, "learning_rate": 2.3000597259214765e-05, "loss": 0.13360977172851562, "step": 3526 }, { "epoch": 0.4914651989131192, "grad_norm": 0.49261510372161865, "learning_rate": 2.2991276492833197e-05, "loss": 0.11439323425292969, "step": 3527 }, { "epoch": 0.4916045426043336, "grad_norm": 0.5018017888069153, "learning_rate": 2.2981955061858282e-05, "loss": 0.11037635803222656, "step": 3528 }, { "epoch": 0.491743886295548, "grad_norm": 0.5720459818840027, "learning_rate": 2.297263296836103e-05, "loss": 0.11130332946777344, "step": 3529 }, { "epoch": 0.49188322998676237, "grad_norm": 0.4461495876312256, "learning_rate": 2.2963310214412596e-05, "loss": 0.10687637329101562, "step": 3530 }, { "epoch": 0.49202257367797675, "grad_norm": 0.4036763608455658, "learning_rate": 2.2953986802084293e-05, "loss": 0.08428955078125, "step": 3531 }, { "epoch": 0.4921619173691911, "grad_norm": 0.4538286626338959, "learning_rate": 2.2944662733447557e-05, "loss": 0.0961151123046875, "step": 3532 }, { "epoch": 0.4923012610604055, "grad_norm": 0.3409772515296936, "learning_rate": 2.2935338010573998e-05, "loss": 0.08807182312011719, "step": 3533 }, { "epoch": 0.4924406047516199, "grad_norm": 0.49719616770744324, "learning_rate": 2.292601263553534e-05, "loss": 0.09952449798583984, "step": 3534 }, { "epoch": 0.49257994844283426, "grad_norm": 0.5508214235305786, "learning_rate": 2.2916686610403477e-05, "loss": 0.11870574951171875, "step": 3535 }, { "epoch": 0.49271929213404864, "grad_norm": 0.36145704984664917, "learning_rate": 2.2907359937250445e-05, "loss": 0.08869743347167969, "step": 3536 }, { "epoch": 0.492858635825263, "grad_norm": 0.3766576051712036, "learning_rate": 2.2898032618148403e-05, "loss": 0.07381629943847656, "step": 3537 }, { "epoch": 0.4929979795164774, "grad_norm": 0.6557148694992065, "learning_rate": 2.288870465516968e-05, "loss": 0.10042762756347656, "step": 3538 }, { "epoch": 0.49313732320769177, "grad_norm": 0.18732362985610962, "learning_rate": 2.287937605038673e-05, "loss": 0.05557823181152344, "step": 3539 }, { "epoch": 0.49327666689890615, "grad_norm": 0.38100650906562805, "learning_rate": 2.2870046805872166e-05, "loss": 0.09485626220703125, "step": 3540 }, { "epoch": 0.4934160105901205, "grad_norm": 0.6137387752532959, "learning_rate": 2.286071692369872e-05, "loss": 0.13443374633789062, "step": 3541 }, { "epoch": 0.4935553542813349, "grad_norm": 0.817206859588623, "learning_rate": 2.2851386405939288e-05, "loss": 0.14458274841308594, "step": 3542 }, { "epoch": 0.4936946979725493, "grad_norm": 0.47277554869651794, "learning_rate": 2.284205525466689e-05, "loss": 0.10109138488769531, "step": 3543 }, { "epoch": 0.49383404166376366, "grad_norm": 0.5495854020118713, "learning_rate": 2.2832723471954705e-05, "loss": 0.09332656860351562, "step": 3544 }, { "epoch": 0.49397338535497803, "grad_norm": 0.29470640420913696, "learning_rate": 2.2823391059876032e-05, "loss": 0.08363723754882812, "step": 3545 }, { "epoch": 0.4941127290461924, "grad_norm": 0.5468565821647644, "learning_rate": 2.2814058020504324e-05, "loss": 0.12225341796875, "step": 3546 }, { "epoch": 0.4942520727374068, "grad_norm": 0.4509151875972748, "learning_rate": 2.280472435591318e-05, "loss": 0.10212516784667969, "step": 3547 }, { "epoch": 0.49439141642862117, "grad_norm": 0.6681642532348633, "learning_rate": 2.2795390068176304e-05, "loss": 0.10919952392578125, "step": 3548 }, { "epoch": 0.4945307601198356, "grad_norm": 0.4352334439754486, "learning_rate": 2.2786055159367588e-05, "loss": 0.11079978942871094, "step": 3549 }, { "epoch": 0.49467010381105, "grad_norm": 0.28221672773361206, "learning_rate": 2.277671963156101e-05, "loss": 0.08351325988769531, "step": 3550 }, { "epoch": 0.49480944750226435, "grad_norm": 0.4246078431606293, "learning_rate": 2.2767383486830728e-05, "loss": 0.1057891845703125, "step": 3551 }, { "epoch": 0.49494879119347873, "grad_norm": 0.2910420000553131, "learning_rate": 2.275804672725102e-05, "loss": 0.07705116271972656, "step": 3552 }, { "epoch": 0.4950881348846931, "grad_norm": 0.31359702348709106, "learning_rate": 2.274870935489629e-05, "loss": 0.07721710205078125, "step": 3553 }, { "epoch": 0.4952274785759075, "grad_norm": 1.4858375787734985, "learning_rate": 2.2739371371841103e-05, "loss": 0.17850112915039062, "step": 3554 }, { "epoch": 0.49536682226712186, "grad_norm": 0.8760536313056946, "learning_rate": 2.2730032780160128e-05, "loss": 0.1501445770263672, "step": 3555 }, { "epoch": 0.49550616595833624, "grad_norm": 0.5283787846565247, "learning_rate": 2.27206935819282e-05, "loss": 0.1293659210205078, "step": 3556 }, { "epoch": 0.4956455096495506, "grad_norm": 0.6187781691551208, "learning_rate": 2.2711353779220278e-05, "loss": 0.10190963745117188, "step": 3557 }, { "epoch": 0.495784853340765, "grad_norm": 0.49607914686203003, "learning_rate": 2.2702013374111443e-05, "loss": 0.10712909698486328, "step": 3558 }, { "epoch": 0.4959241970319794, "grad_norm": 0.42386361956596375, "learning_rate": 2.2692672368676925e-05, "loss": 0.07897567749023438, "step": 3559 }, { "epoch": 0.49606354072319375, "grad_norm": 0.3044247031211853, "learning_rate": 2.2683330764992083e-05, "loss": 0.0748286247253418, "step": 3560 }, { "epoch": 0.49620288441440813, "grad_norm": 0.37392935156822205, "learning_rate": 2.2673988565132404e-05, "loss": 0.06544876098632812, "step": 3561 }, { "epoch": 0.4963422281056225, "grad_norm": 0.49476325511932373, "learning_rate": 2.266464577117352e-05, "loss": 0.09716796875, "step": 3562 }, { "epoch": 0.4964815717968369, "grad_norm": 0.6164884567260742, "learning_rate": 2.2655302385191176e-05, "loss": 0.11668205261230469, "step": 3563 }, { "epoch": 0.49662091548805126, "grad_norm": 0.4746073782444, "learning_rate": 2.2645958409261256e-05, "loss": 0.11136817932128906, "step": 3564 }, { "epoch": 0.49676025917926564, "grad_norm": 0.36123624444007874, "learning_rate": 2.2636613845459802e-05, "loss": 0.07408714294433594, "step": 3565 }, { "epoch": 0.49689960287048, "grad_norm": 0.29045364260673523, "learning_rate": 2.262726869586293e-05, "loss": 0.080230712890625, "step": 3566 }, { "epoch": 0.4970389465616944, "grad_norm": 0.3141564130783081, "learning_rate": 2.2617922962546946e-05, "loss": 0.06623554229736328, "step": 3567 }, { "epoch": 0.49717829025290877, "grad_norm": 0.4020007848739624, "learning_rate": 2.2608576647588242e-05, "loss": 0.08577537536621094, "step": 3568 }, { "epoch": 0.4973176339441232, "grad_norm": 0.43270909786224365, "learning_rate": 2.2599229753063368e-05, "loss": 0.10297966003417969, "step": 3569 }, { "epoch": 0.4974569776353376, "grad_norm": 0.4862155616283417, "learning_rate": 2.2589882281048984e-05, "loss": 0.09780311584472656, "step": 3570 }, { "epoch": 0.49759632132655196, "grad_norm": 0.3469257354736328, "learning_rate": 2.258053423362188e-05, "loss": 0.08203411102294922, "step": 3571 }, { "epoch": 0.49773566501776634, "grad_norm": 0.27453097701072693, "learning_rate": 2.2571185612858987e-05, "loss": 0.07065439224243164, "step": 3572 }, { "epoch": 0.4978750087089807, "grad_norm": 0.40304479002952576, "learning_rate": 2.256183642083735e-05, "loss": 0.07018470764160156, "step": 3573 }, { "epoch": 0.4980143524001951, "grad_norm": 0.4901616871356964, "learning_rate": 2.2552486659634148e-05, "loss": 0.11204910278320312, "step": 3574 }, { "epoch": 0.49815369609140947, "grad_norm": 0.29293447732925415, "learning_rate": 2.2543136331326684e-05, "loss": 0.08166694641113281, "step": 3575 }, { "epoch": 0.49829303978262385, "grad_norm": 0.3444250524044037, "learning_rate": 2.2533785437992392e-05, "loss": 0.08587265014648438, "step": 3576 }, { "epoch": 0.4984323834738382, "grad_norm": 0.581868588924408, "learning_rate": 2.2524433981708822e-05, "loss": 0.11177444458007812, "step": 3577 }, { "epoch": 0.4985717271650526, "grad_norm": 0.3277111351490021, "learning_rate": 2.2515081964553655e-05, "loss": 0.08993244171142578, "step": 3578 }, { "epoch": 0.498711070856267, "grad_norm": 0.45504358410835266, "learning_rate": 2.2505729388604692e-05, "loss": 0.09328460693359375, "step": 3579 }, { "epoch": 0.49885041454748136, "grad_norm": 0.4179115295410156, "learning_rate": 2.2496376255939866e-05, "loss": 0.09133720397949219, "step": 3580 }, { "epoch": 0.49898975823869574, "grad_norm": 0.3786357641220093, "learning_rate": 2.2487022568637236e-05, "loss": 0.08354568481445312, "step": 3581 }, { "epoch": 0.4991291019299101, "grad_norm": 0.37199994921684265, "learning_rate": 2.247766832877496e-05, "loss": 0.09311962127685547, "step": 3582 }, { "epoch": 0.4992684456211245, "grad_norm": 0.3573143184185028, "learning_rate": 2.2468313538431355e-05, "loss": 0.085205078125, "step": 3583 }, { "epoch": 0.49940778931233887, "grad_norm": 0.5203757882118225, "learning_rate": 2.245895819968483e-05, "loss": 0.11702156066894531, "step": 3584 }, { "epoch": 0.49954713300355325, "grad_norm": 0.36471644043922424, "learning_rate": 2.2449602314613937e-05, "loss": 0.08889389038085938, "step": 3585 }, { "epoch": 0.4996864766947676, "grad_norm": 0.47943583130836487, "learning_rate": 2.244024588529734e-05, "loss": 0.10224151611328125, "step": 3586 }, { "epoch": 0.499825820385982, "grad_norm": 0.346510112285614, "learning_rate": 2.2430888913813807e-05, "loss": 0.08899497985839844, "step": 3587 }, { "epoch": 0.4999651640771964, "grad_norm": 0.34265241026878357, "learning_rate": 2.242153140224226e-05, "loss": 0.08121681213378906, "step": 3588 }, { "epoch": 0.5001045077684108, "grad_norm": 0.4229574501514435, "learning_rate": 2.2412173352661722e-05, "loss": 0.09548187255859375, "step": 3589 }, { "epoch": 0.5002438514596251, "grad_norm": 0.49202293157577515, "learning_rate": 2.2402814767151333e-05, "loss": 0.10205841064453125, "step": 3590 }, { "epoch": 0.5003831951508395, "grad_norm": 0.7047402262687683, "learning_rate": 2.2393455647790363e-05, "loss": 0.13703346252441406, "step": 3591 }, { "epoch": 0.5005225388420539, "grad_norm": 0.4008718729019165, "learning_rate": 2.2384095996658188e-05, "loss": 0.08972358703613281, "step": 3592 }, { "epoch": 0.5006618825332683, "grad_norm": 0.28691425919532776, "learning_rate": 2.2374735815834315e-05, "loss": 0.08510208129882812, "step": 3593 }, { "epoch": 0.5008012262244826, "grad_norm": 0.3459254801273346, "learning_rate": 2.2365375107398363e-05, "loss": 0.09442138671875, "step": 3594 }, { "epoch": 0.500940569915697, "grad_norm": 0.313025563955307, "learning_rate": 2.2356013873430058e-05, "loss": 0.08616447448730469, "step": 3595 }, { "epoch": 0.5010799136069114, "grad_norm": 0.4326810836791992, "learning_rate": 2.2346652116009256e-05, "loss": 0.08737373352050781, "step": 3596 }, { "epoch": 0.5012192572981258, "grad_norm": 0.32725492119789124, "learning_rate": 2.2337289837215937e-05, "loss": 0.0787811279296875, "step": 3597 }, { "epoch": 0.5013586009893402, "grad_norm": 0.16523022949695587, "learning_rate": 2.232792703913017e-05, "loss": 0.05519676208496094, "step": 3598 }, { "epoch": 0.5014979446805546, "grad_norm": 0.387765109539032, "learning_rate": 2.2318563723832173e-05, "loss": 0.07978439331054688, "step": 3599 }, { "epoch": 0.501637288371769, "grad_norm": 0.24549956619739532, "learning_rate": 2.230919989340224e-05, "loss": 0.07455253601074219, "step": 3600 }, { "epoch": 0.5017766320629834, "grad_norm": 0.6037759184837341, "learning_rate": 2.2299835549920822e-05, "loss": 0.157470703125, "step": 3601 }, { "epoch": 0.5019159757541978, "grad_norm": 0.5356497764587402, "learning_rate": 2.2290470695468443e-05, "loss": 0.0875091552734375, "step": 3602 }, { "epoch": 0.5020553194454122, "grad_norm": 0.3111777901649475, "learning_rate": 2.2281105332125765e-05, "loss": 0.07269287109375, "step": 3603 }, { "epoch": 0.5021946631366265, "grad_norm": 0.28498589992523193, "learning_rate": 2.2271739461973567e-05, "loss": 0.08382415771484375, "step": 3604 }, { "epoch": 0.5023340068278409, "grad_norm": 0.4115041196346283, "learning_rate": 2.2262373087092722e-05, "loss": 0.10698318481445312, "step": 3605 }, { "epoch": 0.5024733505190553, "grad_norm": 0.42271724343299866, "learning_rate": 2.2253006209564233e-05, "loss": 0.106201171875, "step": 3606 }, { "epoch": 0.5026126942102697, "grad_norm": 0.6989468336105347, "learning_rate": 2.2243638831469197e-05, "loss": 0.10185718536376953, "step": 3607 }, { "epoch": 0.502752037901484, "grad_norm": 0.4073905348777771, "learning_rate": 2.2234270954888833e-05, "loss": 0.094024658203125, "step": 3608 }, { "epoch": 0.5028913815926984, "grad_norm": 0.2625499367713928, "learning_rate": 2.2224902581904476e-05, "loss": 0.08005332946777344, "step": 3609 }, { "epoch": 0.5030307252839128, "grad_norm": 0.6455435752868652, "learning_rate": 2.221553371459756e-05, "loss": 0.11818695068359375, "step": 3610 }, { "epoch": 0.5031700689751272, "grad_norm": 0.27758702635765076, "learning_rate": 2.2206164355049634e-05, "loss": 0.07831954956054688, "step": 3611 }, { "epoch": 0.5033094126663415, "grad_norm": 0.4879065155982971, "learning_rate": 2.2196794505342358e-05, "loss": 0.09527969360351562, "step": 3612 }, { "epoch": 0.5034487563575559, "grad_norm": 0.37370091676712036, "learning_rate": 2.2187424167557496e-05, "loss": 0.10657215118408203, "step": 3613 }, { "epoch": 0.5035881000487703, "grad_norm": 0.4137735664844513, "learning_rate": 2.2178053343776912e-05, "loss": 0.10835838317871094, "step": 3614 }, { "epoch": 0.5037274437399847, "grad_norm": 0.5408356785774231, "learning_rate": 2.216868203608262e-05, "loss": 0.11116600036621094, "step": 3615 }, { "epoch": 0.5038667874311991, "grad_norm": 0.35009750723838806, "learning_rate": 2.2159310246556675e-05, "loss": 0.08928298950195312, "step": 3616 }, { "epoch": 0.5040061311224134, "grad_norm": 0.40930449962615967, "learning_rate": 2.2149937977281296e-05, "loss": 0.09708404541015625, "step": 3617 }, { "epoch": 0.5041454748136278, "grad_norm": 0.21403788030147552, "learning_rate": 2.214056523033879e-05, "loss": 0.07234525680541992, "step": 3618 }, { "epoch": 0.5042848185048422, "grad_norm": 0.5752096772193909, "learning_rate": 2.2131192007811552e-05, "loss": 0.07707881927490234, "step": 3619 }, { "epoch": 0.5044241621960566, "grad_norm": 0.5906797051429749, "learning_rate": 2.2121818311782116e-05, "loss": 0.08550453186035156, "step": 3620 }, { "epoch": 0.504563505887271, "grad_norm": 0.3406551480293274, "learning_rate": 2.211244414433308e-05, "loss": 0.08963584899902344, "step": 3621 }, { "epoch": 0.5047028495784853, "grad_norm": 0.24212178587913513, "learning_rate": 2.2103069507547187e-05, "loss": 0.0862722396850586, "step": 3622 }, { "epoch": 0.5048421932696997, "grad_norm": 0.5994324684143066, "learning_rate": 2.2093694403507264e-05, "loss": 0.12993812561035156, "step": 3623 }, { "epoch": 0.5049815369609141, "grad_norm": 0.3297486901283264, "learning_rate": 2.208431883429625e-05, "loss": 0.08048820495605469, "step": 3624 }, { "epoch": 0.5051208806521285, "grad_norm": 0.33278051018714905, "learning_rate": 2.207494280199717e-05, "loss": 0.08412647247314453, "step": 3625 }, { "epoch": 0.5052602243433428, "grad_norm": 0.4243289530277252, "learning_rate": 2.2065566308693173e-05, "loss": 0.08528900146484375, "step": 3626 }, { "epoch": 0.5053995680345572, "grad_norm": 0.37780314683914185, "learning_rate": 2.2056189356467498e-05, "loss": 0.07941246032714844, "step": 3627 }, { "epoch": 0.5055389117257716, "grad_norm": 0.39797845482826233, "learning_rate": 2.2046811947403492e-05, "loss": 0.07242012023925781, "step": 3628 }, { "epoch": 0.505678255416986, "grad_norm": 0.6362996697425842, "learning_rate": 2.2037434083584605e-05, "loss": 0.11664772033691406, "step": 3629 }, { "epoch": 0.5058175991082003, "grad_norm": 0.34501734375953674, "learning_rate": 2.2028055767094372e-05, "loss": 0.08011245727539062, "step": 3630 }, { "epoch": 0.5059569427994147, "grad_norm": 0.4625336825847626, "learning_rate": 2.2018677000016463e-05, "loss": 0.09065055847167969, "step": 3631 }, { "epoch": 0.5060962864906291, "grad_norm": 0.5450413823127747, "learning_rate": 2.2009297784434595e-05, "loss": 0.09954833984375, "step": 3632 }, { "epoch": 0.5062356301818435, "grad_norm": 0.5648554563522339, "learning_rate": 2.199991812243264e-05, "loss": 0.08428955078125, "step": 3633 }, { "epoch": 0.5063749738730579, "grad_norm": 0.4863109588623047, "learning_rate": 2.1990538016094537e-05, "loss": 0.07711029052734375, "step": 3634 }, { "epoch": 0.5065143175642722, "grad_norm": 0.4763026833534241, "learning_rate": 2.1981157467504332e-05, "loss": 0.10681724548339844, "step": 3635 }, { "epoch": 0.5066536612554866, "grad_norm": 0.6573106050491333, "learning_rate": 2.1971776478746176e-05, "loss": 0.11796283721923828, "step": 3636 }, { "epoch": 0.506793004946701, "grad_norm": 0.3796146512031555, "learning_rate": 2.196239505190429e-05, "loss": 0.08820915222167969, "step": 3637 }, { "epoch": 0.5069323486379154, "grad_norm": 0.3239118754863739, "learning_rate": 2.195301318906303e-05, "loss": 0.06958198547363281, "step": 3638 }, { "epoch": 0.5070716923291299, "grad_norm": 0.4484650492668152, "learning_rate": 2.194363089230683e-05, "loss": 0.07629203796386719, "step": 3639 }, { "epoch": 0.5072110360203442, "grad_norm": 0.38783514499664307, "learning_rate": 2.193424816372022e-05, "loss": 0.09799766540527344, "step": 3640 }, { "epoch": 0.5073503797115586, "grad_norm": 0.31916338205337524, "learning_rate": 2.1924865005387822e-05, "loss": 0.07931709289550781, "step": 3641 }, { "epoch": 0.507489723402773, "grad_norm": 0.4265303909778595, "learning_rate": 2.1915481419394373e-05, "loss": 0.12880897521972656, "step": 3642 }, { "epoch": 0.5076290670939874, "grad_norm": 0.7687326073646545, "learning_rate": 2.190609740782468e-05, "loss": 0.10861682891845703, "step": 3643 }, { "epoch": 0.5077684107852017, "grad_norm": 0.2703242897987366, "learning_rate": 2.1896712972763658e-05, "loss": 0.07837295532226562, "step": 3644 }, { "epoch": 0.5079077544764161, "grad_norm": 0.5065028667449951, "learning_rate": 2.1887328116296315e-05, "loss": 0.10987091064453125, "step": 3645 }, { "epoch": 0.5080470981676305, "grad_norm": 0.2101999968290329, "learning_rate": 2.1877942840507752e-05, "loss": 0.0772552490234375, "step": 3646 }, { "epoch": 0.5081864418588449, "grad_norm": 0.3729335367679596, "learning_rate": 2.1868557147483176e-05, "loss": 0.10075759887695312, "step": 3647 }, { "epoch": 0.5083257855500593, "grad_norm": 0.5413200855255127, "learning_rate": 2.1859171039307848e-05, "loss": 0.1278533935546875, "step": 3648 }, { "epoch": 0.5084651292412736, "grad_norm": 0.7528550028800964, "learning_rate": 2.1849784518067172e-05, "loss": 0.12228012084960938, "step": 3649 }, { "epoch": 0.508604472932488, "grad_norm": 0.18242529034614563, "learning_rate": 2.1840397585846594e-05, "loss": 0.06589698791503906, "step": 3650 }, { "epoch": 0.5087438166237024, "grad_norm": 0.4570610523223877, "learning_rate": 2.1831010244731697e-05, "loss": 0.10050392150878906, "step": 3651 }, { "epoch": 0.5088831603149168, "grad_norm": 0.22792956233024597, "learning_rate": 2.182162249680813e-05, "loss": 0.06468677520751953, "step": 3652 }, { "epoch": 0.5090225040061311, "grad_norm": 0.5405930876731873, "learning_rate": 2.1812234344161623e-05, "loss": 0.11087608337402344, "step": 3653 }, { "epoch": 0.5091618476973455, "grad_norm": 0.4114237427711487, "learning_rate": 2.1802845788878027e-05, "loss": 0.09141349792480469, "step": 3654 }, { "epoch": 0.5093011913885599, "grad_norm": 0.2723290026187897, "learning_rate": 2.1793456833043253e-05, "loss": 0.06774520874023438, "step": 3655 }, { "epoch": 0.5094405350797743, "grad_norm": 0.26907384395599365, "learning_rate": 2.1784067478743317e-05, "loss": 0.0638885498046875, "step": 3656 }, { "epoch": 0.5095798787709886, "grad_norm": 0.3670291304588318, "learning_rate": 2.177467772806432e-05, "loss": 0.08334732055664062, "step": 3657 }, { "epoch": 0.509719222462203, "grad_norm": 0.291133850812912, "learning_rate": 2.1765287583092447e-05, "loss": 0.0897064208984375, "step": 3658 }, { "epoch": 0.5098585661534174, "grad_norm": 0.27694907784461975, "learning_rate": 2.1755897045913975e-05, "loss": 0.08148193359375, "step": 3659 }, { "epoch": 0.5099979098446318, "grad_norm": 0.34353968501091003, "learning_rate": 2.1746506118615267e-05, "loss": 0.07778263092041016, "step": 3660 }, { "epoch": 0.5101372535358462, "grad_norm": 0.6711382865905762, "learning_rate": 2.173711480328277e-05, "loss": 0.10190963745117188, "step": 3661 }, { "epoch": 0.5102765972270605, "grad_norm": 0.41960564255714417, "learning_rate": 2.1727723102003023e-05, "loss": 0.10080718994140625, "step": 3662 }, { "epoch": 0.5104159409182749, "grad_norm": 0.3519643247127533, "learning_rate": 2.1718331016862657e-05, "loss": 0.08876800537109375, "step": 3663 }, { "epoch": 0.5105552846094893, "grad_norm": 0.6020705103874207, "learning_rate": 2.1708938549948354e-05, "loss": 0.12206268310546875, "step": 3664 }, { "epoch": 0.5106946283007037, "grad_norm": 0.27140310406684875, "learning_rate": 2.1699545703346934e-05, "loss": 0.08456611633300781, "step": 3665 }, { "epoch": 0.510833971991918, "grad_norm": 0.27216362953186035, "learning_rate": 2.1690152479145254e-05, "loss": 0.07582759857177734, "step": 3666 }, { "epoch": 0.5109733156831324, "grad_norm": 0.47954046726226807, "learning_rate": 2.1680758879430283e-05, "loss": 0.08698272705078125, "step": 3667 }, { "epoch": 0.5111126593743468, "grad_norm": 0.32324761152267456, "learning_rate": 2.1671364906289053e-05, "loss": 0.08398056030273438, "step": 3668 }, { "epoch": 0.5112520030655612, "grad_norm": 0.3007957935333252, "learning_rate": 2.166197056180871e-05, "loss": 0.07563591003417969, "step": 3669 }, { "epoch": 0.5113913467567756, "grad_norm": 0.214572474360466, "learning_rate": 2.1652575848076446e-05, "loss": 0.06020927429199219, "step": 3670 }, { "epoch": 0.5115306904479899, "grad_norm": 1.1594676971435547, "learning_rate": 2.1643180767179558e-05, "loss": 0.15116310119628906, "step": 3671 }, { "epoch": 0.5116700341392043, "grad_norm": 0.5094106197357178, "learning_rate": 2.163378532120542e-05, "loss": 0.09107780456542969, "step": 3672 }, { "epoch": 0.5118093778304187, "grad_norm": 0.2819201648235321, "learning_rate": 2.162438951224148e-05, "loss": 0.06962776184082031, "step": 3673 }, { "epoch": 0.5119487215216331, "grad_norm": 0.43773022294044495, "learning_rate": 2.1614993342375277e-05, "loss": 0.08666706085205078, "step": 3674 }, { "epoch": 0.5120880652128474, "grad_norm": 0.4743880331516266, "learning_rate": 2.1605596813694426e-05, "loss": 0.07237815856933594, "step": 3675 }, { "epoch": 0.5122274089040618, "grad_norm": 0.3021768033504486, "learning_rate": 2.1596199928286618e-05, "loss": 0.08869361877441406, "step": 3676 }, { "epoch": 0.5123667525952762, "grad_norm": 0.8729799389839172, "learning_rate": 2.1586802688239627e-05, "loss": 0.1299877166748047, "step": 3677 }, { "epoch": 0.5125060962864906, "grad_norm": 0.3595752716064453, "learning_rate": 2.1577405095641307e-05, "loss": 0.080841064453125, "step": 3678 }, { "epoch": 0.5126454399777051, "grad_norm": 0.5820650458335876, "learning_rate": 2.156800715257959e-05, "loss": 0.11102962493896484, "step": 3679 }, { "epoch": 0.5127847836689194, "grad_norm": 0.4247981905937195, "learning_rate": 2.1558608861142472e-05, "loss": 0.10100936889648438, "step": 3680 }, { "epoch": 0.5129241273601338, "grad_norm": 0.37328967452049255, "learning_rate": 2.1549210223418063e-05, "loss": 0.09741592407226562, "step": 3681 }, { "epoch": 0.5130634710513482, "grad_norm": 0.29361581802368164, "learning_rate": 2.15398112414945e-05, "loss": 0.08204269409179688, "step": 3682 }, { "epoch": 0.5132028147425626, "grad_norm": 0.5099249482154846, "learning_rate": 2.1530411917460037e-05, "loss": 0.07933902740478516, "step": 3683 }, { "epoch": 0.513342158433777, "grad_norm": 0.42604923248291016, "learning_rate": 2.1521012253402987e-05, "loss": 0.07947540283203125, "step": 3684 }, { "epoch": 0.5134815021249913, "grad_norm": 0.31628602743148804, "learning_rate": 2.151161225141174e-05, "loss": 0.08016777038574219, "step": 3685 }, { "epoch": 0.5136208458162057, "grad_norm": 0.41677600145339966, "learning_rate": 2.1502211913574764e-05, "loss": 0.09615898132324219, "step": 3686 }, { "epoch": 0.5137601895074201, "grad_norm": 0.4476262032985687, "learning_rate": 2.1492811241980595e-05, "loss": 0.11306190490722656, "step": 3687 }, { "epoch": 0.5138995331986345, "grad_norm": 0.19930614531040192, "learning_rate": 2.1483410238717844e-05, "loss": 0.06281852722167969, "step": 3688 }, { "epoch": 0.5140388768898488, "grad_norm": 0.486012727022171, "learning_rate": 2.147400890587521e-05, "loss": 0.08540534973144531, "step": 3689 }, { "epoch": 0.5141782205810632, "grad_norm": 0.3799509108066559, "learning_rate": 2.146460724554145e-05, "loss": 0.09944915771484375, "step": 3690 }, { "epoch": 0.5143175642722776, "grad_norm": 0.6501029133796692, "learning_rate": 2.1455205259805396e-05, "loss": 0.11091423034667969, "step": 3691 }, { "epoch": 0.514456907963492, "grad_norm": 0.3774292767047882, "learning_rate": 2.1445802950755956e-05, "loss": 0.09331130981445312, "step": 3692 }, { "epoch": 0.5145962516547063, "grad_norm": 0.41339847445487976, "learning_rate": 2.143640032048211e-05, "loss": 0.07103729248046875, "step": 3693 }, { "epoch": 0.5147355953459207, "grad_norm": 0.5329321622848511, "learning_rate": 2.1426997371072905e-05, "loss": 0.11150932312011719, "step": 3694 }, { "epoch": 0.5148749390371351, "grad_norm": 0.4090617597103119, "learning_rate": 2.141759410461746e-05, "loss": 0.08116626739501953, "step": 3695 }, { "epoch": 0.5150142827283495, "grad_norm": 0.3484518527984619, "learning_rate": 2.140819052320497e-05, "loss": 0.09588623046875, "step": 3696 }, { "epoch": 0.5151536264195639, "grad_norm": 0.44340309500694275, "learning_rate": 2.1398786628924705e-05, "loss": 0.10256767272949219, "step": 3697 }, { "epoch": 0.5152929701107782, "grad_norm": 0.8933324813842773, "learning_rate": 2.1389382423865973e-05, "loss": 0.13751602172851562, "step": 3698 }, { "epoch": 0.5154323138019926, "grad_norm": 0.2327093929052353, "learning_rate": 2.137997791011819e-05, "loss": 0.06946754455566406, "step": 3699 }, { "epoch": 0.515571657493207, "grad_norm": 0.4385296702384949, "learning_rate": 2.1370573089770823e-05, "loss": 0.10528945922851562, "step": 3700 }, { "epoch": 0.5157110011844214, "grad_norm": 0.43449580669403076, "learning_rate": 2.136116796491341e-05, "loss": 0.11512565612792969, "step": 3701 }, { "epoch": 0.5158503448756357, "grad_norm": 0.1449153572320938, "learning_rate": 2.1351762537635553e-05, "loss": 0.05810356140136719, "step": 3702 }, { "epoch": 0.5159896885668501, "grad_norm": 0.31845197081565857, "learning_rate": 2.134235681002691e-05, "loss": 0.08333396911621094, "step": 3703 }, { "epoch": 0.5161290322580645, "grad_norm": 0.8715630173683167, "learning_rate": 2.1332950784177235e-05, "loss": 0.11542701721191406, "step": 3704 }, { "epoch": 0.5162683759492789, "grad_norm": 0.3828117847442627, "learning_rate": 2.1323544462176325e-05, "loss": 0.0932464599609375, "step": 3705 }, { "epoch": 0.5164077196404933, "grad_norm": 0.2595888674259186, "learning_rate": 2.131413784611406e-05, "loss": 0.07486724853515625, "step": 3706 }, { "epoch": 0.5165470633317076, "grad_norm": 0.48626959323883057, "learning_rate": 2.1304730938080364e-05, "loss": 0.11182975769042969, "step": 3707 }, { "epoch": 0.516686407022922, "grad_norm": 0.5025616884231567, "learning_rate": 2.129532374016524e-05, "loss": 0.1324024200439453, "step": 3708 }, { "epoch": 0.5168257507141364, "grad_norm": 0.5887278914451599, "learning_rate": 2.128591625445876e-05, "loss": 0.11343955993652344, "step": 3709 }, { "epoch": 0.5169650944053508, "grad_norm": 0.669082760810852, "learning_rate": 2.127650848305104e-05, "loss": 0.10241508483886719, "step": 3710 }, { "epoch": 0.5171044380965651, "grad_norm": 0.38598114252090454, "learning_rate": 2.1267100428032276e-05, "loss": 0.08838272094726562, "step": 3711 }, { "epoch": 0.5172437817877795, "grad_norm": 0.2648531198501587, "learning_rate": 2.1257692091492724e-05, "loss": 0.08539199829101562, "step": 3712 }, { "epoch": 0.5173831254789939, "grad_norm": 0.27213671803474426, "learning_rate": 2.1248283475522712e-05, "loss": 0.08123111724853516, "step": 3713 }, { "epoch": 0.5175224691702083, "grad_norm": 0.4318782687187195, "learning_rate": 2.1238874582212602e-05, "loss": 0.09605121612548828, "step": 3714 }, { "epoch": 0.5176618128614227, "grad_norm": 0.3763803541660309, "learning_rate": 2.1229465413652854e-05, "loss": 0.08822441101074219, "step": 3715 }, { "epoch": 0.517801156552637, "grad_norm": 0.6045380234718323, "learning_rate": 2.122005597193395e-05, "loss": 0.09681987762451172, "step": 3716 }, { "epoch": 0.5179405002438514, "grad_norm": 0.27451291680336, "learning_rate": 2.1210646259146466e-05, "loss": 0.08495330810546875, "step": 3717 }, { "epoch": 0.5180798439350658, "grad_norm": 0.269501268863678, "learning_rate": 2.1201236277381028e-05, "loss": 0.07447242736816406, "step": 3718 }, { "epoch": 0.5182191876262803, "grad_norm": 0.26348036527633667, "learning_rate": 2.119182602872831e-05, "loss": 0.07519245147705078, "step": 3719 }, { "epoch": 0.5183585313174947, "grad_norm": 0.502983570098877, "learning_rate": 2.1182415515279056e-05, "loss": 0.11710357666015625, "step": 3720 }, { "epoch": 0.518497875008709, "grad_norm": 0.3534432053565979, "learning_rate": 2.117300473912407e-05, "loss": 0.0880136489868164, "step": 3721 }, { "epoch": 0.5186372186999234, "grad_norm": 0.3735477030277252, "learning_rate": 2.1163593702354213e-05, "loss": 0.082489013671875, "step": 3722 }, { "epoch": 0.5187765623911378, "grad_norm": 0.762271523475647, "learning_rate": 2.11541824070604e-05, "loss": 0.10651016235351562, "step": 3723 }, { "epoch": 0.5189159060823522, "grad_norm": 0.5533306002616882, "learning_rate": 2.114477085533361e-05, "loss": 0.12658309936523438, "step": 3724 }, { "epoch": 0.5190552497735665, "grad_norm": 0.3389163911342621, "learning_rate": 2.1135359049264868e-05, "loss": 0.0807638168334961, "step": 3725 }, { "epoch": 0.5191945934647809, "grad_norm": 0.4042077362537384, "learning_rate": 2.1125946990945264e-05, "loss": 0.08956146240234375, "step": 3726 }, { "epoch": 0.5193339371559953, "grad_norm": 0.6003642082214355, "learning_rate": 2.111653468246595e-05, "loss": 0.11186027526855469, "step": 3727 }, { "epoch": 0.5194732808472097, "grad_norm": 0.5273147821426392, "learning_rate": 2.1107122125918112e-05, "loss": 0.10215902328491211, "step": 3728 }, { "epoch": 0.519612624538424, "grad_norm": 0.4350491166114807, "learning_rate": 2.1097709323393026e-05, "loss": 0.11403083801269531, "step": 3729 }, { "epoch": 0.5197519682296384, "grad_norm": 0.4470135569572449, "learning_rate": 2.1088296276981978e-05, "loss": 0.08587646484375, "step": 3730 }, { "epoch": 0.5198913119208528, "grad_norm": 0.5227817296981812, "learning_rate": 2.1078882988776352e-05, "loss": 0.11548233032226562, "step": 3731 }, { "epoch": 0.5200306556120672, "grad_norm": 0.573297917842865, "learning_rate": 2.1069469460867547e-05, "loss": 0.13646888732910156, "step": 3732 }, { "epoch": 0.5201699993032816, "grad_norm": 0.32459381222724915, "learning_rate": 2.106005569534705e-05, "loss": 0.09409523010253906, "step": 3733 }, { "epoch": 0.5203093429944959, "grad_norm": 0.38113951683044434, "learning_rate": 2.105064169430638e-05, "loss": 0.09031295776367188, "step": 3734 }, { "epoch": 0.5204486866857103, "grad_norm": 0.28881099820137024, "learning_rate": 2.1041227459837112e-05, "loss": 0.07215499877929688, "step": 3735 }, { "epoch": 0.5205880303769247, "grad_norm": 0.48555436730384827, "learning_rate": 2.103181299403088e-05, "loss": 0.08647918701171875, "step": 3736 }, { "epoch": 0.5207273740681391, "grad_norm": 0.46159493923187256, "learning_rate": 2.1022398298979345e-05, "loss": 0.1125640869140625, "step": 3737 }, { "epoch": 0.5208667177593534, "grad_norm": 0.685918927192688, "learning_rate": 2.1012983376774255e-05, "loss": 0.11599922180175781, "step": 3738 }, { "epoch": 0.5210060614505678, "grad_norm": 0.21120722591876984, "learning_rate": 2.100356822950739e-05, "loss": 0.06370735168457031, "step": 3739 }, { "epoch": 0.5211454051417822, "grad_norm": 0.43901756405830383, "learning_rate": 2.099415285927057e-05, "loss": 0.09376716613769531, "step": 3740 }, { "epoch": 0.5212847488329966, "grad_norm": 0.3358260691165924, "learning_rate": 2.0984737268155686e-05, "loss": 0.08922576904296875, "step": 3741 }, { "epoch": 0.521424092524211, "grad_norm": 0.5238087177276611, "learning_rate": 2.097532145825466e-05, "loss": 0.09090042114257812, "step": 3742 }, { "epoch": 0.5215634362154253, "grad_norm": 0.42208331823349, "learning_rate": 2.0965905431659475e-05, "loss": 0.08283042907714844, "step": 3743 }, { "epoch": 0.5217027799066397, "grad_norm": 0.28850752115249634, "learning_rate": 2.0956489190462156e-05, "loss": 0.08185482025146484, "step": 3744 }, { "epoch": 0.5218421235978541, "grad_norm": 0.7066154479980469, "learning_rate": 2.094707273675477e-05, "loss": 0.1384868621826172, "step": 3745 }, { "epoch": 0.5219814672890685, "grad_norm": 0.43988296389579773, "learning_rate": 2.0937656072629444e-05, "loss": 0.09201431274414062, "step": 3746 }, { "epoch": 0.5221208109802828, "grad_norm": 0.42135465145111084, "learning_rate": 2.0928239200178355e-05, "loss": 0.08837127685546875, "step": 3747 }, { "epoch": 0.5222601546714972, "grad_norm": 0.7186415195465088, "learning_rate": 2.0918822121493697e-05, "loss": 0.1274890899658203, "step": 3748 }, { "epoch": 0.5223994983627116, "grad_norm": 0.3887670338153839, "learning_rate": 2.0909404838667746e-05, "loss": 0.08230972290039062, "step": 3749 }, { "epoch": 0.522538842053926, "grad_norm": 0.30863457918167114, "learning_rate": 2.08999873537928e-05, "loss": 0.08733367919921875, "step": 3750 }, { "epoch": 0.5226781857451404, "grad_norm": 0.3497994840145111, "learning_rate": 2.089056966896122e-05, "loss": 0.09687423706054688, "step": 3751 }, { "epoch": 0.5228175294363547, "grad_norm": 0.5708939433097839, "learning_rate": 2.088115178626539e-05, "loss": 0.09667587280273438, "step": 3752 }, { "epoch": 0.5229568731275691, "grad_norm": 0.2872249186038971, "learning_rate": 2.0871733707797738e-05, "loss": 0.06650352478027344, "step": 3753 }, { "epoch": 0.5230962168187835, "grad_norm": 0.6846075654029846, "learning_rate": 2.0862315435650766e-05, "loss": 0.1037435531616211, "step": 3754 }, { "epoch": 0.5232355605099979, "grad_norm": 0.32103070616722107, "learning_rate": 2.085289697191699e-05, "loss": 0.08530998229980469, "step": 3755 }, { "epoch": 0.5233749042012122, "grad_norm": 0.35416701436042786, "learning_rate": 2.0843478318688978e-05, "loss": 0.08759307861328125, "step": 3756 }, { "epoch": 0.5235142478924266, "grad_norm": 0.3687765896320343, "learning_rate": 2.083405947805934e-05, "loss": 0.08040428161621094, "step": 3757 }, { "epoch": 0.523653591583641, "grad_norm": 0.7897037267684937, "learning_rate": 2.082464045212073e-05, "loss": 0.10051631927490234, "step": 3758 }, { "epoch": 0.5237929352748554, "grad_norm": 0.4318329095840454, "learning_rate": 2.0815221242965835e-05, "loss": 0.09160995483398438, "step": 3759 }, { "epoch": 0.5239322789660699, "grad_norm": 0.4009858965873718, "learning_rate": 2.0805801852687396e-05, "loss": 0.09589576721191406, "step": 3760 }, { "epoch": 0.5240716226572842, "grad_norm": 0.308749258518219, "learning_rate": 2.0796382283378183e-05, "loss": 0.07955551147460938, "step": 3761 }, { "epoch": 0.5242109663484986, "grad_norm": 0.6224801540374756, "learning_rate": 2.0786962537131e-05, "loss": 0.08612060546875, "step": 3762 }, { "epoch": 0.524350310039713, "grad_norm": 0.5004847645759583, "learning_rate": 2.0777542616038718e-05, "loss": 0.08172130584716797, "step": 3763 }, { "epoch": 0.5244896537309274, "grad_norm": 0.7863150835037231, "learning_rate": 2.0768122522194208e-05, "loss": 0.12912559509277344, "step": 3764 }, { "epoch": 0.5246289974221418, "grad_norm": 0.2799774706363678, "learning_rate": 2.0758702257690418e-05, "loss": 0.07049560546875, "step": 3765 }, { "epoch": 0.5247683411133561, "grad_norm": 0.25755569338798523, "learning_rate": 2.0749281824620306e-05, "loss": 0.06817436218261719, "step": 3766 }, { "epoch": 0.5249076848045705, "grad_norm": 0.3820091784000397, "learning_rate": 2.073986122507688e-05, "loss": 0.08180046081542969, "step": 3767 }, { "epoch": 0.5250470284957849, "grad_norm": 0.39923346042633057, "learning_rate": 2.0730440461153183e-05, "loss": 0.09477615356445312, "step": 3768 }, { "epoch": 0.5251863721869993, "grad_norm": 0.4792860746383667, "learning_rate": 2.0721019534942285e-05, "loss": 0.09557151794433594, "step": 3769 }, { "epoch": 0.5253257158782136, "grad_norm": 0.32124465703964233, "learning_rate": 2.071159844853731e-05, "loss": 0.07466602325439453, "step": 3770 }, { "epoch": 0.525465059569428, "grad_norm": 0.5365555286407471, "learning_rate": 2.070217720403141e-05, "loss": 0.11181354522705078, "step": 3771 }, { "epoch": 0.5256044032606424, "grad_norm": 0.623781681060791, "learning_rate": 2.0692755803517764e-05, "loss": 0.12084197998046875, "step": 3772 }, { "epoch": 0.5257437469518568, "grad_norm": 0.25623396039009094, "learning_rate": 2.0683334249089593e-05, "loss": 0.07038688659667969, "step": 3773 }, { "epoch": 0.5258830906430711, "grad_norm": 0.43338853120803833, "learning_rate": 2.067391254284015e-05, "loss": 0.11022758483886719, "step": 3774 }, { "epoch": 0.5260224343342855, "grad_norm": 0.9088030457496643, "learning_rate": 2.066449068686273e-05, "loss": 0.1260051727294922, "step": 3775 }, { "epoch": 0.5261617780254999, "grad_norm": 0.3546534478664398, "learning_rate": 2.065506868325065e-05, "loss": 0.09360957145690918, "step": 3776 }, { "epoch": 0.5263011217167143, "grad_norm": 0.41577398777008057, "learning_rate": 2.0645646534097262e-05, "loss": 0.10062408447265625, "step": 3777 }, { "epoch": 0.5264404654079287, "grad_norm": 0.5167436003684998, "learning_rate": 2.0636224241495954e-05, "loss": 0.08769798278808594, "step": 3778 }, { "epoch": 0.526579809099143, "grad_norm": 0.4334775507450104, "learning_rate": 2.0626801807540148e-05, "loss": 0.09807777404785156, "step": 3779 }, { "epoch": 0.5267191527903574, "grad_norm": 0.2959149181842804, "learning_rate": 2.0617379234323285e-05, "loss": 0.07808113098144531, "step": 3780 }, { "epoch": 0.5268584964815718, "grad_norm": 0.7268750071525574, "learning_rate": 2.060795652393886e-05, "loss": 0.13753890991210938, "step": 3781 }, { "epoch": 0.5269978401727862, "grad_norm": 0.40258875489234924, "learning_rate": 2.0598533678480367e-05, "loss": 0.10054969787597656, "step": 3782 }, { "epoch": 0.5271371838640005, "grad_norm": 0.2940709590911865, "learning_rate": 2.0589110700041357e-05, "loss": 0.08047103881835938, "step": 3783 }, { "epoch": 0.5272765275552149, "grad_norm": 0.3144650161266327, "learning_rate": 2.0579687590715404e-05, "loss": 0.07663726806640625, "step": 3784 }, { "epoch": 0.5274158712464293, "grad_norm": 0.5522154569625854, "learning_rate": 2.0570264352596096e-05, "loss": 0.12514114379882812, "step": 3785 }, { "epoch": 0.5275552149376437, "grad_norm": 0.41386714577674866, "learning_rate": 2.0560840987777074e-05, "loss": 0.09080696105957031, "step": 3786 }, { "epoch": 0.5276945586288581, "grad_norm": 0.3696684241294861, "learning_rate": 2.0551417498351985e-05, "loss": 0.09041595458984375, "step": 3787 }, { "epoch": 0.5278339023200724, "grad_norm": 0.2963676452636719, "learning_rate": 2.0541993886414516e-05, "loss": 0.07682418823242188, "step": 3788 }, { "epoch": 0.5279732460112868, "grad_norm": 0.6746159195899963, "learning_rate": 2.0532570154058385e-05, "loss": 0.10757827758789062, "step": 3789 }, { "epoch": 0.5281125897025012, "grad_norm": 0.5066646933555603, "learning_rate": 2.0523146303377318e-05, "loss": 0.11199569702148438, "step": 3790 }, { "epoch": 0.5282519333937156, "grad_norm": 0.2815934419631958, "learning_rate": 2.0513722336465092e-05, "loss": 0.07396507263183594, "step": 3791 }, { "epoch": 0.52839127708493, "grad_norm": 0.26682940125465393, "learning_rate": 2.0504298255415488e-05, "loss": 0.07218170166015625, "step": 3792 }, { "epoch": 0.5285306207761443, "grad_norm": 0.416168212890625, "learning_rate": 2.0494874062322324e-05, "loss": 0.07485580444335938, "step": 3793 }, { "epoch": 0.5286699644673587, "grad_norm": 0.25325894355773926, "learning_rate": 2.0485449759279442e-05, "loss": 0.08135509490966797, "step": 3794 }, { "epoch": 0.5288093081585731, "grad_norm": 0.25182971358299255, "learning_rate": 2.047602534838071e-05, "loss": 0.07892608642578125, "step": 3795 }, { "epoch": 0.5289486518497875, "grad_norm": 0.6438229084014893, "learning_rate": 2.0466600831720006e-05, "loss": 0.1346454620361328, "step": 3796 }, { "epoch": 0.5290879955410018, "grad_norm": 0.7453251481056213, "learning_rate": 2.0457176211391257e-05, "loss": 0.13410663604736328, "step": 3797 }, { "epoch": 0.5292273392322162, "grad_norm": 0.7843419313430786, "learning_rate": 2.0447751489488387e-05, "loss": 0.1317424774169922, "step": 3798 }, { "epoch": 0.5293666829234306, "grad_norm": 0.4977480471134186, "learning_rate": 2.0438326668105364e-05, "loss": 0.11980056762695312, "step": 3799 }, { "epoch": 0.5295060266146451, "grad_norm": 0.4485436677932739, "learning_rate": 2.0428901749336157e-05, "loss": 0.12494277954101562, "step": 3800 }, { "epoch": 0.5296453703058595, "grad_norm": 0.5202787518501282, "learning_rate": 2.0419476735274774e-05, "loss": 0.09427452087402344, "step": 3801 }, { "epoch": 0.5297847139970738, "grad_norm": 0.4344961643218994, "learning_rate": 2.0410051628015247e-05, "loss": 0.09791183471679688, "step": 3802 }, { "epoch": 0.5299240576882882, "grad_norm": 0.29683443903923035, "learning_rate": 2.0400626429651595e-05, "loss": 0.07895755767822266, "step": 3803 }, { "epoch": 0.5300634013795026, "grad_norm": 0.46187350153923035, "learning_rate": 2.0391201142277905e-05, "loss": 0.1009368896484375, "step": 3804 }, { "epoch": 0.530202745070717, "grad_norm": 0.2928805351257324, "learning_rate": 2.038177576798825e-05, "loss": 0.07193374633789062, "step": 3805 }, { "epoch": 0.5303420887619313, "grad_norm": 0.2958899140357971, "learning_rate": 2.0372350308876732e-05, "loss": 0.07990264892578125, "step": 3806 }, { "epoch": 0.5304814324531457, "grad_norm": 0.22885552048683167, "learning_rate": 2.0362924767037485e-05, "loss": 0.06717300415039062, "step": 3807 }, { "epoch": 0.5306207761443601, "grad_norm": 0.27310827374458313, "learning_rate": 2.0353499144564636e-05, "loss": 0.06908607482910156, "step": 3808 }, { "epoch": 0.5307601198355745, "grad_norm": 0.3984753489494324, "learning_rate": 2.0344073443552347e-05, "loss": 0.07847404479980469, "step": 3809 }, { "epoch": 0.5308994635267889, "grad_norm": 0.5487156510353088, "learning_rate": 2.0334647666094796e-05, "loss": 0.10645866394042969, "step": 3810 }, { "epoch": 0.5310388072180032, "grad_norm": 0.4339144229888916, "learning_rate": 2.0325221814286173e-05, "loss": 0.0990753173828125, "step": 3811 }, { "epoch": 0.5311781509092176, "grad_norm": 0.4616285264492035, "learning_rate": 2.031579589022068e-05, "loss": 0.08770751953125, "step": 3812 }, { "epoch": 0.531317494600432, "grad_norm": 0.3664989173412323, "learning_rate": 2.0306369895992564e-05, "loss": 0.07766151428222656, "step": 3813 }, { "epoch": 0.5314568382916464, "grad_norm": 0.2466147243976593, "learning_rate": 2.029694383369604e-05, "loss": 0.07488059997558594, "step": 3814 }, { "epoch": 0.5315961819828607, "grad_norm": 0.23618072271347046, "learning_rate": 2.028751770542538e-05, "loss": 0.0716238021850586, "step": 3815 }, { "epoch": 0.5317355256740751, "grad_norm": 0.5335721373558044, "learning_rate": 2.0278091513274848e-05, "loss": 0.11327552795410156, "step": 3816 }, { "epoch": 0.5318748693652895, "grad_norm": 0.558652400970459, "learning_rate": 2.0268665259338736e-05, "loss": 0.11819648742675781, "step": 3817 }, { "epoch": 0.5320142130565039, "grad_norm": 0.8747042417526245, "learning_rate": 2.025923894571134e-05, "loss": 0.16401100158691406, "step": 3818 }, { "epoch": 0.5321535567477182, "grad_norm": 0.5491674542427063, "learning_rate": 2.0249812574486957e-05, "loss": 0.1113128662109375, "step": 3819 }, { "epoch": 0.5322929004389326, "grad_norm": 0.5198338627815247, "learning_rate": 2.024038614775993e-05, "loss": 0.09024429321289062, "step": 3820 }, { "epoch": 0.532432244130147, "grad_norm": 0.41959327459335327, "learning_rate": 2.0230959667624587e-05, "loss": 0.09379768371582031, "step": 3821 }, { "epoch": 0.5325715878213614, "grad_norm": 0.3910626769065857, "learning_rate": 2.022153313617528e-05, "loss": 0.09554386138916016, "step": 3822 }, { "epoch": 0.5327109315125758, "grad_norm": 0.3817790746688843, "learning_rate": 2.0212106555506364e-05, "loss": 0.09573745727539062, "step": 3823 }, { "epoch": 0.5328502752037901, "grad_norm": 0.7284618616104126, "learning_rate": 2.0202679927712224e-05, "loss": 0.14071369171142578, "step": 3824 }, { "epoch": 0.5329896188950045, "grad_norm": 0.7249674797058105, "learning_rate": 2.0193253254887223e-05, "loss": 0.09522438049316406, "step": 3825 }, { "epoch": 0.5331289625862189, "grad_norm": 0.5234107375144958, "learning_rate": 2.018382653912576e-05, "loss": 0.1364154815673828, "step": 3826 }, { "epoch": 0.5332683062774333, "grad_norm": 0.3435443639755249, "learning_rate": 2.0174399782522242e-05, "loss": 0.08286666870117188, "step": 3827 }, { "epoch": 0.5334076499686476, "grad_norm": 0.6361182332038879, "learning_rate": 2.016497298717107e-05, "loss": 0.09104156494140625, "step": 3828 }, { "epoch": 0.533546993659862, "grad_norm": 0.27825477719306946, "learning_rate": 2.015554615516667e-05, "loss": 0.06898307800292969, "step": 3829 }, { "epoch": 0.5336863373510764, "grad_norm": 0.5545371770858765, "learning_rate": 2.014611928860346e-05, "loss": 0.09286117553710938, "step": 3830 }, { "epoch": 0.5338256810422908, "grad_norm": 0.2747955918312073, "learning_rate": 2.0136692389575892e-05, "loss": 0.060009002685546875, "step": 3831 }, { "epoch": 0.5339650247335052, "grad_norm": 0.4900788962841034, "learning_rate": 2.012726546017838e-05, "loss": 0.07957267761230469, "step": 3832 }, { "epoch": 0.5341043684247195, "grad_norm": 0.3292944133281708, "learning_rate": 2.01178385025054e-05, "loss": 0.0825042724609375, "step": 3833 }, { "epoch": 0.5342437121159339, "grad_norm": 0.30794966220855713, "learning_rate": 2.0108411518651388e-05, "loss": 0.07288932800292969, "step": 3834 }, { "epoch": 0.5343830558071483, "grad_norm": 0.4722290337085724, "learning_rate": 2.0098984510710812e-05, "loss": 0.11651802062988281, "step": 3835 }, { "epoch": 0.5345223994983627, "grad_norm": 0.39826294779777527, "learning_rate": 2.0089557480778144e-05, "loss": 0.11958885192871094, "step": 3836 }, { "epoch": 0.534661743189577, "grad_norm": 0.29221001267433167, "learning_rate": 2.0080130430947842e-05, "loss": 0.07336187362670898, "step": 3837 }, { "epoch": 0.5348010868807914, "grad_norm": 0.38710153102874756, "learning_rate": 2.007070336331439e-05, "loss": 0.07342910766601562, "step": 3838 }, { "epoch": 0.5349404305720058, "grad_norm": 0.482023686170578, "learning_rate": 2.0061276279972265e-05, "loss": 0.11854743957519531, "step": 3839 }, { "epoch": 0.5350797742632203, "grad_norm": 0.38023141026496887, "learning_rate": 2.0051849183015953e-05, "loss": 0.0763702392578125, "step": 3840 }, { "epoch": 0.5352191179544347, "grad_norm": 0.39840298891067505, "learning_rate": 2.004242207453993e-05, "loss": 0.0769805908203125, "step": 3841 }, { "epoch": 0.535358461645649, "grad_norm": 0.5028513073921204, "learning_rate": 2.0032994956638695e-05, "loss": 0.13070106506347656, "step": 3842 }, { "epoch": 0.5354978053368634, "grad_norm": 0.5282137393951416, "learning_rate": 2.0023567831406733e-05, "loss": 0.09972667694091797, "step": 3843 }, { "epoch": 0.5356371490280778, "grad_norm": 0.49454590678215027, "learning_rate": 2.0014140700938532e-05, "loss": 0.08092117309570312, "step": 3844 }, { "epoch": 0.5357764927192922, "grad_norm": 0.5855965614318848, "learning_rate": 2.0004713567328594e-05, "loss": 0.07767486572265625, "step": 3845 }, { "epoch": 0.5359158364105066, "grad_norm": 0.5014634728431702, "learning_rate": 1.9995286432671412e-05, "loss": 0.08607864379882812, "step": 3846 }, { "epoch": 0.5360551801017209, "grad_norm": 0.482040137052536, "learning_rate": 1.9985859299061474e-05, "loss": 0.08632278442382812, "step": 3847 }, { "epoch": 0.5361945237929353, "grad_norm": 0.8831118941307068, "learning_rate": 1.9976432168593273e-05, "loss": 0.1425323486328125, "step": 3848 }, { "epoch": 0.5363338674841497, "grad_norm": 0.4596536159515381, "learning_rate": 1.996700504336131e-05, "loss": 0.07222938537597656, "step": 3849 }, { "epoch": 0.5364732111753641, "grad_norm": 0.35978060960769653, "learning_rate": 1.9957577925460074e-05, "loss": 0.08877420425415039, "step": 3850 }, { "epoch": 0.5366125548665784, "grad_norm": 0.22182075679302216, "learning_rate": 1.994815081698406e-05, "loss": 0.07085037231445312, "step": 3851 }, { "epoch": 0.5367518985577928, "grad_norm": 0.38891637325286865, "learning_rate": 1.9938723720027745e-05, "loss": 0.07542800903320312, "step": 3852 }, { "epoch": 0.5368912422490072, "grad_norm": 0.33675673604011536, "learning_rate": 1.9929296636685615e-05, "loss": 0.07526779174804688, "step": 3853 }, { "epoch": 0.5370305859402216, "grad_norm": 0.43933358788490295, "learning_rate": 1.9919869569052164e-05, "loss": 0.09370613098144531, "step": 3854 }, { "epoch": 0.537169929631436, "grad_norm": 0.35994234681129456, "learning_rate": 1.991044251922186e-05, "loss": 0.08220672607421875, "step": 3855 }, { "epoch": 0.5373092733226503, "grad_norm": 0.6342858672142029, "learning_rate": 1.9901015489289188e-05, "loss": 0.12670516967773438, "step": 3856 }, { "epoch": 0.5374486170138647, "grad_norm": 0.4870028495788574, "learning_rate": 1.989158848134862e-05, "loss": 0.11120033264160156, "step": 3857 }, { "epoch": 0.5375879607050791, "grad_norm": 0.5184480547904968, "learning_rate": 1.988216149749461e-05, "loss": 0.10367012023925781, "step": 3858 }, { "epoch": 0.5377273043962935, "grad_norm": 0.6856151819229126, "learning_rate": 1.9872734539821626e-05, "loss": 0.1280965805053711, "step": 3859 }, { "epoch": 0.5378666480875078, "grad_norm": 0.22074289619922638, "learning_rate": 1.9863307610424115e-05, "loss": 0.0680685043334961, "step": 3860 }, { "epoch": 0.5380059917787222, "grad_norm": 0.35646674036979675, "learning_rate": 1.985388071139654e-05, "loss": 0.09196853637695312, "step": 3861 }, { "epoch": 0.5381453354699366, "grad_norm": 0.46178749203681946, "learning_rate": 1.984445384483334e-05, "loss": 0.10436630249023438, "step": 3862 }, { "epoch": 0.538284679161151, "grad_norm": 0.5857861042022705, "learning_rate": 1.9835027012828937e-05, "loss": 0.11117744445800781, "step": 3863 }, { "epoch": 0.5384240228523653, "grad_norm": 0.22374816238880157, "learning_rate": 1.9825600217477765e-05, "loss": 0.0763092041015625, "step": 3864 }, { "epoch": 0.5385633665435797, "grad_norm": 0.2297087162733078, "learning_rate": 1.9816173460874243e-05, "loss": 0.056969642639160156, "step": 3865 }, { "epoch": 0.5387027102347941, "grad_norm": 0.7309514284133911, "learning_rate": 1.980674674511278e-05, "loss": 0.1277790069580078, "step": 3866 }, { "epoch": 0.5388420539260085, "grad_norm": 0.3819792866706848, "learning_rate": 1.9797320072287786e-05, "loss": 0.0969696044921875, "step": 3867 }, { "epoch": 0.5389813976172229, "grad_norm": 0.24344579875469208, "learning_rate": 1.9787893444493643e-05, "loss": 0.08148956298828125, "step": 3868 }, { "epoch": 0.5391207413084372, "grad_norm": 0.3158126473426819, "learning_rate": 1.9778466863824726e-05, "loss": 0.0901031494140625, "step": 3869 }, { "epoch": 0.5392600849996516, "grad_norm": 0.2835071384906769, "learning_rate": 1.9769040332375416e-05, "loss": 0.09578514099121094, "step": 3870 }, { "epoch": 0.539399428690866, "grad_norm": 0.42323511838912964, "learning_rate": 1.975961385224007e-05, "loss": 0.09077262878417969, "step": 3871 }, { "epoch": 0.5395387723820804, "grad_norm": 0.27880725264549255, "learning_rate": 1.9750187425513053e-05, "loss": 0.08073568344116211, "step": 3872 }, { "epoch": 0.5396781160732947, "grad_norm": 0.39048072695732117, "learning_rate": 1.9740761054288672e-05, "loss": 0.0747222900390625, "step": 3873 }, { "epoch": 0.5398174597645091, "grad_norm": 0.4747856557369232, "learning_rate": 1.973133474066127e-05, "loss": 0.11139297485351562, "step": 3874 }, { "epoch": 0.5399568034557235, "grad_norm": 0.28921470046043396, "learning_rate": 1.9721908486725156e-05, "loss": 0.07567119598388672, "step": 3875 }, { "epoch": 0.5400961471469379, "grad_norm": 0.2134292870759964, "learning_rate": 1.9712482294574622e-05, "loss": 0.061011314392089844, "step": 3876 }, { "epoch": 0.5402354908381523, "grad_norm": 0.4929518401622772, "learning_rate": 1.9703056166303963e-05, "loss": 0.10542106628417969, "step": 3877 }, { "epoch": 0.5403748345293666, "grad_norm": 0.37527820467948914, "learning_rate": 1.9693630104007446e-05, "loss": 0.09271430969238281, "step": 3878 }, { "epoch": 0.540514178220581, "grad_norm": 0.6134698390960693, "learning_rate": 1.9684204109779324e-05, "loss": 0.10982704162597656, "step": 3879 }, { "epoch": 0.5406535219117955, "grad_norm": 0.372386634349823, "learning_rate": 1.9674778185713834e-05, "loss": 0.1056985855102539, "step": 3880 }, { "epoch": 0.5407928656030099, "grad_norm": 0.3977491557598114, "learning_rate": 1.966535233390521e-05, "loss": 0.0801534652709961, "step": 3881 }, { "epoch": 0.5409322092942243, "grad_norm": 0.33028313517570496, "learning_rate": 1.9655926556447656e-05, "loss": 0.08061408996582031, "step": 3882 }, { "epoch": 0.5410715529854386, "grad_norm": 0.6627756953239441, "learning_rate": 1.9646500855435374e-05, "loss": 0.11822700500488281, "step": 3883 }, { "epoch": 0.541210896676653, "grad_norm": 0.6197908520698547, "learning_rate": 1.963707523296252e-05, "loss": 0.13428688049316406, "step": 3884 }, { "epoch": 0.5413502403678674, "grad_norm": 0.5019155740737915, "learning_rate": 1.962764969112327e-05, "loss": 0.09509849548339844, "step": 3885 }, { "epoch": 0.5414895840590818, "grad_norm": 0.6304894089698792, "learning_rate": 1.9618224232011757e-05, "loss": 0.12030029296875, "step": 3886 }, { "epoch": 0.5416289277502961, "grad_norm": 0.31857267022132874, "learning_rate": 1.96087988577221e-05, "loss": 0.0790557861328125, "step": 3887 }, { "epoch": 0.5417682714415105, "grad_norm": 0.4645424485206604, "learning_rate": 1.9599373570348416e-05, "loss": 0.0848236083984375, "step": 3888 }, { "epoch": 0.5419076151327249, "grad_norm": 0.3319401144981384, "learning_rate": 1.9589948371984766e-05, "loss": 0.086517333984375, "step": 3889 }, { "epoch": 0.5420469588239393, "grad_norm": 0.6384466886520386, "learning_rate": 1.958052326472523e-05, "loss": 0.10201644897460938, "step": 3890 }, { "epoch": 0.5421863025151537, "grad_norm": 0.8067552447319031, "learning_rate": 1.957109825066385e-05, "loss": 0.09109306335449219, "step": 3891 }, { "epoch": 0.542325646206368, "grad_norm": 0.5014331936836243, "learning_rate": 1.956167333189464e-05, "loss": 0.08418655395507812, "step": 3892 }, { "epoch": 0.5424649898975824, "grad_norm": 0.5645598769187927, "learning_rate": 1.9552248510511616e-05, "loss": 0.10551166534423828, "step": 3893 }, { "epoch": 0.5426043335887968, "grad_norm": 0.447004497051239, "learning_rate": 1.954282378860875e-05, "loss": 0.10762977600097656, "step": 3894 }, { "epoch": 0.5427436772800112, "grad_norm": 0.2882435917854309, "learning_rate": 1.9533399168279997e-05, "loss": 0.085235595703125, "step": 3895 }, { "epoch": 0.5428830209712255, "grad_norm": 0.2437857985496521, "learning_rate": 1.9523974651619296e-05, "loss": 0.07326650619506836, "step": 3896 }, { "epoch": 0.5430223646624399, "grad_norm": 0.4714573919773102, "learning_rate": 1.951455024072056e-05, "loss": 0.10591697692871094, "step": 3897 }, { "epoch": 0.5431617083536543, "grad_norm": 0.4197983741760254, "learning_rate": 1.950512593767768e-05, "loss": 0.1025247573852539, "step": 3898 }, { "epoch": 0.5433010520448687, "grad_norm": 0.3259736895561218, "learning_rate": 1.9495701744584522e-05, "loss": 0.06965160369873047, "step": 3899 }, { "epoch": 0.543440395736083, "grad_norm": 0.5673220753669739, "learning_rate": 1.9486277663534915e-05, "loss": 0.11825752258300781, "step": 3900 }, { "epoch": 0.5435797394272974, "grad_norm": 0.3076384961605072, "learning_rate": 1.9476853696622686e-05, "loss": 0.08717727661132812, "step": 3901 }, { "epoch": 0.5437190831185118, "grad_norm": 0.3325883746147156, "learning_rate": 1.9467429845941622e-05, "loss": 0.0806121826171875, "step": 3902 }, { "epoch": 0.5438584268097262, "grad_norm": 0.3728259205818176, "learning_rate": 1.9458006113585484e-05, "loss": 0.08054351806640625, "step": 3903 }, { "epoch": 0.5439977705009406, "grad_norm": 0.32385241985321045, "learning_rate": 1.9448582501648025e-05, "loss": 0.06166362762451172, "step": 3904 }, { "epoch": 0.5441371141921549, "grad_norm": 0.35998374223709106, "learning_rate": 1.9439159012222936e-05, "loss": 0.07708930969238281, "step": 3905 }, { "epoch": 0.5442764578833693, "grad_norm": 0.42142197489738464, "learning_rate": 1.9429735647403908e-05, "loss": 0.096710205078125, "step": 3906 }, { "epoch": 0.5444158015745837, "grad_norm": 0.7051739692687988, "learning_rate": 1.9420312409284606e-05, "loss": 0.14265060424804688, "step": 3907 }, { "epoch": 0.5445551452657981, "grad_norm": 0.5354011058807373, "learning_rate": 1.9410889299958643e-05, "loss": 0.11473846435546875, "step": 3908 }, { "epoch": 0.5446944889570124, "grad_norm": 0.6049163937568665, "learning_rate": 1.940146632151964e-05, "loss": 0.09719657897949219, "step": 3909 }, { "epoch": 0.5448338326482268, "grad_norm": 0.5465180277824402, "learning_rate": 1.939204347606115e-05, "loss": 0.09403038024902344, "step": 3910 }, { "epoch": 0.5449731763394412, "grad_norm": 0.3624514937400818, "learning_rate": 1.938262076567672e-05, "loss": 0.0856170654296875, "step": 3911 }, { "epoch": 0.5451125200306556, "grad_norm": 0.5709749460220337, "learning_rate": 1.9373198192459856e-05, "loss": 0.09744834899902344, "step": 3912 }, { "epoch": 0.54525186372187, "grad_norm": 0.5682340860366821, "learning_rate": 1.936377575850405e-05, "loss": 0.08945083618164062, "step": 3913 }, { "epoch": 0.5453912074130843, "grad_norm": 0.6351409554481506, "learning_rate": 1.935435346590274e-05, "loss": 0.10314083099365234, "step": 3914 }, { "epoch": 0.5455305511042987, "grad_norm": 0.4473152160644531, "learning_rate": 1.934493131674936e-05, "loss": 0.06962394714355469, "step": 3915 }, { "epoch": 0.5456698947955131, "grad_norm": 0.49571606516838074, "learning_rate": 1.9335509313137275e-05, "loss": 0.10107421875, "step": 3916 }, { "epoch": 0.5458092384867275, "grad_norm": 0.7396365404129028, "learning_rate": 1.9326087457159856e-05, "loss": 0.1080322265625, "step": 3917 }, { "epoch": 0.5459485821779418, "grad_norm": 0.47407665848731995, "learning_rate": 1.9316665750910414e-05, "loss": 0.10187339782714844, "step": 3918 }, { "epoch": 0.5460879258691562, "grad_norm": 0.5741749405860901, "learning_rate": 1.930724419648224e-05, "loss": 0.11918449401855469, "step": 3919 }, { "epoch": 0.5462272695603707, "grad_norm": 0.5995572209358215, "learning_rate": 1.92978227959686e-05, "loss": 0.10688400268554688, "step": 3920 }, { "epoch": 0.5463666132515851, "grad_norm": 0.3789908289909363, "learning_rate": 1.9288401551462694e-05, "loss": 0.08160686492919922, "step": 3921 }, { "epoch": 0.5465059569427995, "grad_norm": 0.3713533878326416, "learning_rate": 1.9278980465057722e-05, "loss": 0.0953073501586914, "step": 3922 }, { "epoch": 0.5466453006340138, "grad_norm": 0.3042924702167511, "learning_rate": 1.9269559538846823e-05, "loss": 0.07583045959472656, "step": 3923 }, { "epoch": 0.5467846443252282, "grad_norm": 0.5760781168937683, "learning_rate": 1.9260138774923124e-05, "loss": 0.11906814575195312, "step": 3924 }, { "epoch": 0.5469239880164426, "grad_norm": 0.9579076170921326, "learning_rate": 1.9250718175379697e-05, "loss": 0.10641288757324219, "step": 3925 }, { "epoch": 0.547063331707657, "grad_norm": 0.449503093957901, "learning_rate": 1.924129774230959e-05, "loss": 0.0883941650390625, "step": 3926 }, { "epoch": 0.5472026753988714, "grad_norm": 0.3463016748428345, "learning_rate": 1.9231877477805795e-05, "loss": 0.0735321044921875, "step": 3927 }, { "epoch": 0.5473420190900857, "grad_norm": 0.6760953664779663, "learning_rate": 1.922245738396129e-05, "loss": 0.14137649536132812, "step": 3928 }, { "epoch": 0.5474813627813001, "grad_norm": 0.3450724184513092, "learning_rate": 1.9213037462869003e-05, "loss": 0.09736061096191406, "step": 3929 }, { "epoch": 0.5476207064725145, "grad_norm": 0.19251063466072083, "learning_rate": 1.920361771662183e-05, "loss": 0.05637550354003906, "step": 3930 }, { "epoch": 0.5477600501637289, "grad_norm": 0.25633877515792847, "learning_rate": 1.9194198147312614e-05, "loss": 0.0731649398803711, "step": 3931 }, { "epoch": 0.5478993938549432, "grad_norm": 1.1468250751495361, "learning_rate": 1.9184778757034168e-05, "loss": 0.11553764343261719, "step": 3932 }, { "epoch": 0.5480387375461576, "grad_norm": 0.416185587644577, "learning_rate": 1.9175359547879275e-05, "loss": 0.0832529067993164, "step": 3933 }, { "epoch": 0.548178081237372, "grad_norm": 0.569829523563385, "learning_rate": 1.9165940521940667e-05, "loss": 0.1107940673828125, "step": 3934 }, { "epoch": 0.5483174249285864, "grad_norm": 0.4175695478916168, "learning_rate": 1.9156521681311025e-05, "loss": 0.08311843872070312, "step": 3935 }, { "epoch": 0.5484567686198007, "grad_norm": 0.41599366068840027, "learning_rate": 1.914710302808302e-05, "loss": 0.08492660522460938, "step": 3936 }, { "epoch": 0.5485961123110151, "grad_norm": 0.46967798471450806, "learning_rate": 1.9137684564349244e-05, "loss": 0.11506271362304688, "step": 3937 }, { "epoch": 0.5487354560022295, "grad_norm": 0.3801932632923126, "learning_rate": 1.912826629220227e-05, "loss": 0.07425689697265625, "step": 3938 }, { "epoch": 0.5488747996934439, "grad_norm": 0.23948411643505096, "learning_rate": 1.911884821373462e-05, "loss": 0.06833267211914062, "step": 3939 }, { "epoch": 0.5490141433846583, "grad_norm": 0.34247496724128723, "learning_rate": 1.9109430331038784e-05, "loss": 0.0769052505493164, "step": 3940 }, { "epoch": 0.5491534870758726, "grad_norm": 0.2993677854537964, "learning_rate": 1.91000126462072e-05, "loss": 0.08601188659667969, "step": 3941 }, { "epoch": 0.549292830767087, "grad_norm": 0.42230039834976196, "learning_rate": 1.909059516133226e-05, "loss": 0.08914375305175781, "step": 3942 }, { "epoch": 0.5494321744583014, "grad_norm": 0.3353284001350403, "learning_rate": 1.9081177878506306e-05, "loss": 0.07427597045898438, "step": 3943 }, { "epoch": 0.5495715181495158, "grad_norm": 0.47984886169433594, "learning_rate": 1.907176079982165e-05, "loss": 0.12317848205566406, "step": 3944 }, { "epoch": 0.5497108618407301, "grad_norm": 0.307580828666687, "learning_rate": 1.9062343927370556e-05, "loss": 0.07515335083007812, "step": 3945 }, { "epoch": 0.5498502055319445, "grad_norm": 0.2059253454208374, "learning_rate": 1.905292726324524e-05, "loss": 0.05857563018798828, "step": 3946 }, { "epoch": 0.5499895492231589, "grad_norm": 0.25506484508514404, "learning_rate": 1.9043510809537857e-05, "loss": 0.0672006607055664, "step": 3947 }, { "epoch": 0.5501288929143733, "grad_norm": 0.42814165353775024, "learning_rate": 1.9034094568340532e-05, "loss": 0.08703994750976562, "step": 3948 }, { "epoch": 0.5502682366055877, "grad_norm": 0.4826018512248993, "learning_rate": 1.9024678541745343e-05, "loss": 0.09036445617675781, "step": 3949 }, { "epoch": 0.550407580296802, "grad_norm": 0.2568722069263458, "learning_rate": 1.901526273184432e-05, "loss": 0.07078981399536133, "step": 3950 }, { "epoch": 0.5505469239880164, "grad_norm": 0.5473968386650085, "learning_rate": 1.900584714072943e-05, "loss": 0.10625076293945312, "step": 3951 }, { "epoch": 0.5506862676792308, "grad_norm": 0.6474319696426392, "learning_rate": 1.8996431770492622e-05, "loss": 0.10700035095214844, "step": 3952 }, { "epoch": 0.5508256113704452, "grad_norm": 0.5131282806396484, "learning_rate": 1.8987016623225748e-05, "loss": 0.0976104736328125, "step": 3953 }, { "epoch": 0.5509649550616595, "grad_norm": 0.5160492658615112, "learning_rate": 1.897760170102066e-05, "loss": 0.11835670471191406, "step": 3954 }, { "epoch": 0.5511042987528739, "grad_norm": 0.26706376671791077, "learning_rate": 1.8968187005969126e-05, "loss": 0.06664276123046875, "step": 3955 }, { "epoch": 0.5512436424440883, "grad_norm": 0.30322617292404175, "learning_rate": 1.8958772540162887e-05, "loss": 0.07867145538330078, "step": 3956 }, { "epoch": 0.5513829861353027, "grad_norm": 0.2494422048330307, "learning_rate": 1.8949358305693625e-05, "loss": 0.05819129943847656, "step": 3957 }, { "epoch": 0.551522329826517, "grad_norm": 0.671815812587738, "learning_rate": 1.8939944304652952e-05, "loss": 0.1355133056640625, "step": 3958 }, { "epoch": 0.5516616735177314, "grad_norm": 0.44608765840530396, "learning_rate": 1.8930530539132456e-05, "loss": 0.09113693237304688, "step": 3959 }, { "epoch": 0.5518010172089458, "grad_norm": 0.5076836347579956, "learning_rate": 1.8921117011223655e-05, "loss": 0.11048030853271484, "step": 3960 }, { "epoch": 0.5519403609001603, "grad_norm": 0.403266966342926, "learning_rate": 1.8911703723018025e-05, "loss": 0.10024070739746094, "step": 3961 }, { "epoch": 0.5520797045913747, "grad_norm": 0.40321964025497437, "learning_rate": 1.8902290676606987e-05, "loss": 0.09119606018066406, "step": 3962 }, { "epoch": 0.552219048282589, "grad_norm": 0.40207070112228394, "learning_rate": 1.8892877874081895e-05, "loss": 0.11675262451171875, "step": 3963 }, { "epoch": 0.5523583919738034, "grad_norm": 0.3285926878452301, "learning_rate": 1.8883465317534055e-05, "loss": 0.08487701416015625, "step": 3964 }, { "epoch": 0.5524977356650178, "grad_norm": 0.503216028213501, "learning_rate": 1.887405300905474e-05, "loss": 0.07838821411132812, "step": 3965 }, { "epoch": 0.5526370793562322, "grad_norm": 0.4430154263973236, "learning_rate": 1.886464095073514e-05, "loss": 0.08891105651855469, "step": 3966 }, { "epoch": 0.5527764230474466, "grad_norm": 0.35276368260383606, "learning_rate": 1.88552291446664e-05, "loss": 0.07384872436523438, "step": 3967 }, { "epoch": 0.5529157667386609, "grad_norm": 0.3404218554496765, "learning_rate": 1.884581759293961e-05, "loss": 0.08827590942382812, "step": 3968 }, { "epoch": 0.5530551104298753, "grad_norm": 0.4768119156360626, "learning_rate": 1.883640629764579e-05, "loss": 0.08662796020507812, "step": 3969 }, { "epoch": 0.5531944541210897, "grad_norm": 0.38938379287719727, "learning_rate": 1.8826995260875937e-05, "loss": 0.07959604263305664, "step": 3970 }, { "epoch": 0.5533337978123041, "grad_norm": 0.4274638593196869, "learning_rate": 1.8817584484720947e-05, "loss": 0.10473442077636719, "step": 3971 }, { "epoch": 0.5534731415035185, "grad_norm": 0.2948648929595947, "learning_rate": 1.8808173971271695e-05, "loss": 0.07672786712646484, "step": 3972 }, { "epoch": 0.5536124851947328, "grad_norm": 0.35727739334106445, "learning_rate": 1.8798763722618982e-05, "loss": 0.06604671478271484, "step": 3973 }, { "epoch": 0.5537518288859472, "grad_norm": 0.4010270833969116, "learning_rate": 1.878935374085354e-05, "loss": 0.0767364501953125, "step": 3974 }, { "epoch": 0.5538911725771616, "grad_norm": 0.5224695801734924, "learning_rate": 1.8779944028066057e-05, "loss": 0.10113525390625, "step": 3975 }, { "epoch": 0.554030516268376, "grad_norm": 0.6470497846603394, "learning_rate": 1.8770534586347152e-05, "loss": 0.11547183990478516, "step": 3976 }, { "epoch": 0.5541698599595903, "grad_norm": 0.45190829038619995, "learning_rate": 1.8761125417787398e-05, "loss": 0.09692192077636719, "step": 3977 }, { "epoch": 0.5543092036508047, "grad_norm": 0.6564455032348633, "learning_rate": 1.8751716524477298e-05, "loss": 0.13176918029785156, "step": 3978 }, { "epoch": 0.5544485473420191, "grad_norm": 0.7814648151397705, "learning_rate": 1.874230790850728e-05, "loss": 0.12874603271484375, "step": 3979 }, { "epoch": 0.5545878910332335, "grad_norm": 0.2591119706630707, "learning_rate": 1.8732899571967728e-05, "loss": 0.07242202758789062, "step": 3980 }, { "epoch": 0.5547272347244478, "grad_norm": 0.5433376431465149, "learning_rate": 1.8723491516948968e-05, "loss": 0.10040903091430664, "step": 3981 }, { "epoch": 0.5548665784156622, "grad_norm": 0.5682857632637024, "learning_rate": 1.871408374554125e-05, "loss": 0.10281181335449219, "step": 3982 }, { "epoch": 0.5550059221068766, "grad_norm": 0.2822878658771515, "learning_rate": 1.8704676259834768e-05, "loss": 0.0699472427368164, "step": 3983 }, { "epoch": 0.555145265798091, "grad_norm": 0.25710529088974, "learning_rate": 1.8695269061919642e-05, "loss": 0.06441307067871094, "step": 3984 }, { "epoch": 0.5552846094893054, "grad_norm": 0.40189847350120544, "learning_rate": 1.8685862153885947e-05, "loss": 0.09173011779785156, "step": 3985 }, { "epoch": 0.5554239531805197, "grad_norm": 0.2719210982322693, "learning_rate": 1.867645553782368e-05, "loss": 0.06520462036132812, "step": 3986 }, { "epoch": 0.5555632968717341, "grad_norm": 0.23362241685390472, "learning_rate": 1.866704921582277e-05, "loss": 0.062259674072265625, "step": 3987 }, { "epoch": 0.5557026405629485, "grad_norm": 0.2743144631385803, "learning_rate": 1.86576431899731e-05, "loss": 0.08186721801757812, "step": 3988 }, { "epoch": 0.5558419842541629, "grad_norm": 0.5928040146827698, "learning_rate": 1.864823746236446e-05, "loss": 0.12438011169433594, "step": 3989 }, { "epoch": 0.5559813279453772, "grad_norm": 0.525719165802002, "learning_rate": 1.8638832035086598e-05, "loss": 0.10658931732177734, "step": 3990 }, { "epoch": 0.5561206716365916, "grad_norm": 0.41032901406288147, "learning_rate": 1.862942691022918e-05, "loss": 0.08048820495605469, "step": 3991 }, { "epoch": 0.556260015327806, "grad_norm": 0.6040444374084473, "learning_rate": 1.8620022089881812e-05, "loss": 0.09832763671875, "step": 3992 }, { "epoch": 0.5563993590190204, "grad_norm": 0.2767842710018158, "learning_rate": 1.861061757613403e-05, "loss": 0.06828975677490234, "step": 3993 }, { "epoch": 0.5565387027102348, "grad_norm": 0.3068370819091797, "learning_rate": 1.8601213371075308e-05, "loss": 0.08157730102539062, "step": 3994 }, { "epoch": 0.5566780464014491, "grad_norm": 0.2696853280067444, "learning_rate": 1.8591809476795034e-05, "loss": 0.08797454833984375, "step": 3995 }, { "epoch": 0.5568173900926635, "grad_norm": 0.41997241973876953, "learning_rate": 1.8582405895382544e-05, "loss": 0.10389137268066406, "step": 3996 }, { "epoch": 0.5569567337838779, "grad_norm": 0.3913429081439972, "learning_rate": 1.8573002628927102e-05, "loss": 0.10517311096191406, "step": 3997 }, { "epoch": 0.5570960774750923, "grad_norm": 0.6394292116165161, "learning_rate": 1.8563599679517898e-05, "loss": 0.10970687866210938, "step": 3998 }, { "epoch": 0.5572354211663066, "grad_norm": 0.4419870674610138, "learning_rate": 1.8554197049244054e-05, "loss": 0.10516548156738281, "step": 3999 }, { "epoch": 0.557374764857521, "grad_norm": 0.22656752169132233, "learning_rate": 1.854479474019461e-05, "loss": 0.06470775604248047, "step": 4000 }, { "epoch": 0.5575141085487355, "grad_norm": 0.3976730704307556, "learning_rate": 1.8535392754458555e-05, "loss": 0.10113143920898438, "step": 4001 }, { "epoch": 0.5576534522399499, "grad_norm": 0.3042682409286499, "learning_rate": 1.8525991094124795e-05, "loss": 0.07716178894042969, "step": 4002 }, { "epoch": 0.5577927959311643, "grad_norm": 0.503616213798523, "learning_rate": 1.8516589761282155e-05, "loss": 0.10811328887939453, "step": 4003 }, { "epoch": 0.5579321396223786, "grad_norm": 0.3720170259475708, "learning_rate": 1.850718875801942e-05, "loss": 0.08322334289550781, "step": 4004 }, { "epoch": 0.558071483313593, "grad_norm": 0.26879093050956726, "learning_rate": 1.8497788086425243e-05, "loss": 0.06410980224609375, "step": 4005 }, { "epoch": 0.5582108270048074, "grad_norm": 0.5390329360961914, "learning_rate": 1.8488387748588266e-05, "loss": 0.10888481140136719, "step": 4006 }, { "epoch": 0.5583501706960218, "grad_norm": 0.27489978075027466, "learning_rate": 1.8478987746597017e-05, "loss": 0.06244993209838867, "step": 4007 }, { "epoch": 0.5584895143872362, "grad_norm": 0.5846813321113586, "learning_rate": 1.8469588082539963e-05, "loss": 0.10392570495605469, "step": 4008 }, { "epoch": 0.5586288580784505, "grad_norm": 0.4215579330921173, "learning_rate": 1.8460188758505502e-05, "loss": 0.08225822448730469, "step": 4009 }, { "epoch": 0.5587682017696649, "grad_norm": 0.3585793673992157, "learning_rate": 1.8450789776581947e-05, "loss": 0.0606536865234375, "step": 4010 }, { "epoch": 0.5589075454608793, "grad_norm": 0.3971717655658722, "learning_rate": 1.844139113885753e-05, "loss": 0.07358407974243164, "step": 4011 }, { "epoch": 0.5590468891520937, "grad_norm": 0.43331003189086914, "learning_rate": 1.8431992847420418e-05, "loss": 0.08680152893066406, "step": 4012 }, { "epoch": 0.559186232843308, "grad_norm": 0.5721511244773865, "learning_rate": 1.8422594904358696e-05, "loss": 0.11266708374023438, "step": 4013 }, { "epoch": 0.5593255765345224, "grad_norm": 0.3700628876686096, "learning_rate": 1.8413197311760377e-05, "loss": 0.08453559875488281, "step": 4014 }, { "epoch": 0.5594649202257368, "grad_norm": 0.4493331015110016, "learning_rate": 1.8403800071713392e-05, "loss": 0.09166145324707031, "step": 4015 }, { "epoch": 0.5596042639169512, "grad_norm": 0.3408151865005493, "learning_rate": 1.839440318630558e-05, "loss": 0.08568382263183594, "step": 4016 }, { "epoch": 0.5597436076081655, "grad_norm": 0.5381366610527039, "learning_rate": 1.838500665762473e-05, "loss": 0.1011343002319336, "step": 4017 }, { "epoch": 0.5598829512993799, "grad_norm": 0.4305996894836426, "learning_rate": 1.8375610487758527e-05, "loss": 0.1070709228515625, "step": 4018 }, { "epoch": 0.5600222949905943, "grad_norm": 0.4391328990459442, "learning_rate": 1.8366214678794584e-05, "loss": 0.109039306640625, "step": 4019 }, { "epoch": 0.5601616386818087, "grad_norm": 0.5724929571151733, "learning_rate": 1.8356819232820452e-05, "loss": 0.10242938995361328, "step": 4020 }, { "epoch": 0.5603009823730231, "grad_norm": 0.6282013654708862, "learning_rate": 1.834742415192356e-05, "loss": 0.13050460815429688, "step": 4021 }, { "epoch": 0.5604403260642374, "grad_norm": 0.2273886352777481, "learning_rate": 1.8338029438191298e-05, "loss": 0.05887031555175781, "step": 4022 }, { "epoch": 0.5605796697554518, "grad_norm": 0.3999176621437073, "learning_rate": 1.832863509371095e-05, "loss": 0.09090042114257812, "step": 4023 }, { "epoch": 0.5607190134466662, "grad_norm": 0.22331783175468445, "learning_rate": 1.831924112056972e-05, "loss": 0.07464981079101562, "step": 4024 }, { "epoch": 0.5608583571378806, "grad_norm": 0.6754215359687805, "learning_rate": 1.8309847520854753e-05, "loss": 0.1148386001586914, "step": 4025 }, { "epoch": 0.560997700829095, "grad_norm": 0.6209039688110352, "learning_rate": 1.8300454296653076e-05, "loss": 0.14847946166992188, "step": 4026 }, { "epoch": 0.5611370445203093, "grad_norm": 0.33953696489334106, "learning_rate": 1.829106145005165e-05, "loss": 0.09502220153808594, "step": 4027 }, { "epoch": 0.5612763882115237, "grad_norm": 0.3697056472301483, "learning_rate": 1.828166898313735e-05, "loss": 0.08188819885253906, "step": 4028 }, { "epoch": 0.5614157319027381, "grad_norm": 0.49544212222099304, "learning_rate": 1.8272276897996977e-05, "loss": 0.09785842895507812, "step": 4029 }, { "epoch": 0.5615550755939525, "grad_norm": 0.27257394790649414, "learning_rate": 1.8262885196717232e-05, "loss": 0.07114028930664062, "step": 4030 }, { "epoch": 0.5616944192851668, "grad_norm": 0.44563552737236023, "learning_rate": 1.8253493881384743e-05, "loss": 0.076019287109375, "step": 4031 }, { "epoch": 0.5618337629763812, "grad_norm": 0.5177099108695984, "learning_rate": 1.8244102954086032e-05, "loss": 0.10845184326171875, "step": 4032 }, { "epoch": 0.5619731066675956, "grad_norm": 0.7175803184509277, "learning_rate": 1.823471241690756e-05, "loss": 0.12463569641113281, "step": 4033 }, { "epoch": 0.56211245035881, "grad_norm": 0.5181860327720642, "learning_rate": 1.8225322271935686e-05, "loss": 0.09370231628417969, "step": 4034 }, { "epoch": 0.5622517940500243, "grad_norm": 1.0214121341705322, "learning_rate": 1.8215932521256683e-05, "loss": 0.12355899810791016, "step": 4035 }, { "epoch": 0.5623911377412387, "grad_norm": 0.5820346474647522, "learning_rate": 1.8206543166956754e-05, "loss": 0.10139846801757812, "step": 4036 }, { "epoch": 0.5625304814324531, "grad_norm": 0.19709883630275726, "learning_rate": 1.8197154211121976e-05, "loss": 0.06758785247802734, "step": 4037 }, { "epoch": 0.5626698251236675, "grad_norm": 1.3540277481079102, "learning_rate": 1.818776565583838e-05, "loss": 0.14343833923339844, "step": 4038 }, { "epoch": 0.5628091688148819, "grad_norm": 0.7221106290817261, "learning_rate": 1.8178377503191875e-05, "loss": 0.11934661865234375, "step": 4039 }, { "epoch": 0.5629485125060962, "grad_norm": 0.3034663200378418, "learning_rate": 1.8168989755268303e-05, "loss": 0.08368492126464844, "step": 4040 }, { "epoch": 0.5630878561973107, "grad_norm": 0.32387328147888184, "learning_rate": 1.815960241415341e-05, "loss": 0.07765388488769531, "step": 4041 }, { "epoch": 0.5632271998885251, "grad_norm": 0.2618301808834076, "learning_rate": 1.815021548193284e-05, "loss": 0.07066631317138672, "step": 4042 }, { "epoch": 0.5633665435797395, "grad_norm": 0.4455222189426422, "learning_rate": 1.814082896069216e-05, "loss": 0.09250068664550781, "step": 4043 }, { "epoch": 0.5635058872709539, "grad_norm": 0.2420588731765747, "learning_rate": 1.813144285251683e-05, "loss": 0.0748443603515625, "step": 4044 }, { "epoch": 0.5636452309621682, "grad_norm": 0.2739112973213196, "learning_rate": 1.8122057159492248e-05, "loss": 0.06808853149414062, "step": 4045 }, { "epoch": 0.5637845746533826, "grad_norm": 0.3326997458934784, "learning_rate": 1.8112671883703688e-05, "loss": 0.08143806457519531, "step": 4046 }, { "epoch": 0.563923918344597, "grad_norm": 0.3509039878845215, "learning_rate": 1.8103287027236352e-05, "loss": 0.09311485290527344, "step": 4047 }, { "epoch": 0.5640632620358114, "grad_norm": 0.5972915291786194, "learning_rate": 1.8093902592175328e-05, "loss": 0.10935211181640625, "step": 4048 }, { "epoch": 0.5642026057270257, "grad_norm": 0.2382107973098755, "learning_rate": 1.8084518580605634e-05, "loss": 0.05924034118652344, "step": 4049 }, { "epoch": 0.5643419494182401, "grad_norm": 0.3819197118282318, "learning_rate": 1.807513499461218e-05, "loss": 0.09655189514160156, "step": 4050 }, { "epoch": 0.5644812931094545, "grad_norm": 0.3384595513343811, "learning_rate": 1.8065751836279784e-05, "loss": 0.08532905578613281, "step": 4051 }, { "epoch": 0.5646206368006689, "grad_norm": 0.5121312141418457, "learning_rate": 1.805636910769318e-05, "loss": 0.10368061065673828, "step": 4052 }, { "epoch": 0.5647599804918833, "grad_norm": 0.6872484683990479, "learning_rate": 1.8046986810936974e-05, "loss": 0.13508224487304688, "step": 4053 }, { "epoch": 0.5648993241830976, "grad_norm": 0.5166663527488708, "learning_rate": 1.8037604948095714e-05, "loss": 0.1068572998046875, "step": 4054 }, { "epoch": 0.565038667874312, "grad_norm": 0.3752843737602234, "learning_rate": 1.802822352125383e-05, "loss": 0.092254638671875, "step": 4055 }, { "epoch": 0.5651780115655264, "grad_norm": 0.2951204478740692, "learning_rate": 1.8018842532495667e-05, "loss": 0.09109878540039062, "step": 4056 }, { "epoch": 0.5653173552567408, "grad_norm": 0.4980812966823578, "learning_rate": 1.8009461983905466e-05, "loss": 0.11092185974121094, "step": 4057 }, { "epoch": 0.5654566989479551, "grad_norm": 0.3126406967639923, "learning_rate": 1.8000081877567362e-05, "loss": 0.0808248519897461, "step": 4058 }, { "epoch": 0.5655960426391695, "grad_norm": 0.45106491446495056, "learning_rate": 1.799070221556541e-05, "loss": 0.09257698059082031, "step": 4059 }, { "epoch": 0.5657353863303839, "grad_norm": 0.5763440728187561, "learning_rate": 1.7981322999983547e-05, "loss": 0.09988784790039062, "step": 4060 }, { "epoch": 0.5658747300215983, "grad_norm": 0.4370148777961731, "learning_rate": 1.7971944232905627e-05, "loss": 0.0983123779296875, "step": 4061 }, { "epoch": 0.5660140737128126, "grad_norm": 0.5264624357223511, "learning_rate": 1.7962565916415406e-05, "loss": 0.11218643188476562, "step": 4062 }, { "epoch": 0.566153417404027, "grad_norm": 0.5439543128013611, "learning_rate": 1.7953188052596514e-05, "loss": 0.10368537902832031, "step": 4063 }, { "epoch": 0.5662927610952414, "grad_norm": 0.4733785092830658, "learning_rate": 1.7943810643532506e-05, "loss": 0.12571144104003906, "step": 4064 }, { "epoch": 0.5664321047864558, "grad_norm": 0.5838122963905334, "learning_rate": 1.7934433691306834e-05, "loss": 0.10927009582519531, "step": 4065 }, { "epoch": 0.5665714484776702, "grad_norm": 0.4894682466983795, "learning_rate": 1.7925057198002836e-05, "loss": 0.11099052429199219, "step": 4066 }, { "epoch": 0.5667107921688845, "grad_norm": 0.6455749273300171, "learning_rate": 1.7915681165703754e-05, "loss": 0.10688972473144531, "step": 4067 }, { "epoch": 0.5668501358600989, "grad_norm": 0.30797144770622253, "learning_rate": 1.7906305596492747e-05, "loss": 0.08696746826171875, "step": 4068 }, { "epoch": 0.5669894795513133, "grad_norm": 0.2736218571662903, "learning_rate": 1.7896930492452816e-05, "loss": 0.07781410217285156, "step": 4069 }, { "epoch": 0.5671288232425277, "grad_norm": 0.6503137946128845, "learning_rate": 1.788755585566693e-05, "loss": 0.10112190246582031, "step": 4070 }, { "epoch": 0.567268166933742, "grad_norm": 0.5114457011222839, "learning_rate": 1.7878181688217894e-05, "loss": 0.115997314453125, "step": 4071 }, { "epoch": 0.5674075106249564, "grad_norm": 0.36321181058883667, "learning_rate": 1.7868807992188448e-05, "loss": 0.06845283508300781, "step": 4072 }, { "epoch": 0.5675468543161708, "grad_norm": 0.5832300186157227, "learning_rate": 1.7859434769661218e-05, "loss": 0.09729957580566406, "step": 4073 }, { "epoch": 0.5676861980073852, "grad_norm": 0.3772968351840973, "learning_rate": 1.7850062022718708e-05, "loss": 0.08876419067382812, "step": 4074 }, { "epoch": 0.5678255416985996, "grad_norm": 0.45914870500564575, "learning_rate": 1.7840689753443328e-05, "loss": 0.09173965454101562, "step": 4075 }, { "epoch": 0.5679648853898139, "grad_norm": 0.678042471408844, "learning_rate": 1.7831317963917388e-05, "loss": 0.11501121520996094, "step": 4076 }, { "epoch": 0.5681042290810283, "grad_norm": 0.597774863243103, "learning_rate": 1.7821946656223088e-05, "loss": 0.09260940551757812, "step": 4077 }, { "epoch": 0.5682435727722427, "grad_norm": 0.3199504017829895, "learning_rate": 1.7812575832442518e-05, "loss": 0.08275985717773438, "step": 4078 }, { "epoch": 0.5683829164634571, "grad_norm": 0.26731351017951965, "learning_rate": 1.7803205494657652e-05, "loss": 0.06479549407958984, "step": 4079 }, { "epoch": 0.5685222601546714, "grad_norm": 0.47408196330070496, "learning_rate": 1.7793835644950373e-05, "loss": 0.0783834457397461, "step": 4080 }, { "epoch": 0.5686616038458859, "grad_norm": 1.0532084703445435, "learning_rate": 1.7784466285402445e-05, "loss": 0.141387939453125, "step": 4081 }, { "epoch": 0.5688009475371003, "grad_norm": 0.24096480011940002, "learning_rate": 1.777509741809553e-05, "loss": 0.06844520568847656, "step": 4082 }, { "epoch": 0.5689402912283147, "grad_norm": 0.2607787251472473, "learning_rate": 1.7765729045111177e-05, "loss": 0.07101249694824219, "step": 4083 }, { "epoch": 0.5690796349195291, "grad_norm": 0.6411575078964233, "learning_rate": 1.775636116853081e-05, "loss": 0.11623764038085938, "step": 4084 }, { "epoch": 0.5692189786107434, "grad_norm": 0.4082685112953186, "learning_rate": 1.7746993790435777e-05, "loss": 0.08844757080078125, "step": 4085 }, { "epoch": 0.5693583223019578, "grad_norm": 0.2897104322910309, "learning_rate": 1.773762691290728e-05, "loss": 0.07357215881347656, "step": 4086 }, { "epoch": 0.5694976659931722, "grad_norm": 0.2369295358657837, "learning_rate": 1.7728260538026432e-05, "loss": 0.05613899230957031, "step": 4087 }, { "epoch": 0.5696370096843866, "grad_norm": 0.6680482625961304, "learning_rate": 1.7718894667874235e-05, "loss": 0.10429096221923828, "step": 4088 }, { "epoch": 0.569776353375601, "grad_norm": 0.6774418354034424, "learning_rate": 1.7709529304531567e-05, "loss": 0.10048294067382812, "step": 4089 }, { "epoch": 0.5699156970668153, "grad_norm": 0.22904351353645325, "learning_rate": 1.7700164450079188e-05, "loss": 0.0669546127319336, "step": 4090 }, { "epoch": 0.5700550407580297, "grad_norm": 1.034339427947998, "learning_rate": 1.769080010659776e-05, "loss": 0.1301860809326172, "step": 4091 }, { "epoch": 0.5701943844492441, "grad_norm": 0.4823663532733917, "learning_rate": 1.768143627616783e-05, "loss": 0.08158111572265625, "step": 4092 }, { "epoch": 0.5703337281404585, "grad_norm": 0.4055899977684021, "learning_rate": 1.7672072960869828e-05, "loss": 0.09107780456542969, "step": 4093 }, { "epoch": 0.5704730718316728, "grad_norm": 0.3230876326560974, "learning_rate": 1.766271016278407e-05, "loss": 0.08565521240234375, "step": 4094 }, { "epoch": 0.5706124155228872, "grad_norm": 0.3029000461101532, "learning_rate": 1.7653347883990748e-05, "loss": 0.06458663940429688, "step": 4095 }, { "epoch": 0.5707517592141016, "grad_norm": 0.3183416426181793, "learning_rate": 1.764398612656995e-05, "loss": 0.0647268295288086, "step": 4096 }, { "epoch": 0.570891102905316, "grad_norm": 0.5179814696311951, "learning_rate": 1.7634624892601647e-05, "loss": 0.09415960311889648, "step": 4097 }, { "epoch": 0.5710304465965303, "grad_norm": 0.8599585294723511, "learning_rate": 1.762526418416569e-05, "loss": 0.1350078582763672, "step": 4098 }, { "epoch": 0.5711697902877447, "grad_norm": 0.30145689845085144, "learning_rate": 1.7615904003341822e-05, "loss": 0.07440376281738281, "step": 4099 }, { "epoch": 0.5713091339789591, "grad_norm": 0.21508269011974335, "learning_rate": 1.7606544352209644e-05, "loss": 0.06680107116699219, "step": 4100 }, { "epoch": 0.5714484776701735, "grad_norm": 0.4374901056289673, "learning_rate": 1.7597185232848673e-05, "loss": 0.10190391540527344, "step": 4101 }, { "epoch": 0.5715878213613879, "grad_norm": 0.2996353507041931, "learning_rate": 1.7587826647338285e-05, "loss": 0.07889366149902344, "step": 4102 }, { "epoch": 0.5717271650526022, "grad_norm": 0.6963887214660645, "learning_rate": 1.757846859775774e-05, "loss": 0.08677482604980469, "step": 4103 }, { "epoch": 0.5718665087438166, "grad_norm": 0.5192792415618896, "learning_rate": 1.7569111086186196e-05, "loss": 0.0999603271484375, "step": 4104 }, { "epoch": 0.572005852435031, "grad_norm": 0.7261146903038025, "learning_rate": 1.7559754114702672e-05, "loss": 0.10765457153320312, "step": 4105 }, { "epoch": 0.5721451961262454, "grad_norm": 0.32212814688682556, "learning_rate": 1.755039768538607e-05, "loss": 0.08531761169433594, "step": 4106 }, { "epoch": 0.5722845398174597, "grad_norm": 0.2693021595478058, "learning_rate": 1.7541041800315173e-05, "loss": 0.08331871032714844, "step": 4107 }, { "epoch": 0.5724238835086741, "grad_norm": 0.6828738451004028, "learning_rate": 1.7531686461568648e-05, "loss": 0.1011962890625, "step": 4108 }, { "epoch": 0.5725632271998885, "grad_norm": 0.5507255792617798, "learning_rate": 1.752233167122504e-05, "loss": 0.1263275146484375, "step": 4109 }, { "epoch": 0.5727025708911029, "grad_norm": 0.3005565106868744, "learning_rate": 1.7512977431362777e-05, "loss": 0.07114219665527344, "step": 4110 }, { "epoch": 0.5728419145823173, "grad_norm": 0.3698022663593292, "learning_rate": 1.750362374406014e-05, "loss": 0.09227180480957031, "step": 4111 }, { "epoch": 0.5729812582735316, "grad_norm": 0.2717233896255493, "learning_rate": 1.749427061139531e-05, "loss": 0.06452369689941406, "step": 4112 }, { "epoch": 0.573120601964746, "grad_norm": 0.37814950942993164, "learning_rate": 1.7484918035446352e-05, "loss": 0.07384681701660156, "step": 4113 }, { "epoch": 0.5732599456559604, "grad_norm": 0.46844303607940674, "learning_rate": 1.7475566018291185e-05, "loss": 0.10261154174804688, "step": 4114 }, { "epoch": 0.5733992893471748, "grad_norm": 0.3850955367088318, "learning_rate": 1.7466214562007618e-05, "loss": 0.08108711242675781, "step": 4115 }, { "epoch": 0.5735386330383891, "grad_norm": 0.7322114109992981, "learning_rate": 1.745686366867332e-05, "loss": 0.11954116821289062, "step": 4116 }, { "epoch": 0.5736779767296035, "grad_norm": 0.36043980717658997, "learning_rate": 1.7447513340365855e-05, "loss": 0.09004974365234375, "step": 4117 }, { "epoch": 0.5738173204208179, "grad_norm": 0.303821861743927, "learning_rate": 1.7438163579162658e-05, "loss": 0.08235931396484375, "step": 4118 }, { "epoch": 0.5739566641120323, "grad_norm": 0.29861196875572205, "learning_rate": 1.7428814387141016e-05, "loss": 0.07965278625488281, "step": 4119 }, { "epoch": 0.5740960078032467, "grad_norm": 0.4718315303325653, "learning_rate": 1.741946576637813e-05, "loss": 0.09920692443847656, "step": 4120 }, { "epoch": 0.5742353514944611, "grad_norm": 0.49304017424583435, "learning_rate": 1.7410117718951026e-05, "loss": 0.1141204833984375, "step": 4121 }, { "epoch": 0.5743746951856755, "grad_norm": 0.45490750670433044, "learning_rate": 1.740077024693664e-05, "loss": 0.08880043029785156, "step": 4122 }, { "epoch": 0.5745140388768899, "grad_norm": 0.4145594537258148, "learning_rate": 1.739142335241176e-05, "loss": 0.08373641967773438, "step": 4123 }, { "epoch": 0.5746533825681043, "grad_norm": 0.26137974858283997, "learning_rate": 1.7382077037453057e-05, "loss": 0.07437324523925781, "step": 4124 }, { "epoch": 0.5747927262593187, "grad_norm": 0.3278530240058899, "learning_rate": 1.7372731304137072e-05, "loss": 0.07338714599609375, "step": 4125 }, { "epoch": 0.574932069950533, "grad_norm": 0.2983459234237671, "learning_rate": 1.736338615454021e-05, "loss": 0.07404756546020508, "step": 4126 }, { "epoch": 0.5750714136417474, "grad_norm": 0.420315682888031, "learning_rate": 1.7354041590738747e-05, "loss": 0.07807540893554688, "step": 4127 }, { "epoch": 0.5752107573329618, "grad_norm": 0.3425889313220978, "learning_rate": 1.734469761480883e-05, "loss": 0.07760047912597656, "step": 4128 }, { "epoch": 0.5753501010241762, "grad_norm": 0.30964651703834534, "learning_rate": 1.733535422882649e-05, "loss": 0.0778341293334961, "step": 4129 }, { "epoch": 0.5754894447153905, "grad_norm": 0.4785287082195282, "learning_rate": 1.73260114348676e-05, "loss": 0.10133552551269531, "step": 4130 }, { "epoch": 0.5756287884066049, "grad_norm": 0.33403992652893066, "learning_rate": 1.7316669235007927e-05, "loss": 0.08171653747558594, "step": 4131 }, { "epoch": 0.5757681320978193, "grad_norm": 0.2903982698917389, "learning_rate": 1.7307327631323078e-05, "loss": 0.07427024841308594, "step": 4132 }, { "epoch": 0.5759074757890337, "grad_norm": 0.6040825843811035, "learning_rate": 1.7297986625888563e-05, "loss": 0.10210132598876953, "step": 4133 }, { "epoch": 0.576046819480248, "grad_norm": 0.28627389669418335, "learning_rate": 1.728864622077973e-05, "loss": 0.07282638549804688, "step": 4134 }, { "epoch": 0.5761861631714624, "grad_norm": 0.28188356757164, "learning_rate": 1.72793064180718e-05, "loss": 0.07866859436035156, "step": 4135 }, { "epoch": 0.5763255068626768, "grad_norm": 0.4858262240886688, "learning_rate": 1.7269967219839882e-05, "loss": 0.09848785400390625, "step": 4136 }, { "epoch": 0.5764648505538912, "grad_norm": 0.33621469140052795, "learning_rate": 1.7260628628158907e-05, "loss": 0.07585906982421875, "step": 4137 }, { "epoch": 0.5766041942451056, "grad_norm": 0.3468513786792755, "learning_rate": 1.7251290645103716e-05, "loss": 0.0803232192993164, "step": 4138 }, { "epoch": 0.5767435379363199, "grad_norm": 0.5121119618415833, "learning_rate": 1.7241953272748987e-05, "loss": 0.1042633056640625, "step": 4139 }, { "epoch": 0.5768828816275343, "grad_norm": 0.3060786724090576, "learning_rate": 1.7232616513169272e-05, "loss": 0.08213043212890625, "step": 4140 }, { "epoch": 0.5770222253187487, "grad_norm": 0.3026684522628784, "learning_rate": 1.7223280368438993e-05, "loss": 0.08101844787597656, "step": 4141 }, { "epoch": 0.5771615690099631, "grad_norm": 0.6800536513328552, "learning_rate": 1.7213944840632422e-05, "loss": 0.1369953155517578, "step": 4142 }, { "epoch": 0.5773009127011774, "grad_norm": 0.6473787426948547, "learning_rate": 1.7204609931823702e-05, "loss": 0.14793968200683594, "step": 4143 }, { "epoch": 0.5774402563923918, "grad_norm": 0.3120273947715759, "learning_rate": 1.7195275644086827e-05, "loss": 0.08077526092529297, "step": 4144 }, { "epoch": 0.5775796000836062, "grad_norm": 0.24476101994514465, "learning_rate": 1.7185941979495676e-05, "loss": 0.06314754486083984, "step": 4145 }, { "epoch": 0.5777189437748206, "grad_norm": 0.2898106575012207, "learning_rate": 1.717660894012397e-05, "loss": 0.07238006591796875, "step": 4146 }, { "epoch": 0.577858287466035, "grad_norm": 0.23891085386276245, "learning_rate": 1.7167276528045308e-05, "loss": 0.07794380187988281, "step": 4147 }, { "epoch": 0.5779976311572493, "grad_norm": 0.45459455251693726, "learning_rate": 1.7157944745333114e-05, "loss": 0.09162521362304688, "step": 4148 }, { "epoch": 0.5781369748484637, "grad_norm": 0.6221371293067932, "learning_rate": 1.714861359406072e-05, "loss": 0.1309833526611328, "step": 4149 }, { "epoch": 0.5782763185396781, "grad_norm": 0.6048424243927002, "learning_rate": 1.7139283076301287e-05, "loss": 0.11709403991699219, "step": 4150 }, { "epoch": 0.5784156622308925, "grad_norm": 0.23226934671401978, "learning_rate": 1.7129953194127837e-05, "loss": 0.05693531036376953, "step": 4151 }, { "epoch": 0.5785550059221068, "grad_norm": 0.3674107789993286, "learning_rate": 1.712062394961328e-05, "loss": 0.08276176452636719, "step": 4152 }, { "epoch": 0.5786943496133212, "grad_norm": 0.2901719808578491, "learning_rate": 1.7111295344830324e-05, "loss": 0.07941055297851562, "step": 4153 }, { "epoch": 0.5788336933045356, "grad_norm": 0.5011548399925232, "learning_rate": 1.7101967381851604e-05, "loss": 0.08764457702636719, "step": 4154 }, { "epoch": 0.57897303699575, "grad_norm": 0.5297576189041138, "learning_rate": 1.709264006274956e-05, "loss": 0.1109609603881836, "step": 4155 }, { "epoch": 0.5791123806869644, "grad_norm": 0.6955164670944214, "learning_rate": 1.7083313389596523e-05, "loss": 0.1135702133178711, "step": 4156 }, { "epoch": 0.5792517243781787, "grad_norm": 0.5102815628051758, "learning_rate": 1.7073987364464664e-05, "loss": 0.1122589111328125, "step": 4157 }, { "epoch": 0.5793910680693931, "grad_norm": 0.2615004777908325, "learning_rate": 1.7064661989426012e-05, "loss": 0.06870841979980469, "step": 4158 }, { "epoch": 0.5795304117606075, "grad_norm": 0.34260252118110657, "learning_rate": 1.7055337266552446e-05, "loss": 0.07820701599121094, "step": 4159 }, { "epoch": 0.5796697554518219, "grad_norm": 0.31398245692253113, "learning_rate": 1.704601319791571e-05, "loss": 0.07948493957519531, "step": 4160 }, { "epoch": 0.5798090991430362, "grad_norm": 0.18907572329044342, "learning_rate": 1.7036689785587404e-05, "loss": 0.0648050308227539, "step": 4161 }, { "epoch": 0.5799484428342507, "grad_norm": 0.2440865933895111, "learning_rate": 1.7027367031638976e-05, "loss": 0.07743167877197266, "step": 4162 }, { "epoch": 0.5800877865254651, "grad_norm": 0.3275556266307831, "learning_rate": 1.7018044938141728e-05, "loss": 0.07736778259277344, "step": 4163 }, { "epoch": 0.5802271302166795, "grad_norm": 0.4042894244194031, "learning_rate": 1.700872350716681e-05, "loss": 0.08747482299804688, "step": 4164 }, { "epoch": 0.5803664739078939, "grad_norm": 0.2917661666870117, "learning_rate": 1.6999402740785238e-05, "loss": 0.08220863342285156, "step": 4165 }, { "epoch": 0.5805058175991082, "grad_norm": 0.382846474647522, "learning_rate": 1.6990082641067876e-05, "loss": 0.08656120300292969, "step": 4166 }, { "epoch": 0.5806451612903226, "grad_norm": 0.38001111149787903, "learning_rate": 1.6980763210085425e-05, "loss": 0.0933380126953125, "step": 4167 }, { "epoch": 0.580784504981537, "grad_norm": 0.41645997762680054, "learning_rate": 1.6971444449908474e-05, "loss": 0.08356094360351562, "step": 4168 }, { "epoch": 0.5809238486727514, "grad_norm": 0.30447179079055786, "learning_rate": 1.696212636260741e-05, "loss": 0.084991455078125, "step": 4169 }, { "epoch": 0.5810631923639658, "grad_norm": 0.4125811755657196, "learning_rate": 1.6952808950252518e-05, "loss": 0.09144783020019531, "step": 4170 }, { "epoch": 0.5812025360551801, "grad_norm": 0.4988737106323242, "learning_rate": 1.69434922149139e-05, "loss": 0.09589958190917969, "step": 4171 }, { "epoch": 0.5813418797463945, "grad_norm": 0.5201955437660217, "learning_rate": 1.693417615866154e-05, "loss": 0.13245582580566406, "step": 4172 }, { "epoch": 0.5814812234376089, "grad_norm": 0.4207427203655243, "learning_rate": 1.6924860783565245e-05, "loss": 0.08881568908691406, "step": 4173 }, { "epoch": 0.5816205671288233, "grad_norm": 0.4924398362636566, "learning_rate": 1.691554609169467e-05, "loss": 0.1154470443725586, "step": 4174 }, { "epoch": 0.5817599108200376, "grad_norm": 0.3551323413848877, "learning_rate": 1.6906232085119342e-05, "loss": 0.08148527145385742, "step": 4175 }, { "epoch": 0.581899254511252, "grad_norm": 0.45390087366104126, "learning_rate": 1.6896918765908604e-05, "loss": 0.10584640502929688, "step": 4176 }, { "epoch": 0.5820385982024664, "grad_norm": 0.36133262515068054, "learning_rate": 1.6887606136131673e-05, "loss": 0.08881950378417969, "step": 4177 }, { "epoch": 0.5821779418936808, "grad_norm": 0.376630574464798, "learning_rate": 1.687829419785761e-05, "loss": 0.09702873229980469, "step": 4178 }, { "epoch": 0.5823172855848952, "grad_norm": 0.1584012806415558, "learning_rate": 1.68689829531553e-05, "loss": 0.05718803405761719, "step": 4179 }, { "epoch": 0.5824566292761095, "grad_norm": 0.35900601744651794, "learning_rate": 1.6859672404093494e-05, "loss": 0.08628273010253906, "step": 4180 }, { "epoch": 0.5825959729673239, "grad_norm": 0.32951533794403076, "learning_rate": 1.6850362552740786e-05, "loss": 0.07745933532714844, "step": 4181 }, { "epoch": 0.5827353166585383, "grad_norm": 0.27356448769569397, "learning_rate": 1.6841053401165614e-05, "loss": 0.06383132934570312, "step": 4182 }, { "epoch": 0.5828746603497527, "grad_norm": 0.41488924622535706, "learning_rate": 1.683174495143625e-05, "loss": 0.09416580200195312, "step": 4183 }, { "epoch": 0.583014004040967, "grad_norm": 0.35064804553985596, "learning_rate": 1.6822437205620834e-05, "loss": 0.08542633056640625, "step": 4184 }, { "epoch": 0.5831533477321814, "grad_norm": 0.21964366734027863, "learning_rate": 1.681313016578732e-05, "loss": 0.06488466262817383, "step": 4185 }, { "epoch": 0.5832926914233958, "grad_norm": 0.41592612862586975, "learning_rate": 1.680382383400353e-05, "loss": 0.097503662109375, "step": 4186 }, { "epoch": 0.5834320351146102, "grad_norm": 0.3102315068244934, "learning_rate": 1.679451821233711e-05, "loss": 0.09441566467285156, "step": 4187 }, { "epoch": 0.5835713788058245, "grad_norm": 0.3010697066783905, "learning_rate": 1.6785213302855562e-05, "loss": 0.08429527282714844, "step": 4188 }, { "epoch": 0.5837107224970389, "grad_norm": 0.5403000712394714, "learning_rate": 1.6775909107626227e-05, "loss": 0.09599685668945312, "step": 4189 }, { "epoch": 0.5838500661882533, "grad_norm": 0.24770641326904297, "learning_rate": 1.6766605628716277e-05, "loss": 0.0584564208984375, "step": 4190 }, { "epoch": 0.5839894098794677, "grad_norm": 0.6927403211593628, "learning_rate": 1.675730286819274e-05, "loss": 0.1335010528564453, "step": 4191 }, { "epoch": 0.5841287535706821, "grad_norm": 0.530586302280426, "learning_rate": 1.6748000828122465e-05, "loss": 0.10285568237304688, "step": 4192 }, { "epoch": 0.5842680972618964, "grad_norm": 0.42310282588005066, "learning_rate": 1.673869951057217e-05, "loss": 0.10139083862304688, "step": 4193 }, { "epoch": 0.5844074409531108, "grad_norm": 0.5413349866867065, "learning_rate": 1.6729398917608387e-05, "loss": 0.11145305633544922, "step": 4194 }, { "epoch": 0.5845467846443252, "grad_norm": 0.6309617161750793, "learning_rate": 1.6720099051297494e-05, "loss": 0.12165641784667969, "step": 4195 }, { "epoch": 0.5846861283355396, "grad_norm": 0.39024996757507324, "learning_rate": 1.6710799913705706e-05, "loss": 0.0751943588256836, "step": 4196 }, { "epoch": 0.584825472026754, "grad_norm": 0.5698543190956116, "learning_rate": 1.6701501506899087e-05, "loss": 0.08866691589355469, "step": 4197 }, { "epoch": 0.5849648157179683, "grad_norm": 0.39603039622306824, "learning_rate": 1.6692203832943527e-05, "loss": 0.08042526245117188, "step": 4198 }, { "epoch": 0.5851041594091827, "grad_norm": 0.3647635877132416, "learning_rate": 1.6682906893904754e-05, "loss": 0.07810211181640625, "step": 4199 }, { "epoch": 0.5852435031003971, "grad_norm": 0.23473447561264038, "learning_rate": 1.6673610691848346e-05, "loss": 0.0657510757446289, "step": 4200 }, { "epoch": 0.5853828467916115, "grad_norm": 0.2577277719974518, "learning_rate": 1.6664315228839696e-05, "loss": 0.06799983978271484, "step": 4201 }, { "epoch": 0.5855221904828259, "grad_norm": 0.4780316650867462, "learning_rate": 1.6655020506944046e-05, "loss": 0.0970001220703125, "step": 4202 }, { "epoch": 0.5856615341740403, "grad_norm": 0.4202057421207428, "learning_rate": 1.664572652822647e-05, "loss": 0.0806121826171875, "step": 4203 }, { "epoch": 0.5858008778652547, "grad_norm": 0.4580095112323761, "learning_rate": 1.6636433294751883e-05, "loss": 0.10182571411132812, "step": 4204 }, { "epoch": 0.5859402215564691, "grad_norm": 0.447134792804718, "learning_rate": 1.662714080858503e-05, "loss": 0.0942983627319336, "step": 4205 }, { "epoch": 0.5860795652476835, "grad_norm": 0.4373663067817688, "learning_rate": 1.6617849071790484e-05, "loss": 0.08539962768554688, "step": 4206 }, { "epoch": 0.5862189089388978, "grad_norm": 0.3297964036464691, "learning_rate": 1.6608558086432655e-05, "loss": 0.08370208740234375, "step": 4207 }, { "epoch": 0.5863582526301122, "grad_norm": 0.3247125446796417, "learning_rate": 1.6599267854575788e-05, "loss": 0.06982040405273438, "step": 4208 }, { "epoch": 0.5864975963213266, "grad_norm": 0.2441520094871521, "learning_rate": 1.6589978378283967e-05, "loss": 0.0726776123046875, "step": 4209 }, { "epoch": 0.586636940012541, "grad_norm": 0.5570501089096069, "learning_rate": 1.6580689659621106e-05, "loss": 0.12085533142089844, "step": 4210 }, { "epoch": 0.5867762837037553, "grad_norm": 0.3606414794921875, "learning_rate": 1.6571401700650934e-05, "loss": 0.07482051849365234, "step": 4211 }, { "epoch": 0.5869156273949697, "grad_norm": 0.3009836673736572, "learning_rate": 1.6562114503437017e-05, "loss": 0.06842041015625, "step": 4212 }, { "epoch": 0.5870549710861841, "grad_norm": 0.4016965925693512, "learning_rate": 1.6552828070042782e-05, "loss": 0.08827400207519531, "step": 4213 }, { "epoch": 0.5871943147773985, "grad_norm": 0.5499261021614075, "learning_rate": 1.6543542402531446e-05, "loss": 0.10329055786132812, "step": 4214 }, { "epoch": 0.5873336584686129, "grad_norm": 0.7325122952461243, "learning_rate": 1.6534257502966078e-05, "loss": 0.11194229125976562, "step": 4215 }, { "epoch": 0.5874730021598272, "grad_norm": 0.21600063145160675, "learning_rate": 1.6524973373409563e-05, "loss": 0.05808258056640625, "step": 4216 }, { "epoch": 0.5876123458510416, "grad_norm": 0.4098205268383026, "learning_rate": 1.651569001592463e-05, "loss": 0.08368682861328125, "step": 4217 }, { "epoch": 0.587751689542256, "grad_norm": 0.5150419473648071, "learning_rate": 1.6506407432573828e-05, "loss": 0.10800552368164062, "step": 4218 }, { "epoch": 0.5878910332334704, "grad_norm": 0.3412403166294098, "learning_rate": 1.6497125625419533e-05, "loss": 0.06986331939697266, "step": 4219 }, { "epoch": 0.5880303769246847, "grad_norm": 0.7018589377403259, "learning_rate": 1.6487844596523955e-05, "loss": 0.12097930908203125, "step": 4220 }, { "epoch": 0.5881697206158991, "grad_norm": 0.39494186639785767, "learning_rate": 1.6478564347949127e-05, "loss": 0.10202980041503906, "step": 4221 }, { "epoch": 0.5883090643071135, "grad_norm": 0.2755142152309418, "learning_rate": 1.6469284881756898e-05, "loss": 0.07446479797363281, "step": 4222 }, { "epoch": 0.5884484079983279, "grad_norm": 0.23825016617774963, "learning_rate": 1.6460006200008963e-05, "loss": 0.06912994384765625, "step": 4223 }, { "epoch": 0.5885877516895422, "grad_norm": 0.42187055945396423, "learning_rate": 1.645072830476683e-05, "loss": 0.07721138000488281, "step": 4224 }, { "epoch": 0.5887270953807566, "grad_norm": 0.45180749893188477, "learning_rate": 1.644145119809184e-05, "loss": 0.0998830795288086, "step": 4225 }, { "epoch": 0.588866439071971, "grad_norm": 0.5356190800666809, "learning_rate": 1.643217488204515e-05, "loss": 0.08208560943603516, "step": 4226 }, { "epoch": 0.5890057827631854, "grad_norm": 0.26010578870773315, "learning_rate": 1.6422899358687745e-05, "loss": 0.0724649429321289, "step": 4227 }, { "epoch": 0.5891451264543998, "grad_norm": 0.3078444004058838, "learning_rate": 1.641362463008043e-05, "loss": 0.06835269927978516, "step": 4228 }, { "epoch": 0.5892844701456141, "grad_norm": 0.34110966324806213, "learning_rate": 1.6404350698283853e-05, "loss": 0.08257293701171875, "step": 4229 }, { "epoch": 0.5894238138368285, "grad_norm": 1.0389535427093506, "learning_rate": 1.6395077565358458e-05, "loss": 0.15961074829101562, "step": 4230 }, { "epoch": 0.5895631575280429, "grad_norm": 0.39979010820388794, "learning_rate": 1.6385805233364528e-05, "loss": 0.10228347778320312, "step": 4231 }, { "epoch": 0.5897025012192573, "grad_norm": 0.31581878662109375, "learning_rate": 1.6376533704362155e-05, "loss": 0.08399009704589844, "step": 4232 }, { "epoch": 0.5898418449104716, "grad_norm": 0.443676620721817, "learning_rate": 1.6367262980411273e-05, "loss": 0.09247589111328125, "step": 4233 }, { "epoch": 0.589981188601686, "grad_norm": 0.477750301361084, "learning_rate": 1.635799306357162e-05, "loss": 0.0908355712890625, "step": 4234 }, { "epoch": 0.5901205322929004, "grad_norm": 0.21010470390319824, "learning_rate": 1.6348723955902754e-05, "loss": 0.06458854675292969, "step": 4235 }, { "epoch": 0.5902598759841148, "grad_norm": 0.49260467290878296, "learning_rate": 1.6339455659464073e-05, "loss": 0.1141519546508789, "step": 4236 }, { "epoch": 0.5903992196753292, "grad_norm": 0.42476221919059753, "learning_rate": 1.6330188176314772e-05, "loss": 0.09370231628417969, "step": 4237 }, { "epoch": 0.5905385633665435, "grad_norm": 0.3329406976699829, "learning_rate": 1.6320921508513874e-05, "loss": 0.07845783233642578, "step": 4238 }, { "epoch": 0.5906779070577579, "grad_norm": 0.5825566053390503, "learning_rate": 1.6311655658120214e-05, "loss": 0.09480857849121094, "step": 4239 }, { "epoch": 0.5908172507489723, "grad_norm": 0.6060634255409241, "learning_rate": 1.630239062719247e-05, "loss": 0.11422157287597656, "step": 4240 }, { "epoch": 0.5909565944401867, "grad_norm": 0.2410358488559723, "learning_rate": 1.6293126417789107e-05, "loss": 0.06637001037597656, "step": 4241 }, { "epoch": 0.5910959381314012, "grad_norm": 0.7789146900177002, "learning_rate": 1.628386303196843e-05, "loss": 0.114776611328125, "step": 4242 }, { "epoch": 0.5912352818226155, "grad_norm": 0.4817928075790405, "learning_rate": 1.627460047178854e-05, "loss": 0.10793685913085938, "step": 4243 }, { "epoch": 0.5913746255138299, "grad_norm": 0.7872316241264343, "learning_rate": 1.6265338739307374e-05, "loss": 0.1177215576171875, "step": 4244 }, { "epoch": 0.5915139692050443, "grad_norm": 0.5085355043411255, "learning_rate": 1.6256077836582677e-05, "loss": 0.1152496337890625, "step": 4245 }, { "epoch": 0.5916533128962587, "grad_norm": 0.4017309248447418, "learning_rate": 1.624681776567201e-05, "loss": 0.08722496032714844, "step": 4246 }, { "epoch": 0.591792656587473, "grad_norm": 0.30937114357948303, "learning_rate": 1.6237558528632754e-05, "loss": 0.08802604675292969, "step": 4247 }, { "epoch": 0.5919320002786874, "grad_norm": 0.41937875747680664, "learning_rate": 1.6228300127522083e-05, "loss": 0.0906362533569336, "step": 4248 }, { "epoch": 0.5920713439699018, "grad_norm": 0.259197860956192, "learning_rate": 1.6219042564397023e-05, "loss": 0.07859420776367188, "step": 4249 }, { "epoch": 0.5922106876611162, "grad_norm": 0.38673698902130127, "learning_rate": 1.620978584131438e-05, "loss": 0.09707832336425781, "step": 4250 }, { "epoch": 0.5923500313523306, "grad_norm": 0.43385303020477295, "learning_rate": 1.620052996033079e-05, "loss": 0.10590362548828125, "step": 4251 }, { "epoch": 0.5924893750435449, "grad_norm": 0.2349770963191986, "learning_rate": 1.619127492350271e-05, "loss": 0.07269668579101562, "step": 4252 }, { "epoch": 0.5926287187347593, "grad_norm": 0.7964914441108704, "learning_rate": 1.6182020732886377e-05, "loss": 0.10938549041748047, "step": 4253 }, { "epoch": 0.5927680624259737, "grad_norm": 0.20606297254562378, "learning_rate": 1.6172767390537874e-05, "loss": 0.06501960754394531, "step": 4254 }, { "epoch": 0.5929074061171881, "grad_norm": 0.7406693696975708, "learning_rate": 1.6163514898513076e-05, "loss": 0.12823486328125, "step": 4255 }, { "epoch": 0.5930467498084024, "grad_norm": 0.27187579870224, "learning_rate": 1.6154263258867683e-05, "loss": 0.07677268981933594, "step": 4256 }, { "epoch": 0.5931860934996168, "grad_norm": 0.5245862007141113, "learning_rate": 1.6145012473657197e-05, "loss": 0.11178302764892578, "step": 4257 }, { "epoch": 0.5933254371908312, "grad_norm": 0.35551878809928894, "learning_rate": 1.613576254493693e-05, "loss": 0.08932876586914062, "step": 4258 }, { "epoch": 0.5934647808820456, "grad_norm": 0.42301812767982483, "learning_rate": 1.6126513474762e-05, "loss": 0.08414077758789062, "step": 4259 }, { "epoch": 0.59360412457326, "grad_norm": 0.6257287263870239, "learning_rate": 1.6117265265187337e-05, "loss": 0.10829544067382812, "step": 4260 }, { "epoch": 0.5937434682644743, "grad_norm": 0.36155927181243896, "learning_rate": 1.6108017918267692e-05, "loss": 0.07869148254394531, "step": 4261 }, { "epoch": 0.5938828119556887, "grad_norm": 0.4308112859725952, "learning_rate": 1.6098771436057613e-05, "loss": 0.09504127502441406, "step": 4262 }, { "epoch": 0.5940221556469031, "grad_norm": 0.2474232167005539, "learning_rate": 1.6089525820611453e-05, "loss": 0.07116317749023438, "step": 4263 }, { "epoch": 0.5941614993381175, "grad_norm": 0.39799797534942627, "learning_rate": 1.6080281073983375e-05, "loss": 0.1151885986328125, "step": 4264 }, { "epoch": 0.5943008430293318, "grad_norm": 0.33493489027023315, "learning_rate": 1.6071037198227353e-05, "loss": 0.0895700454711914, "step": 4265 }, { "epoch": 0.5944401867205462, "grad_norm": 0.40401968359947205, "learning_rate": 1.606179419539717e-05, "loss": 0.09290885925292969, "step": 4266 }, { "epoch": 0.5945795304117606, "grad_norm": 0.4128902554512024, "learning_rate": 1.60525520675464e-05, "loss": 0.08153820037841797, "step": 4267 }, { "epoch": 0.594718874102975, "grad_norm": 0.22877612709999084, "learning_rate": 1.6043310816728453e-05, "loss": 0.06336021423339844, "step": 4268 }, { "epoch": 0.5948582177941893, "grad_norm": 0.5616901516914368, "learning_rate": 1.6034070444996498e-05, "loss": 0.09675312042236328, "step": 4269 }, { "epoch": 0.5949975614854037, "grad_norm": 0.2755493223667145, "learning_rate": 1.6024830954403547e-05, "loss": 0.07352447509765625, "step": 4270 }, { "epoch": 0.5951369051766181, "grad_norm": 0.2607648968696594, "learning_rate": 1.60155923470024e-05, "loss": 0.07292938232421875, "step": 4271 }, { "epoch": 0.5952762488678325, "grad_norm": 0.7093551754951477, "learning_rate": 1.6006354624845672e-05, "loss": 0.10014915466308594, "step": 4272 }, { "epoch": 0.5954155925590469, "grad_norm": 0.29763171076774597, "learning_rate": 1.5997117789985776e-05, "loss": 0.07940959930419922, "step": 4273 }, { "epoch": 0.5955549362502612, "grad_norm": 0.29729560017585754, "learning_rate": 1.5987881844474916e-05, "loss": 0.08623886108398438, "step": 4274 }, { "epoch": 0.5956942799414756, "grad_norm": 0.32531899213790894, "learning_rate": 1.597864679036511e-05, "loss": 0.08503532409667969, "step": 4275 }, { "epoch": 0.59583362363269, "grad_norm": 0.3625545799732208, "learning_rate": 1.5969412629708175e-05, "loss": 0.091552734375, "step": 4276 }, { "epoch": 0.5959729673239044, "grad_norm": 0.4198531210422516, "learning_rate": 1.5960179364555736e-05, "loss": 0.10091495513916016, "step": 4277 }, { "epoch": 0.5961123110151187, "grad_norm": 0.6772096157073975, "learning_rate": 1.5950946996959215e-05, "loss": 0.1438426971435547, "step": 4278 }, { "epoch": 0.5962516547063331, "grad_norm": 0.3440890908241272, "learning_rate": 1.594171552896983e-05, "loss": 0.09016990661621094, "step": 4279 }, { "epoch": 0.5963909983975475, "grad_norm": 0.6459162831306458, "learning_rate": 1.5932484962638596e-05, "loss": 0.1296067237854004, "step": 4280 }, { "epoch": 0.5965303420887619, "grad_norm": 0.30490806698799133, "learning_rate": 1.5923255300016343e-05, "loss": 0.0869755744934082, "step": 4281 }, { "epoch": 0.5966696857799764, "grad_norm": 0.41827458143234253, "learning_rate": 1.5914026543153692e-05, "loss": 0.09578704833984375, "step": 4282 }, { "epoch": 0.5968090294711907, "grad_norm": 0.4666815996170044, "learning_rate": 1.5904798694101052e-05, "loss": 0.10046577453613281, "step": 4283 }, { "epoch": 0.5969483731624051, "grad_norm": 0.5194708108901978, "learning_rate": 1.5895571754908656e-05, "loss": 0.08269500732421875, "step": 4284 }, { "epoch": 0.5970877168536195, "grad_norm": 0.41763946413993835, "learning_rate": 1.5886345727626506e-05, "loss": 0.08837890625, "step": 4285 }, { "epoch": 0.5972270605448339, "grad_norm": 0.4195820093154907, "learning_rate": 1.587712061430442e-05, "loss": 0.08429336547851562, "step": 4286 }, { "epoch": 0.5973664042360483, "grad_norm": 0.8402834534645081, "learning_rate": 1.5867896416992002e-05, "loss": 0.10921764373779297, "step": 4287 }, { "epoch": 0.5975057479272626, "grad_norm": 0.5043755769729614, "learning_rate": 1.5858673137738664e-05, "loss": 0.0875253677368164, "step": 4288 }, { "epoch": 0.597645091618477, "grad_norm": 0.38318386673927307, "learning_rate": 1.5849450778593615e-05, "loss": 0.08261680603027344, "step": 4289 }, { "epoch": 0.5977844353096914, "grad_norm": 0.30514639616012573, "learning_rate": 1.5840229341605837e-05, "loss": 0.07734107971191406, "step": 4290 }, { "epoch": 0.5979237790009058, "grad_norm": 0.4954380691051483, "learning_rate": 1.5831008828824134e-05, "loss": 0.10290718078613281, "step": 4291 }, { "epoch": 0.5980631226921201, "grad_norm": 0.6537814736366272, "learning_rate": 1.582178924229708e-05, "loss": 0.11701202392578125, "step": 4292 }, { "epoch": 0.5982024663833345, "grad_norm": 0.44405457377433777, "learning_rate": 1.5812570584073076e-05, "loss": 0.06870651245117188, "step": 4293 }, { "epoch": 0.5983418100745489, "grad_norm": 0.306293785572052, "learning_rate": 1.580335285620028e-05, "loss": 0.08233642578125, "step": 4294 }, { "epoch": 0.5984811537657633, "grad_norm": 0.48364943265914917, "learning_rate": 1.5794136060726682e-05, "loss": 0.1042642593383789, "step": 4295 }, { "epoch": 0.5986204974569777, "grad_norm": 0.39942795038223267, "learning_rate": 1.5784920199700015e-05, "loss": 0.09087276458740234, "step": 4296 }, { "epoch": 0.598759841148192, "grad_norm": 0.43801358342170715, "learning_rate": 1.5775705275167854e-05, "loss": 0.0872802734375, "step": 4297 }, { "epoch": 0.5988991848394064, "grad_norm": 0.40151938796043396, "learning_rate": 1.576649128917754e-05, "loss": 0.09026050567626953, "step": 4298 }, { "epoch": 0.5990385285306208, "grad_norm": 0.5017002820968628, "learning_rate": 1.5757278243776203e-05, "loss": 0.09078788757324219, "step": 4299 }, { "epoch": 0.5991778722218352, "grad_norm": 0.1967606395483017, "learning_rate": 1.5748066141010785e-05, "loss": 0.05249214172363281, "step": 4300 }, { "epoch": 0.5993172159130495, "grad_norm": 0.38431331515312195, "learning_rate": 1.5738854982927993e-05, "loss": 0.08099746704101562, "step": 4301 }, { "epoch": 0.5994565596042639, "grad_norm": 0.4610428512096405, "learning_rate": 1.5729644771574343e-05, "loss": 0.08088874816894531, "step": 4302 }, { "epoch": 0.5995959032954783, "grad_norm": 0.3018493354320526, "learning_rate": 1.572043550899612e-05, "loss": 0.0682525634765625, "step": 4303 }, { "epoch": 0.5997352469866927, "grad_norm": 0.6429982781410217, "learning_rate": 1.5711227197239435e-05, "loss": 0.08879470825195312, "step": 4304 }, { "epoch": 0.599874590677907, "grad_norm": 0.3359735906124115, "learning_rate": 1.5702019838350153e-05, "loss": 0.06863117218017578, "step": 4305 }, { "epoch": 0.6000139343691214, "grad_norm": 0.4144202768802643, "learning_rate": 1.5692813434373934e-05, "loss": 0.08248519897460938, "step": 4306 }, { "epoch": 0.6001532780603358, "grad_norm": 0.3569704294204712, "learning_rate": 1.5683607987356236e-05, "loss": 0.08209800720214844, "step": 4307 }, { "epoch": 0.6002926217515502, "grad_norm": 0.6100709438323975, "learning_rate": 1.5674403499342292e-05, "loss": 0.09993934631347656, "step": 4308 }, { "epoch": 0.6004319654427646, "grad_norm": 0.8249526023864746, "learning_rate": 1.566519997237714e-05, "loss": 0.09371757507324219, "step": 4309 }, { "epoch": 0.6005713091339789, "grad_norm": 0.25579071044921875, "learning_rate": 1.5655997408505595e-05, "loss": 0.07179069519042969, "step": 4310 }, { "epoch": 0.6007106528251933, "grad_norm": 0.5067604780197144, "learning_rate": 1.5646795809772246e-05, "loss": 0.12005043029785156, "step": 4311 }, { "epoch": 0.6008499965164077, "grad_norm": 0.3436828851699829, "learning_rate": 1.563759517822148e-05, "loss": 0.07510757446289062, "step": 4312 }, { "epoch": 0.6009893402076221, "grad_norm": 0.345567911863327, "learning_rate": 1.562839551589747e-05, "loss": 0.08088111877441406, "step": 4313 }, { "epoch": 0.6011286838988364, "grad_norm": 0.4882480204105377, "learning_rate": 1.5619196824844174e-05, "loss": 0.09399604797363281, "step": 4314 }, { "epoch": 0.6012680275900508, "grad_norm": 0.34947770833969116, "learning_rate": 1.5609999107105322e-05, "loss": 0.09238815307617188, "step": 4315 }, { "epoch": 0.6014073712812652, "grad_norm": 0.5666284561157227, "learning_rate": 1.5600802364724456e-05, "loss": 0.110137939453125, "step": 4316 }, { "epoch": 0.6015467149724796, "grad_norm": 0.2855277955532074, "learning_rate": 1.559160659974486e-05, "loss": 0.06323432922363281, "step": 4317 }, { "epoch": 0.601686058663694, "grad_norm": 0.2140055149793625, "learning_rate": 1.5582411814209633e-05, "loss": 0.07003593444824219, "step": 4318 }, { "epoch": 0.6018254023549083, "grad_norm": 0.44561001658439636, "learning_rate": 1.5573218010161642e-05, "loss": 0.09647274017333984, "step": 4319 }, { "epoch": 0.6019647460461227, "grad_norm": 0.3052234351634979, "learning_rate": 1.556402518964355e-05, "loss": 0.07429122924804688, "step": 4320 }, { "epoch": 0.6021040897373371, "grad_norm": 0.4643697738647461, "learning_rate": 1.5554833354697787e-05, "loss": 0.09038734436035156, "step": 4321 }, { "epoch": 0.6022434334285516, "grad_norm": 0.33749672770500183, "learning_rate": 1.5545642507366566e-05, "loss": 0.09173965454101562, "step": 4322 }, { "epoch": 0.602382777119766, "grad_norm": 0.24376077950000763, "learning_rate": 1.5536452649691884e-05, "loss": 0.06325149536132812, "step": 4323 }, { "epoch": 0.6025221208109803, "grad_norm": 0.4440436065196991, "learning_rate": 1.5527263783715515e-05, "loss": 0.08221960067749023, "step": 4324 }, { "epoch": 0.6026614645021947, "grad_norm": 0.37502971291542053, "learning_rate": 1.551807591147902e-05, "loss": 0.11469650268554688, "step": 4325 }, { "epoch": 0.6028008081934091, "grad_norm": 0.26141172647476196, "learning_rate": 1.5508889035023738e-05, "loss": 0.07856941223144531, "step": 4326 }, { "epoch": 0.6029401518846235, "grad_norm": 0.4522486627101898, "learning_rate": 1.549970315639078e-05, "loss": 0.09422111511230469, "step": 4327 }, { "epoch": 0.6030794955758378, "grad_norm": 0.387520432472229, "learning_rate": 1.5490518277621028e-05, "loss": 0.07677650451660156, "step": 4328 }, { "epoch": 0.6032188392670522, "grad_norm": 0.4246736764907837, "learning_rate": 1.5481334400755167e-05, "loss": 0.09272956848144531, "step": 4329 }, { "epoch": 0.6033581829582666, "grad_norm": 0.38624492287635803, "learning_rate": 1.547215152783364e-05, "loss": 0.10107612609863281, "step": 4330 }, { "epoch": 0.603497526649481, "grad_norm": 0.4081531763076782, "learning_rate": 1.5462969660896677e-05, "loss": 0.10271549224853516, "step": 4331 }, { "epoch": 0.6036368703406954, "grad_norm": 0.5028614401817322, "learning_rate": 1.545378880198426e-05, "loss": 0.11981391906738281, "step": 4332 }, { "epoch": 0.6037762140319097, "grad_norm": 0.44842812418937683, "learning_rate": 1.544460895313619e-05, "loss": 0.1009979248046875, "step": 4333 }, { "epoch": 0.6039155577231241, "grad_norm": 0.5853284001350403, "learning_rate": 1.5435430116392003e-05, "loss": 0.10016822814941406, "step": 4334 }, { "epoch": 0.6040549014143385, "grad_norm": 0.5033504962921143, "learning_rate": 1.542625229379103e-05, "loss": 0.09569168090820312, "step": 4335 }, { "epoch": 0.6041942451055529, "grad_norm": 0.6556228399276733, "learning_rate": 1.5417075487372384e-05, "loss": 0.1215982437133789, "step": 4336 }, { "epoch": 0.6043335887967672, "grad_norm": 0.4698899984359741, "learning_rate": 1.5407899699174936e-05, "loss": 0.0881967544555664, "step": 4337 }, { "epoch": 0.6044729324879816, "grad_norm": 0.7146233916282654, "learning_rate": 1.5398724931237334e-05, "loss": 0.10245323181152344, "step": 4338 }, { "epoch": 0.604612276179196, "grad_norm": 0.3442179560661316, "learning_rate": 1.5389551185598e-05, "loss": 0.08383750915527344, "step": 4339 }, { "epoch": 0.6047516198704104, "grad_norm": 0.2657568156719208, "learning_rate": 1.5380378464295133e-05, "loss": 0.06733894348144531, "step": 4340 }, { "epoch": 0.6048909635616248, "grad_norm": 0.2510393261909485, "learning_rate": 1.537120676936671e-05, "loss": 0.05688667297363281, "step": 4341 }, { "epoch": 0.6050303072528391, "grad_norm": 0.26058146357536316, "learning_rate": 1.5362036102850465e-05, "loss": 0.08105087280273438, "step": 4342 }, { "epoch": 0.6051696509440535, "grad_norm": 0.47928401827812195, "learning_rate": 1.535286646678391e-05, "loss": 0.10331153869628906, "step": 4343 }, { "epoch": 0.6053089946352679, "grad_norm": 0.2748623192310333, "learning_rate": 1.5343697863204323e-05, "loss": 0.07226753234863281, "step": 4344 }, { "epoch": 0.6054483383264823, "grad_norm": 0.754459023475647, "learning_rate": 1.533453029414877e-05, "loss": 0.12672805786132812, "step": 4345 }, { "epoch": 0.6055876820176966, "grad_norm": 0.44312620162963867, "learning_rate": 1.5325363761654075e-05, "loss": 0.08278560638427734, "step": 4346 }, { "epoch": 0.605727025708911, "grad_norm": 0.5666711926460266, "learning_rate": 1.5316198267756834e-05, "loss": 0.08909988403320312, "step": 4347 }, { "epoch": 0.6058663694001254, "grad_norm": 0.5665215849876404, "learning_rate": 1.5307033814493392e-05, "loss": 0.1345987319946289, "step": 4348 }, { "epoch": 0.6060057130913398, "grad_norm": 0.6628089547157288, "learning_rate": 1.5297870403899898e-05, "loss": 0.11827659606933594, "step": 4349 }, { "epoch": 0.6061450567825541, "grad_norm": 0.27774930000305176, "learning_rate": 1.528870803801225e-05, "loss": 0.0630340576171875, "step": 4350 }, { "epoch": 0.6062844004737685, "grad_norm": 0.4380408525466919, "learning_rate": 1.5279546718866113e-05, "loss": 0.10371017456054688, "step": 4351 }, { "epoch": 0.6064237441649829, "grad_norm": 0.2545788586139679, "learning_rate": 1.5270386448496926e-05, "loss": 0.06620025634765625, "step": 4352 }, { "epoch": 0.6065630878561973, "grad_norm": 1.0077630281448364, "learning_rate": 1.5261227228939896e-05, "loss": 0.1563739776611328, "step": 4353 }, { "epoch": 0.6067024315474117, "grad_norm": 0.4680543839931488, "learning_rate": 1.5252069062229985e-05, "loss": 0.09190940856933594, "step": 4354 }, { "epoch": 0.606841775238626, "grad_norm": 0.4419260621070862, "learning_rate": 1.5242911950401929e-05, "loss": 0.08714866638183594, "step": 4355 }, { "epoch": 0.6069811189298404, "grad_norm": 0.24258480966091156, "learning_rate": 1.5233755895490232e-05, "loss": 0.06897163391113281, "step": 4356 }, { "epoch": 0.6071204626210548, "grad_norm": 0.25801438093185425, "learning_rate": 1.522460089952916e-05, "loss": 0.08353519439697266, "step": 4357 }, { "epoch": 0.6072598063122692, "grad_norm": 0.30918967723846436, "learning_rate": 1.521544696455275e-05, "loss": 0.08315086364746094, "step": 4358 }, { "epoch": 0.6073991500034835, "grad_norm": 0.432313472032547, "learning_rate": 1.520629409259479e-05, "loss": 0.10988235473632812, "step": 4359 }, { "epoch": 0.6075384936946979, "grad_norm": 0.3778811991214752, "learning_rate": 1.5197142285688831e-05, "loss": 0.08148479461669922, "step": 4360 }, { "epoch": 0.6076778373859123, "grad_norm": 0.3434462249279022, "learning_rate": 1.518799154586821e-05, "loss": 0.07528877258300781, "step": 4361 }, { "epoch": 0.6078171810771267, "grad_norm": 0.4497319459915161, "learning_rate": 1.5178841875166008e-05, "loss": 0.0948944091796875, "step": 4362 }, { "epoch": 0.6079565247683412, "grad_norm": 0.2841293215751648, "learning_rate": 1.5169693275615079e-05, "loss": 0.06557941436767578, "step": 4363 }, { "epoch": 0.6080958684595555, "grad_norm": 0.4516821503639221, "learning_rate": 1.5160545749248014e-05, "loss": 0.09394645690917969, "step": 4364 }, { "epoch": 0.6082352121507699, "grad_norm": 0.39465266466140747, "learning_rate": 1.5151399298097204e-05, "loss": 0.08049297332763672, "step": 4365 }, { "epoch": 0.6083745558419843, "grad_norm": 0.365246057510376, "learning_rate": 1.5142253924194774e-05, "loss": 0.07372379302978516, "step": 4366 }, { "epoch": 0.6085138995331987, "grad_norm": 0.45779022574424744, "learning_rate": 1.5133109629572614e-05, "loss": 0.08776092529296875, "step": 4367 }, { "epoch": 0.608653243224413, "grad_norm": 0.400137335062027, "learning_rate": 1.5123966416262392e-05, "loss": 0.08966255187988281, "step": 4368 }, { "epoch": 0.6087925869156274, "grad_norm": 0.4529951512813568, "learning_rate": 1.51148242862955e-05, "loss": 0.09659385681152344, "step": 4369 }, { "epoch": 0.6089319306068418, "grad_norm": 0.5005354285240173, "learning_rate": 1.5105683241703123e-05, "loss": 0.10712623596191406, "step": 4370 }, { "epoch": 0.6090712742980562, "grad_norm": 0.42617982625961304, "learning_rate": 1.5096543284516188e-05, "loss": 0.09533500671386719, "step": 4371 }, { "epoch": 0.6092106179892706, "grad_norm": 0.21783317625522614, "learning_rate": 1.5087404416765392e-05, "loss": 0.06021881103515625, "step": 4372 }, { "epoch": 0.6093499616804849, "grad_norm": 0.5120546817779541, "learning_rate": 1.5078266640481178e-05, "loss": 0.10751533508300781, "step": 4373 }, { "epoch": 0.6094893053716993, "grad_norm": 0.46297717094421387, "learning_rate": 1.5069129957693755e-05, "loss": 0.07459831237792969, "step": 4374 }, { "epoch": 0.6096286490629137, "grad_norm": 0.39219221472740173, "learning_rate": 1.5059994370433078e-05, "loss": 0.08032989501953125, "step": 4375 }, { "epoch": 0.6097679927541281, "grad_norm": 0.5103345513343811, "learning_rate": 1.5050859880728865e-05, "loss": 0.10629749298095703, "step": 4376 }, { "epoch": 0.6099073364453425, "grad_norm": 0.2514292895793915, "learning_rate": 1.50417264906106e-05, "loss": 0.07557487487792969, "step": 4377 }, { "epoch": 0.6100466801365568, "grad_norm": 0.6229480504989624, "learning_rate": 1.5032594202107509e-05, "loss": 0.115631103515625, "step": 4378 }, { "epoch": 0.6101860238277712, "grad_norm": 0.24010883271694183, "learning_rate": 1.5023463017248582e-05, "loss": 0.06472015380859375, "step": 4379 }, { "epoch": 0.6103253675189856, "grad_norm": 0.35683804750442505, "learning_rate": 1.501433293806255e-05, "loss": 0.08292293548583984, "step": 4380 }, { "epoch": 0.6104647112102, "grad_norm": 0.8884021639823914, "learning_rate": 1.5005203966577919e-05, "loss": 0.15869712829589844, "step": 4381 }, { "epoch": 0.6106040549014143, "grad_norm": 0.3223867416381836, "learning_rate": 1.4996076104822929e-05, "loss": 0.07839488983154297, "step": 4382 }, { "epoch": 0.6107433985926287, "grad_norm": 0.22706077992916107, "learning_rate": 1.498694935482559e-05, "loss": 0.06637859344482422, "step": 4383 }, { "epoch": 0.6108827422838431, "grad_norm": 0.5740638971328735, "learning_rate": 1.4977823718613657e-05, "loss": 0.11501502990722656, "step": 4384 }, { "epoch": 0.6110220859750575, "grad_norm": 0.5040796399116516, "learning_rate": 1.4968699198214634e-05, "loss": 0.11604690551757812, "step": 4385 }, { "epoch": 0.6111614296662718, "grad_norm": 0.2882956564426422, "learning_rate": 1.495957579565578e-05, "loss": 0.08512687683105469, "step": 4386 }, { "epoch": 0.6113007733574862, "grad_norm": 0.5011085271835327, "learning_rate": 1.495045351296411e-05, "loss": 0.10135269165039062, "step": 4387 }, { "epoch": 0.6114401170487006, "grad_norm": 0.3740328252315521, "learning_rate": 1.4941332352166385e-05, "loss": 0.0727996826171875, "step": 4388 }, { "epoch": 0.611579460739915, "grad_norm": 0.33597666025161743, "learning_rate": 1.4932212315289123e-05, "loss": 0.08660125732421875, "step": 4389 }, { "epoch": 0.6117188044311294, "grad_norm": 0.21418540179729462, "learning_rate": 1.4923093404358588e-05, "loss": 0.059624671936035156, "step": 4390 }, { "epoch": 0.6118581481223437, "grad_norm": 0.33718788623809814, "learning_rate": 1.4913975621400787e-05, "loss": 0.0784912109375, "step": 4391 }, { "epoch": 0.6119974918135581, "grad_norm": 0.46394258737564087, "learning_rate": 1.4904858968441485e-05, "loss": 0.09051132202148438, "step": 4392 }, { "epoch": 0.6121368355047725, "grad_norm": 0.5971395373344421, "learning_rate": 1.4895743447506196e-05, "loss": 0.10840606689453125, "step": 4393 }, { "epoch": 0.6122761791959869, "grad_norm": 0.40696173906326294, "learning_rate": 1.4886629060620181e-05, "loss": 0.07547950744628906, "step": 4394 }, { "epoch": 0.6124155228872012, "grad_norm": 0.20598849654197693, "learning_rate": 1.4877515809808459e-05, "loss": 0.06276321411132812, "step": 4395 }, { "epoch": 0.6125548665784156, "grad_norm": 0.2464522123336792, "learning_rate": 1.4868403697095764e-05, "loss": 0.07397651672363281, "step": 4396 }, { "epoch": 0.61269421026963, "grad_norm": 0.33375898003578186, "learning_rate": 1.4859292724506613e-05, "loss": 0.09561920166015625, "step": 4397 }, { "epoch": 0.6128335539608444, "grad_norm": 0.6788848042488098, "learning_rate": 1.4850182894065258e-05, "loss": 0.08547210693359375, "step": 4398 }, { "epoch": 0.6129728976520588, "grad_norm": 0.4669043719768524, "learning_rate": 1.4841074207795684e-05, "loss": 0.11097908020019531, "step": 4399 }, { "epoch": 0.6131122413432731, "grad_norm": 0.473991334438324, "learning_rate": 1.483196666772165e-05, "loss": 0.10281181335449219, "step": 4400 }, { "epoch": 0.6132515850344875, "grad_norm": 0.3439995348453522, "learning_rate": 1.482286027586663e-05, "loss": 0.07788848876953125, "step": 4401 }, { "epoch": 0.6133909287257019, "grad_norm": 0.2826524078845978, "learning_rate": 1.4813755034253862e-05, "loss": 0.06554794311523438, "step": 4402 }, { "epoch": 0.6135302724169164, "grad_norm": 0.4150504171848297, "learning_rate": 1.4804650944906316e-05, "loss": 0.09797477722167969, "step": 4403 }, { "epoch": 0.6136696161081308, "grad_norm": 0.2973020076751709, "learning_rate": 1.4795548009846723e-05, "loss": 0.07308578491210938, "step": 4404 }, { "epoch": 0.6138089597993451, "grad_norm": 0.7574416995048523, "learning_rate": 1.4786446231097546e-05, "loss": 0.11719512939453125, "step": 4405 }, { "epoch": 0.6139483034905595, "grad_norm": 0.21564653515815735, "learning_rate": 1.4777345610680987e-05, "loss": 0.07436084747314453, "step": 4406 }, { "epoch": 0.6140876471817739, "grad_norm": 0.3153612017631531, "learning_rate": 1.4768246150618995e-05, "loss": 0.07751274108886719, "step": 4407 }, { "epoch": 0.6142269908729883, "grad_norm": 1.010072946548462, "learning_rate": 1.4759147852933263e-05, "loss": 0.1707611083984375, "step": 4408 }, { "epoch": 0.6143663345642026, "grad_norm": 0.29089871048927307, "learning_rate": 1.4750050719645227e-05, "loss": 0.07923316955566406, "step": 4409 }, { "epoch": 0.614505678255417, "grad_norm": 0.42160436511039734, "learning_rate": 1.4740954752776064e-05, "loss": 0.0824432373046875, "step": 4410 }, { "epoch": 0.6146450219466314, "grad_norm": 0.4928901195526123, "learning_rate": 1.47318599543467e-05, "loss": 0.08845138549804688, "step": 4411 }, { "epoch": 0.6147843656378458, "grad_norm": 0.6962693929672241, "learning_rate": 1.4722766326377769e-05, "loss": 0.13092613220214844, "step": 4412 }, { "epoch": 0.6149237093290602, "grad_norm": 0.2583491802215576, "learning_rate": 1.4713673870889682e-05, "loss": 0.05639171600341797, "step": 4413 }, { "epoch": 0.6150630530202745, "grad_norm": 0.249579519033432, "learning_rate": 1.4704582589902571e-05, "loss": 0.0738992691040039, "step": 4414 }, { "epoch": 0.6152023967114889, "grad_norm": 0.2860461473464966, "learning_rate": 1.4695492485436308e-05, "loss": 0.08080863952636719, "step": 4415 }, { "epoch": 0.6153417404027033, "grad_norm": 0.37590473890304565, "learning_rate": 1.4686403559510522e-05, "loss": 0.07728195190429688, "step": 4416 }, { "epoch": 0.6154810840939177, "grad_norm": 0.40981897711753845, "learning_rate": 1.4677315814144549e-05, "loss": 0.07838058471679688, "step": 4417 }, { "epoch": 0.615620427785132, "grad_norm": 0.3278336524963379, "learning_rate": 1.4668229251357482e-05, "loss": 0.10363578796386719, "step": 4418 }, { "epoch": 0.6157597714763464, "grad_norm": 0.3095282316207886, "learning_rate": 1.4659143873168145e-05, "loss": 0.09001827239990234, "step": 4419 }, { "epoch": 0.6158991151675608, "grad_norm": 0.3958961069583893, "learning_rate": 1.4650059681595109e-05, "loss": 0.09400367736816406, "step": 4420 }, { "epoch": 0.6160384588587752, "grad_norm": 0.3282521963119507, "learning_rate": 1.4640976678656674e-05, "loss": 0.09108352661132812, "step": 4421 }, { "epoch": 0.6161778025499896, "grad_norm": 0.6200852990150452, "learning_rate": 1.463189486637087e-05, "loss": 0.12784957885742188, "step": 4422 }, { "epoch": 0.6163171462412039, "grad_norm": 0.4753054082393646, "learning_rate": 1.4622814246755468e-05, "loss": 0.09352493286132812, "step": 4423 }, { "epoch": 0.6164564899324183, "grad_norm": 0.5033499002456665, "learning_rate": 1.4613734821827976e-05, "loss": 0.10152626037597656, "step": 4424 }, { "epoch": 0.6165958336236327, "grad_norm": 0.3000742793083191, "learning_rate": 1.4604656593605637e-05, "loss": 0.07673454284667969, "step": 4425 }, { "epoch": 0.6167351773148471, "grad_norm": 0.29957759380340576, "learning_rate": 1.4595579564105432e-05, "loss": 0.07199954986572266, "step": 4426 }, { "epoch": 0.6168745210060614, "grad_norm": 0.8721594214439392, "learning_rate": 1.458650373534406e-05, "loss": 0.11328887939453125, "step": 4427 }, { "epoch": 0.6170138646972758, "grad_norm": 0.28031790256500244, "learning_rate": 1.457742910933796e-05, "loss": 0.06940746307373047, "step": 4428 }, { "epoch": 0.6171532083884902, "grad_norm": 0.5306028723716736, "learning_rate": 1.4568355688103318e-05, "loss": 0.1288623809814453, "step": 4429 }, { "epoch": 0.6172925520797046, "grad_norm": 0.47203969955444336, "learning_rate": 1.4559283473656031e-05, "loss": 0.09840202331542969, "step": 4430 }, { "epoch": 0.617431895770919, "grad_norm": 0.4191051125526428, "learning_rate": 1.4550212468011742e-05, "loss": 0.11074638366699219, "step": 4431 }, { "epoch": 0.6175712394621333, "grad_norm": 0.5333910584449768, "learning_rate": 1.454114267318583e-05, "loss": 0.09947395324707031, "step": 4432 }, { "epoch": 0.6177105831533477, "grad_norm": 0.3333050608634949, "learning_rate": 1.4532074091193385e-05, "loss": 0.0719146728515625, "step": 4433 }, { "epoch": 0.6178499268445621, "grad_norm": 0.42985668778419495, "learning_rate": 1.4523006724049238e-05, "loss": 0.09824657440185547, "step": 4434 }, { "epoch": 0.6179892705357765, "grad_norm": 0.16967526078224182, "learning_rate": 1.4513940573767955e-05, "loss": 0.057842254638671875, "step": 4435 }, { "epoch": 0.6181286142269908, "grad_norm": 0.3704566955566406, "learning_rate": 1.450487564236383e-05, "loss": 0.07242012023925781, "step": 4436 }, { "epoch": 0.6182679579182052, "grad_norm": 0.6515153646469116, "learning_rate": 1.4495811931850886e-05, "loss": 0.14598846435546875, "step": 4437 }, { "epoch": 0.6184073016094196, "grad_norm": 0.44885873794555664, "learning_rate": 1.4486749444242862e-05, "loss": 0.11011886596679688, "step": 4438 }, { "epoch": 0.618546645300634, "grad_norm": 0.3461162745952606, "learning_rate": 1.447768818155324e-05, "loss": 0.08923149108886719, "step": 4439 }, { "epoch": 0.6186859889918483, "grad_norm": 0.42780423164367676, "learning_rate": 1.446862814579523e-05, "loss": 0.08513259887695312, "step": 4440 }, { "epoch": 0.6188253326830627, "grad_norm": 0.2889516353607178, "learning_rate": 1.4459569338981765e-05, "loss": 0.06806564331054688, "step": 4441 }, { "epoch": 0.6189646763742771, "grad_norm": 0.5077807307243347, "learning_rate": 1.4450511763125506e-05, "loss": 0.09954071044921875, "step": 4442 }, { "epoch": 0.6191040200654916, "grad_norm": 0.5910297632217407, "learning_rate": 1.444145542023883e-05, "loss": 0.11904525756835938, "step": 4443 }, { "epoch": 0.619243363756706, "grad_norm": 0.42604178190231323, "learning_rate": 1.4432400312333854e-05, "loss": 0.10444831848144531, "step": 4444 }, { "epoch": 0.6193827074479203, "grad_norm": 0.39210307598114014, "learning_rate": 1.4423346441422422e-05, "loss": 0.09297657012939453, "step": 4445 }, { "epoch": 0.6195220511391347, "grad_norm": 0.5853728652000427, "learning_rate": 1.4414293809516094e-05, "loss": 0.10874652862548828, "step": 4446 }, { "epoch": 0.6196613948303491, "grad_norm": 0.2693627178668976, "learning_rate": 1.4405242418626153e-05, "loss": 0.0760040283203125, "step": 4447 }, { "epoch": 0.6198007385215635, "grad_norm": 0.4167273938655853, "learning_rate": 1.4396192270763622e-05, "loss": 0.09534454345703125, "step": 4448 }, { "epoch": 0.6199400822127779, "grad_norm": 0.3117867708206177, "learning_rate": 1.4387143367939231e-05, "loss": 0.08014869689941406, "step": 4449 }, { "epoch": 0.6200794259039922, "grad_norm": 0.3887786567211151, "learning_rate": 1.4378095712163439e-05, "loss": 0.07845544815063477, "step": 4450 }, { "epoch": 0.6202187695952066, "grad_norm": 0.6843225359916687, "learning_rate": 1.4369049305446423e-05, "loss": 0.09704971313476562, "step": 4451 }, { "epoch": 0.620358113286421, "grad_norm": 0.3312377333641052, "learning_rate": 1.4360004149798101e-05, "loss": 0.08862686157226562, "step": 4452 }, { "epoch": 0.6204974569776354, "grad_norm": 0.47888845205307007, "learning_rate": 1.4350960247228096e-05, "loss": 0.0956573486328125, "step": 4453 }, { "epoch": 0.6206368006688497, "grad_norm": 0.3487328588962555, "learning_rate": 1.4341917599745751e-05, "loss": 0.08647537231445312, "step": 4454 }, { "epoch": 0.6207761443600641, "grad_norm": 0.24733611941337585, "learning_rate": 1.4332876209360136e-05, "loss": 0.07207107543945312, "step": 4455 }, { "epoch": 0.6209154880512785, "grad_norm": 0.3883887827396393, "learning_rate": 1.4323836078080046e-05, "loss": 0.10354900360107422, "step": 4456 }, { "epoch": 0.6210548317424929, "grad_norm": 0.3598092496395111, "learning_rate": 1.4314797207913995e-05, "loss": 0.07749176025390625, "step": 4457 }, { "epoch": 0.6211941754337073, "grad_norm": 0.3052160143852234, "learning_rate": 1.4305759600870208e-05, "loss": 0.07677555084228516, "step": 4458 }, { "epoch": 0.6213335191249216, "grad_norm": 0.6839844584465027, "learning_rate": 1.4296723258956635e-05, "loss": 0.11998176574707031, "step": 4459 }, { "epoch": 0.621472862816136, "grad_norm": 0.6222801208496094, "learning_rate": 1.428768818418094e-05, "loss": 0.10771846771240234, "step": 4460 }, { "epoch": 0.6216122065073504, "grad_norm": 0.3153083026409149, "learning_rate": 1.4278654378550522e-05, "loss": 0.0909423828125, "step": 4461 }, { "epoch": 0.6217515501985648, "grad_norm": 0.38315221667289734, "learning_rate": 1.4269621844072481e-05, "loss": 0.08524513244628906, "step": 4462 }, { "epoch": 0.6218908938897791, "grad_norm": 0.3358972370624542, "learning_rate": 1.4260590582753641e-05, "loss": 0.078521728515625, "step": 4463 }, { "epoch": 0.6220302375809935, "grad_norm": 0.345201313495636, "learning_rate": 1.4251560596600536e-05, "loss": 0.11107826232910156, "step": 4464 }, { "epoch": 0.6221695812722079, "grad_norm": 0.23416604101657867, "learning_rate": 1.4242531887619428e-05, "loss": 0.06717395782470703, "step": 4465 }, { "epoch": 0.6223089249634223, "grad_norm": 0.4715441167354584, "learning_rate": 1.4233504457816291e-05, "loss": 0.09518623352050781, "step": 4466 }, { "epoch": 0.6224482686546366, "grad_norm": 0.44404715299606323, "learning_rate": 1.4224478309196808e-05, "loss": 0.07884025573730469, "step": 4467 }, { "epoch": 0.622587612345851, "grad_norm": 0.5194984078407288, "learning_rate": 1.4215453443766391e-05, "loss": 0.09008026123046875, "step": 4468 }, { "epoch": 0.6227269560370654, "grad_norm": 0.4031558632850647, "learning_rate": 1.420642986353016e-05, "loss": 0.0775909423828125, "step": 4469 }, { "epoch": 0.6228662997282798, "grad_norm": 0.27821147441864014, "learning_rate": 1.4197407570492941e-05, "loss": 0.0841522216796875, "step": 4470 }, { "epoch": 0.6230056434194942, "grad_norm": 0.624203622341156, "learning_rate": 1.4188386566659276e-05, "loss": 0.10931110382080078, "step": 4471 }, { "epoch": 0.6231449871107085, "grad_norm": 0.4313911199569702, "learning_rate": 1.4179366854033441e-05, "loss": 0.09364795684814453, "step": 4472 }, { "epoch": 0.6232843308019229, "grad_norm": 0.32367846369743347, "learning_rate": 1.4170348434619405e-05, "loss": 0.0687856674194336, "step": 4473 }, { "epoch": 0.6234236744931373, "grad_norm": 0.21808283030986786, "learning_rate": 1.4161331310420856e-05, "loss": 0.05828380584716797, "step": 4474 }, { "epoch": 0.6235630181843517, "grad_norm": 0.7091418504714966, "learning_rate": 1.4152315483441188e-05, "loss": 0.10678863525390625, "step": 4475 }, { "epoch": 0.623702361875566, "grad_norm": 0.2861049771308899, "learning_rate": 1.414330095568351e-05, "loss": 0.08919143676757812, "step": 4476 }, { "epoch": 0.6238417055667804, "grad_norm": 0.39125311374664307, "learning_rate": 1.4134287729150653e-05, "loss": 0.09955406188964844, "step": 4477 }, { "epoch": 0.6239810492579948, "grad_norm": 0.2715814411640167, "learning_rate": 1.4125275805845147e-05, "loss": 0.0682535171508789, "step": 4478 }, { "epoch": 0.6241203929492092, "grad_norm": 0.4069819450378418, "learning_rate": 1.4116265187769239e-05, "loss": 0.10981941223144531, "step": 4479 }, { "epoch": 0.6242597366404236, "grad_norm": 0.3776310980319977, "learning_rate": 1.4107255876924865e-05, "loss": 0.09056663513183594, "step": 4480 }, { "epoch": 0.6243990803316379, "grad_norm": 0.419537752866745, "learning_rate": 1.409824787531371e-05, "loss": 0.08897018432617188, "step": 4481 }, { "epoch": 0.6245384240228523, "grad_norm": 0.504173755645752, "learning_rate": 1.408924118493714e-05, "loss": 0.09207344055175781, "step": 4482 }, { "epoch": 0.6246777677140668, "grad_norm": 0.3685634136199951, "learning_rate": 1.4080235807796225e-05, "loss": 0.0875701904296875, "step": 4483 }, { "epoch": 0.6248171114052812, "grad_norm": 0.7345386147499084, "learning_rate": 1.4071231745891768e-05, "loss": 0.10126495361328125, "step": 4484 }, { "epoch": 0.6249564550964956, "grad_norm": 0.6616860032081604, "learning_rate": 1.4062229001224268e-05, "loss": 0.10773658752441406, "step": 4485 }, { "epoch": 0.6250957987877099, "grad_norm": 0.4704466462135315, "learning_rate": 1.4053227575793917e-05, "loss": 0.09924507141113281, "step": 4486 }, { "epoch": 0.6252351424789243, "grad_norm": 0.7378404140472412, "learning_rate": 1.4044227471600627e-05, "loss": 0.1233367919921875, "step": 4487 }, { "epoch": 0.6253744861701387, "grad_norm": 0.39872685074806213, "learning_rate": 1.4035228690644023e-05, "loss": 0.092498779296875, "step": 4488 }, { "epoch": 0.6255138298613531, "grad_norm": 0.8097701668739319, "learning_rate": 1.4026231234923429e-05, "loss": 0.11079788208007812, "step": 4489 }, { "epoch": 0.6256531735525674, "grad_norm": 0.2856326103210449, "learning_rate": 1.4017235106437871e-05, "loss": 0.07610607147216797, "step": 4490 }, { "epoch": 0.6257925172437818, "grad_norm": 0.4556337893009186, "learning_rate": 1.4008240307186084e-05, "loss": 0.09707832336425781, "step": 4491 }, { "epoch": 0.6259318609349962, "grad_norm": 0.47844991087913513, "learning_rate": 1.3999246839166499e-05, "loss": 0.07442665100097656, "step": 4492 }, { "epoch": 0.6260712046262106, "grad_norm": 0.42635342478752136, "learning_rate": 1.399025470437727e-05, "loss": 0.08244895935058594, "step": 4493 }, { "epoch": 0.626210548317425, "grad_norm": 0.6095706224441528, "learning_rate": 1.398126390481624e-05, "loss": 0.09479618072509766, "step": 4494 }, { "epoch": 0.6263498920086393, "grad_norm": 0.5305610299110413, "learning_rate": 1.3972274442480971e-05, "loss": 0.09630680084228516, "step": 4495 }, { "epoch": 0.6264892356998537, "grad_norm": 0.6241780519485474, "learning_rate": 1.3963286319368695e-05, "loss": 0.09165191650390625, "step": 4496 }, { "epoch": 0.6266285793910681, "grad_norm": 0.6201301217079163, "learning_rate": 1.395429953747638e-05, "loss": 0.08475208282470703, "step": 4497 }, { "epoch": 0.6267679230822825, "grad_norm": 0.3653271496295929, "learning_rate": 1.3945314098800684e-05, "loss": 0.07399368286132812, "step": 4498 }, { "epoch": 0.6269072667734968, "grad_norm": 0.5206770896911621, "learning_rate": 1.3936330005337959e-05, "loss": 0.10593605041503906, "step": 4499 }, { "epoch": 0.6270466104647112, "grad_norm": 0.21271751821041107, "learning_rate": 1.392734725908428e-05, "loss": 0.07289695739746094, "step": 4500 }, { "epoch": 0.6271859541559256, "grad_norm": 0.43345266580581665, "learning_rate": 1.3918365862035395e-05, "loss": 0.07505035400390625, "step": 4501 }, { "epoch": 0.62732529784714, "grad_norm": 0.5195086002349854, "learning_rate": 1.3909385816186767e-05, "loss": 0.08366966247558594, "step": 4502 }, { "epoch": 0.6274646415383544, "grad_norm": 0.2996053099632263, "learning_rate": 1.390040712353356e-05, "loss": 0.072784423828125, "step": 4503 }, { "epoch": 0.6276039852295687, "grad_norm": 0.34629565477371216, "learning_rate": 1.3891429786070634e-05, "loss": 0.0764455795288086, "step": 4504 }, { "epoch": 0.6277433289207831, "grad_norm": 0.5912364721298218, "learning_rate": 1.3882453805792549e-05, "loss": 0.11140251159667969, "step": 4505 }, { "epoch": 0.6278826726119975, "grad_norm": 0.325692355632782, "learning_rate": 1.3873479184693568e-05, "loss": 0.08506584167480469, "step": 4506 }, { "epoch": 0.6280220163032119, "grad_norm": 0.5380306839942932, "learning_rate": 1.3864505924767637e-05, "loss": 0.10743331909179688, "step": 4507 }, { "epoch": 0.6281613599944262, "grad_norm": 0.26450660824775696, "learning_rate": 1.3855534028008411e-05, "loss": 0.06974220275878906, "step": 4508 }, { "epoch": 0.6283007036856406, "grad_norm": 0.21581071615219116, "learning_rate": 1.3846563496409245e-05, "loss": 0.062229156494140625, "step": 4509 }, { "epoch": 0.628440047376855, "grad_norm": 0.17179259657859802, "learning_rate": 1.383759433196318e-05, "loss": 0.054993629455566406, "step": 4510 }, { "epoch": 0.6285793910680694, "grad_norm": 0.6132241487503052, "learning_rate": 1.3828626536662978e-05, "loss": 0.11154747009277344, "step": 4511 }, { "epoch": 0.6287187347592837, "grad_norm": 0.2281607687473297, "learning_rate": 1.3819660112501054e-05, "loss": 0.06328296661376953, "step": 4512 }, { "epoch": 0.6288580784504981, "grad_norm": 0.29085204005241394, "learning_rate": 1.3810695061469556e-05, "loss": 0.0788583755493164, "step": 4513 }, { "epoch": 0.6289974221417125, "grad_norm": 0.46856850385665894, "learning_rate": 1.3801731385560312e-05, "loss": 0.09288787841796875, "step": 4514 }, { "epoch": 0.6291367658329269, "grad_norm": 0.8923294544219971, "learning_rate": 1.3792769086764839e-05, "loss": 0.1098031997680664, "step": 4515 }, { "epoch": 0.6292761095241413, "grad_norm": 1.0878287553787231, "learning_rate": 1.3783808167074373e-05, "loss": 0.13759231567382812, "step": 4516 }, { "epoch": 0.6294154532153556, "grad_norm": 0.2399330586194992, "learning_rate": 1.3774848628479807e-05, "loss": 0.06270408630371094, "step": 4517 }, { "epoch": 0.62955479690657, "grad_norm": 0.42587384581565857, "learning_rate": 1.3765890472971755e-05, "loss": 0.09809303283691406, "step": 4518 }, { "epoch": 0.6296941405977844, "grad_norm": 0.8285608887672424, "learning_rate": 1.3756933702540506e-05, "loss": 0.1540231704711914, "step": 4519 }, { "epoch": 0.6298334842889988, "grad_norm": 0.6803591251373291, "learning_rate": 1.3747978319176064e-05, "loss": 0.13019943237304688, "step": 4520 }, { "epoch": 0.6299728279802131, "grad_norm": 0.3261933922767639, "learning_rate": 1.3739024324868107e-05, "loss": 0.08071708679199219, "step": 4521 }, { "epoch": 0.6301121716714275, "grad_norm": 0.3934943974018097, "learning_rate": 1.3730071721605999e-05, "loss": 0.07923507690429688, "step": 4522 }, { "epoch": 0.6302515153626419, "grad_norm": 0.4387754797935486, "learning_rate": 1.3721120511378811e-05, "loss": 0.10785102844238281, "step": 4523 }, { "epoch": 0.6303908590538564, "grad_norm": 0.6049151420593262, "learning_rate": 1.3712170696175289e-05, "loss": 0.1043853759765625, "step": 4524 }, { "epoch": 0.6305302027450708, "grad_norm": 0.4257989823818207, "learning_rate": 1.3703222277983892e-05, "loss": 0.093017578125, "step": 4525 }, { "epoch": 0.6306695464362851, "grad_norm": 0.5541534423828125, "learning_rate": 1.3694275258792742e-05, "loss": 0.11624908447265625, "step": 4526 }, { "epoch": 0.6308088901274995, "grad_norm": 0.5030675530433655, "learning_rate": 1.368532964058968e-05, "loss": 0.1008453369140625, "step": 4527 }, { "epoch": 0.6309482338187139, "grad_norm": 0.45348939299583435, "learning_rate": 1.3676385425362193e-05, "loss": 0.11217308044433594, "step": 4528 }, { "epoch": 0.6310875775099283, "grad_norm": 0.4023277461528778, "learning_rate": 1.3667442615097497e-05, "loss": 0.09495162963867188, "step": 4529 }, { "epoch": 0.6312269212011427, "grad_norm": 0.4912661910057068, "learning_rate": 1.3658501211782478e-05, "loss": 0.10956382751464844, "step": 4530 }, { "epoch": 0.631366264892357, "grad_norm": 0.4192134737968445, "learning_rate": 1.3649561217403707e-05, "loss": 0.10927963256835938, "step": 4531 }, { "epoch": 0.6315056085835714, "grad_norm": 0.3961367607116699, "learning_rate": 1.3640622633947459e-05, "loss": 0.07685661315917969, "step": 4532 }, { "epoch": 0.6316449522747858, "grad_norm": 0.38368305563926697, "learning_rate": 1.3631685463399668e-05, "loss": 0.09729385375976562, "step": 4533 }, { "epoch": 0.6317842959660002, "grad_norm": 0.4135507047176361, "learning_rate": 1.3622749707745979e-05, "loss": 0.09932136535644531, "step": 4534 }, { "epoch": 0.6319236396572145, "grad_norm": 0.4372934401035309, "learning_rate": 1.3613815368971705e-05, "loss": 0.0998687744140625, "step": 4535 }, { "epoch": 0.6320629833484289, "grad_norm": 0.7867633700370789, "learning_rate": 1.360488244906186e-05, "loss": 0.12827110290527344, "step": 4536 }, { "epoch": 0.6322023270396433, "grad_norm": 0.4166977107524872, "learning_rate": 1.3595950950001139e-05, "loss": 0.0872802734375, "step": 4537 }, { "epoch": 0.6323416707308577, "grad_norm": 0.39581069350242615, "learning_rate": 1.3587020873773901e-05, "loss": 0.08702945709228516, "step": 4538 }, { "epoch": 0.632481014422072, "grad_norm": 0.25799399614334106, "learning_rate": 1.3578092222364214e-05, "loss": 0.07033729553222656, "step": 4539 }, { "epoch": 0.6326203581132864, "grad_norm": 0.5845621228218079, "learning_rate": 1.3569164997755821e-05, "loss": 0.12522125244140625, "step": 4540 }, { "epoch": 0.6327597018045008, "grad_norm": 0.31499022245407104, "learning_rate": 1.3560239201932151e-05, "loss": 0.07937908172607422, "step": 4541 }, { "epoch": 0.6328990454957152, "grad_norm": 0.4379984140396118, "learning_rate": 1.35513148368763e-05, "loss": 0.09891891479492188, "step": 4542 }, { "epoch": 0.6330383891869296, "grad_norm": 0.42195138335227966, "learning_rate": 1.3542391904571082e-05, "loss": 0.09016990661621094, "step": 4543 }, { "epoch": 0.6331777328781439, "grad_norm": 0.2361210435628891, "learning_rate": 1.3533470406998941e-05, "loss": 0.06784629821777344, "step": 4544 }, { "epoch": 0.6333170765693583, "grad_norm": 0.2582574188709259, "learning_rate": 1.3524550346142044e-05, "loss": 0.06952762603759766, "step": 4545 }, { "epoch": 0.6334564202605727, "grad_norm": 0.6075373291969299, "learning_rate": 1.3515631723982223e-05, "loss": 0.10183143615722656, "step": 4546 }, { "epoch": 0.6335957639517871, "grad_norm": 0.39811813831329346, "learning_rate": 1.3506714542500986e-05, "loss": 0.08579254150390625, "step": 4547 }, { "epoch": 0.6337351076430014, "grad_norm": 0.3367428183555603, "learning_rate": 1.3497798803679547e-05, "loss": 0.09622573852539062, "step": 4548 }, { "epoch": 0.6338744513342158, "grad_norm": 0.35608991980552673, "learning_rate": 1.348888450949876e-05, "loss": 0.07709741592407227, "step": 4549 }, { "epoch": 0.6340137950254302, "grad_norm": 0.395068883895874, "learning_rate": 1.3479971661939183e-05, "loss": 0.09599494934082031, "step": 4550 }, { "epoch": 0.6341531387166446, "grad_norm": 0.7665305137634277, "learning_rate": 1.3471060262981044e-05, "loss": 0.10598564147949219, "step": 4551 }, { "epoch": 0.634292482407859, "grad_norm": 0.473127543926239, "learning_rate": 1.346215031460426e-05, "loss": 0.0818023681640625, "step": 4552 }, { "epoch": 0.6344318260990733, "grad_norm": 0.4177418649196625, "learning_rate": 1.3453241818788421e-05, "loss": 0.08900070190429688, "step": 4553 }, { "epoch": 0.6345711697902877, "grad_norm": 0.4634590446949005, "learning_rate": 1.3444334777512778e-05, "loss": 0.10011863708496094, "step": 4554 }, { "epoch": 0.6347105134815021, "grad_norm": 0.2542758882045746, "learning_rate": 1.3435429192756275e-05, "loss": 0.07648658752441406, "step": 4555 }, { "epoch": 0.6348498571727165, "grad_norm": 0.4355270564556122, "learning_rate": 1.342652506649754e-05, "loss": 0.10840129852294922, "step": 4556 }, { "epoch": 0.6349892008639308, "grad_norm": 0.4770478904247284, "learning_rate": 1.3417622400714859e-05, "loss": 0.10806655883789062, "step": 4557 }, { "epoch": 0.6351285445551452, "grad_norm": 0.46158450841903687, "learning_rate": 1.3408721197386205e-05, "loss": 0.09137535095214844, "step": 4558 }, { "epoch": 0.6352678882463596, "grad_norm": 0.34236040711402893, "learning_rate": 1.3399821458489215e-05, "loss": 0.07886123657226562, "step": 4559 }, { "epoch": 0.635407231937574, "grad_norm": 0.3457069396972656, "learning_rate": 1.339092318600121e-05, "loss": 0.08760261535644531, "step": 4560 }, { "epoch": 0.6355465756287884, "grad_norm": 0.9576771259307861, "learning_rate": 1.3382026381899191e-05, "loss": 0.12982749938964844, "step": 4561 }, { "epoch": 0.6356859193200027, "grad_norm": 0.48679786920547485, "learning_rate": 1.3373131048159817e-05, "loss": 0.10580253601074219, "step": 4562 }, { "epoch": 0.6358252630112171, "grad_norm": 0.27350515127182007, "learning_rate": 1.3364237186759426e-05, "loss": 0.07553672790527344, "step": 4563 }, { "epoch": 0.6359646067024316, "grad_norm": 0.33626028895378113, "learning_rate": 1.3355344799674042e-05, "loss": 0.08385276794433594, "step": 4564 }, { "epoch": 0.636103950393646, "grad_norm": 0.5877096056938171, "learning_rate": 1.3346453888879341e-05, "loss": 0.11372756958007812, "step": 4565 }, { "epoch": 0.6362432940848604, "grad_norm": 0.44561782479286194, "learning_rate": 1.3337564456350682e-05, "loss": 0.10261154174804688, "step": 4566 }, { "epoch": 0.6363826377760747, "grad_norm": 0.35637980699539185, "learning_rate": 1.3328676504063092e-05, "loss": 0.06494712829589844, "step": 4567 }, { "epoch": 0.6365219814672891, "grad_norm": 0.5346208810806274, "learning_rate": 1.3319790033991278e-05, "loss": 0.11942672729492188, "step": 4568 }, { "epoch": 0.6366613251585035, "grad_norm": 0.5020948052406311, "learning_rate": 1.331090504810961e-05, "loss": 0.11946678161621094, "step": 4569 }, { "epoch": 0.6368006688497179, "grad_norm": 0.4568309783935547, "learning_rate": 1.3302021548392122e-05, "loss": 0.09709358215332031, "step": 4570 }, { "epoch": 0.6369400125409322, "grad_norm": 0.34954169392585754, "learning_rate": 1.3293139536812522e-05, "loss": 0.08315277099609375, "step": 4571 }, { "epoch": 0.6370793562321466, "grad_norm": 0.40242668986320496, "learning_rate": 1.3284259015344205e-05, "loss": 0.09887409210205078, "step": 4572 }, { "epoch": 0.637218699923361, "grad_norm": 0.7618058323860168, "learning_rate": 1.327537998596021e-05, "loss": 0.12411689758300781, "step": 4573 }, { "epoch": 0.6373580436145754, "grad_norm": 0.16569648683071136, "learning_rate": 1.326650245063326e-05, "loss": 0.056082725524902344, "step": 4574 }, { "epoch": 0.6374973873057898, "grad_norm": 0.29899483919143677, "learning_rate": 1.3257626411335733e-05, "loss": 0.0890045166015625, "step": 4575 }, { "epoch": 0.6376367309970041, "grad_norm": 0.8893817663192749, "learning_rate": 1.3248751870039682e-05, "loss": 0.11517333984375, "step": 4576 }, { "epoch": 0.6377760746882185, "grad_norm": 0.47740551829338074, "learning_rate": 1.3239878828716837e-05, "loss": 0.09613609313964844, "step": 4577 }, { "epoch": 0.6379154183794329, "grad_norm": 0.49468278884887695, "learning_rate": 1.3231007289338579e-05, "loss": 0.08887434005737305, "step": 4578 }, { "epoch": 0.6380547620706473, "grad_norm": 0.2836676239967346, "learning_rate": 1.322213725387596e-05, "loss": 0.07103538513183594, "step": 4579 }, { "epoch": 0.6381941057618616, "grad_norm": 0.6250537633895874, "learning_rate": 1.321326872429971e-05, "loss": 0.0993194580078125, "step": 4580 }, { "epoch": 0.638333449453076, "grad_norm": 0.28257840871810913, "learning_rate": 1.3204401702580199e-05, "loss": 0.07997512817382812, "step": 4581 }, { "epoch": 0.6384727931442904, "grad_norm": 0.47230294346809387, "learning_rate": 1.3195536190687485e-05, "loss": 0.0904378890991211, "step": 4582 }, { "epoch": 0.6386121368355048, "grad_norm": 0.2652273178100586, "learning_rate": 1.3186672190591279e-05, "loss": 0.06971168518066406, "step": 4583 }, { "epoch": 0.6387514805267192, "grad_norm": 0.5331162810325623, "learning_rate": 1.3177809704260964e-05, "loss": 0.0934295654296875, "step": 4584 }, { "epoch": 0.6388908242179335, "grad_norm": 0.24835404753684998, "learning_rate": 1.3168948733665583e-05, "loss": 0.07886505126953125, "step": 4585 }, { "epoch": 0.6390301679091479, "grad_norm": 0.5997633934020996, "learning_rate": 1.3160089280773834e-05, "loss": 0.1280221939086914, "step": 4586 }, { "epoch": 0.6391695116003623, "grad_norm": 0.41046106815338135, "learning_rate": 1.3151231347554085e-05, "loss": 0.0800013542175293, "step": 4587 }, { "epoch": 0.6393088552915767, "grad_norm": 0.27543550729751587, "learning_rate": 1.3142374935974373e-05, "loss": 0.08016014099121094, "step": 4588 }, { "epoch": 0.639448198982791, "grad_norm": 0.2125909924507141, "learning_rate": 1.313352004800239e-05, "loss": 0.06027793884277344, "step": 4589 }, { "epoch": 0.6395875426740054, "grad_norm": 0.44318610429763794, "learning_rate": 1.312466668560549e-05, "loss": 0.1065073013305664, "step": 4590 }, { "epoch": 0.6397268863652198, "grad_norm": 0.3339952528476715, "learning_rate": 1.3115814850750686e-05, "loss": 0.06838703155517578, "step": 4591 }, { "epoch": 0.6398662300564342, "grad_norm": 0.3748065233230591, "learning_rate": 1.3106964545404645e-05, "loss": 0.08554649353027344, "step": 4592 }, { "epoch": 0.6400055737476485, "grad_norm": 0.469242125749588, "learning_rate": 1.3098115771533718e-05, "loss": 0.09850692749023438, "step": 4593 }, { "epoch": 0.6401449174388629, "grad_norm": 0.3519827425479889, "learning_rate": 1.3089268531103887e-05, "loss": 0.07600212097167969, "step": 4594 }, { "epoch": 0.6402842611300773, "grad_norm": 0.39018818736076355, "learning_rate": 1.3080422826080828e-05, "loss": 0.08947944641113281, "step": 4595 }, { "epoch": 0.6404236048212917, "grad_norm": 0.4192550778388977, "learning_rate": 1.3071578658429828e-05, "loss": 0.09604454040527344, "step": 4596 }, { "epoch": 0.6405629485125061, "grad_norm": 0.37847158312797546, "learning_rate": 1.3062736030115877e-05, "loss": 0.07626533508300781, "step": 4597 }, { "epoch": 0.6407022922037204, "grad_norm": 0.6921459436416626, "learning_rate": 1.3053894943103598e-05, "loss": 0.09898185729980469, "step": 4598 }, { "epoch": 0.6408416358949348, "grad_norm": 0.3878924250602722, "learning_rate": 1.3045055399357276e-05, "loss": 0.09195327758789062, "step": 4599 }, { "epoch": 0.6409809795861492, "grad_norm": 0.7339143753051758, "learning_rate": 1.3036217400840865e-05, "loss": 0.10531997680664062, "step": 4600 }, { "epoch": 0.6411203232773636, "grad_norm": 0.25794997811317444, "learning_rate": 1.3027380949517964e-05, "loss": 0.0682382583618164, "step": 4601 }, { "epoch": 0.641259666968578, "grad_norm": 0.39499691128730774, "learning_rate": 1.3018546047351828e-05, "loss": 0.07992744445800781, "step": 4602 }, { "epoch": 0.6413990106597923, "grad_norm": 0.3541356027126312, "learning_rate": 1.3009712696305363e-05, "loss": 0.08928108215332031, "step": 4603 }, { "epoch": 0.6415383543510068, "grad_norm": 0.30947551131248474, "learning_rate": 1.3000880898341155e-05, "loss": 0.08517837524414062, "step": 4604 }, { "epoch": 0.6416776980422212, "grad_norm": 0.40523189306259155, "learning_rate": 1.2992050655421413e-05, "loss": 0.08661079406738281, "step": 4605 }, { "epoch": 0.6418170417334356, "grad_norm": 0.3283569812774658, "learning_rate": 1.2983221969508028e-05, "loss": 0.08480644226074219, "step": 4606 }, { "epoch": 0.64195638542465, "grad_norm": 0.43372541666030884, "learning_rate": 1.2974394842562523e-05, "loss": 0.11783790588378906, "step": 4607 }, { "epoch": 0.6420957291158643, "grad_norm": 0.3714885711669922, "learning_rate": 1.2965569276546081e-05, "loss": 0.08965301513671875, "step": 4608 }, { "epoch": 0.6422350728070787, "grad_norm": 0.49691861867904663, "learning_rate": 1.2956745273419551e-05, "loss": 0.09466361999511719, "step": 4609 }, { "epoch": 0.6423744164982931, "grad_norm": 0.42134004831314087, "learning_rate": 1.2947922835143415e-05, "loss": 0.09312820434570312, "step": 4610 }, { "epoch": 0.6425137601895075, "grad_norm": 0.2938251495361328, "learning_rate": 1.2939101963677838e-05, "loss": 0.07767105102539062, "step": 4611 }, { "epoch": 0.6426531038807218, "grad_norm": 0.5558946132659912, "learning_rate": 1.2930282660982592e-05, "loss": 0.10945892333984375, "step": 4612 }, { "epoch": 0.6427924475719362, "grad_norm": 0.5071899890899658, "learning_rate": 1.2921464929017134e-05, "loss": 0.1343231201171875, "step": 4613 }, { "epoch": 0.6429317912631506, "grad_norm": 0.5871200561523438, "learning_rate": 1.2912648769740563e-05, "loss": 0.11411094665527344, "step": 4614 }, { "epoch": 0.643071134954365, "grad_norm": 0.4432167708873749, "learning_rate": 1.2903834185111625e-05, "loss": 0.083038330078125, "step": 4615 }, { "epoch": 0.6432104786455793, "grad_norm": 0.6252923607826233, "learning_rate": 1.2895021177088733e-05, "loss": 0.12012481689453125, "step": 4616 }, { "epoch": 0.6433498223367937, "grad_norm": 0.6448109745979309, "learning_rate": 1.2886209747629921e-05, "loss": 0.1269359588623047, "step": 4617 }, { "epoch": 0.6434891660280081, "grad_norm": 0.41856157779693604, "learning_rate": 1.2877399898692892e-05, "loss": 0.08974266052246094, "step": 4618 }, { "epoch": 0.6436285097192225, "grad_norm": 0.4617769122123718, "learning_rate": 1.286859163223499e-05, "loss": 0.10261917114257812, "step": 4619 }, { "epoch": 0.6437678534104369, "grad_norm": 0.4924931824207306, "learning_rate": 1.2859784950213218e-05, "loss": 0.096282958984375, "step": 4620 }, { "epoch": 0.6439071971016512, "grad_norm": 0.3940213620662689, "learning_rate": 1.2850979854584216e-05, "loss": 0.08867645263671875, "step": 4621 }, { "epoch": 0.6440465407928656, "grad_norm": 0.4150657653808594, "learning_rate": 1.2842176347304283e-05, "loss": 0.09499549865722656, "step": 4622 }, { "epoch": 0.64418588448408, "grad_norm": 0.34833264350891113, "learning_rate": 1.2833374430329341e-05, "loss": 0.08764076232910156, "step": 4623 }, { "epoch": 0.6443252281752944, "grad_norm": 0.1880774050951004, "learning_rate": 1.2824574105614983e-05, "loss": 0.05837059020996094, "step": 4624 }, { "epoch": 0.6444645718665087, "grad_norm": 0.24805030226707458, "learning_rate": 1.2815775375116442e-05, "loss": 0.0693817138671875, "step": 4625 }, { "epoch": 0.6446039155577231, "grad_norm": 0.35106992721557617, "learning_rate": 1.280697824078859e-05, "loss": 0.08980178833007812, "step": 4626 }, { "epoch": 0.6447432592489375, "grad_norm": 0.1822257786989212, "learning_rate": 1.2798182704585968e-05, "loss": 0.05999755859375, "step": 4627 }, { "epoch": 0.6448826029401519, "grad_norm": 0.22310209274291992, "learning_rate": 1.2789388768462715e-05, "loss": 0.06826400756835938, "step": 4628 }, { "epoch": 0.6450219466313662, "grad_norm": 0.4085978865623474, "learning_rate": 1.2780596434372663e-05, "loss": 0.08998680114746094, "step": 4629 }, { "epoch": 0.6451612903225806, "grad_norm": 0.4442988634109497, "learning_rate": 1.2771805704269258e-05, "loss": 0.08619022369384766, "step": 4630 }, { "epoch": 0.645300634013795, "grad_norm": 0.3271266520023346, "learning_rate": 1.2763016580105601e-05, "loss": 0.08275985717773438, "step": 4631 }, { "epoch": 0.6454399777050094, "grad_norm": 0.672417402267456, "learning_rate": 1.2754229063834448e-05, "loss": 0.11099004745483398, "step": 4632 }, { "epoch": 0.6455793213962238, "grad_norm": 0.3289915919303894, "learning_rate": 1.2745443157408164e-05, "loss": 0.07958316802978516, "step": 4633 }, { "epoch": 0.6457186650874381, "grad_norm": 0.7165187001228333, "learning_rate": 1.2736658862778788e-05, "loss": 0.10890674591064453, "step": 4634 }, { "epoch": 0.6458580087786525, "grad_norm": 0.28138500452041626, "learning_rate": 1.2727876181897982e-05, "loss": 0.07460594177246094, "step": 4635 }, { "epoch": 0.6459973524698669, "grad_norm": 0.782300591468811, "learning_rate": 1.2719095116717069e-05, "loss": 0.1223001480102539, "step": 4636 }, { "epoch": 0.6461366961610813, "grad_norm": 0.47515788674354553, "learning_rate": 1.2710315669186994e-05, "loss": 0.08977508544921875, "step": 4637 }, { "epoch": 0.6462760398522956, "grad_norm": 0.3960266709327698, "learning_rate": 1.2701537841258358e-05, "loss": 0.0918121337890625, "step": 4638 }, { "epoch": 0.64641538354351, "grad_norm": 0.7674164175987244, "learning_rate": 1.2692761634881377e-05, "loss": 0.12964630126953125, "step": 4639 }, { "epoch": 0.6465547272347244, "grad_norm": 0.23392018675804138, "learning_rate": 1.2683987052005938e-05, "loss": 0.06695938110351562, "step": 4640 }, { "epoch": 0.6466940709259388, "grad_norm": 0.514554500579834, "learning_rate": 1.2675214094581547e-05, "loss": 0.08422279357910156, "step": 4641 }, { "epoch": 0.6468334146171532, "grad_norm": 0.5896017551422119, "learning_rate": 1.2666442764557352e-05, "loss": 0.12107276916503906, "step": 4642 }, { "epoch": 0.6469727583083675, "grad_norm": 0.33691656589508057, "learning_rate": 1.2657673063882161e-05, "loss": 0.07555294036865234, "step": 4643 }, { "epoch": 0.647112101999582, "grad_norm": 0.9936367869377136, "learning_rate": 1.2648904994504374e-05, "loss": 0.12989425659179688, "step": 4644 }, { "epoch": 0.6472514456907964, "grad_norm": 0.33850833773612976, "learning_rate": 1.2640138558372073e-05, "loss": 0.09196281433105469, "step": 4645 }, { "epoch": 0.6473907893820108, "grad_norm": 0.31961238384246826, "learning_rate": 1.2631373757432957e-05, "loss": 0.08641815185546875, "step": 4646 }, { "epoch": 0.6475301330732252, "grad_norm": 0.45488911867141724, "learning_rate": 1.2622610593634356e-05, "loss": 0.1076040267944336, "step": 4647 }, { "epoch": 0.6476694767644395, "grad_norm": 0.4775375723838806, "learning_rate": 1.2613849068923266e-05, "loss": 0.11142730712890625, "step": 4648 }, { "epoch": 0.6478088204556539, "grad_norm": 0.21747806668281555, "learning_rate": 1.2605089185246277e-05, "loss": 0.06620121002197266, "step": 4649 }, { "epoch": 0.6479481641468683, "grad_norm": 0.6578227877616882, "learning_rate": 1.2596330944549642e-05, "loss": 0.11129617691040039, "step": 4650 }, { "epoch": 0.6480875078380827, "grad_norm": 1.1994502544403076, "learning_rate": 1.2587574348779238e-05, "loss": 0.13140487670898438, "step": 4651 }, { "epoch": 0.648226851529297, "grad_norm": 0.528349757194519, "learning_rate": 1.2578819399880591e-05, "loss": 0.11322307586669922, "step": 4652 }, { "epoch": 0.6483661952205114, "grad_norm": 0.6469501852989197, "learning_rate": 1.2570066099798847e-05, "loss": 0.10960960388183594, "step": 4653 }, { "epoch": 0.6485055389117258, "grad_norm": 0.5228561162948608, "learning_rate": 1.2561314450478785e-05, "loss": 0.11450767517089844, "step": 4654 }, { "epoch": 0.6486448826029402, "grad_norm": 0.6406696438789368, "learning_rate": 1.255256445386482e-05, "loss": 0.11263084411621094, "step": 4655 }, { "epoch": 0.6487842262941546, "grad_norm": 0.48905622959136963, "learning_rate": 1.2543816111901008e-05, "loss": 0.07970237731933594, "step": 4656 }, { "epoch": 0.6489235699853689, "grad_norm": 0.39804914593696594, "learning_rate": 1.253506942653103e-05, "loss": 0.08022403717041016, "step": 4657 }, { "epoch": 0.6490629136765833, "grad_norm": 0.35044676065444946, "learning_rate": 1.2526324399698193e-05, "loss": 0.08477115631103516, "step": 4658 }, { "epoch": 0.6492022573677977, "grad_norm": 0.893737256526947, "learning_rate": 1.2517581033345461e-05, "loss": 0.10933208465576172, "step": 4659 }, { "epoch": 0.6493416010590121, "grad_norm": 0.28521543741226196, "learning_rate": 1.2508839329415384e-05, "loss": 0.07487678527832031, "step": 4660 }, { "epoch": 0.6494809447502264, "grad_norm": 0.540998101234436, "learning_rate": 1.2500099289850185e-05, "loss": 0.10055732727050781, "step": 4661 }, { "epoch": 0.6496202884414408, "grad_norm": 0.5850149393081665, "learning_rate": 1.2491360916591697e-05, "loss": 0.11150932312011719, "step": 4662 }, { "epoch": 0.6497596321326552, "grad_norm": 0.8154765367507935, "learning_rate": 1.2482624211581387e-05, "loss": 0.093658447265625, "step": 4663 }, { "epoch": 0.6498989758238696, "grad_norm": 0.41117632389068604, "learning_rate": 1.2473889176760361e-05, "loss": 0.10511493682861328, "step": 4664 }, { "epoch": 0.650038319515084, "grad_norm": 0.24195298552513123, "learning_rate": 1.246515581406933e-05, "loss": 0.06624794006347656, "step": 4665 }, { "epoch": 0.6501776632062983, "grad_norm": 0.29970526695251465, "learning_rate": 1.2456424125448655e-05, "loss": 0.0819711685180664, "step": 4666 }, { "epoch": 0.6503170068975127, "grad_norm": 0.4756598472595215, "learning_rate": 1.2447694112838309e-05, "loss": 0.11324310302734375, "step": 4667 }, { "epoch": 0.6504563505887271, "grad_norm": 0.3584001660346985, "learning_rate": 1.2438965778177918e-05, "loss": 0.095367431640625, "step": 4668 }, { "epoch": 0.6505956942799415, "grad_norm": 0.5195461511611938, "learning_rate": 1.243023912340671e-05, "loss": 0.09329605102539062, "step": 4669 }, { "epoch": 0.6507350379711558, "grad_norm": 0.7271227836608887, "learning_rate": 1.2421514150463546e-05, "loss": 0.13684844970703125, "step": 4670 }, { "epoch": 0.6508743816623702, "grad_norm": 0.5592740774154663, "learning_rate": 1.2412790861286914e-05, "loss": 0.11276054382324219, "step": 4671 }, { "epoch": 0.6510137253535846, "grad_norm": 0.44399118423461914, "learning_rate": 1.2404069257814939e-05, "loss": 0.11736106872558594, "step": 4672 }, { "epoch": 0.651153069044799, "grad_norm": 0.5937772393226624, "learning_rate": 1.2395349341985355e-05, "loss": 0.10880851745605469, "step": 4673 }, { "epoch": 0.6512924127360133, "grad_norm": 0.3940418064594269, "learning_rate": 1.2386631115735525e-05, "loss": 0.08755683898925781, "step": 4674 }, { "epoch": 0.6514317564272277, "grad_norm": 0.46224796772003174, "learning_rate": 1.2377914581002459e-05, "loss": 0.09561538696289062, "step": 4675 }, { "epoch": 0.6515711001184421, "grad_norm": 0.47966834902763367, "learning_rate": 1.2369199739722744e-05, "loss": 0.0870046615600586, "step": 4676 }, { "epoch": 0.6517104438096565, "grad_norm": 0.3773738145828247, "learning_rate": 1.2360486593832639e-05, "loss": 0.090057373046875, "step": 4677 }, { "epoch": 0.6518497875008709, "grad_norm": 0.7012414336204529, "learning_rate": 1.2351775145267996e-05, "loss": 0.13275527954101562, "step": 4678 }, { "epoch": 0.6519891311920852, "grad_norm": 0.24457143247127533, "learning_rate": 1.2343065395964304e-05, "loss": 0.06409263610839844, "step": 4679 }, { "epoch": 0.6521284748832996, "grad_norm": 0.26543641090393066, "learning_rate": 1.2334357347856678e-05, "loss": 0.07217979431152344, "step": 4680 }, { "epoch": 0.652267818574514, "grad_norm": 0.5168633460998535, "learning_rate": 1.2325651002879835e-05, "loss": 0.11470794677734375, "step": 4681 }, { "epoch": 0.6524071622657284, "grad_norm": 0.3655026853084564, "learning_rate": 1.2316946362968129e-05, "loss": 0.07867717742919922, "step": 4682 }, { "epoch": 0.6525465059569427, "grad_norm": 0.4323385953903198, "learning_rate": 1.230824343005553e-05, "loss": 0.09076690673828125, "step": 4683 }, { "epoch": 0.6526858496481572, "grad_norm": 0.32415640354156494, "learning_rate": 1.2299542206075641e-05, "loss": 0.07686996459960938, "step": 4684 }, { "epoch": 0.6528251933393716, "grad_norm": 0.19900569319725037, "learning_rate": 1.2290842692961673e-05, "loss": 0.06326007843017578, "step": 4685 }, { "epoch": 0.652964537030586, "grad_norm": 0.27827340364456177, "learning_rate": 1.2282144892646453e-05, "loss": 0.07333660125732422, "step": 4686 }, { "epoch": 0.6531038807218004, "grad_norm": 0.3371955454349518, "learning_rate": 1.227344880706243e-05, "loss": 0.0788259506225586, "step": 4687 }, { "epoch": 0.6532432244130147, "grad_norm": 0.2088494449853897, "learning_rate": 1.2264754438141684e-05, "loss": 0.0638885498046875, "step": 4688 }, { "epoch": 0.6533825681042291, "grad_norm": 0.2508700489997864, "learning_rate": 1.2256061787815908e-05, "loss": 0.07767677307128906, "step": 4689 }, { "epoch": 0.6535219117954435, "grad_norm": 0.3616940379142761, "learning_rate": 1.2247370858016407e-05, "loss": 0.0838165283203125, "step": 4690 }, { "epoch": 0.6536612554866579, "grad_norm": 0.9274809956550598, "learning_rate": 1.22386816506741e-05, "loss": 0.1276092529296875, "step": 4691 }, { "epoch": 0.6538005991778723, "grad_norm": 0.25568634271621704, "learning_rate": 1.2229994167719537e-05, "loss": 0.06070137023925781, "step": 4692 }, { "epoch": 0.6539399428690866, "grad_norm": 0.6080480813980103, "learning_rate": 1.2221308411082877e-05, "loss": 0.1146697998046875, "step": 4693 }, { "epoch": 0.654079286560301, "grad_norm": 0.44333502650260925, "learning_rate": 1.2212624382693896e-05, "loss": 0.09967422485351562, "step": 4694 }, { "epoch": 0.6542186302515154, "grad_norm": 0.4627359211444855, "learning_rate": 1.220394208448199e-05, "loss": 0.10509490966796875, "step": 4695 }, { "epoch": 0.6543579739427298, "grad_norm": 0.276304692029953, "learning_rate": 1.2195261518376173e-05, "loss": 0.0648183822631836, "step": 4696 }, { "epoch": 0.6544973176339441, "grad_norm": 0.3660314381122589, "learning_rate": 1.2186582686305056e-05, "loss": 0.09733772277832031, "step": 4697 }, { "epoch": 0.6546366613251585, "grad_norm": 0.2941841185092926, "learning_rate": 1.2177905590196884e-05, "loss": 0.07593154907226562, "step": 4698 }, { "epoch": 0.6547760050163729, "grad_norm": 0.29108163714408875, "learning_rate": 1.2169230231979503e-05, "loss": 0.0818629264831543, "step": 4699 }, { "epoch": 0.6549153487075873, "grad_norm": 0.3985455632209778, "learning_rate": 1.216055661358039e-05, "loss": 0.0948333740234375, "step": 4700 }, { "epoch": 0.6550546923988017, "grad_norm": 0.5750653743743896, "learning_rate": 1.215188473692662e-05, "loss": 0.11328506469726562, "step": 4701 }, { "epoch": 0.655194036090016, "grad_norm": 0.28588059544563293, "learning_rate": 1.2143214603944889e-05, "loss": 0.06926965713500977, "step": 4702 }, { "epoch": 0.6553333797812304, "grad_norm": 0.4526432752609253, "learning_rate": 1.213454621656149e-05, "loss": 0.09573936462402344, "step": 4703 }, { "epoch": 0.6554727234724448, "grad_norm": 0.3729448616504669, "learning_rate": 1.2125879576702354e-05, "loss": 0.08625030517578125, "step": 4704 }, { "epoch": 0.6556120671636592, "grad_norm": 0.176182821393013, "learning_rate": 1.211721468629301e-05, "loss": 0.05028343200683594, "step": 4705 }, { "epoch": 0.6557514108548735, "grad_norm": 0.28093671798706055, "learning_rate": 1.2108551547258598e-05, "loss": 0.0761423110961914, "step": 4706 }, { "epoch": 0.6558907545460879, "grad_norm": 0.49263814091682434, "learning_rate": 1.2099890161523864e-05, "loss": 0.10873794555664062, "step": 4707 }, { "epoch": 0.6560300982373023, "grad_norm": 0.2510289251804352, "learning_rate": 1.209123053101317e-05, "loss": 0.08315420150756836, "step": 4708 }, { "epoch": 0.6561694419285167, "grad_norm": 0.39222031831741333, "learning_rate": 1.2082572657650494e-05, "loss": 0.09826183319091797, "step": 4709 }, { "epoch": 0.656308785619731, "grad_norm": 0.4111989438533783, "learning_rate": 1.2073916543359415e-05, "loss": 0.09436798095703125, "step": 4710 }, { "epoch": 0.6564481293109454, "grad_norm": 0.20286114513874054, "learning_rate": 1.2065262190063132e-05, "loss": 0.06338882446289062, "step": 4711 }, { "epoch": 0.6565874730021598, "grad_norm": 0.4902791678905487, "learning_rate": 1.2056609599684426e-05, "loss": 0.10645294189453125, "step": 4712 }, { "epoch": 0.6567268166933742, "grad_norm": 0.4025605618953705, "learning_rate": 1.2047958774145722e-05, "loss": 0.08800697326660156, "step": 4713 }, { "epoch": 0.6568661603845886, "grad_norm": 0.48924538493156433, "learning_rate": 1.2039309715369033e-05, "loss": 0.08595657348632812, "step": 4714 }, { "epoch": 0.6570055040758029, "grad_norm": 0.4121195375919342, "learning_rate": 1.203066242527597e-05, "loss": 0.10023880004882812, "step": 4715 }, { "epoch": 0.6571448477670173, "grad_norm": 0.8792543411254883, "learning_rate": 1.2022016905787779e-05, "loss": 0.11711692810058594, "step": 4716 }, { "epoch": 0.6572841914582317, "grad_norm": 0.4625588655471802, "learning_rate": 1.2013373158825297e-05, "loss": 0.10661697387695312, "step": 4717 }, { "epoch": 0.6574235351494461, "grad_norm": 0.39131924510002136, "learning_rate": 1.2004731186308956e-05, "loss": 0.09503936767578125, "step": 4718 }, { "epoch": 0.6575628788406604, "grad_norm": 0.371127188205719, "learning_rate": 1.1996090990158804e-05, "loss": 0.08177375793457031, "step": 4719 }, { "epoch": 0.6577022225318748, "grad_norm": 0.3497975766658783, "learning_rate": 1.198745257229451e-05, "loss": 0.06208324432373047, "step": 4720 }, { "epoch": 0.6578415662230892, "grad_norm": 0.1970861554145813, "learning_rate": 1.197881593463532e-05, "loss": 0.06342267990112305, "step": 4721 }, { "epoch": 0.6579809099143036, "grad_norm": 0.31941401958465576, "learning_rate": 1.197018107910011e-05, "loss": 0.08168792724609375, "step": 4722 }, { "epoch": 0.658120253605518, "grad_norm": 0.590983510017395, "learning_rate": 1.1961548007607335e-05, "loss": 0.1244049072265625, "step": 4723 }, { "epoch": 0.6582595972967323, "grad_norm": 0.436781108379364, "learning_rate": 1.1952916722075068e-05, "loss": 0.07187271118164062, "step": 4724 }, { "epoch": 0.6583989409879468, "grad_norm": 0.6713334918022156, "learning_rate": 1.1944287224420991e-05, "loss": 0.12765121459960938, "step": 4725 }, { "epoch": 0.6585382846791612, "grad_norm": 0.32080692052841187, "learning_rate": 1.1935659516562375e-05, "loss": 0.07808208465576172, "step": 4726 }, { "epoch": 0.6586776283703756, "grad_norm": 0.32225891947746277, "learning_rate": 1.1927033600416113e-05, "loss": 0.07587051391601562, "step": 4727 }, { "epoch": 0.65881697206159, "grad_norm": 0.4505733251571655, "learning_rate": 1.1918409477898668e-05, "loss": 0.08877754211425781, "step": 4728 }, { "epoch": 0.6589563157528043, "grad_norm": 0.7813950181007385, "learning_rate": 1.1909787150926128e-05, "loss": 0.13432025909423828, "step": 4729 }, { "epoch": 0.6590956594440187, "grad_norm": 0.32935529947280884, "learning_rate": 1.1901166621414184e-05, "loss": 0.07494544982910156, "step": 4730 }, { "epoch": 0.6592350031352331, "grad_norm": 0.6717613935470581, "learning_rate": 1.1892547891278115e-05, "loss": 0.1503772735595703, "step": 4731 }, { "epoch": 0.6593743468264475, "grad_norm": 0.8573989868164062, "learning_rate": 1.1883930962432811e-05, "loss": 0.16273117065429688, "step": 4732 }, { "epoch": 0.6595136905176618, "grad_norm": 0.5054094791412354, "learning_rate": 1.1875315836792755e-05, "loss": 0.1061086654663086, "step": 4733 }, { "epoch": 0.6596530342088762, "grad_norm": 0.3250320553779602, "learning_rate": 1.1866702516272031e-05, "loss": 0.07829666137695312, "step": 4734 }, { "epoch": 0.6597923779000906, "grad_norm": 0.8544875979423523, "learning_rate": 1.1858091002784315e-05, "loss": 0.12312126159667969, "step": 4735 }, { "epoch": 0.659931721591305, "grad_norm": 0.25590428709983826, "learning_rate": 1.18494812982429e-05, "loss": 0.07162284851074219, "step": 4736 }, { "epoch": 0.6600710652825194, "grad_norm": 0.4091190993785858, "learning_rate": 1.1840873404560662e-05, "loss": 0.07591819763183594, "step": 4737 }, { "epoch": 0.6602104089737337, "grad_norm": 0.46941789984703064, "learning_rate": 1.1832267323650081e-05, "loss": 0.10377883911132812, "step": 4738 }, { "epoch": 0.6603497526649481, "grad_norm": 0.47158774733543396, "learning_rate": 1.1823663057423218e-05, "loss": 0.08356094360351562, "step": 4739 }, { "epoch": 0.6604890963561625, "grad_norm": 0.496063768863678, "learning_rate": 1.1815060607791761e-05, "loss": 0.09033393859863281, "step": 4740 }, { "epoch": 0.6606284400473769, "grad_norm": 0.2974962592124939, "learning_rate": 1.1806459976666972e-05, "loss": 0.06977653503417969, "step": 4741 }, { "epoch": 0.6607677837385912, "grad_norm": 0.3974471390247345, "learning_rate": 1.1797861165959707e-05, "loss": 0.08163070678710938, "step": 4742 }, { "epoch": 0.6609071274298056, "grad_norm": 0.36079445481300354, "learning_rate": 1.1789264177580448e-05, "loss": 0.08262157440185547, "step": 4743 }, { "epoch": 0.66104647112102, "grad_norm": 0.32545045018196106, "learning_rate": 1.1780669013439224e-05, "loss": 0.078582763671875, "step": 4744 }, { "epoch": 0.6611858148122344, "grad_norm": 0.4704546630382538, "learning_rate": 1.1772075675445695e-05, "loss": 0.09307861328125, "step": 4745 }, { "epoch": 0.6613251585034488, "grad_norm": 0.23987379670143127, "learning_rate": 1.1763484165509108e-05, "loss": 0.07114219665527344, "step": 4746 }, { "epoch": 0.6614645021946631, "grad_norm": 0.29970765113830566, "learning_rate": 1.1754894485538288e-05, "loss": 0.07055854797363281, "step": 4747 }, { "epoch": 0.6616038458858775, "grad_norm": 0.4378312826156616, "learning_rate": 1.1746306637441684e-05, "loss": 0.0856485366821289, "step": 4748 }, { "epoch": 0.6617431895770919, "grad_norm": 0.6849618554115295, "learning_rate": 1.1737720623127307e-05, "loss": 0.10413169860839844, "step": 4749 }, { "epoch": 0.6618825332683063, "grad_norm": 0.4879036545753479, "learning_rate": 1.1729136444502775e-05, "loss": 0.08490133285522461, "step": 4750 }, { "epoch": 0.6620218769595206, "grad_norm": 0.5964678525924683, "learning_rate": 1.1720554103475297e-05, "loss": 0.10919189453125, "step": 4751 }, { "epoch": 0.662161220650735, "grad_norm": 0.5085734724998474, "learning_rate": 1.1711973601951676e-05, "loss": 0.09035682678222656, "step": 4752 }, { "epoch": 0.6623005643419494, "grad_norm": 0.889439582824707, "learning_rate": 1.1703394941838302e-05, "loss": 0.08391761779785156, "step": 4753 }, { "epoch": 0.6624399080331638, "grad_norm": 0.6769018173217773, "learning_rate": 1.1694818125041163e-05, "loss": 0.09373664855957031, "step": 4754 }, { "epoch": 0.6625792517243781, "grad_norm": 0.2515867352485657, "learning_rate": 1.1686243153465817e-05, "loss": 0.07453536987304688, "step": 4755 }, { "epoch": 0.6627185954155925, "grad_norm": 0.3582587242126465, "learning_rate": 1.1677670029017437e-05, "loss": 0.09323883056640625, "step": 4756 }, { "epoch": 0.6628579391068069, "grad_norm": 0.589493453502655, "learning_rate": 1.1669098753600777e-05, "loss": 0.09296131134033203, "step": 4757 }, { "epoch": 0.6629972827980213, "grad_norm": 0.7530641555786133, "learning_rate": 1.1660529329120173e-05, "loss": 0.12426948547363281, "step": 4758 }, { "epoch": 0.6631366264892357, "grad_norm": 0.36014506220817566, "learning_rate": 1.1651961757479567e-05, "loss": 0.0916147232055664, "step": 4759 }, { "epoch": 0.66327597018045, "grad_norm": 0.40629875659942627, "learning_rate": 1.1643396040582468e-05, "loss": 0.09214019775390625, "step": 4760 }, { "epoch": 0.6634153138716644, "grad_norm": 0.2881063222885132, "learning_rate": 1.1634832180331976e-05, "loss": 0.08112525939941406, "step": 4761 }, { "epoch": 0.6635546575628788, "grad_norm": 0.5497850179672241, "learning_rate": 1.1626270178630796e-05, "loss": 0.11384010314941406, "step": 4762 }, { "epoch": 0.6636940012540932, "grad_norm": 0.5820058584213257, "learning_rate": 1.1617710037381214e-05, "loss": 0.10714340209960938, "step": 4763 }, { "epoch": 0.6638333449453075, "grad_norm": 0.6494174599647522, "learning_rate": 1.1609151758485088e-05, "loss": 0.12042045593261719, "step": 4764 }, { "epoch": 0.663972688636522, "grad_norm": 0.4189845025539398, "learning_rate": 1.160059534384387e-05, "loss": 0.08283805847167969, "step": 4765 }, { "epoch": 0.6641120323277364, "grad_norm": 0.7069938778877258, "learning_rate": 1.1592040795358604e-05, "loss": 0.14244842529296875, "step": 4766 }, { "epoch": 0.6642513760189508, "grad_norm": 0.3832804560661316, "learning_rate": 1.1583488114929924e-05, "loss": 0.09211540222167969, "step": 4767 }, { "epoch": 0.6643907197101652, "grad_norm": 0.3414803445339203, "learning_rate": 1.1574937304458023e-05, "loss": 0.08011817932128906, "step": 4768 }, { "epoch": 0.6645300634013795, "grad_norm": 0.23895381391048431, "learning_rate": 1.1566388365842717e-05, "loss": 0.06811141967773438, "step": 4769 }, { "epoch": 0.6646694070925939, "grad_norm": 0.18140864372253418, "learning_rate": 1.1557841300983363e-05, "loss": 0.05545806884765625, "step": 4770 }, { "epoch": 0.6648087507838083, "grad_norm": 0.35216325521469116, "learning_rate": 1.1549296111778942e-05, "loss": 0.07842063903808594, "step": 4771 }, { "epoch": 0.6649480944750227, "grad_norm": 0.45460858941078186, "learning_rate": 1.1540752800127986e-05, "loss": 0.09504318237304688, "step": 4772 }, { "epoch": 0.665087438166237, "grad_norm": 0.3307091295719147, "learning_rate": 1.1532211367928628e-05, "loss": 0.07523250579833984, "step": 4773 }, { "epoch": 0.6652267818574514, "grad_norm": 0.3051570653915405, "learning_rate": 1.152367181707859e-05, "loss": 0.06237220764160156, "step": 4774 }, { "epoch": 0.6653661255486658, "grad_norm": 0.20572714507579803, "learning_rate": 1.1515134149475156e-05, "loss": 0.05329275131225586, "step": 4775 }, { "epoch": 0.6655054692398802, "grad_norm": 0.6783502101898193, "learning_rate": 1.1506598367015194e-05, "loss": 0.10324287414550781, "step": 4776 }, { "epoch": 0.6656448129310946, "grad_norm": 0.38690873980522156, "learning_rate": 1.1498064471595167e-05, "loss": 0.07210350036621094, "step": 4777 }, { "epoch": 0.6657841566223089, "grad_norm": 0.49211564660072327, "learning_rate": 1.1489532465111122e-05, "loss": 0.10010623931884766, "step": 4778 }, { "epoch": 0.6659235003135233, "grad_norm": 0.3164088726043701, "learning_rate": 1.1481002349458655e-05, "loss": 0.08154678344726562, "step": 4779 }, { "epoch": 0.6660628440047377, "grad_norm": 0.5095874667167664, "learning_rate": 1.1472474126532981e-05, "loss": 0.10906410217285156, "step": 4780 }, { "epoch": 0.6662021876959521, "grad_norm": 0.18728159368038177, "learning_rate": 1.1463947798228871e-05, "loss": 0.054947853088378906, "step": 4781 }, { "epoch": 0.6663415313871665, "grad_norm": 0.8258336186408997, "learning_rate": 1.1455423366440673e-05, "loss": 0.10771417617797852, "step": 4782 }, { "epoch": 0.6664808750783808, "grad_norm": 0.4428306221961975, "learning_rate": 1.1446900833062325e-05, "loss": 0.08477020263671875, "step": 4783 }, { "epoch": 0.6666202187695952, "grad_norm": 0.3239222466945648, "learning_rate": 1.1438380199987341e-05, "loss": 0.08702850341796875, "step": 4784 }, { "epoch": 0.6667595624608096, "grad_norm": 0.4847688674926758, "learning_rate": 1.1429861469108827e-05, "loss": 0.09185934066772461, "step": 4785 }, { "epoch": 0.666898906152024, "grad_norm": 0.7997563481330872, "learning_rate": 1.1421344642319418e-05, "loss": 0.11021232604980469, "step": 4786 }, { "epoch": 0.6670382498432383, "grad_norm": 0.2723369598388672, "learning_rate": 1.1412829721511378e-05, "loss": 0.08430862426757812, "step": 4787 }, { "epoch": 0.6671775935344527, "grad_norm": 0.492503821849823, "learning_rate": 1.140431670857653e-05, "loss": 0.10603523254394531, "step": 4788 }, { "epoch": 0.6673169372256671, "grad_norm": 0.2270796000957489, "learning_rate": 1.1395805605406263e-05, "loss": 0.06230449676513672, "step": 4789 }, { "epoch": 0.6674562809168815, "grad_norm": 0.2156296968460083, "learning_rate": 1.1387296413891551e-05, "loss": 0.06845664978027344, "step": 4790 }, { "epoch": 0.6675956246080959, "grad_norm": 0.35421469807624817, "learning_rate": 1.1378789135922954e-05, "loss": 0.08222198486328125, "step": 4791 }, { "epoch": 0.6677349682993102, "grad_norm": 0.453458309173584, "learning_rate": 1.1370283773390582e-05, "loss": 0.1114664077758789, "step": 4792 }, { "epoch": 0.6678743119905246, "grad_norm": 0.41679617762565613, "learning_rate": 1.136178032818413e-05, "loss": 0.08990669250488281, "step": 4793 }, { "epoch": 0.668013655681739, "grad_norm": 0.28685396909713745, "learning_rate": 1.1353278802192875e-05, "loss": 0.08612537384033203, "step": 4794 }, { "epoch": 0.6681529993729534, "grad_norm": 0.32269519567489624, "learning_rate": 1.1344779197305674e-05, "loss": 0.08080482482910156, "step": 4795 }, { "epoch": 0.6682923430641677, "grad_norm": 0.6157500147819519, "learning_rate": 1.1336281515410927e-05, "loss": 0.12169075012207031, "step": 4796 }, { "epoch": 0.6684316867553821, "grad_norm": 0.5253986120223999, "learning_rate": 1.1327785758396627e-05, "loss": 0.11471939086914062, "step": 4797 }, { "epoch": 0.6685710304465965, "grad_norm": 0.30481013655662537, "learning_rate": 1.131929192815034e-05, "loss": 0.08031272888183594, "step": 4798 }, { "epoch": 0.6687103741378109, "grad_norm": 0.4991978108882904, "learning_rate": 1.1310800026559213e-05, "loss": 0.10347175598144531, "step": 4799 }, { "epoch": 0.6688497178290252, "grad_norm": 0.41814640164375305, "learning_rate": 1.130231005550993e-05, "loss": 0.08360099792480469, "step": 4800 }, { "epoch": 0.6689890615202396, "grad_norm": 0.49600547552108765, "learning_rate": 1.1293822016888792e-05, "loss": 0.10889005661010742, "step": 4801 }, { "epoch": 0.669128405211454, "grad_norm": 0.5936664938926697, "learning_rate": 1.1285335912581628e-05, "loss": 0.10289478302001953, "step": 4802 }, { "epoch": 0.6692677489026684, "grad_norm": 0.28394562005996704, "learning_rate": 1.1276851744473874e-05, "loss": 0.06980514526367188, "step": 4803 }, { "epoch": 0.6694070925938828, "grad_norm": 0.3888055682182312, "learning_rate": 1.12683695144505e-05, "loss": 0.07971572875976562, "step": 4804 }, { "epoch": 0.6695464362850972, "grad_norm": 0.33685749769210815, "learning_rate": 1.1259889224396072e-05, "loss": 0.07966232299804688, "step": 4805 }, { "epoch": 0.6696857799763116, "grad_norm": 0.6230636239051819, "learning_rate": 1.1251410876194729e-05, "loss": 0.12115097045898438, "step": 4806 }, { "epoch": 0.669825123667526, "grad_norm": 0.37338998913764954, "learning_rate": 1.1242934471730153e-05, "loss": 0.0861368179321289, "step": 4807 }, { "epoch": 0.6699644673587404, "grad_norm": 0.6596747040748596, "learning_rate": 1.1234460012885603e-05, "loss": 0.1042623519897461, "step": 4808 }, { "epoch": 0.6701038110499548, "grad_norm": 0.553026556968689, "learning_rate": 1.122598750154392e-05, "loss": 0.09097003936767578, "step": 4809 }, { "epoch": 0.6702431547411691, "grad_norm": 0.3573400676250458, "learning_rate": 1.1217516939587507e-05, "loss": 0.08969688415527344, "step": 4810 }, { "epoch": 0.6703824984323835, "grad_norm": 0.24873799085617065, "learning_rate": 1.1209048328898313e-05, "loss": 0.06612205505371094, "step": 4811 }, { "epoch": 0.6705218421235979, "grad_norm": 0.42092999815940857, "learning_rate": 1.1200581671357886e-05, "loss": 0.09557151794433594, "step": 4812 }, { "epoch": 0.6706611858148123, "grad_norm": 0.5001912713050842, "learning_rate": 1.1192116968847313e-05, "loss": 0.09222030639648438, "step": 4813 }, { "epoch": 0.6708005295060266, "grad_norm": 0.6193772554397583, "learning_rate": 1.1183654223247268e-05, "loss": 0.1329517364501953, "step": 4814 }, { "epoch": 0.670939873197241, "grad_norm": 0.5607995986938477, "learning_rate": 1.1175193436437968e-05, "loss": 0.11072921752929688, "step": 4815 }, { "epoch": 0.6710792168884554, "grad_norm": 0.3842204213142395, "learning_rate": 1.116673461029921e-05, "loss": 0.08977127075195312, "step": 4816 }, { "epoch": 0.6712185605796698, "grad_norm": 0.4703288674354553, "learning_rate": 1.1158277746710373e-05, "loss": 0.10358428955078125, "step": 4817 }, { "epoch": 0.6713579042708842, "grad_norm": 0.46156299114227295, "learning_rate": 1.1149822847550345e-05, "loss": 0.108123779296875, "step": 4818 }, { "epoch": 0.6714972479620985, "grad_norm": 0.6436176896095276, "learning_rate": 1.1141369914697627e-05, "loss": 0.1276569366455078, "step": 4819 }, { "epoch": 0.6716365916533129, "grad_norm": 0.23828712105751038, "learning_rate": 1.1132918950030274e-05, "loss": 0.07295036315917969, "step": 4820 }, { "epoch": 0.6717759353445273, "grad_norm": 0.2682456970214844, "learning_rate": 1.1124469955425885e-05, "loss": 0.06686210632324219, "step": 4821 }, { "epoch": 0.6719152790357417, "grad_norm": 0.38418298959732056, "learning_rate": 1.1116022932761648e-05, "loss": 0.09996223449707031, "step": 4822 }, { "epoch": 0.672054622726956, "grad_norm": 0.3907872140407562, "learning_rate": 1.1107577883914282e-05, "loss": 0.08451461791992188, "step": 4823 }, { "epoch": 0.6721939664181704, "grad_norm": 0.5194570422172546, "learning_rate": 1.10991348107601e-05, "loss": 0.08857917785644531, "step": 4824 }, { "epoch": 0.6723333101093848, "grad_norm": 0.3620331585407257, "learning_rate": 1.1090693715174947e-05, "loss": 0.08185386657714844, "step": 4825 }, { "epoch": 0.6724726538005992, "grad_norm": 0.8948733806610107, "learning_rate": 1.1082254599034248e-05, "loss": 0.12167549133300781, "step": 4826 }, { "epoch": 0.6726119974918136, "grad_norm": 0.2811999022960663, "learning_rate": 1.1073817464212989e-05, "loss": 0.07607841491699219, "step": 4827 }, { "epoch": 0.6727513411830279, "grad_norm": 0.3258133828639984, "learning_rate": 1.1065382312585698e-05, "loss": 0.08303260803222656, "step": 4828 }, { "epoch": 0.6728906848742423, "grad_norm": 0.3795014023780823, "learning_rate": 1.1056949146026472e-05, "loss": 0.08002090454101562, "step": 4829 }, { "epoch": 0.6730300285654567, "grad_norm": 0.4429343342781067, "learning_rate": 1.1048517966408969e-05, "loss": 0.07542610168457031, "step": 4830 }, { "epoch": 0.6731693722566711, "grad_norm": 0.8556801676750183, "learning_rate": 1.104008877560642e-05, "loss": 0.09728050231933594, "step": 4831 }, { "epoch": 0.6733087159478854, "grad_norm": 0.43739795684814453, "learning_rate": 1.1031661575491577e-05, "loss": 0.10685539245605469, "step": 4832 }, { "epoch": 0.6734480596390998, "grad_norm": 0.6545463800430298, "learning_rate": 1.1023236367936789e-05, "loss": 0.10175704956054688, "step": 4833 }, { "epoch": 0.6735874033303142, "grad_norm": 0.31383681297302246, "learning_rate": 1.1014813154813928e-05, "loss": 0.07999801635742188, "step": 4834 }, { "epoch": 0.6737267470215286, "grad_norm": 0.4482263922691345, "learning_rate": 1.1006391937994459e-05, "loss": 0.11433219909667969, "step": 4835 }, { "epoch": 0.673866090712743, "grad_norm": 0.41278183460235596, "learning_rate": 1.0997972719349363e-05, "loss": 0.10252952575683594, "step": 4836 }, { "epoch": 0.6740054344039573, "grad_norm": 0.4635907709598541, "learning_rate": 1.0989555500749211e-05, "loss": 0.09451150894165039, "step": 4837 }, { "epoch": 0.6741447780951717, "grad_norm": 0.26358306407928467, "learning_rate": 1.0981140284064122e-05, "loss": 0.07045364379882812, "step": 4838 }, { "epoch": 0.6742841217863861, "grad_norm": 0.26832911372184753, "learning_rate": 1.097272707116376e-05, "loss": 0.08025932312011719, "step": 4839 }, { "epoch": 0.6744234654776005, "grad_norm": 0.38217592239379883, "learning_rate": 1.0964315863917337e-05, "loss": 0.07806777954101562, "step": 4840 }, { "epoch": 0.6745628091688148, "grad_norm": 0.6055396795272827, "learning_rate": 1.0955906664193641e-05, "loss": 0.12038993835449219, "step": 4841 }, { "epoch": 0.6747021528600292, "grad_norm": 0.5734759569168091, "learning_rate": 1.0947499473861012e-05, "loss": 0.12209892272949219, "step": 4842 }, { "epoch": 0.6748414965512436, "grad_norm": 0.227107435464859, "learning_rate": 1.0939094294787327e-05, "loss": 0.0742330551147461, "step": 4843 }, { "epoch": 0.674980840242458, "grad_norm": 0.34071043133735657, "learning_rate": 1.0930691128840019e-05, "loss": 0.08356285095214844, "step": 4844 }, { "epoch": 0.6751201839336725, "grad_norm": 0.4039389491081238, "learning_rate": 1.0922289977886087e-05, "loss": 0.09166908264160156, "step": 4845 }, { "epoch": 0.6752595276248868, "grad_norm": 0.39852574467658997, "learning_rate": 1.091389084379208e-05, "loss": 0.08418750762939453, "step": 4846 }, { "epoch": 0.6753988713161012, "grad_norm": 0.5096591114997864, "learning_rate": 1.0905493728424079e-05, "loss": 0.09003353118896484, "step": 4847 }, { "epoch": 0.6755382150073156, "grad_norm": 0.4495689868927002, "learning_rate": 1.0897098633647745e-05, "loss": 0.09197425842285156, "step": 4848 }, { "epoch": 0.67567755869853, "grad_norm": 0.3575702905654907, "learning_rate": 1.0888705561328283e-05, "loss": 0.08242607116699219, "step": 4849 }, { "epoch": 0.6758169023897443, "grad_norm": 0.4976254105567932, "learning_rate": 1.088031451333042e-05, "loss": 0.10451602935791016, "step": 4850 }, { "epoch": 0.6759562460809587, "grad_norm": 0.3875209093093872, "learning_rate": 1.0871925491518462e-05, "loss": 0.08437156677246094, "step": 4851 }, { "epoch": 0.6760955897721731, "grad_norm": 0.3933204114437103, "learning_rate": 1.0863538497756263e-05, "loss": 0.09020614624023438, "step": 4852 }, { "epoch": 0.6762349334633875, "grad_norm": 0.28586864471435547, "learning_rate": 1.085515353390723e-05, "loss": 0.07935142517089844, "step": 4853 }, { "epoch": 0.6763742771546019, "grad_norm": 0.27550914883613586, "learning_rate": 1.0846770601834303e-05, "loss": 0.07664871215820312, "step": 4854 }, { "epoch": 0.6765136208458162, "grad_norm": 0.29771947860717773, "learning_rate": 1.0838389703399965e-05, "loss": 0.08447647094726562, "step": 4855 }, { "epoch": 0.6766529645370306, "grad_norm": 0.38749960064888, "learning_rate": 1.0830010840466282e-05, "loss": 0.0803384780883789, "step": 4856 }, { "epoch": 0.676792308228245, "grad_norm": 0.268987238407135, "learning_rate": 1.0821634014894827e-05, "loss": 0.07018470764160156, "step": 4857 }, { "epoch": 0.6769316519194594, "grad_norm": 0.5637885332107544, "learning_rate": 1.0813259228546746e-05, "loss": 0.12892723083496094, "step": 4858 }, { "epoch": 0.6770709956106737, "grad_norm": 0.46264615654945374, "learning_rate": 1.080488648328274e-05, "loss": 0.08848762512207031, "step": 4859 }, { "epoch": 0.6772103393018881, "grad_norm": 0.37554994225502014, "learning_rate": 1.0796515780963026e-05, "loss": 0.08361244201660156, "step": 4860 }, { "epoch": 0.6773496829931025, "grad_norm": 0.4743032455444336, "learning_rate": 1.078814712344738e-05, "loss": 0.10505294799804688, "step": 4861 }, { "epoch": 0.6774890266843169, "grad_norm": 0.38195300102233887, "learning_rate": 1.0779780512595136e-05, "loss": 0.09167098999023438, "step": 4862 }, { "epoch": 0.6776283703755313, "grad_norm": 0.35630303621292114, "learning_rate": 1.0771415950265163e-05, "loss": 0.08433341979980469, "step": 4863 }, { "epoch": 0.6777677140667456, "grad_norm": 0.5027072429656982, "learning_rate": 1.0763053438315876e-05, "loss": 0.11568069458007812, "step": 4864 }, { "epoch": 0.67790705775796, "grad_norm": 0.7148174047470093, "learning_rate": 1.0754692978605226e-05, "loss": 0.14002609252929688, "step": 4865 }, { "epoch": 0.6780464014491744, "grad_norm": 0.2682296335697174, "learning_rate": 1.074633457299072e-05, "loss": 0.07629776000976562, "step": 4866 }, { "epoch": 0.6781857451403888, "grad_norm": 0.397986501455307, "learning_rate": 1.0737978223329413e-05, "loss": 0.07889175415039062, "step": 4867 }, { "epoch": 0.6783250888316031, "grad_norm": 0.6775274872779846, "learning_rate": 1.0729623931477886e-05, "loss": 0.11358642578125, "step": 4868 }, { "epoch": 0.6784644325228175, "grad_norm": 0.43099498748779297, "learning_rate": 1.0721271699292272e-05, "loss": 0.08658599853515625, "step": 4869 }, { "epoch": 0.6786037762140319, "grad_norm": 0.5844345688819885, "learning_rate": 1.0712921528628258e-05, "loss": 0.09583854675292969, "step": 4870 }, { "epoch": 0.6787431199052463, "grad_norm": 0.4084152579307556, "learning_rate": 1.0704573421341053e-05, "loss": 0.10770893096923828, "step": 4871 }, { "epoch": 0.6788824635964607, "grad_norm": 0.2944040894508362, "learning_rate": 1.0696227379285409e-05, "loss": 0.08372306823730469, "step": 4872 }, { "epoch": 0.679021807287675, "grad_norm": 0.38478100299835205, "learning_rate": 1.0687883404315631e-05, "loss": 0.09403800964355469, "step": 4873 }, { "epoch": 0.6791611509788894, "grad_norm": 0.3841521441936493, "learning_rate": 1.0679541498285568e-05, "loss": 0.10695457458496094, "step": 4874 }, { "epoch": 0.6793004946701038, "grad_norm": 0.45469430088996887, "learning_rate": 1.0671201663048595e-05, "loss": 0.12291145324707031, "step": 4875 }, { "epoch": 0.6794398383613182, "grad_norm": 0.4401262402534485, "learning_rate": 1.0662863900457627e-05, "loss": 0.08165168762207031, "step": 4876 }, { "epoch": 0.6795791820525325, "grad_norm": 0.3655790090560913, "learning_rate": 1.0654528212365127e-05, "loss": 0.07804679870605469, "step": 4877 }, { "epoch": 0.6797185257437469, "grad_norm": 0.4522641599178314, "learning_rate": 1.0646194600623106e-05, "loss": 0.0957651138305664, "step": 4878 }, { "epoch": 0.6798578694349613, "grad_norm": 0.7998565435409546, "learning_rate": 1.0637863067083087e-05, "loss": 0.14170455932617188, "step": 4879 }, { "epoch": 0.6799972131261757, "grad_norm": 0.36890023946762085, "learning_rate": 1.0629533613596162e-05, "loss": 0.08062934875488281, "step": 4880 }, { "epoch": 0.68013655681739, "grad_norm": 0.3289673626422882, "learning_rate": 1.0621206242012936e-05, "loss": 0.08096885681152344, "step": 4881 }, { "epoch": 0.6802759005086044, "grad_norm": 0.3105328679084778, "learning_rate": 1.061288095418355e-05, "loss": 0.0748286247253418, "step": 4882 }, { "epoch": 0.6804152441998188, "grad_norm": 0.39645084738731384, "learning_rate": 1.060455775195771e-05, "loss": 0.08550834655761719, "step": 4883 }, { "epoch": 0.6805545878910332, "grad_norm": 0.6365534663200378, "learning_rate": 1.0596236637184631e-05, "loss": 0.106964111328125, "step": 4884 }, { "epoch": 0.6806939315822477, "grad_norm": 0.5241995453834534, "learning_rate": 1.058791761171309e-05, "loss": 0.08218955993652344, "step": 4885 }, { "epoch": 0.680833275273462, "grad_norm": 0.25713127851486206, "learning_rate": 1.0579600677391375e-05, "loss": 0.07313919067382812, "step": 4886 }, { "epoch": 0.6809726189646764, "grad_norm": 1.0088403224945068, "learning_rate": 1.0571285836067308e-05, "loss": 0.1389312744140625, "step": 4887 }, { "epoch": 0.6811119626558908, "grad_norm": 0.3513859510421753, "learning_rate": 1.0562973089588278e-05, "loss": 0.07870197296142578, "step": 4888 }, { "epoch": 0.6812513063471052, "grad_norm": 0.4524022340774536, "learning_rate": 1.0554662439801167e-05, "loss": 0.09143352508544922, "step": 4889 }, { "epoch": 0.6813906500383196, "grad_norm": 0.2927952706813812, "learning_rate": 1.0546353888552418e-05, "loss": 0.06948280334472656, "step": 4890 }, { "epoch": 0.6815299937295339, "grad_norm": 0.3145082890987396, "learning_rate": 1.0538047437688018e-05, "loss": 0.08067893981933594, "step": 4891 }, { "epoch": 0.6816693374207483, "grad_norm": 0.2631840705871582, "learning_rate": 1.0529743089053452e-05, "loss": 0.07291603088378906, "step": 4892 }, { "epoch": 0.6818086811119627, "grad_norm": 0.4137323796749115, "learning_rate": 1.0521440844493758e-05, "loss": 0.09266090393066406, "step": 4893 }, { "epoch": 0.6819480248031771, "grad_norm": 0.3972265422344208, "learning_rate": 1.0513140705853506e-05, "loss": 0.09338665008544922, "step": 4894 }, { "epoch": 0.6820873684943914, "grad_norm": 0.354187935590744, "learning_rate": 1.0504842674976811e-05, "loss": 0.0923757553100586, "step": 4895 }, { "epoch": 0.6822267121856058, "grad_norm": 0.31886106729507446, "learning_rate": 1.0496546753707295e-05, "loss": 0.07342910766601562, "step": 4896 }, { "epoch": 0.6823660558768202, "grad_norm": 0.35081911087036133, "learning_rate": 1.0488252943888115e-05, "loss": 0.09543418884277344, "step": 4897 }, { "epoch": 0.6825053995680346, "grad_norm": 0.281780868768692, "learning_rate": 1.0479961247361974e-05, "loss": 0.08587074279785156, "step": 4898 }, { "epoch": 0.682644743259249, "grad_norm": 0.2448401004076004, "learning_rate": 1.0471671665971104e-05, "loss": 0.06395721435546875, "step": 4899 }, { "epoch": 0.6827840869504633, "grad_norm": 0.35675546526908875, "learning_rate": 1.0463384201557248e-05, "loss": 0.08567047119140625, "step": 4900 }, { "epoch": 0.6829234306416777, "grad_norm": 0.4261457920074463, "learning_rate": 1.0455098855961705e-05, "loss": 0.08635807037353516, "step": 4901 }, { "epoch": 0.6830627743328921, "grad_norm": 0.3776802122592926, "learning_rate": 1.0446815631025276e-05, "loss": 0.09564781188964844, "step": 4902 }, { "epoch": 0.6832021180241065, "grad_norm": 0.4416855573654175, "learning_rate": 1.0438534528588319e-05, "loss": 0.07860565185546875, "step": 4903 }, { "epoch": 0.6833414617153208, "grad_norm": 0.2753089368343353, "learning_rate": 1.0430255550490686e-05, "loss": 0.06847572326660156, "step": 4904 }, { "epoch": 0.6834808054065352, "grad_norm": 0.23387877643108368, "learning_rate": 1.0421978698571791e-05, "loss": 0.06739139556884766, "step": 4905 }, { "epoch": 0.6836201490977496, "grad_norm": 0.41215506196022034, "learning_rate": 1.0413703974670568e-05, "loss": 0.10003089904785156, "step": 4906 }, { "epoch": 0.683759492788964, "grad_norm": 0.34148383140563965, "learning_rate": 1.0405431380625461e-05, "loss": 0.08628654479980469, "step": 4907 }, { "epoch": 0.6838988364801784, "grad_norm": 0.23727312684059143, "learning_rate": 1.0397160918274447e-05, "loss": 0.061435699462890625, "step": 4908 }, { "epoch": 0.6840381801713927, "grad_norm": 0.3175593316555023, "learning_rate": 1.038889258945504e-05, "loss": 0.08549118041992188, "step": 4909 }, { "epoch": 0.6841775238626071, "grad_norm": 0.49968087673187256, "learning_rate": 1.0380626396004282e-05, "loss": 0.10831642150878906, "step": 4910 }, { "epoch": 0.6843168675538215, "grad_norm": 0.27669012546539307, "learning_rate": 1.0372362339758717e-05, "loss": 0.05743885040283203, "step": 4911 }, { "epoch": 0.6844562112450359, "grad_norm": 0.505900502204895, "learning_rate": 1.0364100422554445e-05, "loss": 0.09831809997558594, "step": 4912 }, { "epoch": 0.6845955549362502, "grad_norm": 0.6580058336257935, "learning_rate": 1.0355840646227063e-05, "loss": 0.14297866821289062, "step": 4913 }, { "epoch": 0.6847348986274646, "grad_norm": 0.25964322686195374, "learning_rate": 1.0347583012611713e-05, "loss": 0.062328338623046875, "step": 4914 }, { "epoch": 0.684874242318679, "grad_norm": 0.6367183327674866, "learning_rate": 1.0339327523543043e-05, "loss": 0.13318252563476562, "step": 4915 }, { "epoch": 0.6850135860098934, "grad_norm": 0.8033723831176758, "learning_rate": 1.0331074180855243e-05, "loss": 0.1332406997680664, "step": 4916 }, { "epoch": 0.6851529297011077, "grad_norm": 0.4216654300689697, "learning_rate": 1.0322822986382026e-05, "loss": 0.09020042419433594, "step": 4917 }, { "epoch": 0.6852922733923221, "grad_norm": 0.33353284001350403, "learning_rate": 1.0314573941956593e-05, "loss": 0.08016586303710938, "step": 4918 }, { "epoch": 0.6854316170835365, "grad_norm": 0.19543087482452393, "learning_rate": 1.0306327049411711e-05, "loss": 0.06568145751953125, "step": 4919 }, { "epoch": 0.6855709607747509, "grad_norm": 0.26474276185035706, "learning_rate": 1.0298082310579653e-05, "loss": 0.07256889343261719, "step": 4920 }, { "epoch": 0.6857103044659653, "grad_norm": 0.48136138916015625, "learning_rate": 1.0289839727292202e-05, "loss": 0.08955860137939453, "step": 4921 }, { "epoch": 0.6858496481571796, "grad_norm": 0.2271389216184616, "learning_rate": 1.0281599301380676e-05, "loss": 0.06487083435058594, "step": 4922 }, { "epoch": 0.685988991848394, "grad_norm": 0.3940986394882202, "learning_rate": 1.0273361034675915e-05, "loss": 0.08514213562011719, "step": 4923 }, { "epoch": 0.6861283355396084, "grad_norm": 0.3297352194786072, "learning_rate": 1.0265124929008272e-05, "loss": 0.08451652526855469, "step": 4924 }, { "epoch": 0.6862676792308228, "grad_norm": 0.4284960925579071, "learning_rate": 1.0256890986207612e-05, "loss": 0.09431838989257812, "step": 4925 }, { "epoch": 0.6864070229220373, "grad_norm": 0.6568474769592285, "learning_rate": 1.0248659208103336e-05, "loss": 0.11489295959472656, "step": 4926 }, { "epoch": 0.6865463666132516, "grad_norm": 0.3984948694705963, "learning_rate": 1.0240429596524361e-05, "loss": 0.10499000549316406, "step": 4927 }, { "epoch": 0.686685710304466, "grad_norm": 0.34218165278434753, "learning_rate": 1.0232202153299118e-05, "loss": 0.08030128479003906, "step": 4928 }, { "epoch": 0.6868250539956804, "grad_norm": 0.5568341612815857, "learning_rate": 1.0223976880255546e-05, "loss": 0.10932540893554688, "step": 4929 }, { "epoch": 0.6869643976868948, "grad_norm": 0.391722708940506, "learning_rate": 1.0215753779221119e-05, "loss": 0.07804012298583984, "step": 4930 }, { "epoch": 0.6871037413781091, "grad_norm": 0.3797610104084015, "learning_rate": 1.0207532852022833e-05, "loss": 0.08785676956176758, "step": 4931 }, { "epoch": 0.6872430850693235, "grad_norm": 0.2856076955795288, "learning_rate": 1.0199314100487175e-05, "loss": 0.08537960052490234, "step": 4932 }, { "epoch": 0.6873824287605379, "grad_norm": 0.7224351763725281, "learning_rate": 1.0191097526440177e-05, "loss": 0.12424850463867188, "step": 4933 }, { "epoch": 0.6875217724517523, "grad_norm": 0.3226775527000427, "learning_rate": 1.0182883131707357e-05, "loss": 0.08257007598876953, "step": 4934 }, { "epoch": 0.6876611161429667, "grad_norm": 0.44007226824760437, "learning_rate": 1.017467091811379e-05, "loss": 0.1140604019165039, "step": 4935 }, { "epoch": 0.687800459834181, "grad_norm": 0.45145511627197266, "learning_rate": 1.0166460887484018e-05, "loss": 0.08194923400878906, "step": 4936 }, { "epoch": 0.6879398035253954, "grad_norm": 0.34159597754478455, "learning_rate": 1.0158253041642132e-05, "loss": 0.08581352233886719, "step": 4937 }, { "epoch": 0.6880791472166098, "grad_norm": 0.3290710151195526, "learning_rate": 1.0150047382411738e-05, "loss": 0.0875406265258789, "step": 4938 }, { "epoch": 0.6882184909078242, "grad_norm": 0.4611169397830963, "learning_rate": 1.0141843911615938e-05, "loss": 0.1058807373046875, "step": 4939 }, { "epoch": 0.6883578345990385, "grad_norm": 0.46143314242362976, "learning_rate": 1.0133642631077348e-05, "loss": 0.11905670166015625, "step": 4940 }, { "epoch": 0.6884971782902529, "grad_norm": 0.46544548869132996, "learning_rate": 1.0125443542618112e-05, "loss": 0.09161567687988281, "step": 4941 }, { "epoch": 0.6886365219814673, "grad_norm": 0.3057534694671631, "learning_rate": 1.0117246648059888e-05, "loss": 0.07961463928222656, "step": 4942 }, { "epoch": 0.6887758656726817, "grad_norm": 0.363075852394104, "learning_rate": 1.0109051949223825e-05, "loss": 0.08548927307128906, "step": 4943 }, { "epoch": 0.688915209363896, "grad_norm": 0.2729121446609497, "learning_rate": 1.0100859447930614e-05, "loss": 0.06646919250488281, "step": 4944 }, { "epoch": 0.6890545530551104, "grad_norm": 0.44278791546821594, "learning_rate": 1.0092669146000422e-05, "loss": 0.09829521179199219, "step": 4945 }, { "epoch": 0.6891938967463248, "grad_norm": 0.23910470306873322, "learning_rate": 1.0084481045252965e-05, "loss": 0.06440305709838867, "step": 4946 }, { "epoch": 0.6893332404375392, "grad_norm": 0.5476571917533875, "learning_rate": 1.0076295147507437e-05, "loss": 0.10954856872558594, "step": 4947 }, { "epoch": 0.6894725841287536, "grad_norm": 0.3782988488674164, "learning_rate": 1.0068111454582565e-05, "loss": 0.09658432006835938, "step": 4948 }, { "epoch": 0.6896119278199679, "grad_norm": 0.323027104139328, "learning_rate": 1.0059929968296597e-05, "loss": 0.08370208740234375, "step": 4949 }, { "epoch": 0.6897512715111823, "grad_norm": 0.31005731225013733, "learning_rate": 1.005175069046724e-05, "loss": 0.08263778686523438, "step": 4950 }, { "epoch": 0.6898906152023967, "grad_norm": 0.35136526823043823, "learning_rate": 1.004357362291175e-05, "loss": 0.08499526977539062, "step": 4951 }, { "epoch": 0.6900299588936111, "grad_norm": 0.2737633287906647, "learning_rate": 1.0035398767446897e-05, "loss": 0.0638427734375, "step": 4952 }, { "epoch": 0.6901693025848255, "grad_norm": 0.2198205292224884, "learning_rate": 1.0027226125888951e-05, "loss": 0.07228469848632812, "step": 4953 }, { "epoch": 0.6903086462760398, "grad_norm": 0.23611025512218475, "learning_rate": 1.0019055700053677e-05, "loss": 0.06723785400390625, "step": 4954 }, { "epoch": 0.6904479899672542, "grad_norm": 0.5975042581558228, "learning_rate": 1.0010887491756347e-05, "loss": 0.09978961944580078, "step": 4955 }, { "epoch": 0.6905873336584686, "grad_norm": 0.3045758008956909, "learning_rate": 1.0002721502811772e-05, "loss": 0.08376121520996094, "step": 4956 }, { "epoch": 0.690726677349683, "grad_norm": 0.4116871654987335, "learning_rate": 9.994557735034227e-06, "loss": 0.08355426788330078, "step": 4957 }, { "epoch": 0.6908660210408973, "grad_norm": 0.22577494382858276, "learning_rate": 9.986396190237526e-06, "loss": 0.064727783203125, "step": 4958 }, { "epoch": 0.6910053647321117, "grad_norm": 0.6208799481391907, "learning_rate": 9.978236870234984e-06, "loss": 0.11593437194824219, "step": 4959 }, { "epoch": 0.6911447084233261, "grad_norm": 0.267959862947464, "learning_rate": 9.970079776839412e-06, "loss": 0.07221126556396484, "step": 4960 }, { "epoch": 0.6912840521145405, "grad_norm": 0.3604404628276825, "learning_rate": 9.961924911863117e-06, "loss": 0.08163166046142578, "step": 4961 }, { "epoch": 0.6914233958057548, "grad_norm": 0.732986569404602, "learning_rate": 9.953772277117933e-06, "loss": 0.13184356689453125, "step": 4962 }, { "epoch": 0.6915627394969692, "grad_norm": 0.29597803950309753, "learning_rate": 9.945621874415197e-06, "loss": 0.07835578918457031, "step": 4963 }, { "epoch": 0.6917020831881836, "grad_norm": 0.21243169903755188, "learning_rate": 9.937473705565728e-06, "loss": 0.06528472900390625, "step": 4964 }, { "epoch": 0.691841426879398, "grad_norm": 0.5123072266578674, "learning_rate": 9.929327772379877e-06, "loss": 0.0998992919921875, "step": 4965 }, { "epoch": 0.6919807705706125, "grad_norm": 0.5419296622276306, "learning_rate": 9.921184076667472e-06, "loss": 0.10455513000488281, "step": 4966 }, { "epoch": 0.6921201142618268, "grad_norm": 0.2582698464393616, "learning_rate": 9.913042620237868e-06, "loss": 0.061771392822265625, "step": 4967 }, { "epoch": 0.6922594579530412, "grad_norm": 0.44821691513061523, "learning_rate": 9.904903404899898e-06, "loss": 0.11999893188476562, "step": 4968 }, { "epoch": 0.6923988016442556, "grad_norm": 0.20230096578598022, "learning_rate": 9.896766432461914e-06, "loss": 0.05615043640136719, "step": 4969 }, { "epoch": 0.69253814533547, "grad_norm": 0.3491203784942627, "learning_rate": 9.88863170473178e-06, "loss": 0.08565330505371094, "step": 4970 }, { "epoch": 0.6926774890266844, "grad_norm": 0.27427130937576294, "learning_rate": 9.880499223516831e-06, "loss": 0.0642547607421875, "step": 4971 }, { "epoch": 0.6928168327178987, "grad_norm": 0.4668982923030853, "learning_rate": 9.872368990623915e-06, "loss": 0.09759140014648438, "step": 4972 }, { "epoch": 0.6929561764091131, "grad_norm": 0.5656536817550659, "learning_rate": 9.864241007859392e-06, "loss": 0.09711837768554688, "step": 4973 }, { "epoch": 0.6930955201003275, "grad_norm": 0.3404300808906555, "learning_rate": 9.856115277029123e-06, "loss": 0.08415603637695312, "step": 4974 }, { "epoch": 0.6932348637915419, "grad_norm": 0.3858882486820221, "learning_rate": 9.84799179993845e-06, "loss": 0.09690380096435547, "step": 4975 }, { "epoch": 0.6933742074827562, "grad_norm": 0.3579595685005188, "learning_rate": 9.839870578392216e-06, "loss": 0.07880020141601562, "step": 4976 }, { "epoch": 0.6935135511739706, "grad_norm": 0.40616241097450256, "learning_rate": 9.83175161419478e-06, "loss": 0.09547042846679688, "step": 4977 }, { "epoch": 0.693652894865185, "grad_norm": 0.32607316970825195, "learning_rate": 9.82363490915e-06, "loss": 0.08511924743652344, "step": 4978 }, { "epoch": 0.6937922385563994, "grad_norm": 0.2519657015800476, "learning_rate": 9.815520465061201e-06, "loss": 0.06199455261230469, "step": 4979 }, { "epoch": 0.6939315822476138, "grad_norm": 0.6344687342643738, "learning_rate": 9.807408283731244e-06, "loss": 0.09833145141601562, "step": 4980 }, { "epoch": 0.6940709259388281, "grad_norm": 0.29017555713653564, "learning_rate": 9.799298366962478e-06, "loss": 0.07285594940185547, "step": 4981 }, { "epoch": 0.6942102696300425, "grad_norm": 0.4909520149230957, "learning_rate": 9.791190716556713e-06, "loss": 0.09121131896972656, "step": 4982 }, { "epoch": 0.6943496133212569, "grad_norm": 0.5514956116676331, "learning_rate": 9.7830853343153e-06, "loss": 0.10633182525634766, "step": 4983 }, { "epoch": 0.6944889570124713, "grad_norm": 0.44479334354400635, "learning_rate": 9.774982222039072e-06, "loss": 0.09659957885742188, "step": 4984 }, { "epoch": 0.6946283007036856, "grad_norm": 0.3229827880859375, "learning_rate": 9.766881381528357e-06, "loss": 0.07755231857299805, "step": 4985 }, { "epoch": 0.6947676443949, "grad_norm": 0.5126659870147705, "learning_rate": 9.758782814582977e-06, "loss": 0.08667945861816406, "step": 4986 }, { "epoch": 0.6949069880861144, "grad_norm": 0.37956535816192627, "learning_rate": 9.750686523002233e-06, "loss": 0.08022117614746094, "step": 4987 }, { "epoch": 0.6950463317773288, "grad_norm": 0.3261547088623047, "learning_rate": 9.742592508584958e-06, "loss": 0.0773477554321289, "step": 4988 }, { "epoch": 0.6951856754685432, "grad_norm": 0.2754902243614197, "learning_rate": 9.734500773129438e-06, "loss": 0.06836414337158203, "step": 4989 }, { "epoch": 0.6953250191597575, "grad_norm": 0.70970219373703, "learning_rate": 9.726411318433482e-06, "loss": 0.11392402648925781, "step": 4990 }, { "epoch": 0.6954643628509719, "grad_norm": 0.24002085626125336, "learning_rate": 9.71832414629439e-06, "loss": 0.06633949279785156, "step": 4991 }, { "epoch": 0.6956037065421863, "grad_norm": 0.281584769487381, "learning_rate": 9.710239258508935e-06, "loss": 0.08434677124023438, "step": 4992 }, { "epoch": 0.6957430502334007, "grad_norm": 0.5980426073074341, "learning_rate": 9.702156656873391e-06, "loss": 0.11432933807373047, "step": 4993 }, { "epoch": 0.695882393924615, "grad_norm": 0.30097121000289917, "learning_rate": 9.694076343183534e-06, "loss": 0.08535194396972656, "step": 4994 }, { "epoch": 0.6960217376158294, "grad_norm": 0.5965678691864014, "learning_rate": 9.685998319234634e-06, "loss": 0.08912277221679688, "step": 4995 }, { "epoch": 0.6961610813070438, "grad_norm": 1.1033999919891357, "learning_rate": 9.677922586821434e-06, "loss": 0.1808605194091797, "step": 4996 }, { "epoch": 0.6963004249982582, "grad_norm": 0.40111514925956726, "learning_rate": 9.669849147738171e-06, "loss": 0.09754276275634766, "step": 4997 }, { "epoch": 0.6964397686894725, "grad_norm": 0.17840033769607544, "learning_rate": 9.661778003778583e-06, "loss": 0.055922508239746094, "step": 4998 }, { "epoch": 0.6965791123806869, "grad_norm": 0.6950664520263672, "learning_rate": 9.653709156735908e-06, "loss": 0.10890865325927734, "step": 4999 }, { "epoch": 0.6967184560719013, "grad_norm": 0.48984822630882263, "learning_rate": 9.64564260840284e-06, "loss": 0.11248588562011719, "step": 5000 }, { "epoch": 0.6968577997631157, "grad_norm": 0.43112874031066895, "learning_rate": 9.63757836057159e-06, "loss": 0.11424636840820312, "step": 5001 }, { "epoch": 0.6969971434543301, "grad_norm": 0.3635101616382599, "learning_rate": 9.629516415033859e-06, "loss": 0.06601142883300781, "step": 5002 }, { "epoch": 0.6971364871455444, "grad_norm": 0.35844650864601135, "learning_rate": 9.621456773580817e-06, "loss": 0.07467079162597656, "step": 5003 }, { "epoch": 0.6972758308367588, "grad_norm": 0.5324035286903381, "learning_rate": 9.613399438003128e-06, "loss": 0.09906578063964844, "step": 5004 }, { "epoch": 0.6974151745279732, "grad_norm": 0.35989972949028015, "learning_rate": 9.605344410090954e-06, "loss": 0.07703399658203125, "step": 5005 }, { "epoch": 0.6975545182191877, "grad_norm": 0.5261275768280029, "learning_rate": 9.597291691633942e-06, "loss": 0.09986114501953125, "step": 5006 }, { "epoch": 0.6976938619104021, "grad_norm": 0.2537088692188263, "learning_rate": 9.589241284421221e-06, "loss": 0.0699310302734375, "step": 5007 }, { "epoch": 0.6978332056016164, "grad_norm": 0.40790513157844543, "learning_rate": 9.581193190241398e-06, "loss": 0.09140586853027344, "step": 5008 }, { "epoch": 0.6979725492928308, "grad_norm": 0.37301063537597656, "learning_rate": 9.57314741088258e-06, "loss": 0.09044075012207031, "step": 5009 }, { "epoch": 0.6981118929840452, "grad_norm": 0.3905682861804962, "learning_rate": 9.565103948132368e-06, "loss": 0.08413505554199219, "step": 5010 }, { "epoch": 0.6982512366752596, "grad_norm": 0.27527716755867004, "learning_rate": 9.557062803777817e-06, "loss": 0.07195281982421875, "step": 5011 }, { "epoch": 0.698390580366474, "grad_norm": 0.2630101144313812, "learning_rate": 9.549023979605503e-06, "loss": 0.070037841796875, "step": 5012 }, { "epoch": 0.6985299240576883, "grad_norm": 0.40293508768081665, "learning_rate": 9.540987477401454e-06, "loss": 0.07933235168457031, "step": 5013 }, { "epoch": 0.6986692677489027, "grad_norm": 0.38914942741394043, "learning_rate": 9.53295329895121e-06, "loss": 0.08435249328613281, "step": 5014 }, { "epoch": 0.6988086114401171, "grad_norm": 0.40008339285850525, "learning_rate": 9.52492144603977e-06, "loss": 0.07492542266845703, "step": 5015 }, { "epoch": 0.6989479551313315, "grad_norm": 0.5118096470832825, "learning_rate": 9.516891920451634e-06, "loss": 0.10499382019042969, "step": 5016 }, { "epoch": 0.6990872988225458, "grad_norm": 0.4209776520729065, "learning_rate": 9.50886472397079e-06, "loss": 0.09214401245117188, "step": 5017 }, { "epoch": 0.6992266425137602, "grad_norm": 0.3492922782897949, "learning_rate": 9.500839858380684e-06, "loss": 0.08222389221191406, "step": 5018 }, { "epoch": 0.6993659862049746, "grad_norm": 0.39044255018234253, "learning_rate": 9.492817325464256e-06, "loss": 0.08613777160644531, "step": 5019 }, { "epoch": 0.699505329896189, "grad_norm": 0.4876403510570526, "learning_rate": 9.484797127003942e-06, "loss": 0.09149360656738281, "step": 5020 }, { "epoch": 0.6996446735874033, "grad_norm": 0.25706058740615845, "learning_rate": 9.476779264781633e-06, "loss": 0.0632781982421875, "step": 5021 }, { "epoch": 0.6997840172786177, "grad_norm": 0.3330318331718445, "learning_rate": 9.468763740578721e-06, "loss": 0.07349395751953125, "step": 5022 }, { "epoch": 0.6999233609698321, "grad_norm": 0.6070712804794312, "learning_rate": 9.460750556176085e-06, "loss": 0.10130786895751953, "step": 5023 }, { "epoch": 0.7000627046610465, "grad_norm": 0.46403488516807556, "learning_rate": 9.452739713354055e-06, "loss": 0.08974075317382812, "step": 5024 }, { "epoch": 0.7002020483522609, "grad_norm": 0.5460727214813232, "learning_rate": 9.444731213892458e-06, "loss": 0.10877418518066406, "step": 5025 }, { "epoch": 0.7003413920434752, "grad_norm": 0.5589982867240906, "learning_rate": 9.436725059570605e-06, "loss": 0.08188438415527344, "step": 5026 }, { "epoch": 0.7004807357346896, "grad_norm": 0.4819416403770447, "learning_rate": 9.428721252167286e-06, "loss": 0.10689735412597656, "step": 5027 }, { "epoch": 0.700620079425904, "grad_norm": 0.8366943001747131, "learning_rate": 9.420719793460758e-06, "loss": 0.14647865295410156, "step": 5028 }, { "epoch": 0.7007594231171184, "grad_norm": 0.5976685285568237, "learning_rate": 9.412720685228755e-06, "loss": 0.11793899536132812, "step": 5029 }, { "epoch": 0.7008987668083327, "grad_norm": 0.273333877325058, "learning_rate": 9.404723929248507e-06, "loss": 0.07473564147949219, "step": 5030 }, { "epoch": 0.7010381104995471, "grad_norm": 0.5806134939193726, "learning_rate": 9.396729527296712e-06, "loss": 0.08838844299316406, "step": 5031 }, { "epoch": 0.7011774541907615, "grad_norm": 0.2642543613910675, "learning_rate": 9.388737481149534e-06, "loss": 0.06691360473632812, "step": 5032 }, { "epoch": 0.7013167978819759, "grad_norm": 0.4547598361968994, "learning_rate": 9.380747792582635e-06, "loss": 0.08000564575195312, "step": 5033 }, { "epoch": 0.7014561415731903, "grad_norm": 0.38699135184288025, "learning_rate": 9.372760463371127e-06, "loss": 0.0940399169921875, "step": 5034 }, { "epoch": 0.7015954852644046, "grad_norm": 0.4875674545764923, "learning_rate": 9.364775495289628e-06, "loss": 0.0942850112915039, "step": 5035 }, { "epoch": 0.701734828955619, "grad_norm": 0.34513965249061584, "learning_rate": 9.3567928901122e-06, "loss": 0.07867431640625, "step": 5036 }, { "epoch": 0.7018741726468334, "grad_norm": 0.5730146169662476, "learning_rate": 9.348812649612404e-06, "loss": 0.09345674514770508, "step": 5037 }, { "epoch": 0.7020135163380478, "grad_norm": 0.46924862265586853, "learning_rate": 9.340834775563275e-06, "loss": 0.11110115051269531, "step": 5038 }, { "epoch": 0.7021528600292621, "grad_norm": 0.41912055015563965, "learning_rate": 9.332859269737303e-06, "loss": 0.09215927124023438, "step": 5039 }, { "epoch": 0.7022922037204765, "grad_norm": 0.3693384826183319, "learning_rate": 9.32488613390646e-06, "loss": 0.06923675537109375, "step": 5040 }, { "epoch": 0.7024315474116909, "grad_norm": 0.34351763129234314, "learning_rate": 9.316915369842201e-06, "loss": 0.07317924499511719, "step": 5041 }, { "epoch": 0.7025708911029053, "grad_norm": 0.3105156421661377, "learning_rate": 9.308946979315456e-06, "loss": 0.0794219970703125, "step": 5042 }, { "epoch": 0.7027102347941196, "grad_norm": 0.3036092221736908, "learning_rate": 9.300980964096604e-06, "loss": 0.0716257095336914, "step": 5043 }, { "epoch": 0.702849578485334, "grad_norm": 0.3073762357234955, "learning_rate": 9.293017325955524e-06, "loss": 0.07764244079589844, "step": 5044 }, { "epoch": 0.7029889221765484, "grad_norm": 0.41103121638298035, "learning_rate": 9.285056066661547e-06, "loss": 0.09472084045410156, "step": 5045 }, { "epoch": 0.7031282658677629, "grad_norm": 0.23458358645439148, "learning_rate": 9.277097187983489e-06, "loss": 0.07300376892089844, "step": 5046 }, { "epoch": 0.7032676095589773, "grad_norm": 0.45855510234832764, "learning_rate": 9.269140691689622e-06, "loss": 0.09363555908203125, "step": 5047 }, { "epoch": 0.7034069532501916, "grad_norm": 0.6549604535102844, "learning_rate": 9.261186579547703e-06, "loss": 0.09659004211425781, "step": 5048 }, { "epoch": 0.703546296941406, "grad_norm": 0.6095008850097656, "learning_rate": 9.253234853324968e-06, "loss": 0.12381935119628906, "step": 5049 }, { "epoch": 0.7036856406326204, "grad_norm": 0.47968077659606934, "learning_rate": 9.245285514788082e-06, "loss": 0.10022354125976562, "step": 5050 }, { "epoch": 0.7038249843238348, "grad_norm": 0.5633729100227356, "learning_rate": 9.237338565703222e-06, "loss": 0.12637710571289062, "step": 5051 }, { "epoch": 0.7039643280150492, "grad_norm": 0.46615126729011536, "learning_rate": 9.229394007836017e-06, "loss": 0.10266304016113281, "step": 5052 }, { "epoch": 0.7041036717062635, "grad_norm": 0.4680469036102295, "learning_rate": 9.221451842951572e-06, "loss": 0.09732437133789062, "step": 5053 }, { "epoch": 0.7042430153974779, "grad_norm": 0.6042613983154297, "learning_rate": 9.21351207281445e-06, "loss": 0.11707878112792969, "step": 5054 }, { "epoch": 0.7043823590886923, "grad_norm": 0.4946025013923645, "learning_rate": 9.205574699188677e-06, "loss": 0.09771537780761719, "step": 5055 }, { "epoch": 0.7045217027799067, "grad_norm": 0.3503205180168152, "learning_rate": 9.197639723837775e-06, "loss": 0.08000564575195312, "step": 5056 }, { "epoch": 0.704661046471121, "grad_norm": 0.3377445340156555, "learning_rate": 9.189707148524697e-06, "loss": 0.06848335266113281, "step": 5057 }, { "epoch": 0.7048003901623354, "grad_norm": 0.46059450507164, "learning_rate": 9.181776975011882e-06, "loss": 0.1086273193359375, "step": 5058 }, { "epoch": 0.7049397338535498, "grad_norm": 0.2698400616645813, "learning_rate": 9.173849205061251e-06, "loss": 0.0711674690246582, "step": 5059 }, { "epoch": 0.7050790775447642, "grad_norm": 0.21385367214679718, "learning_rate": 9.165923840434162e-06, "loss": 0.06261634826660156, "step": 5060 }, { "epoch": 0.7052184212359786, "grad_norm": 0.5526517033576965, "learning_rate": 9.15800088289144e-06, "loss": 0.1278858184814453, "step": 5061 }, { "epoch": 0.7053577649271929, "grad_norm": 0.35767436027526855, "learning_rate": 9.150080334193394e-06, "loss": 0.09249687194824219, "step": 5062 }, { "epoch": 0.7054971086184073, "grad_norm": 0.2869773209095001, "learning_rate": 9.142162196099799e-06, "loss": 0.07478904724121094, "step": 5063 }, { "epoch": 0.7056364523096217, "grad_norm": 0.5399680733680725, "learning_rate": 9.134246470369868e-06, "loss": 0.09094667434692383, "step": 5064 }, { "epoch": 0.7057757960008361, "grad_norm": 0.31981995701789856, "learning_rate": 9.126333158762309e-06, "loss": 0.0757455825805664, "step": 5065 }, { "epoch": 0.7059151396920504, "grad_norm": 0.2379252314567566, "learning_rate": 9.118422263035264e-06, "loss": 0.07272195816040039, "step": 5066 }, { "epoch": 0.7060544833832648, "grad_norm": 0.29845714569091797, "learning_rate": 9.110513784946368e-06, "loss": 0.0746011734008789, "step": 5067 }, { "epoch": 0.7061938270744792, "grad_norm": 0.3006128966808319, "learning_rate": 9.102607726252692e-06, "loss": 0.07023334503173828, "step": 5068 }, { "epoch": 0.7063331707656936, "grad_norm": 0.5126835107803345, "learning_rate": 9.094704088710788e-06, "loss": 0.11052894592285156, "step": 5069 }, { "epoch": 0.706472514456908, "grad_norm": 0.36278772354125977, "learning_rate": 9.08680287407667e-06, "loss": 0.07686328887939453, "step": 5070 }, { "epoch": 0.7066118581481223, "grad_norm": 0.4253590404987335, "learning_rate": 9.078904084105802e-06, "loss": 0.10579395294189453, "step": 5071 }, { "epoch": 0.7067512018393367, "grad_norm": 0.604463517665863, "learning_rate": 9.071007720553104e-06, "loss": 0.09521102905273438, "step": 5072 }, { "epoch": 0.7068905455305511, "grad_norm": 0.4090389311313629, "learning_rate": 9.06311378517298e-06, "loss": 0.07865142822265625, "step": 5073 }, { "epoch": 0.7070298892217655, "grad_norm": 0.2793520987033844, "learning_rate": 9.055222279719284e-06, "loss": 0.06829833984375, "step": 5074 }, { "epoch": 0.7071692329129798, "grad_norm": 0.6008466482162476, "learning_rate": 9.047333205945318e-06, "loss": 0.12237358093261719, "step": 5075 }, { "epoch": 0.7073085766041942, "grad_norm": 0.4721815586090088, "learning_rate": 9.039446565603868e-06, "loss": 0.09187817573547363, "step": 5076 }, { "epoch": 0.7074479202954086, "grad_norm": 0.3796568810939789, "learning_rate": 9.03156236044715e-06, "loss": 0.0887298583984375, "step": 5077 }, { "epoch": 0.707587263986623, "grad_norm": 0.346814900636673, "learning_rate": 9.023680592226868e-06, "loss": 0.06937313079833984, "step": 5078 }, { "epoch": 0.7077266076778373, "grad_norm": 0.32224252820014954, "learning_rate": 9.015801262694157e-06, "loss": 0.08800125122070312, "step": 5079 }, { "epoch": 0.7078659513690517, "grad_norm": 0.3186229467391968, "learning_rate": 9.007924373599634e-06, "loss": 0.08640098571777344, "step": 5080 }, { "epoch": 0.7080052950602661, "grad_norm": 0.2802046239376068, "learning_rate": 9.000049926693375e-06, "loss": 0.06688499450683594, "step": 5081 }, { "epoch": 0.7081446387514805, "grad_norm": 0.46237489581108093, "learning_rate": 8.992177923724876e-06, "loss": 0.1093902587890625, "step": 5082 }, { "epoch": 0.7082839824426949, "grad_norm": 0.6200442314147949, "learning_rate": 8.98430836644313e-06, "loss": 0.10738563537597656, "step": 5083 }, { "epoch": 0.7084233261339092, "grad_norm": 0.42910531163215637, "learning_rate": 8.97644125659657e-06, "loss": 0.08350181579589844, "step": 5084 }, { "epoch": 0.7085626698251236, "grad_norm": 0.3607664108276367, "learning_rate": 8.968576595933098e-06, "loss": 0.07746124267578125, "step": 5085 }, { "epoch": 0.7087020135163381, "grad_norm": 0.3402417004108429, "learning_rate": 8.960714386200056e-06, "loss": 0.07793235778808594, "step": 5086 }, { "epoch": 0.7088413572075525, "grad_norm": 0.45876577496528625, "learning_rate": 8.95285462914424e-06, "loss": 0.0984954833984375, "step": 5087 }, { "epoch": 0.7089807008987669, "grad_norm": 0.36876380443573, "learning_rate": 8.94499732651192e-06, "loss": 0.08346176147460938, "step": 5088 }, { "epoch": 0.7091200445899812, "grad_norm": 0.43128451704978943, "learning_rate": 8.937142480048797e-06, "loss": 0.08225059509277344, "step": 5089 }, { "epoch": 0.7092593882811956, "grad_norm": 0.3870275020599365, "learning_rate": 8.929290091500045e-06, "loss": 0.09326362609863281, "step": 5090 }, { "epoch": 0.70939873197241, "grad_norm": 0.4229520857334137, "learning_rate": 8.921440162610295e-06, "loss": 0.07924079895019531, "step": 5091 }, { "epoch": 0.7095380756636244, "grad_norm": 0.3940028250217438, "learning_rate": 8.913592695123613e-06, "loss": 0.07867813110351562, "step": 5092 }, { "epoch": 0.7096774193548387, "grad_norm": 0.40666428208351135, "learning_rate": 8.905747690783517e-06, "loss": 0.08480262756347656, "step": 5093 }, { "epoch": 0.7098167630460531, "grad_norm": 0.29039615392684937, "learning_rate": 8.897905151333002e-06, "loss": 0.0776824951171875, "step": 5094 }, { "epoch": 0.7099561067372675, "grad_norm": 0.34529373049736023, "learning_rate": 8.890065078514503e-06, "loss": 0.07745933532714844, "step": 5095 }, { "epoch": 0.7100954504284819, "grad_norm": 0.29522383213043213, "learning_rate": 8.882227474069892e-06, "loss": 0.07621955871582031, "step": 5096 }, { "epoch": 0.7102347941196963, "grad_norm": 0.4033275842666626, "learning_rate": 8.874392339740518e-06, "loss": 0.09445571899414062, "step": 5097 }, { "epoch": 0.7103741378109106, "grad_norm": 0.635503888130188, "learning_rate": 8.866559677267162e-06, "loss": 0.12297248840332031, "step": 5098 }, { "epoch": 0.710513481502125, "grad_norm": 0.28513672947883606, "learning_rate": 8.858729488390068e-06, "loss": 0.06948471069335938, "step": 5099 }, { "epoch": 0.7106528251933394, "grad_norm": 0.3871840238571167, "learning_rate": 8.850901774848916e-06, "loss": 0.10392379760742188, "step": 5100 }, { "epoch": 0.7107921688845538, "grad_norm": 0.3118007779121399, "learning_rate": 8.843076538382853e-06, "loss": 0.06787872314453125, "step": 5101 }, { "epoch": 0.7109315125757681, "grad_norm": 0.3747655153274536, "learning_rate": 8.835253780730472e-06, "loss": 0.0836944580078125, "step": 5102 }, { "epoch": 0.7110708562669825, "grad_norm": 0.35966306924819946, "learning_rate": 8.827433503629805e-06, "loss": 0.08930683135986328, "step": 5103 }, { "epoch": 0.7112101999581969, "grad_norm": 0.6194000244140625, "learning_rate": 8.819615708818335e-06, "loss": 0.10882568359375, "step": 5104 }, { "epoch": 0.7113495436494113, "grad_norm": 0.32348307967185974, "learning_rate": 8.811800398032999e-06, "loss": 0.07853364944458008, "step": 5105 }, { "epoch": 0.7114888873406257, "grad_norm": 0.2370113730430603, "learning_rate": 8.803987573010191e-06, "loss": 0.0627756118774414, "step": 5106 }, { "epoch": 0.71162823103184, "grad_norm": 0.6447005867958069, "learning_rate": 8.796177235485736e-06, "loss": 0.0991220474243164, "step": 5107 }, { "epoch": 0.7117675747230544, "grad_norm": 0.46245312690734863, "learning_rate": 8.788369387194904e-06, "loss": 0.10154342651367188, "step": 5108 }, { "epoch": 0.7119069184142688, "grad_norm": 0.5178766250610352, "learning_rate": 8.78056402987243e-06, "loss": 0.09460639953613281, "step": 5109 }, { "epoch": 0.7120462621054832, "grad_norm": 0.2147335708141327, "learning_rate": 8.772761165252488e-06, "loss": 0.06746482849121094, "step": 5110 }, { "epoch": 0.7121856057966975, "grad_norm": 0.3610585927963257, "learning_rate": 8.76496079506869e-06, "loss": 0.06991767883300781, "step": 5111 }, { "epoch": 0.7123249494879119, "grad_norm": 0.544852077960968, "learning_rate": 8.757162921054099e-06, "loss": 0.10314083099365234, "step": 5112 }, { "epoch": 0.7124642931791263, "grad_norm": 0.6036769151687622, "learning_rate": 8.749367544941238e-06, "loss": 0.09000015258789062, "step": 5113 }, { "epoch": 0.7126036368703407, "grad_norm": 0.289544016122818, "learning_rate": 8.741574668462053e-06, "loss": 0.07390594482421875, "step": 5114 }, { "epoch": 0.712742980561555, "grad_norm": 0.7487357258796692, "learning_rate": 8.733784293347934e-06, "loss": 0.12241554260253906, "step": 5115 }, { "epoch": 0.7128823242527694, "grad_norm": 0.361358106136322, "learning_rate": 8.725996421329733e-06, "loss": 0.08399295806884766, "step": 5116 }, { "epoch": 0.7130216679439838, "grad_norm": 0.4703681170940399, "learning_rate": 8.718211054137744e-06, "loss": 0.07787322998046875, "step": 5117 }, { "epoch": 0.7131610116351982, "grad_norm": 0.47227171063423157, "learning_rate": 8.710428193501692e-06, "loss": 0.10083770751953125, "step": 5118 }, { "epoch": 0.7133003553264126, "grad_norm": 0.3163149356842041, "learning_rate": 8.702647841150743e-06, "loss": 0.07923126220703125, "step": 5119 }, { "epoch": 0.7134396990176269, "grad_norm": 0.43398594856262207, "learning_rate": 8.694869998813527e-06, "loss": 0.08489704132080078, "step": 5120 }, { "epoch": 0.7135790427088413, "grad_norm": 0.40075892210006714, "learning_rate": 8.68709466821809e-06, "loss": 0.07426166534423828, "step": 5121 }, { "epoch": 0.7137183864000557, "grad_norm": 0.40390872955322266, "learning_rate": 8.67932185109194e-06, "loss": 0.08249330520629883, "step": 5122 }, { "epoch": 0.7138577300912701, "grad_norm": 0.7273439168930054, "learning_rate": 8.671551549162025e-06, "loss": 0.12781524658203125, "step": 5123 }, { "epoch": 0.7139970737824844, "grad_norm": 0.39760667085647583, "learning_rate": 8.663783764154726e-06, "loss": 0.07667922973632812, "step": 5124 }, { "epoch": 0.7141364174736988, "grad_norm": 0.6939367651939392, "learning_rate": 8.656018497795855e-06, "loss": 0.11000251770019531, "step": 5125 }, { "epoch": 0.7142757611649132, "grad_norm": 0.3765064775943756, "learning_rate": 8.648255751810686e-06, "loss": 0.09102058410644531, "step": 5126 }, { "epoch": 0.7144151048561277, "grad_norm": 0.23896130919456482, "learning_rate": 8.640495527923931e-06, "loss": 0.06025886535644531, "step": 5127 }, { "epoch": 0.7145544485473421, "grad_norm": 0.4112388789653778, "learning_rate": 8.632737827859729e-06, "loss": 0.08991622924804688, "step": 5128 }, { "epoch": 0.7146937922385564, "grad_norm": 0.442043274641037, "learning_rate": 8.624982653341656e-06, "loss": 0.09262752532958984, "step": 5129 }, { "epoch": 0.7148331359297708, "grad_norm": 0.3844919204711914, "learning_rate": 8.61723000609274e-06, "loss": 0.07954025268554688, "step": 5130 }, { "epoch": 0.7149724796209852, "grad_norm": 0.6022707223892212, "learning_rate": 8.609479887835453e-06, "loss": 0.10756492614746094, "step": 5131 }, { "epoch": 0.7151118233121996, "grad_norm": 0.4081715941429138, "learning_rate": 8.601732300291674e-06, "loss": 0.07703113555908203, "step": 5132 }, { "epoch": 0.715251167003414, "grad_norm": 0.7200596332550049, "learning_rate": 8.593987245182754e-06, "loss": 0.12595748901367188, "step": 5133 }, { "epoch": 0.7153905106946283, "grad_norm": 0.4747401773929596, "learning_rate": 8.586244724229471e-06, "loss": 0.114227294921875, "step": 5134 }, { "epoch": 0.7155298543858427, "grad_norm": 0.2698807120323181, "learning_rate": 8.57850473915203e-06, "loss": 0.07819461822509766, "step": 5135 }, { "epoch": 0.7156691980770571, "grad_norm": 0.317402720451355, "learning_rate": 8.57076729167007e-06, "loss": 0.06857109069824219, "step": 5136 }, { "epoch": 0.7158085417682715, "grad_norm": 0.6743189692497253, "learning_rate": 8.563032383502685e-06, "loss": 0.12783050537109375, "step": 5137 }, { "epoch": 0.7159478854594858, "grad_norm": 1.5394914150238037, "learning_rate": 8.555300016368403e-06, "loss": 0.15388917922973633, "step": 5138 }, { "epoch": 0.7160872291507002, "grad_norm": 0.3642120659351349, "learning_rate": 8.547570191985168e-06, "loss": 0.0774688720703125, "step": 5139 }, { "epoch": 0.7162265728419146, "grad_norm": 0.36346563696861267, "learning_rate": 8.539842912070367e-06, "loss": 0.08487510681152344, "step": 5140 }, { "epoch": 0.716365916533129, "grad_norm": 0.31676119565963745, "learning_rate": 8.532118178340829e-06, "loss": 0.0726633071899414, "step": 5141 }, { "epoch": 0.7165052602243434, "grad_norm": 0.4287249743938446, "learning_rate": 8.524395992512827e-06, "loss": 0.09098434448242188, "step": 5142 }, { "epoch": 0.7166446039155577, "grad_norm": 0.32714638113975525, "learning_rate": 8.516676356302031e-06, "loss": 0.08021354675292969, "step": 5143 }, { "epoch": 0.7167839476067721, "grad_norm": 0.42301255464553833, "learning_rate": 8.508959271423589e-06, "loss": 0.09534835815429688, "step": 5144 }, { "epoch": 0.7169232912979865, "grad_norm": 0.7229560017585754, "learning_rate": 8.501244739592045e-06, "loss": 0.10836982727050781, "step": 5145 }, { "epoch": 0.7170626349892009, "grad_norm": 0.650001585483551, "learning_rate": 8.493532762521406e-06, "loss": 0.10709941387176514, "step": 5146 }, { "epoch": 0.7172019786804152, "grad_norm": 0.3265645503997803, "learning_rate": 8.485823341925084e-06, "loss": 0.07647895812988281, "step": 5147 }, { "epoch": 0.7173413223716296, "grad_norm": 0.30761218070983887, "learning_rate": 8.47811647951594e-06, "loss": 0.08565711975097656, "step": 5148 }, { "epoch": 0.717480666062844, "grad_norm": 0.6360039710998535, "learning_rate": 8.470412177006281e-06, "loss": 0.11336040496826172, "step": 5149 }, { "epoch": 0.7176200097540584, "grad_norm": 0.48578715324401855, "learning_rate": 8.462710436107796e-06, "loss": 0.10680580139160156, "step": 5150 }, { "epoch": 0.7177593534452728, "grad_norm": 0.49643251299858093, "learning_rate": 8.455011258531653e-06, "loss": 0.09367179870605469, "step": 5151 }, { "epoch": 0.7178986971364871, "grad_norm": 0.3269549012184143, "learning_rate": 8.44731464598843e-06, "loss": 0.07808589935302734, "step": 5152 }, { "epoch": 0.7180380408277015, "grad_norm": 1.104999303817749, "learning_rate": 8.439620600188147e-06, "loss": 0.1510181427001953, "step": 5153 }, { "epoch": 0.7181773845189159, "grad_norm": 0.5524479150772095, "learning_rate": 8.431929122840234e-06, "loss": 0.11038398742675781, "step": 5154 }, { "epoch": 0.7183167282101303, "grad_norm": 0.3176952302455902, "learning_rate": 8.424240215653571e-06, "loss": 0.082275390625, "step": 5155 }, { "epoch": 0.7184560719013446, "grad_norm": 0.4152560532093048, "learning_rate": 8.416553880336456e-06, "loss": 0.09180355072021484, "step": 5156 }, { "epoch": 0.718595415592559, "grad_norm": 0.3988531827926636, "learning_rate": 8.408870118596606e-06, "loss": 0.08794403076171875, "step": 5157 }, { "epoch": 0.7187347592837734, "grad_norm": 0.48612943291664124, "learning_rate": 8.401188932141184e-06, "loss": 0.11799430847167969, "step": 5158 }, { "epoch": 0.7188741029749878, "grad_norm": 0.2978157103061676, "learning_rate": 8.393510322676784e-06, "loss": 0.07317352294921875, "step": 5159 }, { "epoch": 0.7190134466662021, "grad_norm": 0.4608181118965149, "learning_rate": 8.385834291909409e-06, "loss": 0.10091590881347656, "step": 5160 }, { "epoch": 0.7191527903574165, "grad_norm": 0.2656065821647644, "learning_rate": 8.378160841544493e-06, "loss": 0.06689071655273438, "step": 5161 }, { "epoch": 0.7192921340486309, "grad_norm": 0.5172542929649353, "learning_rate": 8.370489973286907e-06, "loss": 0.09465599060058594, "step": 5162 }, { "epoch": 0.7194314777398453, "grad_norm": 0.5902307629585266, "learning_rate": 8.362821688840947e-06, "loss": 0.12065887451171875, "step": 5163 }, { "epoch": 0.7195708214310597, "grad_norm": 0.47906601428985596, "learning_rate": 8.355155989910322e-06, "loss": 0.11677932739257812, "step": 5164 }, { "epoch": 0.719710165122274, "grad_norm": 0.3854982256889343, "learning_rate": 8.347492878198185e-06, "loss": 0.08113861083984375, "step": 5165 }, { "epoch": 0.7198495088134884, "grad_norm": 0.3745138347148895, "learning_rate": 8.339832355407093e-06, "loss": 0.07414436340332031, "step": 5166 }, { "epoch": 0.7199888525047029, "grad_norm": 0.29264095425605774, "learning_rate": 8.332174423239052e-06, "loss": 0.07708597183227539, "step": 5167 }, { "epoch": 0.7201281961959173, "grad_norm": 0.7210025191307068, "learning_rate": 8.324519083395467e-06, "loss": 0.10997867584228516, "step": 5168 }, { "epoch": 0.7202675398871317, "grad_norm": 0.2720508575439453, "learning_rate": 8.316866337577185e-06, "loss": 0.07496452331542969, "step": 5169 }, { "epoch": 0.720406883578346, "grad_norm": 0.5303398966789246, "learning_rate": 8.309216187484482e-06, "loss": 0.11944007873535156, "step": 5170 }, { "epoch": 0.7205462272695604, "grad_norm": 0.3748815059661865, "learning_rate": 8.301568634817034e-06, "loss": 0.0949106216430664, "step": 5171 }, { "epoch": 0.7206855709607748, "grad_norm": 0.1971912682056427, "learning_rate": 8.29392368127395e-06, "loss": 0.06019020080566406, "step": 5172 }, { "epoch": 0.7208249146519892, "grad_norm": 0.8516234755516052, "learning_rate": 8.286281328553769e-06, "loss": 0.1330556869506836, "step": 5173 }, { "epoch": 0.7209642583432035, "grad_norm": 0.3069489002227783, "learning_rate": 8.278641578354453e-06, "loss": 0.07458877563476562, "step": 5174 }, { "epoch": 0.7211036020344179, "grad_norm": 0.2986977696418762, "learning_rate": 8.271004432373372e-06, "loss": 0.08624076843261719, "step": 5175 }, { "epoch": 0.7212429457256323, "grad_norm": 0.30304375290870667, "learning_rate": 8.263369892307334e-06, "loss": 0.07644271850585938, "step": 5176 }, { "epoch": 0.7213822894168467, "grad_norm": 0.4310969114303589, "learning_rate": 8.255737959852548e-06, "loss": 0.09969139099121094, "step": 5177 }, { "epoch": 0.7215216331080611, "grad_norm": 0.5641813278198242, "learning_rate": 8.248108636704666e-06, "loss": 0.106292724609375, "step": 5178 }, { "epoch": 0.7216609767992754, "grad_norm": 0.5855363607406616, "learning_rate": 8.240481924558739e-06, "loss": 0.11700439453125, "step": 5179 }, { "epoch": 0.7218003204904898, "grad_norm": 0.5434195399284363, "learning_rate": 8.232857825109256e-06, "loss": 0.08960437774658203, "step": 5180 }, { "epoch": 0.7219396641817042, "grad_norm": 0.3627125322818756, "learning_rate": 8.225236340050127e-06, "loss": 0.08090591430664062, "step": 5181 }, { "epoch": 0.7220790078729186, "grad_norm": 0.3675297796726227, "learning_rate": 8.217617471074648e-06, "loss": 0.09488868713378906, "step": 5182 }, { "epoch": 0.7222183515641329, "grad_norm": 0.3407084047794342, "learning_rate": 8.210001219875569e-06, "loss": 0.08603858947753906, "step": 5183 }, { "epoch": 0.7223576952553473, "grad_norm": 0.3816525638103485, "learning_rate": 8.202387588145051e-06, "loss": 0.0806427001953125, "step": 5184 }, { "epoch": 0.7224970389465617, "grad_norm": 0.3510199189186096, "learning_rate": 8.194776577574673e-06, "loss": 0.07478523254394531, "step": 5185 }, { "epoch": 0.7226363826377761, "grad_norm": 0.5290048718452454, "learning_rate": 8.187168189855421e-06, "loss": 0.1089315414428711, "step": 5186 }, { "epoch": 0.7227757263289905, "grad_norm": 0.3169694244861603, "learning_rate": 8.179562426677699e-06, "loss": 0.07172203063964844, "step": 5187 }, { "epoch": 0.7229150700202048, "grad_norm": 0.2740107774734497, "learning_rate": 8.171959289731348e-06, "loss": 0.06768798828125, "step": 5188 }, { "epoch": 0.7230544137114192, "grad_norm": 0.42453867197036743, "learning_rate": 8.164358780705596e-06, "loss": 0.0925741195678711, "step": 5189 }, { "epoch": 0.7231937574026336, "grad_norm": 0.4400254786014557, "learning_rate": 8.156760901289111e-06, "loss": 0.0936279296875, "step": 5190 }, { "epoch": 0.723333101093848, "grad_norm": 0.42008212208747864, "learning_rate": 8.149165653169976e-06, "loss": 0.09064865112304688, "step": 5191 }, { "epoch": 0.7234724447850623, "grad_norm": 0.9223529696464539, "learning_rate": 8.141573038035675e-06, "loss": 0.10186386108398438, "step": 5192 }, { "epoch": 0.7236117884762767, "grad_norm": 0.9249104261398315, "learning_rate": 8.133983057573103e-06, "loss": 0.14863204956054688, "step": 5193 }, { "epoch": 0.7237511321674911, "grad_norm": 0.43765610456466675, "learning_rate": 8.12639571346859e-06, "loss": 0.10097503662109375, "step": 5194 }, { "epoch": 0.7238904758587055, "grad_norm": 0.28605374693870544, "learning_rate": 8.118811007407878e-06, "loss": 0.06598663330078125, "step": 5195 }, { "epoch": 0.7240298195499199, "grad_norm": 0.3929120600223541, "learning_rate": 8.111228941076101e-06, "loss": 0.07799625396728516, "step": 5196 }, { "epoch": 0.7241691632411342, "grad_norm": 0.46199655532836914, "learning_rate": 8.103649516157835e-06, "loss": 0.0844879150390625, "step": 5197 }, { "epoch": 0.7243085069323486, "grad_norm": 0.41551336646080017, "learning_rate": 8.096072734337042e-06, "loss": 0.08631134033203125, "step": 5198 }, { "epoch": 0.724447850623563, "grad_norm": 0.8455735445022583, "learning_rate": 8.088498597297121e-06, "loss": 0.11363410949707031, "step": 5199 }, { "epoch": 0.7245871943147774, "grad_norm": 0.19877387583255768, "learning_rate": 8.080927106720862e-06, "loss": 0.06332778930664062, "step": 5200 }, { "epoch": 0.7247265380059917, "grad_norm": 0.37594079971313477, "learning_rate": 8.073358264290483e-06, "loss": 0.08067703247070312, "step": 5201 }, { "epoch": 0.7248658816972061, "grad_norm": 0.41934120655059814, "learning_rate": 8.065792071687615e-06, "loss": 0.09765243530273438, "step": 5202 }, { "epoch": 0.7250052253884205, "grad_norm": 0.31635111570358276, "learning_rate": 8.058228530593283e-06, "loss": 0.08084964752197266, "step": 5203 }, { "epoch": 0.7251445690796349, "grad_norm": 0.41760557889938354, "learning_rate": 8.050667642687933e-06, "loss": 0.11375045776367188, "step": 5204 }, { "epoch": 0.7252839127708492, "grad_norm": 0.4118405878543854, "learning_rate": 8.043109409651424e-06, "loss": 0.09345436096191406, "step": 5205 }, { "epoch": 0.7254232564620636, "grad_norm": 0.33801305294036865, "learning_rate": 8.03555383316303e-06, "loss": 0.08185386657714844, "step": 5206 }, { "epoch": 0.7255626001532781, "grad_norm": 0.4541131556034088, "learning_rate": 8.028000914901422e-06, "loss": 0.09766387939453125, "step": 5207 }, { "epoch": 0.7257019438444925, "grad_norm": 0.3576096296310425, "learning_rate": 8.020450656544679e-06, "loss": 0.09553861618041992, "step": 5208 }, { "epoch": 0.7258412875357069, "grad_norm": 0.5684376955032349, "learning_rate": 8.012903059770301e-06, "loss": 0.09291648864746094, "step": 5209 }, { "epoch": 0.7259806312269212, "grad_norm": 0.27862709760665894, "learning_rate": 8.005358126255199e-06, "loss": 0.07385444641113281, "step": 5210 }, { "epoch": 0.7261199749181356, "grad_norm": 0.28414681553840637, "learning_rate": 7.997815857675673e-06, "loss": 0.07109832763671875, "step": 5211 }, { "epoch": 0.72625931860935, "grad_norm": 0.5604143142700195, "learning_rate": 7.990276255707449e-06, "loss": 0.13321542739868164, "step": 5212 }, { "epoch": 0.7263986623005644, "grad_norm": 0.41795778274536133, "learning_rate": 7.982739322025663e-06, "loss": 0.07536888122558594, "step": 5213 }, { "epoch": 0.7265380059917788, "grad_norm": 0.2303725779056549, "learning_rate": 7.97520505830484e-06, "loss": 0.06885910034179688, "step": 5214 }, { "epoch": 0.7266773496829931, "grad_norm": 0.49004650115966797, "learning_rate": 7.967673466218914e-06, "loss": 0.11131477355957031, "step": 5215 }, { "epoch": 0.7268166933742075, "grad_norm": 0.37181439995765686, "learning_rate": 7.960144547441242e-06, "loss": 0.08528709411621094, "step": 5216 }, { "epoch": 0.7269560370654219, "grad_norm": 0.3378145396709442, "learning_rate": 7.952618303644584e-06, "loss": 0.09295272827148438, "step": 5217 }, { "epoch": 0.7270953807566363, "grad_norm": 0.2376299798488617, "learning_rate": 7.945094736501094e-06, "loss": 0.06809425354003906, "step": 5218 }, { "epoch": 0.7272347244478506, "grad_norm": 0.24654662609100342, "learning_rate": 7.937573847682325e-06, "loss": 0.06886863708496094, "step": 5219 }, { "epoch": 0.727374068139065, "grad_norm": 0.6324467062950134, "learning_rate": 7.930055638859267e-06, "loss": 0.10488510131835938, "step": 5220 }, { "epoch": 0.7275134118302794, "grad_norm": 0.5221948623657227, "learning_rate": 7.922540111702275e-06, "loss": 0.10553550720214844, "step": 5221 }, { "epoch": 0.7276527555214938, "grad_norm": 0.6176754236221313, "learning_rate": 7.915027267881139e-06, "loss": 0.10611343383789062, "step": 5222 }, { "epoch": 0.7277920992127082, "grad_norm": 0.4901888072490692, "learning_rate": 7.907517109065046e-06, "loss": 0.09523391723632812, "step": 5223 }, { "epoch": 0.7279314429039225, "grad_norm": 0.20954963564872742, "learning_rate": 7.900009636922576e-06, "loss": 0.06577491760253906, "step": 5224 }, { "epoch": 0.7280707865951369, "grad_norm": 0.4891282021999359, "learning_rate": 7.89250485312171e-06, "loss": 0.10221195220947266, "step": 5225 }, { "epoch": 0.7282101302863513, "grad_norm": 0.5454464554786682, "learning_rate": 7.885002759329845e-06, "loss": 0.1093902587890625, "step": 5226 }, { "epoch": 0.7283494739775657, "grad_norm": 0.36035647988319397, "learning_rate": 7.877503357213787e-06, "loss": 0.08115196228027344, "step": 5227 }, { "epoch": 0.72848881766878, "grad_norm": 0.4631879925727844, "learning_rate": 7.870006648439712e-06, "loss": 0.11802482604980469, "step": 5228 }, { "epoch": 0.7286281613599944, "grad_norm": 0.2796304225921631, "learning_rate": 7.862512634673237e-06, "loss": 0.07593154907226562, "step": 5229 }, { "epoch": 0.7287675050512088, "grad_norm": 0.19027690589427948, "learning_rate": 7.855021317579341e-06, "loss": 0.059413909912109375, "step": 5230 }, { "epoch": 0.7289068487424232, "grad_norm": 0.39414161443710327, "learning_rate": 7.847532698822442e-06, "loss": 0.08272361755371094, "step": 5231 }, { "epoch": 0.7290461924336376, "grad_norm": 0.2920358180999756, "learning_rate": 7.840046780066325e-06, "loss": 0.07503509521484375, "step": 5232 }, { "epoch": 0.7291855361248519, "grad_norm": 0.413739413022995, "learning_rate": 7.832563562974196e-06, "loss": 0.09159469604492188, "step": 5233 }, { "epoch": 0.7293248798160663, "grad_norm": 0.41796594858169556, "learning_rate": 7.825083049208665e-06, "loss": 0.08780097961425781, "step": 5234 }, { "epoch": 0.7294642235072807, "grad_norm": 0.3385968506336212, "learning_rate": 7.817605240431718e-06, "loss": 0.097625732421875, "step": 5235 }, { "epoch": 0.7296035671984951, "grad_norm": 0.35310012102127075, "learning_rate": 7.810130138304755e-06, "loss": 0.08129501342773438, "step": 5236 }, { "epoch": 0.7297429108897094, "grad_norm": 0.4573066830635071, "learning_rate": 7.802657744488575e-06, "loss": 0.09685707092285156, "step": 5237 }, { "epoch": 0.7298822545809238, "grad_norm": 0.5055689811706543, "learning_rate": 7.79518806064338e-06, "loss": 0.11349296569824219, "step": 5238 }, { "epoch": 0.7300215982721382, "grad_norm": 0.2029559165239334, "learning_rate": 7.78772108842876e-06, "loss": 0.06750869750976562, "step": 5239 }, { "epoch": 0.7301609419633526, "grad_norm": 0.31361621618270874, "learning_rate": 7.780256829503692e-06, "loss": 0.07887077331542969, "step": 5240 }, { "epoch": 0.730300285654567, "grad_norm": 0.28357189893722534, "learning_rate": 7.772795285526578e-06, "loss": 0.06754493713378906, "step": 5241 }, { "epoch": 0.7304396293457813, "grad_norm": 0.5571112036705017, "learning_rate": 7.765336458155205e-06, "loss": 0.1217947006225586, "step": 5242 }, { "epoch": 0.7305789730369957, "grad_norm": 0.3396696448326111, "learning_rate": 7.757880349046742e-06, "loss": 0.0818939208984375, "step": 5243 }, { "epoch": 0.7307183167282101, "grad_norm": 0.5522879958152771, "learning_rate": 7.750426959857782e-06, "loss": 0.10689544677734375, "step": 5244 }, { "epoch": 0.7308576604194245, "grad_norm": 0.45427289605140686, "learning_rate": 7.74297629224428e-06, "loss": 0.08866119384765625, "step": 5245 }, { "epoch": 0.7309970041106388, "grad_norm": 0.5256122946739197, "learning_rate": 7.735528347861623e-06, "loss": 0.1161651611328125, "step": 5246 }, { "epoch": 0.7311363478018533, "grad_norm": 0.4239473044872284, "learning_rate": 7.728083128364555e-06, "loss": 0.08913612365722656, "step": 5247 }, { "epoch": 0.7312756914930677, "grad_norm": 0.4787594676017761, "learning_rate": 7.720640635407244e-06, "loss": 0.09527015686035156, "step": 5248 }, { "epoch": 0.7314150351842821, "grad_norm": 0.23695620894432068, "learning_rate": 7.713200870643246e-06, "loss": 0.06842422485351562, "step": 5249 }, { "epoch": 0.7315543788754965, "grad_norm": 0.5999666452407837, "learning_rate": 7.705763835725507e-06, "loss": 0.1345539093017578, "step": 5250 }, { "epoch": 0.7316937225667108, "grad_norm": 0.4842648208141327, "learning_rate": 7.69832953230635e-06, "loss": 0.11662101745605469, "step": 5251 }, { "epoch": 0.7318330662579252, "grad_norm": 0.5254838466644287, "learning_rate": 7.69089796203752e-06, "loss": 0.11694192886352539, "step": 5252 }, { "epoch": 0.7319724099491396, "grad_norm": 0.4872169494628906, "learning_rate": 7.683469126570152e-06, "loss": 0.10805892944335938, "step": 5253 }, { "epoch": 0.732111753640354, "grad_norm": 0.6147377490997314, "learning_rate": 7.67604302755474e-06, "loss": 0.10059547424316406, "step": 5254 }, { "epoch": 0.7322510973315683, "grad_norm": 0.7399958968162537, "learning_rate": 7.668619666641216e-06, "loss": 0.11346578598022461, "step": 5255 }, { "epoch": 0.7323904410227827, "grad_norm": 0.3494555950164795, "learning_rate": 7.661199045478874e-06, "loss": 0.08198165893554688, "step": 5256 }, { "epoch": 0.7325297847139971, "grad_norm": 0.20980234444141388, "learning_rate": 7.653781165716396e-06, "loss": 0.053356170654296875, "step": 5257 }, { "epoch": 0.7326691284052115, "grad_norm": 0.3114386200904846, "learning_rate": 7.646366029001873e-06, "loss": 0.06892585754394531, "step": 5258 }, { "epoch": 0.7328084720964259, "grad_norm": 0.5675535202026367, "learning_rate": 7.638953636982789e-06, "loss": 0.11671257019042969, "step": 5259 }, { "epoch": 0.7329478157876402, "grad_norm": 0.6186447739601135, "learning_rate": 7.631543991305998e-06, "loss": 0.09685134887695312, "step": 5260 }, { "epoch": 0.7330871594788546, "grad_norm": 0.5841290354728699, "learning_rate": 7.62413709361775e-06, "loss": 0.08207225799560547, "step": 5261 }, { "epoch": 0.733226503170069, "grad_norm": 0.36281493306159973, "learning_rate": 7.616732945563692e-06, "loss": 0.07967281341552734, "step": 5262 }, { "epoch": 0.7333658468612834, "grad_norm": 0.4248640239238739, "learning_rate": 7.609331548788865e-06, "loss": 0.08991622924804688, "step": 5263 }, { "epoch": 0.7335051905524977, "grad_norm": 0.38816145062446594, "learning_rate": 7.601932904937679e-06, "loss": 0.0976715087890625, "step": 5264 }, { "epoch": 0.7336445342437121, "grad_norm": 0.5123850107192993, "learning_rate": 7.594537015653949e-06, "loss": 0.09481048583984375, "step": 5265 }, { "epoch": 0.7337838779349265, "grad_norm": 0.37643522024154663, "learning_rate": 7.5871438825808786e-06, "loss": 0.08455467224121094, "step": 5266 }, { "epoch": 0.7339232216261409, "grad_norm": 0.28728827834129333, "learning_rate": 7.579753507361048e-06, "loss": 0.07533073425292969, "step": 5267 }, { "epoch": 0.7340625653173553, "grad_norm": 0.33166998624801636, "learning_rate": 7.572365891636422e-06, "loss": 0.07481765747070312, "step": 5268 }, { "epoch": 0.7342019090085696, "grad_norm": 0.7789461612701416, "learning_rate": 7.5649810370483666e-06, "loss": 0.11207962036132812, "step": 5269 }, { "epoch": 0.734341252699784, "grad_norm": 0.36456742882728577, "learning_rate": 7.557598945237634e-06, "loss": 0.09762763977050781, "step": 5270 }, { "epoch": 0.7344805963909984, "grad_norm": 0.2507241368293762, "learning_rate": 7.550219617844354e-06, "loss": 0.0762176513671875, "step": 5271 }, { "epoch": 0.7346199400822128, "grad_norm": 0.5907347202301025, "learning_rate": 7.542843056508034e-06, "loss": 0.1425609588623047, "step": 5272 }, { "epoch": 0.7347592837734271, "grad_norm": 0.22814048826694489, "learning_rate": 7.535469262867583e-06, "loss": 0.062175750732421875, "step": 5273 }, { "epoch": 0.7348986274646415, "grad_norm": 0.3741365969181061, "learning_rate": 7.528098238561301e-06, "loss": 0.09040260314941406, "step": 5274 }, { "epoch": 0.7350379711558559, "grad_norm": 0.20582439005374908, "learning_rate": 7.520729985226842e-06, "loss": 0.06995010375976562, "step": 5275 }, { "epoch": 0.7351773148470703, "grad_norm": 0.35709893703460693, "learning_rate": 7.513364504501283e-06, "loss": 0.09345054626464844, "step": 5276 }, { "epoch": 0.7353166585382847, "grad_norm": 0.5450581312179565, "learning_rate": 7.506001798021049e-06, "loss": 0.09265708923339844, "step": 5277 }, { "epoch": 0.735456002229499, "grad_norm": 0.2907041907310486, "learning_rate": 7.498641867421981e-06, "loss": 0.07734107971191406, "step": 5278 }, { "epoch": 0.7355953459207134, "grad_norm": 0.2761858105659485, "learning_rate": 7.4912847143392706e-06, "loss": 0.07556962966918945, "step": 5279 }, { "epoch": 0.7357346896119278, "grad_norm": 0.4762260615825653, "learning_rate": 7.483930340407519e-06, "loss": 0.08451175689697266, "step": 5280 }, { "epoch": 0.7358740333031422, "grad_norm": 0.3836508095264435, "learning_rate": 7.476578747260712e-06, "loss": 0.09780693054199219, "step": 5281 }, { "epoch": 0.7360133769943565, "grad_norm": 0.4055038094520569, "learning_rate": 7.469229936532179e-06, "loss": 0.0874786376953125, "step": 5282 }, { "epoch": 0.7361527206855709, "grad_norm": 0.539602518081665, "learning_rate": 7.46188390985467e-06, "loss": 0.1146240234375, "step": 5283 }, { "epoch": 0.7362920643767853, "grad_norm": 0.2968486547470093, "learning_rate": 7.454540668860309e-06, "loss": 0.081634521484375, "step": 5284 }, { "epoch": 0.7364314080679997, "grad_norm": 0.20609451830387115, "learning_rate": 7.4472002151805985e-06, "loss": 0.06349563598632812, "step": 5285 }, { "epoch": 0.736570751759214, "grad_norm": 0.3517487049102783, "learning_rate": 7.4398625504464105e-06, "loss": 0.08644676208496094, "step": 5286 }, { "epoch": 0.7367100954504285, "grad_norm": 0.58217853307724, "learning_rate": 7.432527676288015e-06, "loss": 0.13733863830566406, "step": 5287 }, { "epoch": 0.7368494391416429, "grad_norm": 0.4435776174068451, "learning_rate": 7.425195594335053e-06, "loss": 0.09581756591796875, "step": 5288 }, { "epoch": 0.7369887828328573, "grad_norm": 0.19641825556755066, "learning_rate": 7.417866306216532e-06, "loss": 0.05942535400390625, "step": 5289 }, { "epoch": 0.7371281265240717, "grad_norm": 0.2820185422897339, "learning_rate": 7.4105398135608645e-06, "loss": 0.07469654083251953, "step": 5290 }, { "epoch": 0.737267470215286, "grad_norm": 0.510333240032196, "learning_rate": 7.403216117995835e-06, "loss": 0.09612464904785156, "step": 5291 }, { "epoch": 0.7374068139065004, "grad_norm": 0.4178142547607422, "learning_rate": 7.395895221148594e-06, "loss": 0.09444808959960938, "step": 5292 }, { "epoch": 0.7375461575977148, "grad_norm": 0.733722984790802, "learning_rate": 7.388577124645671e-06, "loss": 0.12704849243164062, "step": 5293 }, { "epoch": 0.7376855012889292, "grad_norm": 0.2610227167606354, "learning_rate": 7.381261830112989e-06, "loss": 0.06534099578857422, "step": 5294 }, { "epoch": 0.7378248449801436, "grad_norm": 0.3517918884754181, "learning_rate": 7.373949339175843e-06, "loss": 0.08024120330810547, "step": 5295 }, { "epoch": 0.7379641886713579, "grad_norm": 0.7256416082382202, "learning_rate": 7.366639653458889e-06, "loss": 0.12233352661132812, "step": 5296 }, { "epoch": 0.7381035323625723, "grad_norm": 0.4519415497779846, "learning_rate": 7.359332774586188e-06, "loss": 0.09293365478515625, "step": 5297 }, { "epoch": 0.7382428760537867, "grad_norm": 0.5059886574745178, "learning_rate": 7.352028704181145e-06, "loss": 0.08743095397949219, "step": 5298 }, { "epoch": 0.7383822197450011, "grad_norm": 0.2818610668182373, "learning_rate": 7.344727443866573e-06, "loss": 0.0724020004272461, "step": 5299 }, { "epoch": 0.7385215634362154, "grad_norm": 0.24482236802577972, "learning_rate": 7.3374289952646305e-06, "loss": 0.06834030151367188, "step": 5300 }, { "epoch": 0.7386609071274298, "grad_norm": 0.5065958499908447, "learning_rate": 7.330133359996876e-06, "loss": 0.10745620727539062, "step": 5301 }, { "epoch": 0.7388002508186442, "grad_norm": 0.4421199858188629, "learning_rate": 7.322840539684235e-06, "loss": 0.09714508056640625, "step": 5302 }, { "epoch": 0.7389395945098586, "grad_norm": 0.44000640511512756, "learning_rate": 7.3155505359470046e-06, "loss": 0.08619213104248047, "step": 5303 }, { "epoch": 0.739078938201073, "grad_norm": 0.4587100148200989, "learning_rate": 7.308263350404845e-06, "loss": 0.1168050765991211, "step": 5304 }, { "epoch": 0.7392182818922873, "grad_norm": 0.4899766147136688, "learning_rate": 7.3009789846768116e-06, "loss": 0.10746002197265625, "step": 5305 }, { "epoch": 0.7393576255835017, "grad_norm": 0.4415782392024994, "learning_rate": 7.2936974403813336e-06, "loss": 0.10408592224121094, "step": 5306 }, { "epoch": 0.7394969692747161, "grad_norm": 0.34515702724456787, "learning_rate": 7.286418719136186e-06, "loss": 0.07592487335205078, "step": 5307 }, { "epoch": 0.7396363129659305, "grad_norm": 0.47330015897750854, "learning_rate": 7.279142822558549e-06, "loss": 0.08856391906738281, "step": 5308 }, { "epoch": 0.7397756566571448, "grad_norm": 0.5328090190887451, "learning_rate": 7.271869752264949e-06, "loss": 0.10736751556396484, "step": 5309 }, { "epoch": 0.7399150003483592, "grad_norm": 0.4696916937828064, "learning_rate": 7.264599509871309e-06, "loss": 0.12309455871582031, "step": 5310 }, { "epoch": 0.7400543440395736, "grad_norm": 0.5248294472694397, "learning_rate": 7.2573320969928974e-06, "loss": 0.1027841567993164, "step": 5311 }, { "epoch": 0.740193687730788, "grad_norm": 0.4099537432193756, "learning_rate": 7.250067515244373e-06, "loss": 0.08274650573730469, "step": 5312 }, { "epoch": 0.7403330314220024, "grad_norm": 0.33823150396347046, "learning_rate": 7.2428057662397665e-06, "loss": 0.07387733459472656, "step": 5313 }, { "epoch": 0.7404723751132167, "grad_norm": 0.3604521155357361, "learning_rate": 7.235546851592468e-06, "loss": 0.08296585083007812, "step": 5314 }, { "epoch": 0.7406117188044311, "grad_norm": 0.5736944079399109, "learning_rate": 7.228290772915234e-06, "loss": 0.12298965454101562, "step": 5315 }, { "epoch": 0.7407510624956455, "grad_norm": 0.21735985577106476, "learning_rate": 7.22103753182021e-06, "loss": 0.054782867431640625, "step": 5316 }, { "epoch": 0.7408904061868599, "grad_norm": 0.9880133867263794, "learning_rate": 7.213787129918901e-06, "loss": 0.11534690856933594, "step": 5317 }, { "epoch": 0.7410297498780742, "grad_norm": 0.29176798462867737, "learning_rate": 7.206539568822179e-06, "loss": 0.07190132141113281, "step": 5318 }, { "epoch": 0.7411690935692886, "grad_norm": 0.32569587230682373, "learning_rate": 7.199294850140279e-06, "loss": 0.08114337921142578, "step": 5319 }, { "epoch": 0.741308437260503, "grad_norm": 0.43902289867401123, "learning_rate": 7.1920529754828235e-06, "loss": 0.09025764465332031, "step": 5320 }, { "epoch": 0.7414477809517174, "grad_norm": 0.576722264289856, "learning_rate": 7.184813946458782e-06, "loss": 0.1326751708984375, "step": 5321 }, { "epoch": 0.7415871246429317, "grad_norm": 0.4572266638278961, "learning_rate": 7.177577764676504e-06, "loss": 0.08825874328613281, "step": 5322 }, { "epoch": 0.7417264683341461, "grad_norm": 0.5242393016815186, "learning_rate": 7.170344431743707e-06, "loss": 0.10158061981201172, "step": 5323 }, { "epoch": 0.7418658120253605, "grad_norm": 0.5776848793029785, "learning_rate": 7.163113949267484e-06, "loss": 0.09824752807617188, "step": 5324 }, { "epoch": 0.7420051557165749, "grad_norm": 0.5140780806541443, "learning_rate": 7.155886318854257e-06, "loss": 0.09934616088867188, "step": 5325 }, { "epoch": 0.7421444994077893, "grad_norm": 0.4499206840991974, "learning_rate": 7.148661542109854e-06, "loss": 0.09918212890625, "step": 5326 }, { "epoch": 0.7422838430990036, "grad_norm": 0.2885253131389618, "learning_rate": 7.141439620639463e-06, "loss": 0.08504486083984375, "step": 5327 }, { "epoch": 0.7424231867902181, "grad_norm": 0.3308602273464203, "learning_rate": 7.134220556047613e-06, "loss": 0.09352684020996094, "step": 5328 }, { "epoch": 0.7425625304814325, "grad_norm": 0.40207818150520325, "learning_rate": 7.127004349938234e-06, "loss": 0.09848403930664062, "step": 5329 }, { "epoch": 0.7427018741726469, "grad_norm": 0.4874917268753052, "learning_rate": 7.119791003914584e-06, "loss": 0.09022808074951172, "step": 5330 }, { "epoch": 0.7428412178638613, "grad_norm": 0.3834069073200226, "learning_rate": 7.112580519579322e-06, "loss": 0.0862579345703125, "step": 5331 }, { "epoch": 0.7429805615550756, "grad_norm": 0.4183783233165741, "learning_rate": 7.105372898534435e-06, "loss": 0.09955406188964844, "step": 5332 }, { "epoch": 0.74311990524629, "grad_norm": 0.43529242277145386, "learning_rate": 7.098168142381301e-06, "loss": 0.08988571166992188, "step": 5333 }, { "epoch": 0.7432592489375044, "grad_norm": 0.25895678997039795, "learning_rate": 7.090966252720659e-06, "loss": 0.07162952423095703, "step": 5334 }, { "epoch": 0.7433985926287188, "grad_norm": 0.48741841316223145, "learning_rate": 7.083767231152598e-06, "loss": 0.09276580810546875, "step": 5335 }, { "epoch": 0.7435379363199331, "grad_norm": 0.5888963937759399, "learning_rate": 7.076571079276569e-06, "loss": 0.10720252990722656, "step": 5336 }, { "epoch": 0.7436772800111475, "grad_norm": 0.6804060935974121, "learning_rate": 7.069377798691397e-06, "loss": 0.10675048828125, "step": 5337 }, { "epoch": 0.7438166237023619, "grad_norm": 0.49692052602767944, "learning_rate": 7.0621873909952765e-06, "loss": 0.10655021667480469, "step": 5338 }, { "epoch": 0.7439559673935763, "grad_norm": 0.6393535733222961, "learning_rate": 7.05499985778574e-06, "loss": 0.11463356018066406, "step": 5339 }, { "epoch": 0.7440953110847907, "grad_norm": 0.2779795527458191, "learning_rate": 7.047815200659691e-06, "loss": 0.07988166809082031, "step": 5340 }, { "epoch": 0.744234654776005, "grad_norm": 0.47452548146247864, "learning_rate": 7.040633421213401e-06, "loss": 0.10791873931884766, "step": 5341 }, { "epoch": 0.7443739984672194, "grad_norm": 0.44544273614883423, "learning_rate": 7.033454521042502e-06, "loss": 0.09237480163574219, "step": 5342 }, { "epoch": 0.7445133421584338, "grad_norm": 0.5560184121131897, "learning_rate": 7.026278501741972e-06, "loss": 0.10832786560058594, "step": 5343 }, { "epoch": 0.7446526858496482, "grad_norm": 0.8864823579788208, "learning_rate": 7.019105364906165e-06, "loss": 0.1255340576171875, "step": 5344 }, { "epoch": 0.7447920295408625, "grad_norm": 0.8156023025512695, "learning_rate": 7.011935112128791e-06, "loss": 0.13353538513183594, "step": 5345 }, { "epoch": 0.7449313732320769, "grad_norm": 0.2233741581439972, "learning_rate": 7.004767745002916e-06, "loss": 0.067474365234375, "step": 5346 }, { "epoch": 0.7450707169232913, "grad_norm": 0.39506271481513977, "learning_rate": 6.997603265120951e-06, "loss": 0.10436248779296875, "step": 5347 }, { "epoch": 0.7452100606145057, "grad_norm": 0.7981248497962952, "learning_rate": 6.990441674074695e-06, "loss": 0.13108444213867188, "step": 5348 }, { "epoch": 0.74534940430572, "grad_norm": 0.36387279629707336, "learning_rate": 6.98328297345529e-06, "loss": 0.09256649017333984, "step": 5349 }, { "epoch": 0.7454887479969344, "grad_norm": 0.5401174426078796, "learning_rate": 6.9761271648532306e-06, "loss": 0.10043525695800781, "step": 5350 }, { "epoch": 0.7456280916881488, "grad_norm": 0.5807051062583923, "learning_rate": 6.968974249858371e-06, "loss": 0.10367202758789062, "step": 5351 }, { "epoch": 0.7457674353793632, "grad_norm": 0.4250313937664032, "learning_rate": 6.9618242300599284e-06, "loss": 0.0880126953125, "step": 5352 }, { "epoch": 0.7459067790705776, "grad_norm": 0.3035208582878113, "learning_rate": 6.9546771070464815e-06, "loss": 0.07814979553222656, "step": 5353 }, { "epoch": 0.7460461227617919, "grad_norm": 0.626301646232605, "learning_rate": 6.947532882405945e-06, "loss": 0.1120452880859375, "step": 5354 }, { "epoch": 0.7461854664530063, "grad_norm": 0.3918405771255493, "learning_rate": 6.940391557725616e-06, "loss": 0.072967529296875, "step": 5355 }, { "epoch": 0.7463248101442207, "grad_norm": 0.42057374119758606, "learning_rate": 6.933253134592128e-06, "loss": 0.09484481811523438, "step": 5356 }, { "epoch": 0.7464641538354351, "grad_norm": 0.34041526913642883, "learning_rate": 6.9261176145914655e-06, "loss": 0.08697509765625, "step": 5357 }, { "epoch": 0.7466034975266495, "grad_norm": 0.4116964638233185, "learning_rate": 6.9189849993089905e-06, "loss": 0.09820365905761719, "step": 5358 }, { "epoch": 0.7467428412178638, "grad_norm": 0.3223320245742798, "learning_rate": 6.911855290329408e-06, "loss": 0.09090805053710938, "step": 5359 }, { "epoch": 0.7468821849090782, "grad_norm": 0.5847012996673584, "learning_rate": 6.904728489236767e-06, "loss": 0.10735130310058594, "step": 5360 }, { "epoch": 0.7470215286002926, "grad_norm": 0.631091296672821, "learning_rate": 6.897604597614491e-06, "loss": 0.11324691772460938, "step": 5361 }, { "epoch": 0.747160872291507, "grad_norm": 0.42836451530456543, "learning_rate": 6.890483617045336e-06, "loss": 0.0932769775390625, "step": 5362 }, { "epoch": 0.7473002159827213, "grad_norm": 0.6613325476646423, "learning_rate": 6.883365549111432e-06, "loss": 0.11774444580078125, "step": 5363 }, { "epoch": 0.7474395596739357, "grad_norm": 0.25608140230178833, "learning_rate": 6.876250395394237e-06, "loss": 0.0735931396484375, "step": 5364 }, { "epoch": 0.7475789033651501, "grad_norm": 0.7796933650970459, "learning_rate": 6.869138157474586e-06, "loss": 0.13457107543945312, "step": 5365 }, { "epoch": 0.7477182470563645, "grad_norm": 0.28765279054641724, "learning_rate": 6.862028836932659e-06, "loss": 0.06716537475585938, "step": 5366 }, { "epoch": 0.7478575907475788, "grad_norm": 0.47190096974372864, "learning_rate": 6.854922435347979e-06, "loss": 0.10978317260742188, "step": 5367 }, { "epoch": 0.7479969344387933, "grad_norm": 0.9470019936561584, "learning_rate": 6.847818954299421e-06, "loss": 0.16739654541015625, "step": 5368 }, { "epoch": 0.7481362781300077, "grad_norm": 0.4666794538497925, "learning_rate": 6.840718395365222e-06, "loss": 0.10582828521728516, "step": 5369 }, { "epoch": 0.7482756218212221, "grad_norm": 0.29082751274108887, "learning_rate": 6.833620760122972e-06, "loss": 0.07102394104003906, "step": 5370 }, { "epoch": 0.7484149655124365, "grad_norm": 0.4226193428039551, "learning_rate": 6.826526050149594e-06, "loss": 0.11211395263671875, "step": 5371 }, { "epoch": 0.7485543092036508, "grad_norm": 0.4173780679702759, "learning_rate": 6.819434267021366e-06, "loss": 0.0889892578125, "step": 5372 }, { "epoch": 0.7486936528948652, "grad_norm": 0.45405134558677673, "learning_rate": 6.812345412313926e-06, "loss": 0.10229873657226562, "step": 5373 }, { "epoch": 0.7488329965860796, "grad_norm": 0.31372514367103577, "learning_rate": 6.805259487602261e-06, "loss": 0.07987022399902344, "step": 5374 }, { "epoch": 0.748972340277294, "grad_norm": 0.3155912756919861, "learning_rate": 6.798176494460693e-06, "loss": 0.08767318725585938, "step": 5375 }, { "epoch": 0.7491116839685084, "grad_norm": 0.39213237166404724, "learning_rate": 6.791096434462909e-06, "loss": 0.0906209945678711, "step": 5376 }, { "epoch": 0.7492510276597227, "grad_norm": 0.19547727704048157, "learning_rate": 6.7840193091819285e-06, "loss": 0.05392146110534668, "step": 5377 }, { "epoch": 0.7493903713509371, "grad_norm": 0.3898513615131378, "learning_rate": 6.776945120190137e-06, "loss": 0.09250068664550781, "step": 5378 }, { "epoch": 0.7495297150421515, "grad_norm": 0.2871211767196655, "learning_rate": 6.769873869059247e-06, "loss": 0.07194328308105469, "step": 5379 }, { "epoch": 0.7496690587333659, "grad_norm": 0.6385897397994995, "learning_rate": 6.762805557360335e-06, "loss": 0.10859298706054688, "step": 5380 }, { "epoch": 0.7498084024245802, "grad_norm": 0.2599533796310425, "learning_rate": 6.755740186663822e-06, "loss": 0.06892967224121094, "step": 5381 }, { "epoch": 0.7499477461157946, "grad_norm": 0.3884093165397644, "learning_rate": 6.748677758539468e-06, "loss": 0.08082294464111328, "step": 5382 }, { "epoch": 0.750087089807009, "grad_norm": 0.8340564966201782, "learning_rate": 6.741618274556379e-06, "loss": 0.15668296813964844, "step": 5383 }, { "epoch": 0.7502264334982234, "grad_norm": 0.5126813054084778, "learning_rate": 6.734561736283014e-06, "loss": 0.08181953430175781, "step": 5384 }, { "epoch": 0.7503657771894378, "grad_norm": 0.33062878251075745, "learning_rate": 6.727508145287183e-06, "loss": 0.07105255126953125, "step": 5385 }, { "epoch": 0.7505051208806521, "grad_norm": 0.509175717830658, "learning_rate": 6.720457503136017e-06, "loss": 0.09186744689941406, "step": 5386 }, { "epoch": 0.7506444645718665, "grad_norm": 0.25099271535873413, "learning_rate": 6.713409811396028e-06, "loss": 0.06648826599121094, "step": 5387 }, { "epoch": 0.7507838082630809, "grad_norm": 0.35701096057891846, "learning_rate": 6.706365071633037e-06, "loss": 0.08265209197998047, "step": 5388 }, { "epoch": 0.7509231519542953, "grad_norm": 0.6357024908065796, "learning_rate": 6.699323285412222e-06, "loss": 0.10497093200683594, "step": 5389 }, { "epoch": 0.7510624956455096, "grad_norm": 0.43411293625831604, "learning_rate": 6.692284454298115e-06, "loss": 0.0875101089477539, "step": 5390 }, { "epoch": 0.751201839336724, "grad_norm": 0.21202746033668518, "learning_rate": 6.685248579854589e-06, "loss": 0.06991004943847656, "step": 5391 }, { "epoch": 0.7513411830279384, "grad_norm": 0.8575679063796997, "learning_rate": 6.678215663644845e-06, "loss": 0.1467437744140625, "step": 5392 }, { "epoch": 0.7514805267191528, "grad_norm": 0.3817749619483948, "learning_rate": 6.671185707231434e-06, "loss": 0.08721923828125, "step": 5393 }, { "epoch": 0.7516198704103672, "grad_norm": 0.23752187192440033, "learning_rate": 6.664158712176256e-06, "loss": 0.07210922241210938, "step": 5394 }, { "epoch": 0.7517592141015815, "grad_norm": 0.3807697892189026, "learning_rate": 6.657134680040558e-06, "loss": 0.09526824951171875, "step": 5395 }, { "epoch": 0.7518985577927959, "grad_norm": 0.30172351002693176, "learning_rate": 6.650113612384903e-06, "loss": 0.07815361022949219, "step": 5396 }, { "epoch": 0.7520379014840103, "grad_norm": 0.4551621973514557, "learning_rate": 6.643095510769229e-06, "loss": 0.09092330932617188, "step": 5397 }, { "epoch": 0.7521772451752247, "grad_norm": 0.6545920372009277, "learning_rate": 6.63608037675278e-06, "loss": 0.12024831771850586, "step": 5398 }, { "epoch": 0.752316588866439, "grad_norm": 0.2520396411418915, "learning_rate": 6.629068211894176e-06, "loss": 0.06812477111816406, "step": 5399 }, { "epoch": 0.7524559325576534, "grad_norm": 0.4703008234500885, "learning_rate": 6.622059017751346e-06, "loss": 0.09744071960449219, "step": 5400 }, { "epoch": 0.7525952762488678, "grad_norm": 0.38878002762794495, "learning_rate": 6.615052795881576e-06, "loss": 0.08088350296020508, "step": 5401 }, { "epoch": 0.7527346199400822, "grad_norm": 0.49537014961242676, "learning_rate": 6.6080495478415e-06, "loss": 0.10373878479003906, "step": 5402 }, { "epoch": 0.7528739636312965, "grad_norm": 0.2493787556886673, "learning_rate": 6.60104927518707e-06, "loss": 0.06769561767578125, "step": 5403 }, { "epoch": 0.7530133073225109, "grad_norm": 0.5747022032737732, "learning_rate": 6.594051979473582e-06, "loss": 0.09014892578125, "step": 5404 }, { "epoch": 0.7531526510137253, "grad_norm": 0.4645173251628876, "learning_rate": 6.58705766225568e-06, "loss": 0.08118724822998047, "step": 5405 }, { "epoch": 0.7532919947049397, "grad_norm": 0.41708967089653015, "learning_rate": 6.580066325087351e-06, "loss": 0.08668899536132812, "step": 5406 }, { "epoch": 0.7534313383961541, "grad_norm": 0.2979566156864166, "learning_rate": 6.573077969521892e-06, "loss": 0.09412384033203125, "step": 5407 }, { "epoch": 0.7535706820873685, "grad_norm": 0.29419851303100586, "learning_rate": 6.566092597111977e-06, "loss": 0.06911849975585938, "step": 5408 }, { "epoch": 0.7537100257785829, "grad_norm": 0.32274869084358215, "learning_rate": 6.559110209409578e-06, "loss": 0.07682609558105469, "step": 5409 }, { "epoch": 0.7538493694697973, "grad_norm": 0.3934105336666107, "learning_rate": 6.552130807966035e-06, "loss": 0.08617210388183594, "step": 5410 }, { "epoch": 0.7539887131610117, "grad_norm": 0.5384942293167114, "learning_rate": 6.5451543943320005e-06, "loss": 0.10007476806640625, "step": 5411 }, { "epoch": 0.7541280568522261, "grad_norm": 0.2780991196632385, "learning_rate": 6.538180970057482e-06, "loss": 0.06848621368408203, "step": 5412 }, { "epoch": 0.7542674005434404, "grad_norm": 0.6526579856872559, "learning_rate": 6.531210536691819e-06, "loss": 0.10422706604003906, "step": 5413 }, { "epoch": 0.7544067442346548, "grad_norm": 0.4447813630104065, "learning_rate": 6.524243095783675e-06, "loss": 0.10326576232910156, "step": 5414 }, { "epoch": 0.7545460879258692, "grad_norm": 0.570479154586792, "learning_rate": 6.517278648881054e-06, "loss": 0.1380290985107422, "step": 5415 }, { "epoch": 0.7546854316170836, "grad_norm": 0.3983791768550873, "learning_rate": 6.5103171975312995e-06, "loss": 0.1024017333984375, "step": 5416 }, { "epoch": 0.754824775308298, "grad_norm": 0.35590988397598267, "learning_rate": 6.503358743281098e-06, "loss": 0.09337806701660156, "step": 5417 }, { "epoch": 0.7549641189995123, "grad_norm": 0.23069748282432556, "learning_rate": 6.496403287676443e-06, "loss": 0.06593894958496094, "step": 5418 }, { "epoch": 0.7551034626907267, "grad_norm": 0.32855483889579773, "learning_rate": 6.489450832262692e-06, "loss": 0.07491683959960938, "step": 5419 }, { "epoch": 0.7552428063819411, "grad_norm": 0.5590240955352783, "learning_rate": 6.482501378584511e-06, "loss": 0.10749435424804688, "step": 5420 }, { "epoch": 0.7553821500731555, "grad_norm": 0.35634827613830566, "learning_rate": 6.475554928185912e-06, "loss": 0.08460807800292969, "step": 5421 }, { "epoch": 0.7555214937643698, "grad_norm": 0.5878504514694214, "learning_rate": 6.468611482610238e-06, "loss": 0.11061286926269531, "step": 5422 }, { "epoch": 0.7556608374555842, "grad_norm": 0.26749250292778015, "learning_rate": 6.461671043400166e-06, "loss": 0.07394790649414062, "step": 5423 }, { "epoch": 0.7558001811467986, "grad_norm": 0.5609726309776306, "learning_rate": 6.454733612097717e-06, "loss": 0.12810325622558594, "step": 5424 }, { "epoch": 0.755939524838013, "grad_norm": 0.44277918338775635, "learning_rate": 6.4477991902442e-06, "loss": 0.10015392303466797, "step": 5425 }, { "epoch": 0.7560788685292273, "grad_norm": 0.37568068504333496, "learning_rate": 6.440867779380302e-06, "loss": 0.08872032165527344, "step": 5426 }, { "epoch": 0.7562182122204417, "grad_norm": 0.28825080394744873, "learning_rate": 6.43393938104603e-06, "loss": 0.06662559509277344, "step": 5427 }, { "epoch": 0.7563575559116561, "grad_norm": 0.35220131278038025, "learning_rate": 6.427013996780702e-06, "loss": 0.08224868774414062, "step": 5428 }, { "epoch": 0.7564968996028705, "grad_norm": 0.6465475559234619, "learning_rate": 6.420091628122995e-06, "loss": 0.1110992431640625, "step": 5429 }, { "epoch": 0.7566362432940849, "grad_norm": 0.5498183369636536, "learning_rate": 6.413172276610886e-06, "loss": 0.10001659393310547, "step": 5430 }, { "epoch": 0.7567755869852992, "grad_norm": 0.5067670941352844, "learning_rate": 6.406255943781711e-06, "loss": 0.09395980834960938, "step": 5431 }, { "epoch": 0.7569149306765136, "grad_norm": 0.4883936643600464, "learning_rate": 6.3993426311721095e-06, "loss": 0.10237503051757812, "step": 5432 }, { "epoch": 0.757054274367728, "grad_norm": 0.6049573421478271, "learning_rate": 6.3924323403180685e-06, "loss": 0.12650299072265625, "step": 5433 }, { "epoch": 0.7571936180589424, "grad_norm": 0.3602919578552246, "learning_rate": 6.385525072754899e-06, "loss": 0.08189773559570312, "step": 5434 }, { "epoch": 0.7573329617501567, "grad_norm": 0.3477003574371338, "learning_rate": 6.378620830017237e-06, "loss": 0.08017921447753906, "step": 5435 }, { "epoch": 0.7574723054413711, "grad_norm": 0.44920551776885986, "learning_rate": 6.371719613639036e-06, "loss": 0.11045646667480469, "step": 5436 }, { "epoch": 0.7576116491325855, "grad_norm": 1.0397391319274902, "learning_rate": 6.3648214251536e-06, "loss": 0.13048744201660156, "step": 5437 }, { "epoch": 0.7577509928237999, "grad_norm": 0.6216824650764465, "learning_rate": 6.357926266093552e-06, "loss": 0.10159492492675781, "step": 5438 }, { "epoch": 0.7578903365150143, "grad_norm": 0.44514256715774536, "learning_rate": 6.351034137990828e-06, "loss": 0.079864501953125, "step": 5439 }, { "epoch": 0.7580296802062286, "grad_norm": 0.23278479278087616, "learning_rate": 6.344145042376715e-06, "loss": 0.05853271484375, "step": 5440 }, { "epoch": 0.758169023897443, "grad_norm": 0.5219769477844238, "learning_rate": 6.337258980781797e-06, "loss": 0.09805488586425781, "step": 5441 }, { "epoch": 0.7583083675886574, "grad_norm": 0.5640836358070374, "learning_rate": 6.330375954736014e-06, "loss": 0.13898849487304688, "step": 5442 }, { "epoch": 0.7584477112798718, "grad_norm": 0.41109687089920044, "learning_rate": 6.323495965768605e-06, "loss": 0.11105728149414062, "step": 5443 }, { "epoch": 0.7585870549710861, "grad_norm": 0.5368844270706177, "learning_rate": 6.3166190154081545e-06, "loss": 0.08562040328979492, "step": 5444 }, { "epoch": 0.7587263986623005, "grad_norm": 0.2581464946269989, "learning_rate": 6.309745105182566e-06, "loss": 0.07268905639648438, "step": 5445 }, { "epoch": 0.7588657423535149, "grad_norm": 0.7036799192428589, "learning_rate": 6.302874236619061e-06, "loss": 0.11418724060058594, "step": 5446 }, { "epoch": 0.7590050860447293, "grad_norm": 0.3938368260860443, "learning_rate": 6.296006411244184e-06, "loss": 0.07325172424316406, "step": 5447 }, { "epoch": 0.7591444297359438, "grad_norm": 0.19827838242053986, "learning_rate": 6.289141630583815e-06, "loss": 0.06301307678222656, "step": 5448 }, { "epoch": 0.7592837734271581, "grad_norm": 0.4769246280193329, "learning_rate": 6.28227989616316e-06, "loss": 0.10567283630371094, "step": 5449 }, { "epoch": 0.7594231171183725, "grad_norm": 0.4471933841705322, "learning_rate": 6.275421209506729e-06, "loss": 0.11755561828613281, "step": 5450 }, { "epoch": 0.7595624608095869, "grad_norm": 0.48801907896995544, "learning_rate": 6.268565572138361e-06, "loss": 0.08877182006835938, "step": 5451 }, { "epoch": 0.7597018045008013, "grad_norm": 0.4508819282054901, "learning_rate": 6.2617129855812295e-06, "loss": 0.10540008544921875, "step": 5452 }, { "epoch": 0.7598411481920156, "grad_norm": 0.6613976359367371, "learning_rate": 6.254863451357829e-06, "loss": 0.11122798919677734, "step": 5453 }, { "epoch": 0.75998049188323, "grad_norm": 1.0758857727050781, "learning_rate": 6.248016970989957e-06, "loss": 0.139434814453125, "step": 5454 }, { "epoch": 0.7601198355744444, "grad_norm": 0.47574397921562195, "learning_rate": 6.241173545998749e-06, "loss": 0.10753250122070312, "step": 5455 }, { "epoch": 0.7602591792656588, "grad_norm": 0.2507322132587433, "learning_rate": 6.2343331779046745e-06, "loss": 0.07076644897460938, "step": 5456 }, { "epoch": 0.7603985229568732, "grad_norm": 0.4457326829433441, "learning_rate": 6.227495868227477e-06, "loss": 0.0998220443725586, "step": 5457 }, { "epoch": 0.7605378666480875, "grad_norm": 0.2072775512933731, "learning_rate": 6.220661618486268e-06, "loss": 0.061641693115234375, "step": 5458 }, { "epoch": 0.7606772103393019, "grad_norm": 0.34904319047927856, "learning_rate": 6.2138304301994654e-06, "loss": 0.079803466796875, "step": 5459 }, { "epoch": 0.7608165540305163, "grad_norm": 0.26476773619651794, "learning_rate": 6.207002304884793e-06, "loss": 0.06981658935546875, "step": 5460 }, { "epoch": 0.7609558977217307, "grad_norm": 0.252936989068985, "learning_rate": 6.200177244059313e-06, "loss": 0.07707786560058594, "step": 5461 }, { "epoch": 0.761095241412945, "grad_norm": 0.6309165358543396, "learning_rate": 6.19335524923939e-06, "loss": 0.11703681945800781, "step": 5462 }, { "epoch": 0.7612345851041594, "grad_norm": 0.45011815428733826, "learning_rate": 6.186536321940724e-06, "loss": 0.08563995361328125, "step": 5463 }, { "epoch": 0.7613739287953738, "grad_norm": 0.45311301946640015, "learning_rate": 6.179720463678314e-06, "loss": 0.10060882568359375, "step": 5464 }, { "epoch": 0.7615132724865882, "grad_norm": 0.38116151094436646, "learning_rate": 6.172907675966495e-06, "loss": 0.07877540588378906, "step": 5465 }, { "epoch": 0.7616526161778026, "grad_norm": 0.34794652462005615, "learning_rate": 6.16609796031892e-06, "loss": 0.06635856628417969, "step": 5466 }, { "epoch": 0.7617919598690169, "grad_norm": 0.36350882053375244, "learning_rate": 6.159291318248544e-06, "loss": 0.07588005065917969, "step": 5467 }, { "epoch": 0.7619313035602313, "grad_norm": 0.42620009183883667, "learning_rate": 6.152487751267641e-06, "loss": 0.0813446044921875, "step": 5468 }, { "epoch": 0.7620706472514457, "grad_norm": 0.60691899061203, "learning_rate": 6.145687260887818e-06, "loss": 0.12911415100097656, "step": 5469 }, { "epoch": 0.7622099909426601, "grad_norm": 0.3409835398197174, "learning_rate": 6.138889848619991e-06, "loss": 0.07182979583740234, "step": 5470 }, { "epoch": 0.7623493346338744, "grad_norm": 0.6642006635665894, "learning_rate": 6.132095515974385e-06, "loss": 0.10474300384521484, "step": 5471 }, { "epoch": 0.7624886783250888, "grad_norm": 0.40079253911972046, "learning_rate": 6.125304264460541e-06, "loss": 0.078277587890625, "step": 5472 }, { "epoch": 0.7626280220163032, "grad_norm": 0.31252363324165344, "learning_rate": 6.118516095587321e-06, "loss": 0.07710742950439453, "step": 5473 }, { "epoch": 0.7627673657075176, "grad_norm": 0.3905729353427887, "learning_rate": 6.1117310108629156e-06, "loss": 0.08337783813476562, "step": 5474 }, { "epoch": 0.762906709398732, "grad_norm": 0.3330124020576477, "learning_rate": 6.104949011794796e-06, "loss": 0.08472251892089844, "step": 5475 }, { "epoch": 0.7630460530899463, "grad_norm": 0.4900970458984375, "learning_rate": 6.098170099889777e-06, "loss": 0.11617851257324219, "step": 5476 }, { "epoch": 0.7631853967811607, "grad_norm": 0.3379685878753662, "learning_rate": 6.0913942766539855e-06, "loss": 0.07949018478393555, "step": 5477 }, { "epoch": 0.7633247404723751, "grad_norm": 0.5697566866874695, "learning_rate": 6.084621543592846e-06, "loss": 0.09378433227539062, "step": 5478 }, { "epoch": 0.7634640841635895, "grad_norm": 0.38149791955947876, "learning_rate": 6.0778519022110985e-06, "loss": 0.09528732299804688, "step": 5479 }, { "epoch": 0.7636034278548038, "grad_norm": 0.47144532203674316, "learning_rate": 6.071085354012812e-06, "loss": 0.07474136352539062, "step": 5480 }, { "epoch": 0.7637427715460182, "grad_norm": 0.39604148268699646, "learning_rate": 6.064321900501362e-06, "loss": 0.08310317993164062, "step": 5481 }, { "epoch": 0.7638821152372326, "grad_norm": 0.3993886113166809, "learning_rate": 6.057561543179429e-06, "loss": 0.08984375, "step": 5482 }, { "epoch": 0.764021458928447, "grad_norm": 0.4063778519630432, "learning_rate": 6.050804283549005e-06, "loss": 0.09253883361816406, "step": 5483 }, { "epoch": 0.7641608026196614, "grad_norm": 0.35410287976264954, "learning_rate": 6.0440501231114025e-06, "loss": 0.08151626586914062, "step": 5484 }, { "epoch": 0.7643001463108757, "grad_norm": 0.39233893156051636, "learning_rate": 6.03729906336725e-06, "loss": 0.07767963409423828, "step": 5485 }, { "epoch": 0.7644394900020901, "grad_norm": 0.5650411248207092, "learning_rate": 6.030551105816465e-06, "loss": 0.10586166381835938, "step": 5486 }, { "epoch": 0.7645788336933045, "grad_norm": 0.4234045445919037, "learning_rate": 6.0238062519583015e-06, "loss": 0.09926033020019531, "step": 5487 }, { "epoch": 0.764718177384519, "grad_norm": 0.4568929076194763, "learning_rate": 6.017064503291307e-06, "loss": 0.09903907775878906, "step": 5488 }, { "epoch": 0.7648575210757333, "grad_norm": 0.538196861743927, "learning_rate": 6.01032586131334e-06, "loss": 0.11121177673339844, "step": 5489 }, { "epoch": 0.7649968647669477, "grad_norm": 0.655273973941803, "learning_rate": 6.0035903275215755e-06, "loss": 0.09779071807861328, "step": 5490 }, { "epoch": 0.7651362084581621, "grad_norm": 0.30899253487586975, "learning_rate": 5.996857903412503e-06, "loss": 0.07186508178710938, "step": 5491 }, { "epoch": 0.7652755521493765, "grad_norm": 0.38374000787734985, "learning_rate": 5.990128590481907e-06, "loss": 0.08440780639648438, "step": 5492 }, { "epoch": 0.7654148958405909, "grad_norm": 0.4119897782802582, "learning_rate": 5.983402390224882e-06, "loss": 0.08262062072753906, "step": 5493 }, { "epoch": 0.7655542395318052, "grad_norm": 0.3379344642162323, "learning_rate": 5.97667930413584e-06, "loss": 0.080535888671875, "step": 5494 }, { "epoch": 0.7656935832230196, "grad_norm": 0.45356515049934387, "learning_rate": 5.969959333708506e-06, "loss": 0.07735633850097656, "step": 5495 }, { "epoch": 0.765832926914234, "grad_norm": 0.35990554094314575, "learning_rate": 5.963242480435889e-06, "loss": 0.09521961212158203, "step": 5496 }, { "epoch": 0.7659722706054484, "grad_norm": 0.4882234036922455, "learning_rate": 5.95652874581033e-06, "loss": 0.09277915954589844, "step": 5497 }, { "epoch": 0.7661116142966627, "grad_norm": 0.33397021889686584, "learning_rate": 5.9498181313234726e-06, "loss": 0.08430719375610352, "step": 5498 }, { "epoch": 0.7662509579878771, "grad_norm": 0.430325984954834, "learning_rate": 5.943110638466254e-06, "loss": 0.09017753601074219, "step": 5499 }, { "epoch": 0.7663903016790915, "grad_norm": 0.27272331714630127, "learning_rate": 5.9364062687289226e-06, "loss": 0.06490516662597656, "step": 5500 }, { "epoch": 0.7665296453703059, "grad_norm": 0.6101490259170532, "learning_rate": 5.929705023601038e-06, "loss": 0.09710025787353516, "step": 5501 }, { "epoch": 0.7666689890615203, "grad_norm": 0.6803492307662964, "learning_rate": 5.923006904571476e-06, "loss": 0.12291812896728516, "step": 5502 }, { "epoch": 0.7668083327527346, "grad_norm": 0.1838691532611847, "learning_rate": 5.9163119131283966e-06, "loss": 0.057074546813964844, "step": 5503 }, { "epoch": 0.766947676443949, "grad_norm": 0.5395563244819641, "learning_rate": 5.909620050759266e-06, "loss": 0.11144733428955078, "step": 5504 }, { "epoch": 0.7670870201351634, "grad_norm": 0.31303671002388, "learning_rate": 5.902931318950871e-06, "loss": 0.07358169555664062, "step": 5505 }, { "epoch": 0.7672263638263778, "grad_norm": 0.4697885811328888, "learning_rate": 5.896245719189304e-06, "loss": 0.0849456787109375, "step": 5506 }, { "epoch": 0.7673657075175921, "grad_norm": 0.28624579310417175, "learning_rate": 5.889563252959935e-06, "loss": 0.07756233215332031, "step": 5507 }, { "epoch": 0.7675050512088065, "grad_norm": 0.303877592086792, "learning_rate": 5.882883921747473e-06, "loss": 0.07081031799316406, "step": 5508 }, { "epoch": 0.7676443949000209, "grad_norm": 0.23901784420013428, "learning_rate": 5.876207727035897e-06, "loss": 0.06409931182861328, "step": 5509 }, { "epoch": 0.7677837385912353, "grad_norm": 0.33295851945877075, "learning_rate": 5.869534670308519e-06, "loss": 0.0933990478515625, "step": 5510 }, { "epoch": 0.7679230822824497, "grad_norm": 0.3850051462650299, "learning_rate": 5.862864753047926e-06, "loss": 0.07532501220703125, "step": 5511 }, { "epoch": 0.768062425973664, "grad_norm": 0.47002583742141724, "learning_rate": 5.856197976736029e-06, "loss": 0.10259819030761719, "step": 5512 }, { "epoch": 0.7682017696648784, "grad_norm": 0.49294349551200867, "learning_rate": 5.8495343428540395e-06, "loss": 0.09721755981445312, "step": 5513 }, { "epoch": 0.7683411133560928, "grad_norm": 0.49381691217422485, "learning_rate": 5.84287385288246e-06, "loss": 0.11520957946777344, "step": 5514 }, { "epoch": 0.7684804570473072, "grad_norm": 0.4883812963962555, "learning_rate": 5.836216508301089e-06, "loss": 0.0957489013671875, "step": 5515 }, { "epoch": 0.7686198007385215, "grad_norm": 0.29631319642066956, "learning_rate": 5.829562310589048e-06, "loss": 0.07316017150878906, "step": 5516 }, { "epoch": 0.7687591444297359, "grad_norm": 0.3663982152938843, "learning_rate": 5.822911261224748e-06, "loss": 0.06248760223388672, "step": 5517 }, { "epoch": 0.7688984881209503, "grad_norm": 0.4885948598384857, "learning_rate": 5.816263361685892e-06, "loss": 0.09225845336914062, "step": 5518 }, { "epoch": 0.7690378318121647, "grad_norm": 0.6836559772491455, "learning_rate": 5.809618613449503e-06, "loss": 0.10099411010742188, "step": 5519 }, { "epoch": 0.769177175503379, "grad_norm": 0.4479810297489166, "learning_rate": 5.802977017991888e-06, "loss": 0.09682273864746094, "step": 5520 }, { "epoch": 0.7693165191945934, "grad_norm": 0.4251222312450409, "learning_rate": 5.7963385767886496e-06, "loss": 0.09038352966308594, "step": 5521 }, { "epoch": 0.7694558628858078, "grad_norm": 0.4616117775440216, "learning_rate": 5.789703291314703e-06, "loss": 0.10788917541503906, "step": 5522 }, { "epoch": 0.7695952065770222, "grad_norm": 0.2932809293270111, "learning_rate": 5.783071163044259e-06, "loss": 0.06887245178222656, "step": 5523 }, { "epoch": 0.7697345502682366, "grad_norm": 0.3136294484138489, "learning_rate": 5.776442193450835e-06, "loss": 0.0786275863647461, "step": 5524 }, { "epoch": 0.7698738939594509, "grad_norm": 0.28551772236824036, "learning_rate": 5.769816384007216e-06, "loss": 0.07081794738769531, "step": 5525 }, { "epoch": 0.7700132376506653, "grad_norm": 0.7451992630958557, "learning_rate": 5.7631937361855175e-06, "loss": 0.1413872241973877, "step": 5526 }, { "epoch": 0.7701525813418797, "grad_norm": 0.47857722640037537, "learning_rate": 5.756574251457141e-06, "loss": 0.08774089813232422, "step": 5527 }, { "epoch": 0.7702919250330941, "grad_norm": 0.4239037036895752, "learning_rate": 5.74995793129278e-06, "loss": 0.09536552429199219, "step": 5528 }, { "epoch": 0.7704312687243086, "grad_norm": 0.4977094233036041, "learning_rate": 5.743344777162439e-06, "loss": 0.09756088256835938, "step": 5529 }, { "epoch": 0.7705706124155229, "grad_norm": 0.6402494311332703, "learning_rate": 5.736734790535394e-06, "loss": 0.10307693481445312, "step": 5530 }, { "epoch": 0.7707099561067373, "grad_norm": 0.5160558223724365, "learning_rate": 5.7301279728802525e-06, "loss": 0.08398151397705078, "step": 5531 }, { "epoch": 0.7708492997979517, "grad_norm": 0.5487915277481079, "learning_rate": 5.72352432566488e-06, "loss": 0.10751914978027344, "step": 5532 }, { "epoch": 0.7709886434891661, "grad_norm": 0.3306606411933899, "learning_rate": 5.716923850356466e-06, "loss": 0.08510017395019531, "step": 5533 }, { "epoch": 0.7711279871803804, "grad_norm": 0.5041764974594116, "learning_rate": 5.71032654842149e-06, "loss": 0.09428596496582031, "step": 5534 }, { "epoch": 0.7712673308715948, "grad_norm": 0.24946224689483643, "learning_rate": 5.703732421325716e-06, "loss": 0.0734100341796875, "step": 5535 }, { "epoch": 0.7714066745628092, "grad_norm": 0.7002207636833191, "learning_rate": 5.697141470534204e-06, "loss": 0.10991287231445312, "step": 5536 }, { "epoch": 0.7715460182540236, "grad_norm": 0.45745134353637695, "learning_rate": 5.690553697511316e-06, "loss": 0.10733795166015625, "step": 5537 }, { "epoch": 0.771685361945238, "grad_norm": 0.46072080731391907, "learning_rate": 5.683969103720712e-06, "loss": 0.0908966064453125, "step": 5538 }, { "epoch": 0.7718247056364523, "grad_norm": 0.5528069138526917, "learning_rate": 5.677387690625329e-06, "loss": 0.11517524719238281, "step": 5539 }, { "epoch": 0.7719640493276667, "grad_norm": 0.31519708037376404, "learning_rate": 5.670809459687414e-06, "loss": 0.07634162902832031, "step": 5540 }, { "epoch": 0.7721033930188811, "grad_norm": 0.2139206975698471, "learning_rate": 5.66423441236849e-06, "loss": 0.06270027160644531, "step": 5541 }, { "epoch": 0.7722427367100955, "grad_norm": 0.3998766541481018, "learning_rate": 5.657662550129394e-06, "loss": 0.08623504638671875, "step": 5542 }, { "epoch": 0.7723820804013098, "grad_norm": 0.3840036392211914, "learning_rate": 5.6510938744302356e-06, "loss": 0.09958648681640625, "step": 5543 }, { "epoch": 0.7725214240925242, "grad_norm": 0.38918596506118774, "learning_rate": 5.644528386730424e-06, "loss": 0.08456611633300781, "step": 5544 }, { "epoch": 0.7726607677837386, "grad_norm": 0.4766157269477844, "learning_rate": 5.637966088488671e-06, "loss": 0.08827400207519531, "step": 5545 }, { "epoch": 0.772800111474953, "grad_norm": 0.6479074954986572, "learning_rate": 5.631406981162961e-06, "loss": 0.10059928894042969, "step": 5546 }, { "epoch": 0.7729394551661674, "grad_norm": 0.3935074210166931, "learning_rate": 5.624851066210575e-06, "loss": 0.07424545288085938, "step": 5547 }, { "epoch": 0.7730787988573817, "grad_norm": 0.33431318402290344, "learning_rate": 5.618298345088091e-06, "loss": 0.08692455291748047, "step": 5548 }, { "epoch": 0.7732181425485961, "grad_norm": 0.60147625207901, "learning_rate": 5.611748819251382e-06, "loss": 0.11832809448242188, "step": 5549 }, { "epoch": 0.7733574862398105, "grad_norm": 0.5956194996833801, "learning_rate": 5.605202490155588e-06, "loss": 0.07928943634033203, "step": 5550 }, { "epoch": 0.7734968299310249, "grad_norm": 0.5900894999504089, "learning_rate": 5.5986593592551694e-06, "loss": 0.10564613342285156, "step": 5551 }, { "epoch": 0.7736361736222392, "grad_norm": 0.26753196120262146, "learning_rate": 5.592119428003848e-06, "loss": 0.07294082641601562, "step": 5552 }, { "epoch": 0.7737755173134536, "grad_norm": 0.5651355981826782, "learning_rate": 5.585582697854657e-06, "loss": 0.10560798645019531, "step": 5553 }, { "epoch": 0.773914861004668, "grad_norm": 0.30781468749046326, "learning_rate": 5.579049170259896e-06, "loss": 0.07196426391601562, "step": 5554 }, { "epoch": 0.7740542046958824, "grad_norm": 0.44568315148353577, "learning_rate": 5.572518846671176e-06, "loss": 0.07974910736083984, "step": 5555 }, { "epoch": 0.7741935483870968, "grad_norm": 0.33378368616104126, "learning_rate": 5.565991728539395e-06, "loss": 0.07940101623535156, "step": 5556 }, { "epoch": 0.7743328920783111, "grad_norm": 0.4446316659450531, "learning_rate": 5.559467817314705e-06, "loss": 0.08727455139160156, "step": 5557 }, { "epoch": 0.7744722357695255, "grad_norm": 0.47160443663597107, "learning_rate": 5.552947114446583e-06, "loss": 0.07864952087402344, "step": 5558 }, { "epoch": 0.7746115794607399, "grad_norm": 0.5461838841438293, "learning_rate": 5.546429621383786e-06, "loss": 0.09097099304199219, "step": 5559 }, { "epoch": 0.7747509231519543, "grad_norm": 0.3548069894313812, "learning_rate": 5.5399153395743425e-06, "loss": 0.08426284790039062, "step": 5560 }, { "epoch": 0.7748902668431686, "grad_norm": 0.32090651988983154, "learning_rate": 5.533404270465585e-06, "loss": 0.07843208312988281, "step": 5561 }, { "epoch": 0.775029610534383, "grad_norm": 0.4261753261089325, "learning_rate": 5.526896415504115e-06, "loss": 0.08635997772216797, "step": 5562 }, { "epoch": 0.7751689542255974, "grad_norm": 0.342864453792572, "learning_rate": 5.520391776135841e-06, "loss": 0.0870370864868164, "step": 5563 }, { "epoch": 0.7753082979168118, "grad_norm": 0.42381200194358826, "learning_rate": 5.513890353805933e-06, "loss": 0.09576988220214844, "step": 5564 }, { "epoch": 0.7754476416080262, "grad_norm": 0.6680832505226135, "learning_rate": 5.507392149958865e-06, "loss": 0.13741111755371094, "step": 5565 }, { "epoch": 0.7755869852992405, "grad_norm": 0.7433701753616333, "learning_rate": 5.500897166038397e-06, "loss": 0.11650466918945312, "step": 5566 }, { "epoch": 0.7757263289904549, "grad_norm": 0.4303596019744873, "learning_rate": 5.494405403487557e-06, "loss": 0.08211326599121094, "step": 5567 }, { "epoch": 0.7758656726816693, "grad_norm": 0.36084893345832825, "learning_rate": 5.487916863748664e-06, "loss": 0.0764923095703125, "step": 5568 }, { "epoch": 0.7760050163728838, "grad_norm": 0.6295400857925415, "learning_rate": 5.481431548263325e-06, "loss": 0.11561870574951172, "step": 5569 }, { "epoch": 0.7761443600640981, "grad_norm": 0.4392228424549103, "learning_rate": 5.474949458472438e-06, "loss": 0.08215045928955078, "step": 5570 }, { "epoch": 0.7762837037553125, "grad_norm": 0.3167163133621216, "learning_rate": 5.468470595816162e-06, "loss": 0.0771026611328125, "step": 5571 }, { "epoch": 0.7764230474465269, "grad_norm": 0.47042712569236755, "learning_rate": 5.461994961733967e-06, "loss": 0.09822845458984375, "step": 5572 }, { "epoch": 0.7765623911377413, "grad_norm": 0.25248804688453674, "learning_rate": 5.455522557664576e-06, "loss": 0.06484031677246094, "step": 5573 }, { "epoch": 0.7767017348289557, "grad_norm": 0.43427860736846924, "learning_rate": 5.449053385046023e-06, "loss": 0.09099197387695312, "step": 5574 }, { "epoch": 0.77684107852017, "grad_norm": 0.48409879207611084, "learning_rate": 5.442587445315599e-06, "loss": 0.08309555053710938, "step": 5575 }, { "epoch": 0.7769804222113844, "grad_norm": 0.39322492480278015, "learning_rate": 5.436124739909892e-06, "loss": 0.08971405029296875, "step": 5576 }, { "epoch": 0.7771197659025988, "grad_norm": 0.45462679862976074, "learning_rate": 5.429665270264774e-06, "loss": 0.09873485565185547, "step": 5577 }, { "epoch": 0.7772591095938132, "grad_norm": 0.4180141091346741, "learning_rate": 5.423209037815389e-06, "loss": 0.08861541748046875, "step": 5578 }, { "epoch": 0.7773984532850275, "grad_norm": 0.7758762240409851, "learning_rate": 5.416756043996154e-06, "loss": 0.12624740600585938, "step": 5579 }, { "epoch": 0.7775377969762419, "grad_norm": 0.45777758955955505, "learning_rate": 5.4103062902407855e-06, "loss": 0.08131980895996094, "step": 5580 }, { "epoch": 0.7776771406674563, "grad_norm": 0.41972222924232483, "learning_rate": 5.403859777982279e-06, "loss": 0.10394477844238281, "step": 5581 }, { "epoch": 0.7778164843586707, "grad_norm": 0.758634626865387, "learning_rate": 5.397416508652891e-06, "loss": 0.12643051147460938, "step": 5582 }, { "epoch": 0.7779558280498851, "grad_norm": 0.3732863664627075, "learning_rate": 5.390976483684167e-06, "loss": 0.08740472793579102, "step": 5583 }, { "epoch": 0.7780951717410994, "grad_norm": 0.886250376701355, "learning_rate": 5.384539704506939e-06, "loss": 0.10259819030761719, "step": 5584 }, { "epoch": 0.7782345154323138, "grad_norm": 0.24178580939769745, "learning_rate": 5.378106172551319e-06, "loss": 0.07463836669921875, "step": 5585 }, { "epoch": 0.7783738591235282, "grad_norm": 0.4198436141014099, "learning_rate": 5.371675889246677e-06, "loss": 0.09327888488769531, "step": 5586 }, { "epoch": 0.7785132028147426, "grad_norm": 0.5162523984909058, "learning_rate": 5.3652488560216875e-06, "loss": 0.07857704162597656, "step": 5587 }, { "epoch": 0.7786525465059569, "grad_norm": 0.296465665102005, "learning_rate": 5.358825074304286e-06, "loss": 0.08316993713378906, "step": 5588 }, { "epoch": 0.7787918901971713, "grad_norm": 0.37294113636016846, "learning_rate": 5.352404545521683e-06, "loss": 0.0830841064453125, "step": 5589 }, { "epoch": 0.7789312338883857, "grad_norm": 0.41440248489379883, "learning_rate": 5.345987271100381e-06, "loss": 0.0898895263671875, "step": 5590 }, { "epoch": 0.7790705775796001, "grad_norm": 0.3802894353866577, "learning_rate": 5.339573252466155e-06, "loss": 0.07274198532104492, "step": 5591 }, { "epoch": 0.7792099212708145, "grad_norm": 0.32686641812324524, "learning_rate": 5.333162491044044e-06, "loss": 0.07315731048583984, "step": 5592 }, { "epoch": 0.7793492649620288, "grad_norm": 0.5207377672195435, "learning_rate": 5.3267549882583825e-06, "loss": 0.100677490234375, "step": 5593 }, { "epoch": 0.7794886086532432, "grad_norm": 0.23007819056510925, "learning_rate": 5.320350745532761e-06, "loss": 0.06632232666015625, "step": 5594 }, { "epoch": 0.7796279523444576, "grad_norm": 0.5154330134391785, "learning_rate": 5.3139497642900696e-06, "loss": 0.0999603271484375, "step": 5595 }, { "epoch": 0.779767296035672, "grad_norm": 0.34746482968330383, "learning_rate": 5.307552045952445e-06, "loss": 0.09080934524536133, "step": 5596 }, { "epoch": 0.7799066397268863, "grad_norm": 0.24995028972625732, "learning_rate": 5.3011575919413214e-06, "loss": 0.07035255432128906, "step": 5597 }, { "epoch": 0.7800459834181007, "grad_norm": 0.3719792366027832, "learning_rate": 5.294766403677407e-06, "loss": 0.08910274505615234, "step": 5598 }, { "epoch": 0.7801853271093151, "grad_norm": 0.5692002177238464, "learning_rate": 5.288378482580671e-06, "loss": 0.09650802612304688, "step": 5599 }, { "epoch": 0.7803246708005295, "grad_norm": 0.3977873921394348, "learning_rate": 5.281993830070357e-06, "loss": 0.08350563049316406, "step": 5600 }, { "epoch": 0.7804640144917439, "grad_norm": 0.4569106996059418, "learning_rate": 5.275612447565e-06, "loss": 0.08640003204345703, "step": 5601 }, { "epoch": 0.7806033581829582, "grad_norm": 0.44899600744247437, "learning_rate": 5.269234336482396e-06, "loss": 0.10199260711669922, "step": 5602 }, { "epoch": 0.7807427018741726, "grad_norm": 0.4218426048755646, "learning_rate": 5.262859498239614e-06, "loss": 0.10093879699707031, "step": 5603 }, { "epoch": 0.780882045565387, "grad_norm": 0.7482976913452148, "learning_rate": 5.256487934252992e-06, "loss": 0.11282920837402344, "step": 5604 }, { "epoch": 0.7810213892566014, "grad_norm": 0.6332219243049622, "learning_rate": 5.25011964593815e-06, "loss": 0.11429595947265625, "step": 5605 }, { "epoch": 0.7811607329478157, "grad_norm": 0.4180949628353119, "learning_rate": 5.243754634709983e-06, "loss": 0.07964134216308594, "step": 5606 }, { "epoch": 0.7813000766390301, "grad_norm": 0.3166016638278961, "learning_rate": 5.237392901982641e-06, "loss": 0.08282470703125, "step": 5607 }, { "epoch": 0.7814394203302445, "grad_norm": 0.26847293972969055, "learning_rate": 5.2310344491695585e-06, "loss": 0.06253623962402344, "step": 5608 }, { "epoch": 0.781578764021459, "grad_norm": 0.6265087723731995, "learning_rate": 5.2246792776834466e-06, "loss": 0.10777664184570312, "step": 5609 }, { "epoch": 0.7817181077126734, "grad_norm": 0.5848155617713928, "learning_rate": 5.218327388936273e-06, "loss": 0.10179519653320312, "step": 5610 }, { "epoch": 0.7818574514038877, "grad_norm": 0.22869634628295898, "learning_rate": 5.211978784339275e-06, "loss": 0.056471824645996094, "step": 5611 }, { "epoch": 0.7819967950951021, "grad_norm": 0.639868974685669, "learning_rate": 5.205633465302977e-06, "loss": 0.11487007141113281, "step": 5612 }, { "epoch": 0.7821361387863165, "grad_norm": 0.4403250515460968, "learning_rate": 5.199291433237168e-06, "loss": 0.08856010437011719, "step": 5613 }, { "epoch": 0.7822754824775309, "grad_norm": 0.41663333773612976, "learning_rate": 5.192952689550894e-06, "loss": 0.08444833755493164, "step": 5614 }, { "epoch": 0.7824148261687452, "grad_norm": 0.2434936910867691, "learning_rate": 5.186617235652478e-06, "loss": 0.06649303436279297, "step": 5615 }, { "epoch": 0.7825541698599596, "grad_norm": 0.3980183005332947, "learning_rate": 5.180285072949516e-06, "loss": 0.09190559387207031, "step": 5616 }, { "epoch": 0.782693513551174, "grad_norm": 0.1845494508743286, "learning_rate": 5.173956202848878e-06, "loss": 0.05714225769042969, "step": 5617 }, { "epoch": 0.7828328572423884, "grad_norm": 0.5550017952919006, "learning_rate": 5.167630626756681e-06, "loss": 0.11956214904785156, "step": 5618 }, { "epoch": 0.7829722009336028, "grad_norm": 0.6702423691749573, "learning_rate": 5.161308346078335e-06, "loss": 0.12084197998046875, "step": 5619 }, { "epoch": 0.7831115446248171, "grad_norm": 0.4007166922092438, "learning_rate": 5.154989362218501e-06, "loss": 0.075897216796875, "step": 5620 }, { "epoch": 0.7832508883160315, "grad_norm": 0.4318609833717346, "learning_rate": 5.148673676581109e-06, "loss": 0.09497261047363281, "step": 5621 }, { "epoch": 0.7833902320072459, "grad_norm": 0.39287474751472473, "learning_rate": 5.142361290569365e-06, "loss": 0.09283447265625, "step": 5622 }, { "epoch": 0.7835295756984603, "grad_norm": 0.35759952664375305, "learning_rate": 5.136052205585735e-06, "loss": 0.08503532409667969, "step": 5623 }, { "epoch": 0.7836689193896746, "grad_norm": 0.4242338240146637, "learning_rate": 5.129746423031967e-06, "loss": 0.0792999267578125, "step": 5624 }, { "epoch": 0.783808263080889, "grad_norm": 0.41541731357574463, "learning_rate": 5.123443944309039e-06, "loss": 0.08974647521972656, "step": 5625 }, { "epoch": 0.7839476067721034, "grad_norm": 0.9979515671730042, "learning_rate": 5.1171447708172285e-06, "loss": 0.14743995666503906, "step": 5626 }, { "epoch": 0.7840869504633178, "grad_norm": 0.4515165388584137, "learning_rate": 5.110848903956076e-06, "loss": 0.07520675659179688, "step": 5627 }, { "epoch": 0.7842262941545322, "grad_norm": 0.5661170482635498, "learning_rate": 5.104556345124363e-06, "loss": 0.1016998291015625, "step": 5628 }, { "epoch": 0.7843656378457465, "grad_norm": 0.3119969964027405, "learning_rate": 5.098267095720164e-06, "loss": 0.0758056640625, "step": 5629 }, { "epoch": 0.7845049815369609, "grad_norm": 0.45896095037460327, "learning_rate": 5.091981157140808e-06, "loss": 0.09566307067871094, "step": 5630 }, { "epoch": 0.7846443252281753, "grad_norm": 0.20681026577949524, "learning_rate": 5.085698530782885e-06, "loss": 0.0582122802734375, "step": 5631 }, { "epoch": 0.7847836689193897, "grad_norm": 0.367009699344635, "learning_rate": 5.079419218042243e-06, "loss": 0.07361793518066406, "step": 5632 }, { "epoch": 0.784923012610604, "grad_norm": 0.3429175913333893, "learning_rate": 5.073143220314007e-06, "loss": 0.0737762451171875, "step": 5633 }, { "epoch": 0.7850623563018184, "grad_norm": 0.26564738154411316, "learning_rate": 5.066870538992568e-06, "loss": 0.06990432739257812, "step": 5634 }, { "epoch": 0.7852016999930328, "grad_norm": 0.5979676842689514, "learning_rate": 5.060601175471567e-06, "loss": 0.09690093994140625, "step": 5635 }, { "epoch": 0.7853410436842472, "grad_norm": 0.6289647817611694, "learning_rate": 5.054335131143906e-06, "loss": 0.11433601379394531, "step": 5636 }, { "epoch": 0.7854803873754616, "grad_norm": 0.316021203994751, "learning_rate": 5.0480724074017625e-06, "loss": 0.08373260498046875, "step": 5637 }, { "epoch": 0.7856197310666759, "grad_norm": 0.7286697030067444, "learning_rate": 5.041813005636578e-06, "loss": 0.09458351135253906, "step": 5638 }, { "epoch": 0.7857590747578903, "grad_norm": 0.41320085525512695, "learning_rate": 5.035556927239036e-06, "loss": 0.08198833465576172, "step": 5639 }, { "epoch": 0.7858984184491047, "grad_norm": 0.4740714728832245, "learning_rate": 5.029304173599107e-06, "loss": 0.083587646484375, "step": 5640 }, { "epoch": 0.7860377621403191, "grad_norm": 0.5497187376022339, "learning_rate": 5.023054746106e-06, "loss": 0.12179946899414062, "step": 5641 }, { "epoch": 0.7861771058315334, "grad_norm": 0.4507594108581543, "learning_rate": 5.016808646148204e-06, "loss": 0.09488868713378906, "step": 5642 }, { "epoch": 0.7863164495227478, "grad_norm": 0.28897884488105774, "learning_rate": 5.0105658751134464e-06, "loss": 0.06509876251220703, "step": 5643 }, { "epoch": 0.7864557932139622, "grad_norm": 0.7803561091423035, "learning_rate": 5.0043264343887395e-06, "loss": 0.12209892272949219, "step": 5644 }, { "epoch": 0.7865951369051766, "grad_norm": 0.26965487003326416, "learning_rate": 4.998090325360346e-06, "loss": 0.07392120361328125, "step": 5645 }, { "epoch": 0.786734480596391, "grad_norm": 0.2806442379951477, "learning_rate": 4.991857549413784e-06, "loss": 0.06663131713867188, "step": 5646 }, { "epoch": 0.7868738242876053, "grad_norm": 0.37051138281822205, "learning_rate": 4.9856281079338265e-06, "loss": 0.08903694152832031, "step": 5647 }, { "epoch": 0.7870131679788197, "grad_norm": 0.48712053894996643, "learning_rate": 4.979402002304519e-06, "loss": 0.08421993255615234, "step": 5648 }, { "epoch": 0.7871525116700342, "grad_norm": 0.33820393681526184, "learning_rate": 4.973179233909167e-06, "loss": 0.07174873352050781, "step": 5649 }, { "epoch": 0.7872918553612486, "grad_norm": 0.28863459825515747, "learning_rate": 4.966959804130314e-06, "loss": 0.07728958129882812, "step": 5650 }, { "epoch": 0.787431199052463, "grad_norm": 0.4273664057254791, "learning_rate": 4.96074371434979e-06, "loss": 0.0938119888305664, "step": 5651 }, { "epoch": 0.7875705427436773, "grad_norm": 0.37286117672920227, "learning_rate": 4.954530965948654e-06, "loss": 0.08338451385498047, "step": 5652 }, { "epoch": 0.7877098864348917, "grad_norm": 0.2729094624519348, "learning_rate": 4.948321560307248e-06, "loss": 0.07269859313964844, "step": 5653 }, { "epoch": 0.7878492301261061, "grad_norm": 0.33811935782432556, "learning_rate": 4.942115498805151e-06, "loss": 0.067626953125, "step": 5654 }, { "epoch": 0.7879885738173205, "grad_norm": 0.20715104043483734, "learning_rate": 4.9359127828212125e-06, "loss": 0.05515241622924805, "step": 5655 }, { "epoch": 0.7881279175085348, "grad_norm": 0.19436496496200562, "learning_rate": 4.929713413733548e-06, "loss": 0.05989646911621094, "step": 5656 }, { "epoch": 0.7882672611997492, "grad_norm": 0.26916325092315674, "learning_rate": 4.92351739291949e-06, "loss": 0.06653213500976562, "step": 5657 }, { "epoch": 0.7884066048909636, "grad_norm": 0.4645390510559082, "learning_rate": 4.917324721755665e-06, "loss": 0.08740425109863281, "step": 5658 }, { "epoch": 0.788545948582178, "grad_norm": 0.5124428272247314, "learning_rate": 4.911135401617948e-06, "loss": 0.09937286376953125, "step": 5659 }, { "epoch": 0.7886852922733923, "grad_norm": 0.42513900995254517, "learning_rate": 4.904949433881457e-06, "loss": 0.08971786499023438, "step": 5660 }, { "epoch": 0.7888246359646067, "grad_norm": 0.782625138759613, "learning_rate": 4.8987668199205796e-06, "loss": 0.114959716796875, "step": 5661 }, { "epoch": 0.7889639796558211, "grad_norm": 0.17023731768131256, "learning_rate": 4.892587561108942e-06, "loss": 0.052082061767578125, "step": 5662 }, { "epoch": 0.7891033233470355, "grad_norm": 0.6163641214370728, "learning_rate": 4.886411658819448e-06, "loss": 0.10512351989746094, "step": 5663 }, { "epoch": 0.7892426670382499, "grad_norm": 0.7495293021202087, "learning_rate": 4.880239114424226e-06, "loss": 0.12883567810058594, "step": 5664 }, { "epoch": 0.7893820107294642, "grad_norm": 0.4445473551750183, "learning_rate": 4.874069929294685e-06, "loss": 0.10071754455566406, "step": 5665 }, { "epoch": 0.7895213544206786, "grad_norm": 0.4127316176891327, "learning_rate": 4.867904104801477e-06, "loss": 0.08897018432617188, "step": 5666 }, { "epoch": 0.789660698111893, "grad_norm": 0.45221707224845886, "learning_rate": 4.861741642314511e-06, "loss": 0.0730133056640625, "step": 5667 }, { "epoch": 0.7898000418031074, "grad_norm": 0.35365915298461914, "learning_rate": 4.85558254320293e-06, "loss": 0.0850381851196289, "step": 5668 }, { "epoch": 0.7899393854943217, "grad_norm": 0.22997444868087769, "learning_rate": 4.849426808835156e-06, "loss": 0.058258056640625, "step": 5669 }, { "epoch": 0.7900787291855361, "grad_norm": 0.3983100354671478, "learning_rate": 4.843274440578856e-06, "loss": 0.0897369384765625, "step": 5670 }, { "epoch": 0.7902180728767505, "grad_norm": 0.28290966153144836, "learning_rate": 4.837125439800936e-06, "loss": 0.07515859603881836, "step": 5671 }, { "epoch": 0.7903574165679649, "grad_norm": 0.545756459236145, "learning_rate": 4.830979807867577e-06, "loss": 0.09769821166992188, "step": 5672 }, { "epoch": 0.7904967602591793, "grad_norm": 0.4839855432510376, "learning_rate": 4.824837546144183e-06, "loss": 0.07847309112548828, "step": 5673 }, { "epoch": 0.7906361039503936, "grad_norm": 0.48080193996429443, "learning_rate": 4.818698655995437e-06, "loss": 0.09499359130859375, "step": 5674 }, { "epoch": 0.790775447641608, "grad_norm": 0.47343459725379944, "learning_rate": 4.812563138785249e-06, "loss": 0.1078948974609375, "step": 5675 }, { "epoch": 0.7909147913328224, "grad_norm": 0.32694512605667114, "learning_rate": 4.806430995876796e-06, "loss": 0.08149242401123047, "step": 5676 }, { "epoch": 0.7910541350240368, "grad_norm": 0.22882424294948578, "learning_rate": 4.800302228632505e-06, "loss": 0.06014060974121094, "step": 5677 }, { "epoch": 0.7911934787152511, "grad_norm": 0.3147236108779907, "learning_rate": 4.7941768384140465e-06, "loss": 0.0701303482055664, "step": 5678 }, { "epoch": 0.7913328224064655, "grad_norm": 0.30596208572387695, "learning_rate": 4.788054826582334e-06, "loss": 0.07063865661621094, "step": 5679 }, { "epoch": 0.7914721660976799, "grad_norm": 0.420888751745224, "learning_rate": 4.781936194497543e-06, "loss": 0.08429145812988281, "step": 5680 }, { "epoch": 0.7916115097888943, "grad_norm": 0.46405813097953796, "learning_rate": 4.7758209435191e-06, "loss": 0.10609626770019531, "step": 5681 }, { "epoch": 0.7917508534801087, "grad_norm": 0.34289026260375977, "learning_rate": 4.769709075005673e-06, "loss": 0.07353401184082031, "step": 5682 }, { "epoch": 0.791890197171323, "grad_norm": 0.4633822739124298, "learning_rate": 4.763600590315167e-06, "loss": 0.08964920043945312, "step": 5683 }, { "epoch": 0.7920295408625374, "grad_norm": 0.4054756164550781, "learning_rate": 4.757495490804758e-06, "loss": 0.08865165710449219, "step": 5684 }, { "epoch": 0.7921688845537518, "grad_norm": 0.6909626722335815, "learning_rate": 4.751393777830864e-06, "loss": 0.13172340393066406, "step": 5685 }, { "epoch": 0.7923082282449662, "grad_norm": 0.5144474506378174, "learning_rate": 4.745295452749137e-06, "loss": 0.09984302520751953, "step": 5686 }, { "epoch": 0.7924475719361805, "grad_norm": 0.44301581382751465, "learning_rate": 4.739200516914488e-06, "loss": 0.0847940444946289, "step": 5687 }, { "epoch": 0.7925869156273949, "grad_norm": 0.43679583072662354, "learning_rate": 4.7331089716810866e-06, "loss": 0.097686767578125, "step": 5688 }, { "epoch": 0.7927262593186094, "grad_norm": 0.5174006223678589, "learning_rate": 4.727020818402311e-06, "loss": 0.09666252136230469, "step": 5689 }, { "epoch": 0.7928656030098238, "grad_norm": 0.25460925698280334, "learning_rate": 4.720936058430818e-06, "loss": 0.07010602951049805, "step": 5690 }, { "epoch": 0.7930049467010382, "grad_norm": 0.4205535054206848, "learning_rate": 4.714854693118515e-06, "loss": 0.09518051147460938, "step": 5691 }, { "epoch": 0.7931442903922525, "grad_norm": 0.2479642778635025, "learning_rate": 4.708776723816528e-06, "loss": 0.06893539428710938, "step": 5692 }, { "epoch": 0.7932836340834669, "grad_norm": 0.3316034972667694, "learning_rate": 4.702702151875253e-06, "loss": 0.08754539489746094, "step": 5693 }, { "epoch": 0.7934229777746813, "grad_norm": 0.4822218120098114, "learning_rate": 4.696630978644312e-06, "loss": 0.09250640869140625, "step": 5694 }, { "epoch": 0.7935623214658957, "grad_norm": 0.49893999099731445, "learning_rate": 4.690563205472589e-06, "loss": 0.10248756408691406, "step": 5695 }, { "epoch": 0.79370166515711, "grad_norm": 0.5823110342025757, "learning_rate": 4.684498833708198e-06, "loss": 0.12551307678222656, "step": 5696 }, { "epoch": 0.7938410088483244, "grad_norm": 0.2669805884361267, "learning_rate": 4.678437864698507e-06, "loss": 0.07390403747558594, "step": 5697 }, { "epoch": 0.7939803525395388, "grad_norm": 0.42157742381095886, "learning_rate": 4.672380299790131e-06, "loss": 0.08240509033203125, "step": 5698 }, { "epoch": 0.7941196962307532, "grad_norm": 0.262778103351593, "learning_rate": 4.6663261403289165e-06, "loss": 0.06632804870605469, "step": 5699 }, { "epoch": 0.7942590399219676, "grad_norm": 0.2866041660308838, "learning_rate": 4.660275387659954e-06, "loss": 0.07683277130126953, "step": 5700 }, { "epoch": 0.7943983836131819, "grad_norm": 0.3691703677177429, "learning_rate": 4.654228043127589e-06, "loss": 0.07423877716064453, "step": 5701 }, { "epoch": 0.7945377273043963, "grad_norm": 0.48941630125045776, "learning_rate": 4.6481841080754064e-06, "loss": 0.09962081909179688, "step": 5702 }, { "epoch": 0.7946770709956107, "grad_norm": 0.4583169221878052, "learning_rate": 4.642143583846223e-06, "loss": 0.09332275390625, "step": 5703 }, { "epoch": 0.7948164146868251, "grad_norm": 0.45926913619041443, "learning_rate": 4.636106471782116e-06, "loss": 0.08583831787109375, "step": 5704 }, { "epoch": 0.7949557583780394, "grad_norm": 0.3149871528148651, "learning_rate": 4.630072773224381e-06, "loss": 0.08464860916137695, "step": 5705 }, { "epoch": 0.7950951020692538, "grad_norm": 0.397433340549469, "learning_rate": 4.62404248951358e-06, "loss": 0.08165359497070312, "step": 5706 }, { "epoch": 0.7952344457604682, "grad_norm": 0.2532755732536316, "learning_rate": 4.618015621989493e-06, "loss": 0.05828380584716797, "step": 5707 }, { "epoch": 0.7953737894516826, "grad_norm": 0.4045651853084564, "learning_rate": 4.6119921719911596e-06, "loss": 0.08685302734375, "step": 5708 }, { "epoch": 0.795513133142897, "grad_norm": 0.35737645626068115, "learning_rate": 4.605972140856856e-06, "loss": 0.08438873291015625, "step": 5709 }, { "epoch": 0.7956524768341113, "grad_norm": 0.6652624607086182, "learning_rate": 4.599955529924089e-06, "loss": 0.09720230102539062, "step": 5710 }, { "epoch": 0.7957918205253257, "grad_norm": 0.2874082326889038, "learning_rate": 4.593942340529612e-06, "loss": 0.07478523254394531, "step": 5711 }, { "epoch": 0.7959311642165401, "grad_norm": 0.5558286905288696, "learning_rate": 4.587932574009417e-06, "loss": 0.117584228515625, "step": 5712 }, { "epoch": 0.7960705079077545, "grad_norm": 0.5126045942306519, "learning_rate": 4.58192623169875e-06, "loss": 0.10787773132324219, "step": 5713 }, { "epoch": 0.7962098515989688, "grad_norm": 0.4443994462490082, "learning_rate": 4.575923314932072e-06, "loss": 0.10480117797851562, "step": 5714 }, { "epoch": 0.7963491952901832, "grad_norm": 0.2733587622642517, "learning_rate": 4.56992382504309e-06, "loss": 0.06936526298522949, "step": 5715 }, { "epoch": 0.7964885389813976, "grad_norm": 0.43731874227523804, "learning_rate": 4.563927763364759e-06, "loss": 0.08162117004394531, "step": 5716 }, { "epoch": 0.796627882672612, "grad_norm": 0.4961440861225128, "learning_rate": 4.557935131229274e-06, "loss": 0.11594009399414062, "step": 5717 }, { "epoch": 0.7967672263638264, "grad_norm": 0.6191819906234741, "learning_rate": 4.5519459299680465e-06, "loss": 0.12314796447753906, "step": 5718 }, { "epoch": 0.7969065700550407, "grad_norm": 0.33314236998558044, "learning_rate": 4.545960160911758e-06, "loss": 0.08730030059814453, "step": 5719 }, { "epoch": 0.7970459137462551, "grad_norm": 0.5012186169624329, "learning_rate": 4.539977825390296e-06, "loss": 0.07854652404785156, "step": 5720 }, { "epoch": 0.7971852574374695, "grad_norm": 0.3092936873435974, "learning_rate": 4.533998924732801e-06, "loss": 0.08112525939941406, "step": 5721 }, { "epoch": 0.7973246011286839, "grad_norm": 0.30831006169319153, "learning_rate": 4.528023460267648e-06, "loss": 0.06740951538085938, "step": 5722 }, { "epoch": 0.7974639448198982, "grad_norm": 0.7222568392753601, "learning_rate": 4.52205143332245e-06, "loss": 0.1507091522216797, "step": 5723 }, { "epoch": 0.7976032885111126, "grad_norm": 0.3376224637031555, "learning_rate": 4.516082845224063e-06, "loss": 0.09464073181152344, "step": 5724 }, { "epoch": 0.797742632202327, "grad_norm": 0.4256724417209625, "learning_rate": 4.510117697298564e-06, "loss": 0.08450889587402344, "step": 5725 }, { "epoch": 0.7978819758935414, "grad_norm": 0.3248347043991089, "learning_rate": 4.504155990871266e-06, "loss": 0.0777425765991211, "step": 5726 }, { "epoch": 0.7980213195847558, "grad_norm": 0.21624957025051117, "learning_rate": 4.498197727266738e-06, "loss": 0.07103157043457031, "step": 5727 }, { "epoch": 0.7981606632759701, "grad_norm": 0.38018715381622314, "learning_rate": 4.4922429078087545e-06, "loss": 0.08751296997070312, "step": 5728 }, { "epoch": 0.7983000069671845, "grad_norm": 0.33947092294692993, "learning_rate": 4.486291533820351e-06, "loss": 0.08021354675292969, "step": 5729 }, { "epoch": 0.798439350658399, "grad_norm": 0.2640308737754822, "learning_rate": 4.480343606623789e-06, "loss": 0.07692337036132812, "step": 5730 }, { "epoch": 0.7985786943496134, "grad_norm": 0.3216187059879303, "learning_rate": 4.474399127540561e-06, "loss": 0.07148361206054688, "step": 5731 }, { "epoch": 0.7987180380408277, "grad_norm": 0.2968781590461731, "learning_rate": 4.4684580978913815e-06, "loss": 0.08005905151367188, "step": 5732 }, { "epoch": 0.7988573817320421, "grad_norm": 0.777054488658905, "learning_rate": 4.462520518996225e-06, "loss": 0.12508010864257812, "step": 5733 }, { "epoch": 0.7989967254232565, "grad_norm": 0.2634645402431488, "learning_rate": 4.456586392174285e-06, "loss": 0.074249267578125, "step": 5734 }, { "epoch": 0.7991360691144709, "grad_norm": 0.47465378046035767, "learning_rate": 4.450655718743988e-06, "loss": 0.09573936462402344, "step": 5735 }, { "epoch": 0.7992754128056853, "grad_norm": 0.4410315454006195, "learning_rate": 4.444728500022988e-06, "loss": 0.07931900024414062, "step": 5736 }, { "epoch": 0.7994147564968996, "grad_norm": 0.423197865486145, "learning_rate": 4.4388047373281815e-06, "loss": 0.10241508483886719, "step": 5737 }, { "epoch": 0.799554100188114, "grad_norm": 0.3805861175060272, "learning_rate": 4.432884431975699e-06, "loss": 0.08723258972167969, "step": 5738 }, { "epoch": 0.7996934438793284, "grad_norm": 0.4521794617176056, "learning_rate": 4.426967585280888e-06, "loss": 0.07951927185058594, "step": 5739 }, { "epoch": 0.7998327875705428, "grad_norm": 0.4139499366283417, "learning_rate": 4.4210541985583455e-06, "loss": 0.08146524429321289, "step": 5740 }, { "epoch": 0.7999721312617571, "grad_norm": 0.6790738701820374, "learning_rate": 4.415144273121883e-06, "loss": 0.10047531127929688, "step": 5741 }, { "epoch": 0.8001114749529715, "grad_norm": 0.6247254014015198, "learning_rate": 4.409237810284559e-06, "loss": 0.10669422149658203, "step": 5742 }, { "epoch": 0.8002508186441859, "grad_norm": 0.7914243936538696, "learning_rate": 4.4033348113586465e-06, "loss": 0.09213638305664062, "step": 5743 }, { "epoch": 0.8003901623354003, "grad_norm": 0.2749635875225067, "learning_rate": 4.39743527765566e-06, "loss": 0.06101489067077637, "step": 5744 }, { "epoch": 0.8005295060266147, "grad_norm": 0.33767813444137573, "learning_rate": 4.391539210486346e-06, "loss": 0.07876014709472656, "step": 5745 }, { "epoch": 0.800668849717829, "grad_norm": 0.42112815380096436, "learning_rate": 4.385646611160674e-06, "loss": 0.10757255554199219, "step": 5746 }, { "epoch": 0.8008081934090434, "grad_norm": 0.3489990234375, "learning_rate": 4.379757480987836e-06, "loss": 0.08538246154785156, "step": 5747 }, { "epoch": 0.8009475371002578, "grad_norm": 0.36643949151039124, "learning_rate": 4.373871821276272e-06, "loss": 0.08438396453857422, "step": 5748 }, { "epoch": 0.8010868807914722, "grad_norm": 0.3192828595638275, "learning_rate": 4.367989633333642e-06, "loss": 0.07179450988769531, "step": 5749 }, { "epoch": 0.8012262244826865, "grad_norm": 0.548864483833313, "learning_rate": 4.362110918466826e-06, "loss": 0.10724449157714844, "step": 5750 }, { "epoch": 0.8013655681739009, "grad_norm": 0.3951129913330078, "learning_rate": 4.356235677981952e-06, "loss": 0.08402442932128906, "step": 5751 }, { "epoch": 0.8015049118651153, "grad_norm": 0.5227956771850586, "learning_rate": 4.350363913184355e-06, "loss": 0.10839080810546875, "step": 5752 }, { "epoch": 0.8016442555563297, "grad_norm": 0.45943084359169006, "learning_rate": 4.3444956253786044e-06, "loss": 0.10425376892089844, "step": 5753 }, { "epoch": 0.801783599247544, "grad_norm": 0.35157501697540283, "learning_rate": 4.338630815868505e-06, "loss": 0.08690738677978516, "step": 5754 }, { "epoch": 0.8019229429387584, "grad_norm": 0.4287568926811218, "learning_rate": 4.3327694859570824e-06, "loss": 0.08146858215332031, "step": 5755 }, { "epoch": 0.8020622866299728, "grad_norm": 0.5608795881271362, "learning_rate": 4.326911636946603e-06, "loss": 0.08159065246582031, "step": 5756 }, { "epoch": 0.8022016303211872, "grad_norm": 0.4520722031593323, "learning_rate": 4.321057270138525e-06, "loss": 0.08691787719726562, "step": 5757 }, { "epoch": 0.8023409740124016, "grad_norm": 0.29534855484962463, "learning_rate": 4.315206386833562e-06, "loss": 0.06525421142578125, "step": 5758 }, { "epoch": 0.8024803177036159, "grad_norm": 0.7585322260856628, "learning_rate": 4.309358988331658e-06, "loss": 0.12059879302978516, "step": 5759 }, { "epoch": 0.8026196613948303, "grad_norm": 0.39878302812576294, "learning_rate": 4.303515075931957e-06, "loss": 0.08762168884277344, "step": 5760 }, { "epoch": 0.8027590050860447, "grad_norm": 0.6048856377601624, "learning_rate": 4.297674650932848e-06, "loss": 0.101287841796875, "step": 5761 }, { "epoch": 0.8028983487772591, "grad_norm": 0.34144991636276245, "learning_rate": 4.2918377146319505e-06, "loss": 0.07674407958984375, "step": 5762 }, { "epoch": 0.8030376924684735, "grad_norm": 0.32711613178253174, "learning_rate": 4.286004268326085e-06, "loss": 0.07323074340820312, "step": 5763 }, { "epoch": 0.8031770361596878, "grad_norm": 0.49057304859161377, "learning_rate": 4.280174313311311e-06, "loss": 0.09510993957519531, "step": 5764 }, { "epoch": 0.8033163798509022, "grad_norm": 0.5425952672958374, "learning_rate": 4.274347850882916e-06, "loss": 0.09586715698242188, "step": 5765 }, { "epoch": 0.8034557235421166, "grad_norm": 0.20432499051094055, "learning_rate": 4.26852488233541e-06, "loss": 0.06329059600830078, "step": 5766 }, { "epoch": 0.803595067233331, "grad_norm": 0.3065548837184906, "learning_rate": 4.26270540896252e-06, "loss": 0.07519149780273438, "step": 5767 }, { "epoch": 0.8037344109245453, "grad_norm": 0.3480375111103058, "learning_rate": 4.256889432057194e-06, "loss": 0.08692550659179688, "step": 5768 }, { "epoch": 0.8038737546157597, "grad_norm": 0.5164533853530884, "learning_rate": 4.251076952911615e-06, "loss": 0.09987831115722656, "step": 5769 }, { "epoch": 0.8040130983069742, "grad_norm": 0.6816145181655884, "learning_rate": 4.245267972817189e-06, "loss": 0.09496116638183594, "step": 5770 }, { "epoch": 0.8041524419981886, "grad_norm": 0.45076847076416016, "learning_rate": 4.239462493064525e-06, "loss": 0.08380699157714844, "step": 5771 }, { "epoch": 0.804291785689403, "grad_norm": 0.4026052951812744, "learning_rate": 4.233660514943483e-06, "loss": 0.07911872863769531, "step": 5772 }, { "epoch": 0.8044311293806173, "grad_norm": 0.6147128939628601, "learning_rate": 4.227862039743118e-06, "loss": 0.09858131408691406, "step": 5773 }, { "epoch": 0.8045704730718317, "grad_norm": 0.5074610114097595, "learning_rate": 4.2220670687517275e-06, "loss": 0.09175968170166016, "step": 5774 }, { "epoch": 0.8047098167630461, "grad_norm": 0.22406648099422455, "learning_rate": 4.216275603256814e-06, "loss": 0.06613349914550781, "step": 5775 }, { "epoch": 0.8048491604542605, "grad_norm": 0.4787399172782898, "learning_rate": 4.210487644545112e-06, "loss": 0.10070323944091797, "step": 5776 }, { "epoch": 0.8049885041454748, "grad_norm": 0.3241536021232605, "learning_rate": 4.204703193902582e-06, "loss": 0.08920478820800781, "step": 5777 }, { "epoch": 0.8051278478366892, "grad_norm": 0.399618536233902, "learning_rate": 4.198922252614388e-06, "loss": 0.10127449035644531, "step": 5778 }, { "epoch": 0.8052671915279036, "grad_norm": 0.6698436737060547, "learning_rate": 4.193144821964918e-06, "loss": 0.08912849426269531, "step": 5779 }, { "epoch": 0.805406535219118, "grad_norm": 0.2682548761367798, "learning_rate": 4.1873709032377926e-06, "loss": 0.07201957702636719, "step": 5780 }, { "epoch": 0.8055458789103324, "grad_norm": 0.359358549118042, "learning_rate": 4.181600497715852e-06, "loss": 0.08260917663574219, "step": 5781 }, { "epoch": 0.8056852226015467, "grad_norm": 0.41767463088035583, "learning_rate": 4.175833606681132e-06, "loss": 0.06744194030761719, "step": 5782 }, { "epoch": 0.8058245662927611, "grad_norm": 0.30972811579704285, "learning_rate": 4.17007023141492e-06, "loss": 0.0800933837890625, "step": 5783 }, { "epoch": 0.8059639099839755, "grad_norm": 0.2272789031267166, "learning_rate": 4.164310373197693e-06, "loss": 0.06789588928222656, "step": 5784 }, { "epoch": 0.8061032536751899, "grad_norm": 0.4090959429740906, "learning_rate": 4.158554033309172e-06, "loss": 0.09096145629882812, "step": 5785 }, { "epoch": 0.8062425973664042, "grad_norm": 0.298571914434433, "learning_rate": 4.152801213028273e-06, "loss": 0.07482147216796875, "step": 5786 }, { "epoch": 0.8063819410576186, "grad_norm": 0.21629935503005981, "learning_rate": 4.147051913633147e-06, "loss": 0.0650339126586914, "step": 5787 }, { "epoch": 0.806521284748833, "grad_norm": 0.4418518841266632, "learning_rate": 4.1413061364011665e-06, "loss": 0.09589195251464844, "step": 5788 }, { "epoch": 0.8066606284400474, "grad_norm": 0.29798203706741333, "learning_rate": 4.135563882608893e-06, "loss": 0.07706546783447266, "step": 5789 }, { "epoch": 0.8067999721312618, "grad_norm": 0.4144391715526581, "learning_rate": 4.129825153532132e-06, "loss": 0.10109138488769531, "step": 5790 }, { "epoch": 0.8069393158224761, "grad_norm": 0.35099121928215027, "learning_rate": 4.124089950445906e-06, "loss": 0.07657432556152344, "step": 5791 }, { "epoch": 0.8070786595136905, "grad_norm": 0.4530881643295288, "learning_rate": 4.118358274624435e-06, "loss": 0.08520889282226562, "step": 5792 }, { "epoch": 0.8072180032049049, "grad_norm": 0.3531850576400757, "learning_rate": 4.112630127341175e-06, "loss": 0.07258892059326172, "step": 5793 }, { "epoch": 0.8073573468961193, "grad_norm": 0.2995791733264923, "learning_rate": 4.106905509868781e-06, "loss": 0.08699226379394531, "step": 5794 }, { "epoch": 0.8074966905873336, "grad_norm": 0.3795398771762848, "learning_rate": 4.101184423479143e-06, "loss": 0.08843040466308594, "step": 5795 }, { "epoch": 0.807636034278548, "grad_norm": 0.4340970814228058, "learning_rate": 4.0954668694433455e-06, "loss": 0.09912586212158203, "step": 5796 }, { "epoch": 0.8077753779697624, "grad_norm": 0.33241933584213257, "learning_rate": 4.0897528490317025e-06, "loss": 0.08241462707519531, "step": 5797 }, { "epoch": 0.8079147216609768, "grad_norm": 0.3067690134048462, "learning_rate": 4.084042363513745e-06, "loss": 0.08090782165527344, "step": 5798 }, { "epoch": 0.8080540653521912, "grad_norm": 0.44497787952423096, "learning_rate": 4.078335414158206e-06, "loss": 0.10407066345214844, "step": 5799 }, { "epoch": 0.8081934090434055, "grad_norm": 0.4380345046520233, "learning_rate": 4.0726320022330345e-06, "loss": 0.09170675277709961, "step": 5800 }, { "epoch": 0.8083327527346199, "grad_norm": 0.41976046562194824, "learning_rate": 4.066932129005403e-06, "loss": 0.1017913818359375, "step": 5801 }, { "epoch": 0.8084720964258343, "grad_norm": 0.21358807384967804, "learning_rate": 4.061235795741702e-06, "loss": 0.06260299682617188, "step": 5802 }, { "epoch": 0.8086114401170487, "grad_norm": 0.5995532274246216, "learning_rate": 4.055543003707514e-06, "loss": 0.11542654037475586, "step": 5803 }, { "epoch": 0.808750783808263, "grad_norm": 0.2147154062986374, "learning_rate": 4.049853754167656e-06, "loss": 0.06442070007324219, "step": 5804 }, { "epoch": 0.8088901274994774, "grad_norm": 0.36500343680381775, "learning_rate": 4.0441680483861415e-06, "loss": 0.09430313110351562, "step": 5805 }, { "epoch": 0.8090294711906918, "grad_norm": 0.21613256633281708, "learning_rate": 4.038485887626214e-06, "loss": 0.06231880187988281, "step": 5806 }, { "epoch": 0.8091688148819062, "grad_norm": 0.799733579158783, "learning_rate": 4.032807273150308e-06, "loss": 0.1477947235107422, "step": 5807 }, { "epoch": 0.8093081585731206, "grad_norm": 0.4835410416126251, "learning_rate": 4.02713220622009e-06, "loss": 0.09343338012695312, "step": 5808 }, { "epoch": 0.8094475022643349, "grad_norm": 0.7171555161476135, "learning_rate": 4.021460688096435e-06, "loss": 0.1707286834716797, "step": 5809 }, { "epoch": 0.8095868459555494, "grad_norm": 0.4625450372695923, "learning_rate": 4.015792720039418e-06, "loss": 0.09865570068359375, "step": 5810 }, { "epoch": 0.8097261896467638, "grad_norm": 0.5930670499801636, "learning_rate": 4.010128303308327e-06, "loss": 0.09847068786621094, "step": 5811 }, { "epoch": 0.8098655333379782, "grad_norm": 0.32328855991363525, "learning_rate": 4.004467439161672e-06, "loss": 0.08299636840820312, "step": 5812 }, { "epoch": 0.8100048770291925, "grad_norm": 0.6118909120559692, "learning_rate": 3.998810128857174e-06, "loss": 0.09056758880615234, "step": 5813 }, { "epoch": 0.8101442207204069, "grad_norm": 0.4110959768295288, "learning_rate": 3.993156373651752e-06, "loss": 0.07280921936035156, "step": 5814 }, { "epoch": 0.8102835644116213, "grad_norm": 0.26330631971359253, "learning_rate": 3.987506174801536e-06, "loss": 0.07263946533203125, "step": 5815 }, { "epoch": 0.8104229081028357, "grad_norm": 0.43202245235443115, "learning_rate": 3.981859533561876e-06, "loss": 0.0966939926147461, "step": 5816 }, { "epoch": 0.8105622517940501, "grad_norm": 0.2843722403049469, "learning_rate": 3.976216451187334e-06, "loss": 0.07382583618164062, "step": 5817 }, { "epoch": 0.8107015954852644, "grad_norm": 0.3897741436958313, "learning_rate": 3.97057692893166e-06, "loss": 0.08265113830566406, "step": 5818 }, { "epoch": 0.8108409391764788, "grad_norm": 0.3721672296524048, "learning_rate": 3.964940968047835e-06, "loss": 0.07163524627685547, "step": 5819 }, { "epoch": 0.8109802828676932, "grad_norm": 0.40209370851516724, "learning_rate": 3.959308569788052e-06, "loss": 0.08085155487060547, "step": 5820 }, { "epoch": 0.8111196265589076, "grad_norm": 0.19883792102336884, "learning_rate": 3.953679735403677e-06, "loss": 0.051334381103515625, "step": 5821 }, { "epoch": 0.811258970250122, "grad_norm": 0.19753260910511017, "learning_rate": 3.948054466145324e-06, "loss": 0.06000041961669922, "step": 5822 }, { "epoch": 0.8113983139413363, "grad_norm": 0.2998635172843933, "learning_rate": 3.942432763262794e-06, "loss": 0.08606910705566406, "step": 5823 }, { "epoch": 0.8115376576325507, "grad_norm": 0.6876741051673889, "learning_rate": 3.9368146280051104e-06, "loss": 0.09685707092285156, "step": 5824 }, { "epoch": 0.8116770013237651, "grad_norm": 0.4737551510334015, "learning_rate": 3.931200061620486e-06, "loss": 0.09328079223632812, "step": 5825 }, { "epoch": 0.8118163450149795, "grad_norm": 0.45151373744010925, "learning_rate": 3.925589065356346e-06, "loss": 0.10296249389648438, "step": 5826 }, { "epoch": 0.8119556887061938, "grad_norm": 0.43353471159935, "learning_rate": 3.919981640459336e-06, "loss": 0.09314918518066406, "step": 5827 }, { "epoch": 0.8120950323974082, "grad_norm": 0.20053760707378387, "learning_rate": 3.914377788175287e-06, "loss": 0.06680107116699219, "step": 5828 }, { "epoch": 0.8122343760886226, "grad_norm": 0.623275876045227, "learning_rate": 3.908777509749255e-06, "loss": 0.11342620849609375, "step": 5829 }, { "epoch": 0.812373719779837, "grad_norm": 0.27016758918762207, "learning_rate": 3.903180806425495e-06, "loss": 0.07428932189941406, "step": 5830 }, { "epoch": 0.8125130634710513, "grad_norm": 0.4298363924026489, "learning_rate": 3.897587679447463e-06, "loss": 0.08160972595214844, "step": 5831 }, { "epoch": 0.8126524071622657, "grad_norm": 0.5625234246253967, "learning_rate": 3.891998130057819e-06, "loss": 0.07708358764648438, "step": 5832 }, { "epoch": 0.8127917508534801, "grad_norm": 0.5247209668159485, "learning_rate": 3.886412159498439e-06, "loss": 0.0969839096069336, "step": 5833 }, { "epoch": 0.8129310945446945, "grad_norm": 0.21361549198627472, "learning_rate": 3.880829769010402e-06, "loss": 0.06264972686767578, "step": 5834 }, { "epoch": 0.8130704382359089, "grad_norm": 0.6517264246940613, "learning_rate": 3.875250959833982e-06, "loss": 0.13391685485839844, "step": 5835 }, { "epoch": 0.8132097819271232, "grad_norm": 0.17446625232696533, "learning_rate": 3.869675733208662e-06, "loss": 0.05457496643066406, "step": 5836 }, { "epoch": 0.8133491256183376, "grad_norm": 0.3378855288028717, "learning_rate": 3.8641040903731335e-06, "loss": 0.07589054107666016, "step": 5837 }, { "epoch": 0.813488469309552, "grad_norm": 0.3982596695423126, "learning_rate": 3.85853603256529e-06, "loss": 0.090057373046875, "step": 5838 }, { "epoch": 0.8136278130007664, "grad_norm": 0.3694663941860199, "learning_rate": 3.852971561022218e-06, "loss": 0.0801544189453125, "step": 5839 }, { "epoch": 0.8137671566919807, "grad_norm": 0.39266082644462585, "learning_rate": 3.8474106769802255e-06, "loss": 0.09895706176757812, "step": 5840 }, { "epoch": 0.8139065003831951, "grad_norm": 0.3252917528152466, "learning_rate": 3.841853381674814e-06, "loss": 0.09617424011230469, "step": 5841 }, { "epoch": 0.8140458440744095, "grad_norm": 0.6276628971099854, "learning_rate": 3.836299676340684e-06, "loss": 0.0860738754272461, "step": 5842 }, { "epoch": 0.8141851877656239, "grad_norm": 0.4068092405796051, "learning_rate": 3.83074956221174e-06, "loss": 0.08965110778808594, "step": 5843 }, { "epoch": 0.8143245314568383, "grad_norm": 0.5269186496734619, "learning_rate": 3.825203040521091e-06, "loss": 0.09769535064697266, "step": 5844 }, { "epoch": 0.8144638751480526, "grad_norm": 0.8164353370666504, "learning_rate": 3.819660112501053e-06, "loss": 0.12249183654785156, "step": 5845 }, { "epoch": 0.814603218839267, "grad_norm": 0.3817645013332367, "learning_rate": 3.814120779383137e-06, "loss": 0.09700870513916016, "step": 5846 }, { "epoch": 0.8147425625304814, "grad_norm": 0.5397366881370544, "learning_rate": 3.8085850423980475e-06, "loss": 0.09310340881347656, "step": 5847 }, { "epoch": 0.8148819062216958, "grad_norm": 0.47126224637031555, "learning_rate": 3.8030529027757057e-06, "loss": 0.10383796691894531, "step": 5848 }, { "epoch": 0.8150212499129101, "grad_norm": 0.5539352297782898, "learning_rate": 3.797524361745231e-06, "loss": 0.10870504379272461, "step": 5849 }, { "epoch": 0.8151605936041246, "grad_norm": 0.7726271152496338, "learning_rate": 3.7919994205349287e-06, "loss": 0.125244140625, "step": 5850 }, { "epoch": 0.815299937295339, "grad_norm": 0.4979258179664612, "learning_rate": 3.7864780803723267e-06, "loss": 0.1098785400390625, "step": 5851 }, { "epoch": 0.8154392809865534, "grad_norm": 0.479111909866333, "learning_rate": 3.7809603424841346e-06, "loss": 0.10226631164550781, "step": 5852 }, { "epoch": 0.8155786246777678, "grad_norm": 0.2699643671512604, "learning_rate": 3.7754462080962604e-06, "loss": 0.06986236572265625, "step": 5853 }, { "epoch": 0.8157179683689821, "grad_norm": 0.3852890133857727, "learning_rate": 3.769935678433827e-06, "loss": 0.07102775573730469, "step": 5854 }, { "epoch": 0.8158573120601965, "grad_norm": 0.6873718500137329, "learning_rate": 3.7644287547211476e-06, "loss": 0.10587120056152344, "step": 5855 }, { "epoch": 0.8159966557514109, "grad_norm": 0.2716093063354492, "learning_rate": 3.7589254381817397e-06, "loss": 0.07733535766601562, "step": 5856 }, { "epoch": 0.8161359994426253, "grad_norm": 0.5599513649940491, "learning_rate": 3.753425730038307e-06, "loss": 0.0971670150756836, "step": 5857 }, { "epoch": 0.8162753431338396, "grad_norm": 0.3076510727405548, "learning_rate": 3.7479296315127588e-06, "loss": 0.07508087158203125, "step": 5858 }, { "epoch": 0.816414686825054, "grad_norm": 0.3657916486263275, "learning_rate": 3.7424371438262096e-06, "loss": 0.08539915084838867, "step": 5859 }, { "epoch": 0.8165540305162684, "grad_norm": 0.38651734590530396, "learning_rate": 3.7369482681989565e-06, "loss": 0.07268714904785156, "step": 5860 }, { "epoch": 0.8166933742074828, "grad_norm": 0.3738669455051422, "learning_rate": 3.7314630058505063e-06, "loss": 0.08064079284667969, "step": 5861 }, { "epoch": 0.8168327178986972, "grad_norm": 0.3860386610031128, "learning_rate": 3.725981357999562e-06, "loss": 0.08741378784179688, "step": 5862 }, { "epoch": 0.8169720615899115, "grad_norm": 0.3698016107082367, "learning_rate": 3.72050332586402e-06, "loss": 0.08467674255371094, "step": 5863 }, { "epoch": 0.8171114052811259, "grad_norm": 0.519075334072113, "learning_rate": 3.715028910660967e-06, "loss": 0.10147666931152344, "step": 5864 }, { "epoch": 0.8172507489723403, "grad_norm": 0.3606766164302826, "learning_rate": 3.709558113606697e-06, "loss": 0.06927299499511719, "step": 5865 }, { "epoch": 0.8173900926635547, "grad_norm": 0.8066274523735046, "learning_rate": 3.704090935916702e-06, "loss": 0.09131526947021484, "step": 5866 }, { "epoch": 0.817529436354769, "grad_norm": 0.4588612914085388, "learning_rate": 3.6986273788056592e-06, "loss": 0.09450054168701172, "step": 5867 }, { "epoch": 0.8176687800459834, "grad_norm": 0.29021358489990234, "learning_rate": 3.6931674434874397e-06, "loss": 0.06563186645507812, "step": 5868 }, { "epoch": 0.8178081237371978, "grad_norm": 0.36841708421707153, "learning_rate": 3.6877111311751246e-06, "loss": 0.0891885757446289, "step": 5869 }, { "epoch": 0.8179474674284122, "grad_norm": 0.3166259229183197, "learning_rate": 3.682258443080986e-06, "loss": 0.08437347412109375, "step": 5870 }, { "epoch": 0.8180868111196266, "grad_norm": 0.44120749831199646, "learning_rate": 3.676809380416475e-06, "loss": 0.0991201400756836, "step": 5871 }, { "epoch": 0.8182261548108409, "grad_norm": 0.4555787742137909, "learning_rate": 3.671363944392259e-06, "loss": 0.0888357162475586, "step": 5872 }, { "epoch": 0.8183654985020553, "grad_norm": 0.3031899631023407, "learning_rate": 3.6659221362181827e-06, "loss": 0.07115364074707031, "step": 5873 }, { "epoch": 0.8185048421932697, "grad_norm": 0.3476454019546509, "learning_rate": 3.660483957103298e-06, "loss": 0.09343528747558594, "step": 5874 }, { "epoch": 0.8186441858844841, "grad_norm": 0.5661113858222961, "learning_rate": 3.655049408255835e-06, "loss": 0.1084747314453125, "step": 5875 }, { "epoch": 0.8187835295756984, "grad_norm": 0.34590500593185425, "learning_rate": 3.649618490883233e-06, "loss": 0.08631324768066406, "step": 5876 }, { "epoch": 0.8189228732669128, "grad_norm": 0.65434330701828, "learning_rate": 3.6441912061921205e-06, "loss": 0.12784957885742188, "step": 5877 }, { "epoch": 0.8190622169581272, "grad_norm": 0.3674112856388092, "learning_rate": 3.638767555388314e-06, "loss": 0.07309913635253906, "step": 5878 }, { "epoch": 0.8192015606493416, "grad_norm": 0.6925652623176575, "learning_rate": 3.6333475396768168e-06, "loss": 0.1035623550415039, "step": 5879 }, { "epoch": 0.819340904340556, "grad_norm": 0.45551711320877075, "learning_rate": 3.6279311602618416e-06, "loss": 0.09308242797851562, "step": 5880 }, { "epoch": 0.8194802480317703, "grad_norm": 0.306031733751297, "learning_rate": 3.6225184183467856e-06, "loss": 0.08002948760986328, "step": 5881 }, { "epoch": 0.8196195917229847, "grad_norm": 0.3192896842956543, "learning_rate": 3.6171093151342264e-06, "loss": 0.0796661376953125, "step": 5882 }, { "epoch": 0.8197589354141991, "grad_norm": 0.38856467604637146, "learning_rate": 3.611703851825956e-06, "loss": 0.08147239685058594, "step": 5883 }, { "epoch": 0.8198982791054135, "grad_norm": 0.761904239654541, "learning_rate": 3.6063020296229344e-06, "loss": 0.11430168151855469, "step": 5884 }, { "epoch": 0.8200376227966278, "grad_norm": 0.32405394315719604, "learning_rate": 3.60090384972533e-06, "loss": 0.07469940185546875, "step": 5885 }, { "epoch": 0.8201769664878422, "grad_norm": 0.2449733316898346, "learning_rate": 3.595509313332488e-06, "loss": 0.06522083282470703, "step": 5886 }, { "epoch": 0.8203163101790566, "grad_norm": 0.6892227530479431, "learning_rate": 3.5901184216429585e-06, "loss": 0.11584281921386719, "step": 5887 }, { "epoch": 0.820455653870271, "grad_norm": 0.3779495656490326, "learning_rate": 3.584731175854479e-06, "loss": 0.07533073425292969, "step": 5888 }, { "epoch": 0.8205949975614854, "grad_norm": 0.36501604318618774, "learning_rate": 3.5793475771639562e-06, "loss": 0.07806777954101562, "step": 5889 }, { "epoch": 0.8207343412526998, "grad_norm": 0.4072225093841553, "learning_rate": 3.5739676267675115e-06, "loss": 0.08334732055664062, "step": 5890 }, { "epoch": 0.8208736849439142, "grad_norm": 0.7222934365272522, "learning_rate": 3.568591325860453e-06, "loss": 0.0991058349609375, "step": 5891 }, { "epoch": 0.8210130286351286, "grad_norm": 0.5367545485496521, "learning_rate": 3.563218675637261e-06, "loss": 0.10298347473144531, "step": 5892 }, { "epoch": 0.821152372326343, "grad_norm": 0.6872527003288269, "learning_rate": 3.5578496772916205e-06, "loss": 0.12026405334472656, "step": 5893 }, { "epoch": 0.8212917160175573, "grad_norm": 0.34621474146842957, "learning_rate": 3.552484332016408e-06, "loss": 0.07516288757324219, "step": 5894 }, { "epoch": 0.8214310597087717, "grad_norm": 0.5347920060157776, "learning_rate": 3.547122641003671e-06, "loss": 0.11132526397705078, "step": 5895 }, { "epoch": 0.8215704033999861, "grad_norm": 0.41580092906951904, "learning_rate": 3.5417646054446554e-06, "loss": 0.09637069702148438, "step": 5896 }, { "epoch": 0.8217097470912005, "grad_norm": 0.5229889750480652, "learning_rate": 3.536410226529794e-06, "loss": 0.09395217895507812, "step": 5897 }, { "epoch": 0.8218490907824149, "grad_norm": 0.23738409578800201, "learning_rate": 3.5310595054487173e-06, "loss": 0.06240701675415039, "step": 5898 }, { "epoch": 0.8219884344736292, "grad_norm": 0.41276171803474426, "learning_rate": 3.525712443390226e-06, "loss": 0.09011459350585938, "step": 5899 }, { "epoch": 0.8221277781648436, "grad_norm": 0.43580058217048645, "learning_rate": 3.5203690415423086e-06, "loss": 0.08883857727050781, "step": 5900 }, { "epoch": 0.822267121856058, "grad_norm": 0.43713656067848206, "learning_rate": 3.5150293010921543e-06, "loss": 0.083587646484375, "step": 5901 }, { "epoch": 0.8224064655472724, "grad_norm": 0.5140691995620728, "learning_rate": 3.5096932232261384e-06, "loss": 0.09360122680664062, "step": 5902 }, { "epoch": 0.8225458092384867, "grad_norm": 0.32079729437828064, "learning_rate": 3.504360809129801e-06, "loss": 0.07270622253417969, "step": 5903 }, { "epoch": 0.8226851529297011, "grad_norm": 0.6171124577522278, "learning_rate": 3.4990320599878948e-06, "loss": 0.09617042541503906, "step": 5904 }, { "epoch": 0.8228244966209155, "grad_norm": 0.26502928137779236, "learning_rate": 3.493706976984337e-06, "loss": 0.06240272521972656, "step": 5905 }, { "epoch": 0.8229638403121299, "grad_norm": 0.6324767470359802, "learning_rate": 3.4883855613022476e-06, "loss": 0.11882781982421875, "step": 5906 }, { "epoch": 0.8231031840033443, "grad_norm": 0.4432928264141083, "learning_rate": 3.483067814123917e-06, "loss": 0.09133720397949219, "step": 5907 }, { "epoch": 0.8232425276945586, "grad_norm": 0.3890266418457031, "learning_rate": 3.477753736630829e-06, "loss": 0.09441375732421875, "step": 5908 }, { "epoch": 0.823381871385773, "grad_norm": 0.32935482263565063, "learning_rate": 3.4724433300036565e-06, "loss": 0.0778961181640625, "step": 5909 }, { "epoch": 0.8235212150769874, "grad_norm": 0.33957135677337646, "learning_rate": 3.467136595422247e-06, "loss": 0.07725143432617188, "step": 5910 }, { "epoch": 0.8236605587682018, "grad_norm": 0.6433339715003967, "learning_rate": 3.4618335340656263e-06, "loss": 0.11888313293457031, "step": 5911 }, { "epoch": 0.8237999024594161, "grad_norm": 0.25005996227264404, "learning_rate": 3.456534147112023e-06, "loss": 0.05613517761230469, "step": 5912 }, { "epoch": 0.8239392461506305, "grad_norm": 0.34963458776474, "learning_rate": 3.451238435738844e-06, "loss": 0.084197998046875, "step": 5913 }, { "epoch": 0.8240785898418449, "grad_norm": 0.2656228244304657, "learning_rate": 3.445946401122666e-06, "loss": 0.07004070281982422, "step": 5914 }, { "epoch": 0.8242179335330593, "grad_norm": 0.938654899597168, "learning_rate": 3.4406580444392647e-06, "loss": 0.1311187744140625, "step": 5915 }, { "epoch": 0.8243572772242737, "grad_norm": 0.3312970995903015, "learning_rate": 3.435373366863586e-06, "loss": 0.09749221801757812, "step": 5916 }, { "epoch": 0.824496620915488, "grad_norm": 0.39837902784347534, "learning_rate": 3.430092369569773e-06, "loss": 0.08923912048339844, "step": 5917 }, { "epoch": 0.8246359646067024, "grad_norm": 0.9318960905075073, "learning_rate": 3.4248150537311344e-06, "loss": 0.15502071380615234, "step": 5918 }, { "epoch": 0.8247753082979168, "grad_norm": 0.7180649638175964, "learning_rate": 3.4195414205201718e-06, "loss": 0.1254749298095703, "step": 5919 }, { "epoch": 0.8249146519891312, "grad_norm": 0.41500693559646606, "learning_rate": 3.4142714711085765e-06, "loss": 0.09228134155273438, "step": 5920 }, { "epoch": 0.8250539956803455, "grad_norm": 0.3274748623371124, "learning_rate": 3.409005206667193e-06, "loss": 0.06975364685058594, "step": 5921 }, { "epoch": 0.8251933393715599, "grad_norm": 0.30552950501441956, "learning_rate": 3.4037426283660734e-06, "loss": 0.08064556121826172, "step": 5922 }, { "epoch": 0.8253326830627743, "grad_norm": 0.5385141372680664, "learning_rate": 3.3984837373744406e-06, "loss": 0.08904647827148438, "step": 5923 }, { "epoch": 0.8254720267539887, "grad_norm": 0.4074801802635193, "learning_rate": 3.3932285348607108e-06, "loss": 0.07433128356933594, "step": 5924 }, { "epoch": 0.825611370445203, "grad_norm": 0.39350637793540955, "learning_rate": 3.387977021992459e-06, "loss": 0.0794367790222168, "step": 5925 }, { "epoch": 0.8257507141364174, "grad_norm": 0.2991810142993927, "learning_rate": 3.38272919993645e-06, "loss": 0.06830692291259766, "step": 5926 }, { "epoch": 0.8258900578276318, "grad_norm": 0.7272956371307373, "learning_rate": 3.377485069858639e-06, "loss": 0.10809326171875, "step": 5927 }, { "epoch": 0.8260294015188462, "grad_norm": 0.4145250916481018, "learning_rate": 3.372244632924142e-06, "loss": 0.07814407348632812, "step": 5928 }, { "epoch": 0.8261687452100606, "grad_norm": 0.5727525949478149, "learning_rate": 3.3670078902972693e-06, "loss": 0.11113357543945312, "step": 5929 }, { "epoch": 0.8263080889012749, "grad_norm": 0.4335343837738037, "learning_rate": 3.361774843141512e-06, "loss": 0.09780502319335938, "step": 5930 }, { "epoch": 0.8264474325924894, "grad_norm": 0.3678942024707794, "learning_rate": 3.3565454926195252e-06, "loss": 0.08487510681152344, "step": 5931 }, { "epoch": 0.8265867762837038, "grad_norm": 0.3676747977733612, "learning_rate": 3.35131983989315e-06, "loss": 0.07985591888427734, "step": 5932 }, { "epoch": 0.8267261199749182, "grad_norm": 0.44906941056251526, "learning_rate": 3.3460978861234095e-06, "loss": 0.09431838989257812, "step": 5933 }, { "epoch": 0.8268654636661326, "grad_norm": 0.2738412320613861, "learning_rate": 3.3408796324705085e-06, "loss": 0.06808280944824219, "step": 5934 }, { "epoch": 0.8270048073573469, "grad_norm": 0.46651172637939453, "learning_rate": 3.335665080093815e-06, "loss": 0.11558246612548828, "step": 5935 }, { "epoch": 0.8271441510485613, "grad_norm": 0.3002771735191345, "learning_rate": 3.3304542301518915e-06, "loss": 0.0759124755859375, "step": 5936 }, { "epoch": 0.8272834947397757, "grad_norm": 0.4293699264526367, "learning_rate": 3.325247083802463e-06, "loss": 0.0855255126953125, "step": 5937 }, { "epoch": 0.8274228384309901, "grad_norm": 0.22232414782047272, "learning_rate": 3.320043642202444e-06, "loss": 0.06422615051269531, "step": 5938 }, { "epoch": 0.8275621821222044, "grad_norm": 0.3650669753551483, "learning_rate": 3.3148439065079142e-06, "loss": 0.08199882507324219, "step": 5939 }, { "epoch": 0.8277015258134188, "grad_norm": 0.27184221148490906, "learning_rate": 3.309647877874138e-06, "loss": 0.07247352600097656, "step": 5940 }, { "epoch": 0.8278408695046332, "grad_norm": 0.5894299149513245, "learning_rate": 3.304455557455564e-06, "loss": 0.11213302612304688, "step": 5941 }, { "epoch": 0.8279802131958476, "grad_norm": 0.302002876996994, "learning_rate": 3.299266946405797e-06, "loss": 0.07868576049804688, "step": 5942 }, { "epoch": 0.828119556887062, "grad_norm": 0.30811387300491333, "learning_rate": 3.294082045877627e-06, "loss": 0.07997322082519531, "step": 5943 }, { "epoch": 0.8282589005782763, "grad_norm": 0.44835326075553894, "learning_rate": 3.2889008570230228e-06, "loss": 0.09256553649902344, "step": 5944 }, { "epoch": 0.8283982442694907, "grad_norm": 0.31270575523376465, "learning_rate": 3.2837233809931314e-06, "loss": 0.07196235656738281, "step": 5945 }, { "epoch": 0.8285375879607051, "grad_norm": 0.3924355208873749, "learning_rate": 3.278549618938267e-06, "loss": 0.09350776672363281, "step": 5946 }, { "epoch": 0.8286769316519195, "grad_norm": 0.6258882880210876, "learning_rate": 3.2733795720079133e-06, "loss": 0.10442447662353516, "step": 5947 }, { "epoch": 0.8288162753431338, "grad_norm": 0.386699378490448, "learning_rate": 3.268213241350746e-06, "loss": 0.08852005004882812, "step": 5948 }, { "epoch": 0.8289556190343482, "grad_norm": 0.28927746415138245, "learning_rate": 3.263050628114606e-06, "loss": 0.06868267059326172, "step": 5949 }, { "epoch": 0.8290949627255626, "grad_norm": 0.28275036811828613, "learning_rate": 3.2578917334465034e-06, "loss": 0.079315185546875, "step": 5950 }, { "epoch": 0.829234306416777, "grad_norm": 0.46204715967178345, "learning_rate": 3.2527365584926264e-06, "loss": 0.09205293655395508, "step": 5951 }, { "epoch": 0.8293736501079914, "grad_norm": 0.5025037527084351, "learning_rate": 3.2475851043983496e-06, "loss": 0.0969247817993164, "step": 5952 }, { "epoch": 0.8295129937992057, "grad_norm": 0.7166548371315002, "learning_rate": 3.2424373723081892e-06, "loss": 0.13725662231445312, "step": 5953 }, { "epoch": 0.8296523374904201, "grad_norm": 0.3021635413169861, "learning_rate": 3.2372933633658633e-06, "loss": 0.06968307495117188, "step": 5954 }, { "epoch": 0.8297916811816345, "grad_norm": 0.32865071296691895, "learning_rate": 3.2321530787142508e-06, "loss": 0.0791463851928711, "step": 5955 }, { "epoch": 0.8299310248728489, "grad_norm": 0.28863033652305603, "learning_rate": 3.227016519495414e-06, "loss": 0.062075138092041016, "step": 5956 }, { "epoch": 0.8300703685640632, "grad_norm": 0.6655749678611755, "learning_rate": 3.221883686850573e-06, "loss": 0.10507583618164062, "step": 5957 }, { "epoch": 0.8302097122552776, "grad_norm": 0.233371764421463, "learning_rate": 3.2167545819201227e-06, "loss": 0.06587982177734375, "step": 5958 }, { "epoch": 0.830349055946492, "grad_norm": 0.7336015105247498, "learning_rate": 3.2116292058436383e-06, "loss": 0.10976982116699219, "step": 5959 }, { "epoch": 0.8304883996377064, "grad_norm": 0.847266674041748, "learning_rate": 3.2065075597598573e-06, "loss": 0.11776447296142578, "step": 5960 }, { "epoch": 0.8306277433289208, "grad_norm": 0.23172317445278168, "learning_rate": 3.201389644806692e-06, "loss": 0.06261444091796875, "step": 5961 }, { "epoch": 0.8307670870201351, "grad_norm": 0.2808094024658203, "learning_rate": 3.1962754621212345e-06, "loss": 0.06742572784423828, "step": 5962 }, { "epoch": 0.8309064307113495, "grad_norm": 0.38375911116600037, "learning_rate": 3.1911650128397342e-06, "loss": 0.09573650360107422, "step": 5963 }, { "epoch": 0.8310457744025639, "grad_norm": 0.5335613489151001, "learning_rate": 3.1860582980976117e-06, "loss": 0.08881568908691406, "step": 5964 }, { "epoch": 0.8311851180937783, "grad_norm": 0.3622586727142334, "learning_rate": 3.180955319029464e-06, "loss": 0.0902862548828125, "step": 5965 }, { "epoch": 0.8313244617849926, "grad_norm": 0.4199208915233612, "learning_rate": 3.175856076769066e-06, "loss": 0.07773017883300781, "step": 5966 }, { "epoch": 0.831463805476207, "grad_norm": 0.5161654949188232, "learning_rate": 3.170760572449345e-06, "loss": 0.11299705505371094, "step": 5967 }, { "epoch": 0.8316031491674214, "grad_norm": 0.4099973440170288, "learning_rate": 3.1656688072024024e-06, "loss": 0.09703731536865234, "step": 5968 }, { "epoch": 0.8317424928586358, "grad_norm": 0.3924803137779236, "learning_rate": 3.160580782159517e-06, "loss": 0.08985328674316406, "step": 5969 }, { "epoch": 0.8318818365498502, "grad_norm": 0.305880069732666, "learning_rate": 3.155496498451136e-06, "loss": 0.07192802429199219, "step": 5970 }, { "epoch": 0.8320211802410646, "grad_norm": 1.0590119361877441, "learning_rate": 3.1504159572068604e-06, "loss": 0.1198577880859375, "step": 5971 }, { "epoch": 0.832160523932279, "grad_norm": 0.6996975541114807, "learning_rate": 3.1453391595554783e-06, "loss": 0.14168930053710938, "step": 5972 }, { "epoch": 0.8322998676234934, "grad_norm": 0.4013158082962036, "learning_rate": 3.140266106624941e-06, "loss": 0.08887386322021484, "step": 5973 }, { "epoch": 0.8324392113147078, "grad_norm": 0.296390175819397, "learning_rate": 3.1351967995423594e-06, "loss": 0.0687713623046875, "step": 5974 }, { "epoch": 0.8325785550059221, "grad_norm": 0.5937119722366333, "learning_rate": 3.1301312394340157e-06, "loss": 0.14336204528808594, "step": 5975 }, { "epoch": 0.8327178986971365, "grad_norm": 0.6880470514297485, "learning_rate": 3.1250694274253623e-06, "loss": 0.11455917358398438, "step": 5976 }, { "epoch": 0.8328572423883509, "grad_norm": 0.6203737258911133, "learning_rate": 3.120011364641029e-06, "loss": 0.13069534301757812, "step": 5977 }, { "epoch": 0.8329965860795653, "grad_norm": 0.5486828088760376, "learning_rate": 3.114957052204792e-06, "loss": 0.09066009521484375, "step": 5978 }, { "epoch": 0.8331359297707797, "grad_norm": 0.5083727240562439, "learning_rate": 3.1099064912396002e-06, "loss": 0.10340404510498047, "step": 5979 }, { "epoch": 0.833275273461994, "grad_norm": 0.4876103103160858, "learning_rate": 3.1048596828675805e-06, "loss": 0.08077812194824219, "step": 5980 }, { "epoch": 0.8334146171532084, "grad_norm": 0.336137592792511, "learning_rate": 3.0998166282100215e-06, "loss": 0.07556915283203125, "step": 5981 }, { "epoch": 0.8335539608444228, "grad_norm": 0.30565690994262695, "learning_rate": 3.0947773283873638e-06, "loss": 0.06825637817382812, "step": 5982 }, { "epoch": 0.8336933045356372, "grad_norm": 0.27219176292419434, "learning_rate": 3.0897417845192356e-06, "loss": 0.07198524475097656, "step": 5983 }, { "epoch": 0.8338326482268515, "grad_norm": 0.20084337890148163, "learning_rate": 3.084709997724411e-06, "loss": 0.05603218078613281, "step": 5984 }, { "epoch": 0.8339719919180659, "grad_norm": 0.529218852519989, "learning_rate": 3.079681969120849e-06, "loss": 0.09210205078125, "step": 5985 }, { "epoch": 0.8341113356092803, "grad_norm": 1.1863633394241333, "learning_rate": 3.0746576998256495e-06, "loss": 0.10213756561279297, "step": 5986 }, { "epoch": 0.8342506793004947, "grad_norm": 0.3188323676586151, "learning_rate": 3.0696371909550947e-06, "loss": 0.08150482177734375, "step": 5987 }, { "epoch": 0.8343900229917091, "grad_norm": 0.4841364324092865, "learning_rate": 3.0646204436246372e-06, "loss": 0.10869598388671875, "step": 5988 }, { "epoch": 0.8345293666829234, "grad_norm": 0.3367457091808319, "learning_rate": 3.0596074589488743e-06, "loss": 0.06162071228027344, "step": 5989 }, { "epoch": 0.8346687103741378, "grad_norm": 0.6023036241531372, "learning_rate": 3.054598238041575e-06, "loss": 0.08916091918945312, "step": 5990 }, { "epoch": 0.8348080540653522, "grad_norm": 0.44613492488861084, "learning_rate": 3.04959278201568e-06, "loss": 0.091094970703125, "step": 5991 }, { "epoch": 0.8349473977565666, "grad_norm": 0.28894999623298645, "learning_rate": 3.044591091983282e-06, "loss": 0.06940841674804688, "step": 5992 }, { "epoch": 0.835086741447781, "grad_norm": 0.29688334465026855, "learning_rate": 3.0395931690556435e-06, "loss": 0.07472991943359375, "step": 5993 }, { "epoch": 0.8352260851389953, "grad_norm": 0.34513744711875916, "learning_rate": 3.0345990143431938e-06, "loss": 0.07462596893310547, "step": 5994 }, { "epoch": 0.8353654288302097, "grad_norm": 0.49477240443229675, "learning_rate": 3.029608628955518e-06, "loss": 0.10788726806640625, "step": 5995 }, { "epoch": 0.8355047725214241, "grad_norm": 0.3080471158027649, "learning_rate": 3.0246220140013593e-06, "loss": 0.06917953491210938, "step": 5996 }, { "epoch": 0.8356441162126385, "grad_norm": 0.47105005383491516, "learning_rate": 3.019639170588633e-06, "loss": 0.08435821533203125, "step": 5997 }, { "epoch": 0.8357834599038528, "grad_norm": 0.25112777948379517, "learning_rate": 3.0146600998244226e-06, "loss": 0.07252120971679688, "step": 5998 }, { "epoch": 0.8359228035950672, "grad_norm": 0.591999351978302, "learning_rate": 3.009684802814954e-06, "loss": 0.12003326416015625, "step": 5999 }, { "epoch": 0.8360621472862816, "grad_norm": 0.758327305316925, "learning_rate": 3.004713280665621e-06, "loss": 0.11583185195922852, "step": 6000 }, { "epoch": 0.836201490977496, "grad_norm": 0.31356847286224365, "learning_rate": 2.999745534480989e-06, "loss": 0.07633399963378906, "step": 6001 }, { "epoch": 0.8363408346687103, "grad_norm": 0.3390062153339386, "learning_rate": 2.9947815653647816e-06, "loss": 0.07587337493896484, "step": 6002 }, { "epoch": 0.8364801783599247, "grad_norm": 0.29491111636161804, "learning_rate": 2.98982137441987e-06, "loss": 0.07812690734863281, "step": 6003 }, { "epoch": 0.8366195220511391, "grad_norm": 0.41523683071136475, "learning_rate": 2.9848649627483063e-06, "loss": 0.09159660339355469, "step": 6004 }, { "epoch": 0.8367588657423535, "grad_norm": 0.5694877505302429, "learning_rate": 2.97991233145128e-06, "loss": 0.11769866943359375, "step": 6005 }, { "epoch": 0.8368982094335679, "grad_norm": 0.3769380450248718, "learning_rate": 2.974963481629163e-06, "loss": 0.08779239654541016, "step": 6006 }, { "epoch": 0.8370375531247822, "grad_norm": 0.3078921437263489, "learning_rate": 2.970018414381466e-06, "loss": 0.08168983459472656, "step": 6007 }, { "epoch": 0.8371768968159966, "grad_norm": 0.5745800137519836, "learning_rate": 2.9650771308068773e-06, "loss": 0.09920406341552734, "step": 6008 }, { "epoch": 0.837316240507211, "grad_norm": 0.38986900448799133, "learning_rate": 2.960139632003243e-06, "loss": 0.09000205993652344, "step": 6009 }, { "epoch": 0.8374555841984254, "grad_norm": 0.3203256130218506, "learning_rate": 2.955205919067552e-06, "loss": 0.07768917083740234, "step": 6010 }, { "epoch": 0.8375949278896399, "grad_norm": 0.329010933637619, "learning_rate": 2.950275993095966e-06, "loss": 0.07488441467285156, "step": 6011 }, { "epoch": 0.8377342715808542, "grad_norm": 0.315508097410202, "learning_rate": 2.945349855183799e-06, "loss": 0.07185077667236328, "step": 6012 }, { "epoch": 0.8378736152720686, "grad_norm": 0.6710783839225769, "learning_rate": 2.940427506425536e-06, "loss": 0.1017303466796875, "step": 6013 }, { "epoch": 0.838012958963283, "grad_norm": 0.2549206018447876, "learning_rate": 2.9355089479148003e-06, "loss": 0.06960105895996094, "step": 6014 }, { "epoch": 0.8381523026544974, "grad_norm": 0.4311777949333191, "learning_rate": 2.9305941807443903e-06, "loss": 0.09122419357299805, "step": 6015 }, { "epoch": 0.8382916463457117, "grad_norm": 0.41985729336738586, "learning_rate": 2.925683206006249e-06, "loss": 0.07822227478027344, "step": 6016 }, { "epoch": 0.8384309900369261, "grad_norm": 0.5131974220275879, "learning_rate": 2.9207760247914895e-06, "loss": 0.10774612426757812, "step": 6017 }, { "epoch": 0.8385703337281405, "grad_norm": 0.3060045540332794, "learning_rate": 2.915872638190369e-06, "loss": 0.07098770141601562, "step": 6018 }, { "epoch": 0.8387096774193549, "grad_norm": 0.49415498971939087, "learning_rate": 2.9109730472923093e-06, "loss": 0.11088943481445312, "step": 6019 }, { "epoch": 0.8388490211105692, "grad_norm": 0.25852856040000916, "learning_rate": 2.9060772531858996e-06, "loss": 0.07455062866210938, "step": 6020 }, { "epoch": 0.8389883648017836, "grad_norm": 0.2807832658290863, "learning_rate": 2.9011852569588537e-06, "loss": 0.08133316040039062, "step": 6021 }, { "epoch": 0.839127708492998, "grad_norm": 0.3982226550579071, "learning_rate": 2.896297059698072e-06, "loss": 0.07769775390625, "step": 6022 }, { "epoch": 0.8392670521842124, "grad_norm": 0.6757959723472595, "learning_rate": 2.891412662489599e-06, "loss": 0.124725341796875, "step": 6023 }, { "epoch": 0.8394063958754268, "grad_norm": 0.44980958104133606, "learning_rate": 2.8865320664186412e-06, "loss": 0.09700393676757812, "step": 6024 }, { "epoch": 0.8395457395666411, "grad_norm": 0.25863179564476013, "learning_rate": 2.8816552725695524e-06, "loss": 0.06827640533447266, "step": 6025 }, { "epoch": 0.8396850832578555, "grad_norm": 0.42598164081573486, "learning_rate": 2.8767822820258362e-06, "loss": 0.09490489959716797, "step": 6026 }, { "epoch": 0.8398244269490699, "grad_norm": 0.2794443964958191, "learning_rate": 2.8719130958701736e-06, "loss": 0.0713658332824707, "step": 6027 }, { "epoch": 0.8399637706402843, "grad_norm": 0.42572668194770813, "learning_rate": 2.867047715184377e-06, "loss": 0.09069061279296875, "step": 6028 }, { "epoch": 0.8401031143314986, "grad_norm": 0.4264449179172516, "learning_rate": 2.8621861410494234e-06, "loss": 0.08712005615234375, "step": 6029 }, { "epoch": 0.840242458022713, "grad_norm": 0.4531373679637909, "learning_rate": 2.8573283745454515e-06, "loss": 0.0946197509765625, "step": 6030 }, { "epoch": 0.8403818017139274, "grad_norm": 0.362354576587677, "learning_rate": 2.8524744167517427e-06, "loss": 0.08356380462646484, "step": 6031 }, { "epoch": 0.8405211454051418, "grad_norm": 0.5839719772338867, "learning_rate": 2.847624268746727e-06, "loss": 0.12095832824707031, "step": 6032 }, { "epoch": 0.8406604890963562, "grad_norm": 0.4246070384979248, "learning_rate": 2.842777931608005e-06, "loss": 0.0864706039428711, "step": 6033 }, { "epoch": 0.8407998327875705, "grad_norm": 0.43371450901031494, "learning_rate": 2.8379354064123245e-06, "loss": 0.09790515899658203, "step": 6034 }, { "epoch": 0.8409391764787849, "grad_norm": 0.45481717586517334, "learning_rate": 2.8330966942355752e-06, "loss": 0.11210823059082031, "step": 6035 }, { "epoch": 0.8410785201699993, "grad_norm": 0.28368887305259705, "learning_rate": 2.828261796152818e-06, "loss": 0.06558036804199219, "step": 6036 }, { "epoch": 0.8412178638612137, "grad_norm": 0.438705712556839, "learning_rate": 2.8234307132382486e-06, "loss": 0.09321022033691406, "step": 6037 }, { "epoch": 0.841357207552428, "grad_norm": 0.3374811112880707, "learning_rate": 2.818603446565231e-06, "loss": 0.07131576538085938, "step": 6038 }, { "epoch": 0.8414965512436424, "grad_norm": 0.47786885499954224, "learning_rate": 2.813779997206265e-06, "loss": 0.08928489685058594, "step": 6039 }, { "epoch": 0.8416358949348568, "grad_norm": 0.4252340793609619, "learning_rate": 2.8089603662330155e-06, "loss": 0.08931732177734375, "step": 6040 }, { "epoch": 0.8417752386260712, "grad_norm": 1.1540358066558838, "learning_rate": 2.8041445547162994e-06, "loss": 0.1344432830810547, "step": 6041 }, { "epoch": 0.8419145823172856, "grad_norm": 0.3762575089931488, "learning_rate": 2.7993325637260738e-06, "loss": 0.0845499038696289, "step": 6042 }, { "epoch": 0.8420539260084999, "grad_norm": 0.6358121633529663, "learning_rate": 2.79452439433145e-06, "loss": 0.09679603576660156, "step": 6043 }, { "epoch": 0.8421932696997143, "grad_norm": 0.3084847927093506, "learning_rate": 2.7897200476007015e-06, "loss": 0.07917404174804688, "step": 6044 }, { "epoch": 0.8423326133909287, "grad_norm": 0.6463673710823059, "learning_rate": 2.7849195246012417e-06, "loss": 0.09175682067871094, "step": 6045 }, { "epoch": 0.8424719570821431, "grad_norm": 0.2675143778324127, "learning_rate": 2.780122826399634e-06, "loss": 0.06274604797363281, "step": 6046 }, { "epoch": 0.8426113007733574, "grad_norm": 0.4555037021636963, "learning_rate": 2.775329954061603e-06, "loss": 0.08306884765625, "step": 6047 }, { "epoch": 0.8427506444645718, "grad_norm": 0.3516727685928345, "learning_rate": 2.770540908652004e-06, "loss": 0.08041667938232422, "step": 6048 }, { "epoch": 0.8428899881557862, "grad_norm": 0.6985844373703003, "learning_rate": 2.765755691234866e-06, "loss": 0.12052536010742188, "step": 6049 }, { "epoch": 0.8430293318470006, "grad_norm": 0.3218729496002197, "learning_rate": 2.7609743028733427e-06, "loss": 0.0764617919921875, "step": 6050 }, { "epoch": 0.8431686755382151, "grad_norm": 0.5126708745956421, "learning_rate": 2.7561967446297555e-06, "loss": 0.09080219268798828, "step": 6051 }, { "epoch": 0.8433080192294294, "grad_norm": 0.6177491545677185, "learning_rate": 2.75142301756558e-06, "loss": 0.12530899047851562, "step": 6052 }, { "epoch": 0.8434473629206438, "grad_norm": 0.7910027503967285, "learning_rate": 2.7466531227414074e-06, "loss": 0.11333656311035156, "step": 6053 }, { "epoch": 0.8435867066118582, "grad_norm": 0.2803467810153961, "learning_rate": 2.7418870612170123e-06, "loss": 0.0684051513671875, "step": 6054 }, { "epoch": 0.8437260503030726, "grad_norm": 0.5629757642745972, "learning_rate": 2.737124834051301e-06, "loss": 0.11083221435546875, "step": 6055 }, { "epoch": 0.843865393994287, "grad_norm": 0.3576008379459381, "learning_rate": 2.732366442302339e-06, "loss": 0.09086227416992188, "step": 6056 }, { "epoch": 0.8440047376855013, "grad_norm": 0.3193489909172058, "learning_rate": 2.727611887027326e-06, "loss": 0.08743476867675781, "step": 6057 }, { "epoch": 0.8441440813767157, "grad_norm": 0.34056103229522705, "learning_rate": 2.722861169282611e-06, "loss": 0.08976459503173828, "step": 6058 }, { "epoch": 0.8442834250679301, "grad_norm": 0.6656012535095215, "learning_rate": 2.7181142901237077e-06, "loss": 0.12055301666259766, "step": 6059 }, { "epoch": 0.8444227687591445, "grad_norm": 0.5287493467330933, "learning_rate": 2.71337125060525e-06, "loss": 0.13140106201171875, "step": 6060 }, { "epoch": 0.8445621124503588, "grad_norm": 0.3663991391658783, "learning_rate": 2.7086320517810416e-06, "loss": 0.08819198608398438, "step": 6061 }, { "epoch": 0.8447014561415732, "grad_norm": 0.5506178140640259, "learning_rate": 2.7038966947040247e-06, "loss": 0.12643051147460938, "step": 6062 }, { "epoch": 0.8448407998327876, "grad_norm": 0.3241470754146576, "learning_rate": 2.6991651804262886e-06, "loss": 0.08289813995361328, "step": 6063 }, { "epoch": 0.844980143524002, "grad_norm": 0.35260558128356934, "learning_rate": 2.694437509999057e-06, "loss": 0.07013130187988281, "step": 6064 }, { "epoch": 0.8451194872152163, "grad_norm": 0.46374887228012085, "learning_rate": 2.6897136844727214e-06, "loss": 0.08963680267333984, "step": 6065 }, { "epoch": 0.8452588309064307, "grad_norm": 0.238925501704216, "learning_rate": 2.6849937048968056e-06, "loss": 0.06758499145507812, "step": 6066 }, { "epoch": 0.8453981745976451, "grad_norm": 0.31748783588409424, "learning_rate": 2.680277572319978e-06, "loss": 0.08249664306640625, "step": 6067 }, { "epoch": 0.8455375182888595, "grad_norm": 0.2243543565273285, "learning_rate": 2.675565287790063e-06, "loss": 0.06714248657226562, "step": 6068 }, { "epoch": 0.8456768619800739, "grad_norm": 0.45018714666366577, "learning_rate": 2.6708568523540114e-06, "loss": 0.09334468841552734, "step": 6069 }, { "epoch": 0.8458162056712882, "grad_norm": 0.21868069469928741, "learning_rate": 2.6661522670579398e-06, "loss": 0.06909561157226562, "step": 6070 }, { "epoch": 0.8459555493625026, "grad_norm": 0.24899961054325104, "learning_rate": 2.6614515329470923e-06, "loss": 0.06824970245361328, "step": 6071 }, { "epoch": 0.846094893053717, "grad_norm": 0.22221729159355164, "learning_rate": 2.656754651065869e-06, "loss": 0.061514854431152344, "step": 6072 }, { "epoch": 0.8462342367449314, "grad_norm": 0.2697114646434784, "learning_rate": 2.652061622457813e-06, "loss": 0.07717323303222656, "step": 6073 }, { "epoch": 0.8463735804361457, "grad_norm": 0.5473085045814514, "learning_rate": 2.647372448165606e-06, "loss": 0.11187553405761719, "step": 6074 }, { "epoch": 0.8465129241273601, "grad_norm": 0.30559125542640686, "learning_rate": 2.6426871292310675e-06, "loss": 0.07525825500488281, "step": 6075 }, { "epoch": 0.8466522678185745, "grad_norm": 0.6351538896560669, "learning_rate": 2.638005666695176e-06, "loss": 0.10077667236328125, "step": 6076 }, { "epoch": 0.8467916115097889, "grad_norm": 0.48891040682792664, "learning_rate": 2.6333280615980483e-06, "loss": 0.09486198425292969, "step": 6077 }, { "epoch": 0.8469309552010033, "grad_norm": 0.5667522549629211, "learning_rate": 2.6286543149789355e-06, "loss": 0.11515045166015625, "step": 6078 }, { "epoch": 0.8470702988922176, "grad_norm": 0.4492383301258087, "learning_rate": 2.6239844278762384e-06, "loss": 0.10705947875976562, "step": 6079 }, { "epoch": 0.847209642583432, "grad_norm": 0.7337267994880676, "learning_rate": 2.6193184013274975e-06, "loss": 0.12817955017089844, "step": 6080 }, { "epoch": 0.8473489862746464, "grad_norm": 0.38938257098197937, "learning_rate": 2.614656236369406e-06, "loss": 0.08687973022460938, "step": 6081 }, { "epoch": 0.8474883299658608, "grad_norm": 0.4894862174987793, "learning_rate": 2.60999793403778e-06, "loss": 0.11002159118652344, "step": 6082 }, { "epoch": 0.8476276736570751, "grad_norm": 0.369227796792984, "learning_rate": 2.605343495367596e-06, "loss": 0.08879852294921875, "step": 6083 }, { "epoch": 0.8477670173482895, "grad_norm": 0.47201672196388245, "learning_rate": 2.6006929213929576e-06, "loss": 0.114013671875, "step": 6084 }, { "epoch": 0.8479063610395039, "grad_norm": 0.4520953297615051, "learning_rate": 2.5960462131471233e-06, "loss": 0.08694267272949219, "step": 6085 }, { "epoch": 0.8480457047307183, "grad_norm": 0.4896768629550934, "learning_rate": 2.5914033716624754e-06, "loss": 0.09000873565673828, "step": 6086 }, { "epoch": 0.8481850484219327, "grad_norm": 0.3052300214767456, "learning_rate": 2.5867643979705535e-06, "loss": 0.06956005096435547, "step": 6087 }, { "epoch": 0.848324392113147, "grad_norm": 0.42583993077278137, "learning_rate": 2.582129293102038e-06, "loss": 0.07254981994628906, "step": 6088 }, { "epoch": 0.8484637358043614, "grad_norm": 0.31622254848480225, "learning_rate": 2.577498058086736e-06, "loss": 0.08102607727050781, "step": 6089 }, { "epoch": 0.8486030794955758, "grad_norm": 0.32135188579559326, "learning_rate": 2.5728706939535976e-06, "loss": 0.07416534423828125, "step": 6090 }, { "epoch": 0.8487424231867903, "grad_norm": 0.2672739624977112, "learning_rate": 2.568247201730727e-06, "loss": 0.07895469665527344, "step": 6091 }, { "epoch": 0.8488817668780047, "grad_norm": 0.32579171657562256, "learning_rate": 2.5636275824453515e-06, "loss": 0.08190727233886719, "step": 6092 }, { "epoch": 0.849021110569219, "grad_norm": 0.518416702747345, "learning_rate": 2.559011837123846e-06, "loss": 0.1014871597290039, "step": 6093 }, { "epoch": 0.8491604542604334, "grad_norm": 0.6070149540901184, "learning_rate": 2.554399966791732e-06, "loss": 0.11040782928466797, "step": 6094 }, { "epoch": 0.8492997979516478, "grad_norm": 0.26617398858070374, "learning_rate": 2.5497919724736564e-06, "loss": 0.0724630355834961, "step": 6095 }, { "epoch": 0.8494391416428622, "grad_norm": 0.44468075037002563, "learning_rate": 2.545187855193403e-06, "loss": 0.11523056030273438, "step": 6096 }, { "epoch": 0.8495784853340765, "grad_norm": 0.3417418897151947, "learning_rate": 2.5405876159739083e-06, "loss": 0.09253311157226562, "step": 6097 }, { "epoch": 0.8497178290252909, "grad_norm": 0.6098604202270508, "learning_rate": 2.535991255837247e-06, "loss": 0.1244049072265625, "step": 6098 }, { "epoch": 0.8498571727165053, "grad_norm": 0.42331188917160034, "learning_rate": 2.5313987758046164e-06, "loss": 0.10073089599609375, "step": 6099 }, { "epoch": 0.8499965164077197, "grad_norm": 0.41634684801101685, "learning_rate": 2.526810176896357e-06, "loss": 0.09232425689697266, "step": 6100 }, { "epoch": 0.850135860098934, "grad_norm": 0.48981231451034546, "learning_rate": 2.5222254601319595e-06, "loss": 0.0906972885131836, "step": 6101 }, { "epoch": 0.8502752037901484, "grad_norm": 0.3192691206932068, "learning_rate": 2.5176446265300424e-06, "loss": 0.07143211364746094, "step": 6102 }, { "epoch": 0.8504145474813628, "grad_norm": 0.6801007390022278, "learning_rate": 2.5130676771083585e-06, "loss": 0.09728527069091797, "step": 6103 }, { "epoch": 0.8505538911725772, "grad_norm": 0.3933090567588806, "learning_rate": 2.5084946128838007e-06, "loss": 0.08660697937011719, "step": 6104 }, { "epoch": 0.8506932348637916, "grad_norm": 0.4547611474990845, "learning_rate": 2.5039254348724096e-06, "loss": 0.11753273010253906, "step": 6105 }, { "epoch": 0.8508325785550059, "grad_norm": 0.5446875691413879, "learning_rate": 2.499360144089342e-06, "loss": 0.08763694763183594, "step": 6106 }, { "epoch": 0.8509719222462203, "grad_norm": 0.27666905522346497, "learning_rate": 2.4947987415489004e-06, "loss": 0.07766246795654297, "step": 6107 }, { "epoch": 0.8511112659374347, "grad_norm": 0.5309749841690063, "learning_rate": 2.490241228264527e-06, "loss": 0.09719657897949219, "step": 6108 }, { "epoch": 0.8512506096286491, "grad_norm": 0.46976763010025024, "learning_rate": 2.4856876052488032e-06, "loss": 0.09152984619140625, "step": 6109 }, { "epoch": 0.8513899533198634, "grad_norm": 0.2724052965641022, "learning_rate": 2.481137873513435e-06, "loss": 0.07703590393066406, "step": 6110 }, { "epoch": 0.8515292970110778, "grad_norm": 0.5727488994598389, "learning_rate": 2.476592034069265e-06, "loss": 0.10336875915527344, "step": 6111 }, { "epoch": 0.8516686407022922, "grad_norm": 0.23325027525424957, "learning_rate": 2.4720500879262787e-06, "loss": 0.06134986877441406, "step": 6112 }, { "epoch": 0.8518079843935066, "grad_norm": 0.551236093044281, "learning_rate": 2.4675120360935977e-06, "loss": 0.08746910095214844, "step": 6113 }, { "epoch": 0.851947328084721, "grad_norm": 0.8448876738548279, "learning_rate": 2.4629778795794623e-06, "loss": 0.11013984680175781, "step": 6114 }, { "epoch": 0.8520866717759353, "grad_norm": 0.47471508383750916, "learning_rate": 2.458447619391271e-06, "loss": 0.10681915283203125, "step": 6115 }, { "epoch": 0.8522260154671497, "grad_norm": 0.5640620589256287, "learning_rate": 2.453921256535534e-06, "loss": 0.12112998962402344, "step": 6116 }, { "epoch": 0.8523653591583641, "grad_norm": 0.40197497606277466, "learning_rate": 2.449398792017914e-06, "loss": 0.09423637390136719, "step": 6117 }, { "epoch": 0.8525047028495785, "grad_norm": 0.6447674632072449, "learning_rate": 2.4448802268431914e-06, "loss": 0.10263633728027344, "step": 6118 }, { "epoch": 0.8526440465407928, "grad_norm": 0.19194839894771576, "learning_rate": 2.440365562015292e-06, "loss": 0.06379890441894531, "step": 6119 }, { "epoch": 0.8527833902320072, "grad_norm": 0.3000054955482483, "learning_rate": 2.4358547985372806e-06, "loss": 0.077667236328125, "step": 6120 }, { "epoch": 0.8529227339232216, "grad_norm": 0.5129270553588867, "learning_rate": 2.431347937411328e-06, "loss": 0.10344505310058594, "step": 6121 }, { "epoch": 0.853062077614436, "grad_norm": 0.39717739820480347, "learning_rate": 2.426844979638763e-06, "loss": 0.09134674072265625, "step": 6122 }, { "epoch": 0.8532014213056504, "grad_norm": 0.37627261877059937, "learning_rate": 2.4223459262200422e-06, "loss": 0.08382034301757812, "step": 6123 }, { "epoch": 0.8533407649968647, "grad_norm": 0.32273003458976746, "learning_rate": 2.4178507781547577e-06, "loss": 0.08122634887695312, "step": 6124 }, { "epoch": 0.8534801086880791, "grad_norm": 0.1896367371082306, "learning_rate": 2.4133595364416174e-06, "loss": 0.0527191162109375, "step": 6125 }, { "epoch": 0.8536194523792935, "grad_norm": 0.30182042717933655, "learning_rate": 2.4088722020784828e-06, "loss": 0.07706832885742188, "step": 6126 }, { "epoch": 0.8537587960705079, "grad_norm": 0.4277908205986023, "learning_rate": 2.404388776062332e-06, "loss": 0.09140205383300781, "step": 6127 }, { "epoch": 0.8538981397617222, "grad_norm": 0.4095993638038635, "learning_rate": 2.399909259389277e-06, "loss": 0.09116363525390625, "step": 6128 }, { "epoch": 0.8540374834529366, "grad_norm": 0.3366397023200989, "learning_rate": 2.3954336530545684e-06, "loss": 0.07817244529724121, "step": 6129 }, { "epoch": 0.854176827144151, "grad_norm": 0.7705382704734802, "learning_rate": 2.3909619580525867e-06, "loss": 0.1641864776611328, "step": 6130 }, { "epoch": 0.8543161708353654, "grad_norm": 0.2510642409324646, "learning_rate": 2.3864941753768345e-06, "loss": 0.07448959350585938, "step": 6131 }, { "epoch": 0.8544555145265799, "grad_norm": 0.5485810041427612, "learning_rate": 2.3820303060199513e-06, "loss": 0.0965738296508789, "step": 6132 }, { "epoch": 0.8545948582177942, "grad_norm": 0.5430808067321777, "learning_rate": 2.3775703509737102e-06, "loss": 0.10214042663574219, "step": 6133 }, { "epoch": 0.8547342019090086, "grad_norm": 0.47783732414245605, "learning_rate": 2.3731143112290124e-06, "loss": 0.08200263977050781, "step": 6134 }, { "epoch": 0.854873545600223, "grad_norm": 0.5380401611328125, "learning_rate": 2.368662187775883e-06, "loss": 0.11745071411132812, "step": 6135 }, { "epoch": 0.8550128892914374, "grad_norm": 0.4718419909477234, "learning_rate": 2.3642139816034872e-06, "loss": 0.09151268005371094, "step": 6136 }, { "epoch": 0.8551522329826517, "grad_norm": 0.38381996750831604, "learning_rate": 2.359769693700111e-06, "loss": 0.0989227294921875, "step": 6137 }, { "epoch": 0.8552915766738661, "grad_norm": 0.34954148530960083, "learning_rate": 2.3553293250531794e-06, "loss": 0.08082962036132812, "step": 6138 }, { "epoch": 0.8554309203650805, "grad_norm": 0.4856241047382355, "learning_rate": 2.350892876649231e-06, "loss": 0.08167171478271484, "step": 6139 }, { "epoch": 0.8555702640562949, "grad_norm": 0.6339848041534424, "learning_rate": 2.3464603494739513e-06, "loss": 0.10645866394042969, "step": 6140 }, { "epoch": 0.8557096077475093, "grad_norm": 0.8167253732681274, "learning_rate": 2.3420317445121497e-06, "loss": 0.12325668334960938, "step": 6141 }, { "epoch": 0.8558489514387236, "grad_norm": 0.45094019174575806, "learning_rate": 2.3376070627477555e-06, "loss": 0.09259605407714844, "step": 6142 }, { "epoch": 0.855988295129938, "grad_norm": 0.19517521560192108, "learning_rate": 2.333186305163828e-06, "loss": 0.05629730224609375, "step": 6143 }, { "epoch": 0.8561276388211524, "grad_norm": 0.5733916163444519, "learning_rate": 2.3287694727425623e-06, "loss": 0.09826278686523438, "step": 6144 }, { "epoch": 0.8562669825123668, "grad_norm": 0.2823205888271332, "learning_rate": 2.3243565664652844e-06, "loss": 0.07054615020751953, "step": 6145 }, { "epoch": 0.8564063262035811, "grad_norm": 0.5767850279808044, "learning_rate": 2.31994758731243e-06, "loss": 0.1253185272216797, "step": 6146 }, { "epoch": 0.8565456698947955, "grad_norm": 0.4193795323371887, "learning_rate": 2.3155425362635863e-06, "loss": 0.08545112609863281, "step": 6147 }, { "epoch": 0.8566850135860099, "grad_norm": 0.3408593237400055, "learning_rate": 2.311141414297442e-06, "loss": 0.06340408325195312, "step": 6148 }, { "epoch": 0.8568243572772243, "grad_norm": 0.2660634219646454, "learning_rate": 2.3067442223918345e-06, "loss": 0.0651702880859375, "step": 6149 }, { "epoch": 0.8569637009684387, "grad_norm": 0.5275837779045105, "learning_rate": 2.3023509615237138e-06, "loss": 0.10289573669433594, "step": 6150 }, { "epoch": 0.857103044659653, "grad_norm": 0.26569780707359314, "learning_rate": 2.2979616326691658e-06, "loss": 0.06591224670410156, "step": 6151 }, { "epoch": 0.8572423883508674, "grad_norm": 0.21894851326942444, "learning_rate": 2.2935762368034075e-06, "loss": 0.06441116333007812, "step": 6152 }, { "epoch": 0.8573817320420818, "grad_norm": 0.345255047082901, "learning_rate": 2.289194774900756e-06, "loss": 0.0834512710571289, "step": 6153 }, { "epoch": 0.8575210757332962, "grad_norm": 0.23532091081142426, "learning_rate": 2.2848172479346806e-06, "loss": 0.06847763061523438, "step": 6154 }, { "epoch": 0.8576604194245105, "grad_norm": 0.5739054679870605, "learning_rate": 2.280443656877769e-06, "loss": 0.12067413330078125, "step": 6155 }, { "epoch": 0.8577997631157249, "grad_norm": 0.376862496137619, "learning_rate": 2.276074002701736e-06, "loss": 0.07921409606933594, "step": 6156 }, { "epoch": 0.8579391068069393, "grad_norm": 0.3740381896495819, "learning_rate": 2.271708286377414e-06, "loss": 0.07156181335449219, "step": 6157 }, { "epoch": 0.8580784504981537, "grad_norm": 0.46686938405036926, "learning_rate": 2.267346508874766e-06, "loss": 0.0925140380859375, "step": 6158 }, { "epoch": 0.858217794189368, "grad_norm": 0.21648165583610535, "learning_rate": 2.262988671162882e-06, "loss": 0.05639076232910156, "step": 6159 }, { "epoch": 0.8583571378805824, "grad_norm": 0.28644120693206787, "learning_rate": 2.258634774209969e-06, "loss": 0.06700897216796875, "step": 6160 }, { "epoch": 0.8584964815717968, "grad_norm": 0.891989529132843, "learning_rate": 2.2542848189833675e-06, "loss": 0.10189247131347656, "step": 6161 }, { "epoch": 0.8586358252630112, "grad_norm": 0.3060687482357025, "learning_rate": 2.249938806449539e-06, "loss": 0.06688880920410156, "step": 6162 }, { "epoch": 0.8587751689542256, "grad_norm": 0.5004172325134277, "learning_rate": 2.2455967375740716e-06, "loss": 0.1014251708984375, "step": 6163 }, { "epoch": 0.8589145126454399, "grad_norm": 0.2624199688434601, "learning_rate": 2.241258613321664e-06, "loss": 0.07732963562011719, "step": 6164 }, { "epoch": 0.8590538563366543, "grad_norm": 0.7008524537086487, "learning_rate": 2.2369244346561516e-06, "loss": 0.10513687133789062, "step": 6165 }, { "epoch": 0.8591932000278687, "grad_norm": 0.6817530989646912, "learning_rate": 2.2325942025404968e-06, "loss": 0.09541511535644531, "step": 6166 }, { "epoch": 0.8593325437190831, "grad_norm": 0.6515265107154846, "learning_rate": 2.2282679179367684e-06, "loss": 0.12012481689453125, "step": 6167 }, { "epoch": 0.8594718874102975, "grad_norm": 0.35227033495903015, "learning_rate": 2.2239455818061793e-06, "loss": 0.07697486877441406, "step": 6168 }, { "epoch": 0.8596112311015118, "grad_norm": 0.7372114658355713, "learning_rate": 2.219627195109042e-06, "loss": 0.10674285888671875, "step": 6169 }, { "epoch": 0.8597505747927262, "grad_norm": 0.3742528259754181, "learning_rate": 2.2153127588048127e-06, "loss": 0.08556556701660156, "step": 6170 }, { "epoch": 0.8598899184839406, "grad_norm": 0.6937990188598633, "learning_rate": 2.2110022738520543e-06, "loss": 0.108154296875, "step": 6171 }, { "epoch": 0.8600292621751551, "grad_norm": 0.28995242714881897, "learning_rate": 2.20669574120846e-06, "loss": 0.08116436004638672, "step": 6172 }, { "epoch": 0.8601686058663695, "grad_norm": 0.4390193223953247, "learning_rate": 2.202393161830849e-06, "loss": 0.09386062622070312, "step": 6173 }, { "epoch": 0.8603079495575838, "grad_norm": 0.3394027054309845, "learning_rate": 2.1980945366751503e-06, "loss": 0.08933067321777344, "step": 6174 }, { "epoch": 0.8604472932487982, "grad_norm": 1.2987918853759766, "learning_rate": 2.1937998666964176e-06, "loss": 0.16503620147705078, "step": 6175 }, { "epoch": 0.8605866369400126, "grad_norm": 0.4892064034938812, "learning_rate": 2.189509152848832e-06, "loss": 0.10833740234375, "step": 6176 }, { "epoch": 0.860725980631227, "grad_norm": 0.34563905000686646, "learning_rate": 2.185222396085698e-06, "loss": 0.07393074035644531, "step": 6177 }, { "epoch": 0.8608653243224413, "grad_norm": 0.3832445740699768, "learning_rate": 2.1809395973594263e-06, "loss": 0.07958221435546875, "step": 6178 }, { "epoch": 0.8610046680136557, "grad_norm": 0.22865910828113556, "learning_rate": 2.176660757621558e-06, "loss": 0.05886268615722656, "step": 6179 }, { "epoch": 0.8611440117048701, "grad_norm": 0.24472646415233612, "learning_rate": 2.1723858778227537e-06, "loss": 0.06394648551940918, "step": 6180 }, { "epoch": 0.8612833553960845, "grad_norm": 0.4410075545310974, "learning_rate": 2.1681149589128016e-06, "loss": 0.08817100524902344, "step": 6181 }, { "epoch": 0.8614226990872988, "grad_norm": 0.5140257477760315, "learning_rate": 2.1638480018405916e-06, "loss": 0.0994405746459961, "step": 6182 }, { "epoch": 0.8615620427785132, "grad_norm": 0.23163393139839172, "learning_rate": 2.15958500755415e-06, "loss": 0.06215190887451172, "step": 6183 }, { "epoch": 0.8617013864697276, "grad_norm": 0.348198264837265, "learning_rate": 2.1553259770006196e-06, "loss": 0.07444000244140625, "step": 6184 }, { "epoch": 0.861840730160942, "grad_norm": 0.4754301607608795, "learning_rate": 2.1510709111262584e-06, "loss": 0.09931755065917969, "step": 6185 }, { "epoch": 0.8619800738521564, "grad_norm": 0.5701733827590942, "learning_rate": 2.1468198108764373e-06, "loss": 0.11285781860351562, "step": 6186 }, { "epoch": 0.8621194175433707, "grad_norm": 0.25285398960113525, "learning_rate": 2.1425726771956578e-06, "loss": 0.07250022888183594, "step": 6187 }, { "epoch": 0.8622587612345851, "grad_norm": 0.6970971822738647, "learning_rate": 2.1383295110275437e-06, "loss": 0.12930011749267578, "step": 6188 }, { "epoch": 0.8623981049257995, "grad_norm": 0.7598935961723328, "learning_rate": 2.1340903133148205e-06, "loss": 0.1602611541748047, "step": 6189 }, { "epoch": 0.8625374486170139, "grad_norm": 0.3051297962665558, "learning_rate": 2.1298550849993437e-06, "loss": 0.0764760971069336, "step": 6190 }, { "epoch": 0.8626767923082282, "grad_norm": 0.6686179637908936, "learning_rate": 2.1256238270220853e-06, "loss": 0.09928512573242188, "step": 6191 }, { "epoch": 0.8628161359994426, "grad_norm": 0.373225599527359, "learning_rate": 2.1213965403231328e-06, "loss": 0.10668563842773438, "step": 6192 }, { "epoch": 0.862955479690657, "grad_norm": 0.5725539922714233, "learning_rate": 2.117173225841691e-06, "loss": 0.11554384231567383, "step": 6193 }, { "epoch": 0.8630948233818714, "grad_norm": 0.35374873876571655, "learning_rate": 2.112953884516091e-06, "loss": 0.0678253173828125, "step": 6194 }, { "epoch": 0.8632341670730858, "grad_norm": 0.267788827419281, "learning_rate": 2.1087385172837705e-06, "loss": 0.06675338745117188, "step": 6195 }, { "epoch": 0.8633735107643001, "grad_norm": 0.6978381276130676, "learning_rate": 2.1045271250812817e-06, "loss": 0.12001991271972656, "step": 6196 }, { "epoch": 0.8635128544555145, "grad_norm": 0.5822411179542542, "learning_rate": 2.100319708844307e-06, "loss": 0.118865966796875, "step": 6197 }, { "epoch": 0.8636521981467289, "grad_norm": 0.6201586723327637, "learning_rate": 2.0961162695076397e-06, "loss": 0.11176395416259766, "step": 6198 }, { "epoch": 0.8637915418379433, "grad_norm": 0.47711411118507385, "learning_rate": 2.091916808005179e-06, "loss": 0.11138343811035156, "step": 6199 }, { "epoch": 0.8639308855291576, "grad_norm": 0.5287181735038757, "learning_rate": 2.08772132526996e-06, "loss": 0.09464645385742188, "step": 6200 }, { "epoch": 0.864070229220372, "grad_norm": 0.369865357875824, "learning_rate": 2.083529822234116e-06, "loss": 0.09836196899414062, "step": 6201 }, { "epoch": 0.8642095729115864, "grad_norm": 0.40709400177001953, "learning_rate": 2.079342299828908e-06, "loss": 0.07903099060058594, "step": 6202 }, { "epoch": 0.8643489166028008, "grad_norm": 0.500076949596405, "learning_rate": 2.075158758984701e-06, "loss": 0.08227157592773438, "step": 6203 }, { "epoch": 0.8644882602940152, "grad_norm": 0.4871565103530884, "learning_rate": 2.070979200630987e-06, "loss": 0.08368682861328125, "step": 6204 }, { "epoch": 0.8646276039852295, "grad_norm": 0.5697380900382996, "learning_rate": 2.0668036256963743e-06, "loss": 0.11416244506835938, "step": 6205 }, { "epoch": 0.8647669476764439, "grad_norm": 0.22186125814914703, "learning_rate": 2.0626320351085716e-06, "loss": 0.06265544891357422, "step": 6206 }, { "epoch": 0.8649062913676583, "grad_norm": 0.3469395339488983, "learning_rate": 2.058464429794409e-06, "loss": 0.07046699523925781, "step": 6207 }, { "epoch": 0.8650456350588727, "grad_norm": 0.40573573112487793, "learning_rate": 2.054300810679839e-06, "loss": 0.08606910705566406, "step": 6208 }, { "epoch": 0.865184978750087, "grad_norm": 0.2924923300743103, "learning_rate": 2.0501411786899263e-06, "loss": 0.07188987731933594, "step": 6209 }, { "epoch": 0.8653243224413014, "grad_norm": 0.2580891251564026, "learning_rate": 2.045985534748842e-06, "loss": 0.07619667053222656, "step": 6210 }, { "epoch": 0.8654636661325158, "grad_norm": 0.30118241906166077, "learning_rate": 2.0418338797798686e-06, "loss": 0.08199119567871094, "step": 6211 }, { "epoch": 0.8656030098237303, "grad_norm": 0.4043683111667633, "learning_rate": 2.0376862147054164e-06, "loss": 0.10099411010742188, "step": 6212 }, { "epoch": 0.8657423535149447, "grad_norm": 0.4757402539253235, "learning_rate": 2.0335425404470045e-06, "loss": 0.08879566192626953, "step": 6213 }, { "epoch": 0.865881697206159, "grad_norm": 0.3641589879989624, "learning_rate": 2.0294028579252557e-06, "loss": 0.08553504943847656, "step": 6214 }, { "epoch": 0.8660210408973734, "grad_norm": 0.3024021089076996, "learning_rate": 2.025267168059919e-06, "loss": 0.07017135620117188, "step": 6215 }, { "epoch": 0.8661603845885878, "grad_norm": 0.2530957758426666, "learning_rate": 2.0211354717698437e-06, "loss": 0.06912803649902344, "step": 6216 }, { "epoch": 0.8662997282798022, "grad_norm": 0.27706608176231384, "learning_rate": 2.017007769973005e-06, "loss": 0.07220077514648438, "step": 6217 }, { "epoch": 0.8664390719710166, "grad_norm": 0.40846511721611023, "learning_rate": 2.01288406358648e-06, "loss": 0.08023834228515625, "step": 6218 }, { "epoch": 0.8665784156622309, "grad_norm": 0.543289065361023, "learning_rate": 2.00876435352646e-06, "loss": 0.09698677062988281, "step": 6219 }, { "epoch": 0.8667177593534453, "grad_norm": 0.20729734003543854, "learning_rate": 2.00464864070826e-06, "loss": 0.05886268615722656, "step": 6220 }, { "epoch": 0.8668571030446597, "grad_norm": 0.7065600752830505, "learning_rate": 2.0005369260462904e-06, "loss": 0.09293365478515625, "step": 6221 }, { "epoch": 0.8669964467358741, "grad_norm": 0.8685945868492126, "learning_rate": 1.996429210454078e-06, "loss": 0.1315479278564453, "step": 6222 }, { "epoch": 0.8671357904270884, "grad_norm": 0.688685953617096, "learning_rate": 1.9923254948442648e-06, "loss": 0.12176704406738281, "step": 6223 }, { "epoch": 0.8672751341183028, "grad_norm": 0.23695588111877441, "learning_rate": 1.98822578012861e-06, "loss": 0.060810089111328125, "step": 6224 }, { "epoch": 0.8674144778095172, "grad_norm": 0.5983558893203735, "learning_rate": 1.9841300672179662e-06, "loss": 0.10082054138183594, "step": 6225 }, { "epoch": 0.8675538215007316, "grad_norm": 0.6987488269805908, "learning_rate": 1.9800383570223157e-06, "loss": 0.12230491638183594, "step": 6226 }, { "epoch": 0.867693165191946, "grad_norm": 0.21722497045993805, "learning_rate": 1.97595065045074e-06, "loss": 0.06642532348632812, "step": 6227 }, { "epoch": 0.8678325088831603, "grad_norm": 0.36800140142440796, "learning_rate": 1.9718669484114315e-06, "loss": 0.07820510864257812, "step": 6228 }, { "epoch": 0.8679718525743747, "grad_norm": 0.26743701100349426, "learning_rate": 1.9677872518116948e-06, "loss": 0.07381343841552734, "step": 6229 }, { "epoch": 0.8681111962655891, "grad_norm": 0.401845782995224, "learning_rate": 1.963711561557955e-06, "loss": 0.08802032470703125, "step": 6230 }, { "epoch": 0.8682505399568035, "grad_norm": 0.47003045678138733, "learning_rate": 1.959639878555728e-06, "loss": 0.10158634185791016, "step": 6231 }, { "epoch": 0.8683898836480178, "grad_norm": 0.3978874385356903, "learning_rate": 1.95557220370965e-06, "loss": 0.08491897583007812, "step": 6232 }, { "epoch": 0.8685292273392322, "grad_norm": 0.36235830187797546, "learning_rate": 1.9515085379234656e-06, "loss": 0.07915687561035156, "step": 6233 }, { "epoch": 0.8686685710304466, "grad_norm": 0.5926169157028198, "learning_rate": 1.9474488821000357e-06, "loss": 0.08065223693847656, "step": 6234 }, { "epoch": 0.868807914721661, "grad_norm": 0.4906582534313202, "learning_rate": 1.943393237141311e-06, "loss": 0.10334110260009766, "step": 6235 }, { "epoch": 0.8689472584128753, "grad_norm": 0.25246039032936096, "learning_rate": 1.9393416039483724e-06, "loss": 0.06314659118652344, "step": 6236 }, { "epoch": 0.8690866021040897, "grad_norm": 0.5675008893013, "learning_rate": 1.9352939834214004e-06, "loss": 0.10373306274414062, "step": 6237 }, { "epoch": 0.8692259457953041, "grad_norm": 0.593263566493988, "learning_rate": 1.931250376459679e-06, "loss": 0.13410520553588867, "step": 6238 }, { "epoch": 0.8693652894865185, "grad_norm": 0.36081454157829285, "learning_rate": 1.9272107839616062e-06, "loss": 0.09426689147949219, "step": 6239 }, { "epoch": 0.8695046331777329, "grad_norm": 0.32264989614486694, "learning_rate": 1.923175206824688e-06, "loss": 0.08277511596679688, "step": 6240 }, { "epoch": 0.8696439768689472, "grad_norm": 0.4625852406024933, "learning_rate": 1.9191436459455406e-06, "loss": 0.09335708618164062, "step": 6241 }, { "epoch": 0.8697833205601616, "grad_norm": 0.6318019032478333, "learning_rate": 1.915116102219883e-06, "loss": 0.11864662170410156, "step": 6242 }, { "epoch": 0.869922664251376, "grad_norm": 0.39255353808403015, "learning_rate": 1.911092576542537e-06, "loss": 0.09418296813964844, "step": 6243 }, { "epoch": 0.8700620079425904, "grad_norm": 0.3702635169029236, "learning_rate": 1.9070730698074458e-06, "loss": 0.09469032287597656, "step": 6244 }, { "epoch": 0.8702013516338047, "grad_norm": 0.45386284589767456, "learning_rate": 1.9030575829076525e-06, "loss": 0.0948038101196289, "step": 6245 }, { "epoch": 0.8703406953250191, "grad_norm": 0.26332685351371765, "learning_rate": 1.8990461167353014e-06, "loss": 0.06401777267456055, "step": 6246 }, { "epoch": 0.8704800390162335, "grad_norm": 0.311318576335907, "learning_rate": 1.8950386721816549e-06, "loss": 0.08427047729492188, "step": 6247 }, { "epoch": 0.8706193827074479, "grad_norm": 0.40963122248649597, "learning_rate": 1.8910352501370677e-06, "loss": 0.07628822326660156, "step": 6248 }, { "epoch": 0.8707587263986623, "grad_norm": 0.2988976538181305, "learning_rate": 1.887035851491017e-06, "loss": 0.07598525285720825, "step": 6249 }, { "epoch": 0.8708980700898766, "grad_norm": 0.2402399182319641, "learning_rate": 1.8830404771320721e-06, "loss": 0.06158256530761719, "step": 6250 }, { "epoch": 0.871037413781091, "grad_norm": 0.39784547686576843, "learning_rate": 1.8790491279479139e-06, "loss": 0.08606243133544922, "step": 6251 }, { "epoch": 0.8711767574723055, "grad_norm": 0.28068819642066956, "learning_rate": 1.8750618048253377e-06, "loss": 0.08400154113769531, "step": 6252 }, { "epoch": 0.8713161011635199, "grad_norm": 0.3113216459751129, "learning_rate": 1.8710785086502237e-06, "loss": 0.06360626220703125, "step": 6253 }, { "epoch": 0.8714554448547343, "grad_norm": 0.41839560866355896, "learning_rate": 1.867099240307575e-06, "loss": 0.09745979309082031, "step": 6254 }, { "epoch": 0.8715947885459486, "grad_norm": 0.21026763319969177, "learning_rate": 1.8631240006814933e-06, "loss": 0.06408309936523438, "step": 6255 }, { "epoch": 0.871734132237163, "grad_norm": 0.5461362600326538, "learning_rate": 1.85915279065519e-06, "loss": 0.1246337890625, "step": 6256 }, { "epoch": 0.8718734759283774, "grad_norm": 0.42185425758361816, "learning_rate": 1.85518561111097e-06, "loss": 0.10392951965332031, "step": 6257 }, { "epoch": 0.8720128196195918, "grad_norm": 0.5524580478668213, "learning_rate": 1.85122246293026e-06, "loss": 0.1206512451171875, "step": 6258 }, { "epoch": 0.8721521633108061, "grad_norm": 0.5464361906051636, "learning_rate": 1.847263346993573e-06, "loss": 0.09935855865478516, "step": 6259 }, { "epoch": 0.8722915070020205, "grad_norm": 0.39103132486343384, "learning_rate": 1.8433082641805323e-06, "loss": 0.09817123413085938, "step": 6260 }, { "epoch": 0.8724308506932349, "grad_norm": 0.36702439188957214, "learning_rate": 1.8393572153698724e-06, "loss": 0.09551811218261719, "step": 6261 }, { "epoch": 0.8725701943844493, "grad_norm": 0.39921388030052185, "learning_rate": 1.835410201439427e-06, "loss": 0.08054161071777344, "step": 6262 }, { "epoch": 0.8727095380756636, "grad_norm": 0.494232714176178, "learning_rate": 1.83146722326613e-06, "loss": 0.12225341796875, "step": 6263 }, { "epoch": 0.872848881766878, "grad_norm": 0.38751542568206787, "learning_rate": 1.8275282817260187e-06, "loss": 0.08472251892089844, "step": 6264 }, { "epoch": 0.8729882254580924, "grad_norm": 0.6939036846160889, "learning_rate": 1.8235933776942394e-06, "loss": 0.12839508056640625, "step": 6265 }, { "epoch": 0.8731275691493068, "grad_norm": 0.5964553952217102, "learning_rate": 1.8196625120450396e-06, "loss": 0.0962982177734375, "step": 6266 }, { "epoch": 0.8732669128405212, "grad_norm": 0.2522365152835846, "learning_rate": 1.8157356856517626e-06, "loss": 0.06416702270507812, "step": 6267 }, { "epoch": 0.8734062565317355, "grad_norm": 0.2652980387210846, "learning_rate": 1.8118128993868667e-06, "loss": 0.07312965393066406, "step": 6268 }, { "epoch": 0.8735456002229499, "grad_norm": 0.23944240808486938, "learning_rate": 1.8078941541218964e-06, "loss": 0.0690765380859375, "step": 6269 }, { "epoch": 0.8736849439141643, "grad_norm": 0.4796037971973419, "learning_rate": 1.8039794507275155e-06, "loss": 0.10884475708007812, "step": 6270 }, { "epoch": 0.8738242876053787, "grad_norm": 0.39657866954803467, "learning_rate": 1.8000687900734748e-06, "loss": 0.08995819091796875, "step": 6271 }, { "epoch": 0.873963631296593, "grad_norm": 0.40571677684783936, "learning_rate": 1.796162173028637e-06, "loss": 0.08679962158203125, "step": 6272 }, { "epoch": 0.8741029749878074, "grad_norm": 0.32280442118644714, "learning_rate": 1.7922596004609682e-06, "loss": 0.07275772094726562, "step": 6273 }, { "epoch": 0.8742423186790218, "grad_norm": 0.4451875686645508, "learning_rate": 1.7883610732375278e-06, "loss": 0.10102462768554688, "step": 6274 }, { "epoch": 0.8743816623702362, "grad_norm": 0.4532744586467743, "learning_rate": 1.784466592224472e-06, "loss": 0.10247039794921875, "step": 6275 }, { "epoch": 0.8745210060614506, "grad_norm": 0.41182005405426025, "learning_rate": 1.7805761582870729e-06, "loss": 0.0906524658203125, "step": 6276 }, { "epoch": 0.8746603497526649, "grad_norm": 0.3910611867904663, "learning_rate": 1.7766897722896993e-06, "loss": 0.07802057266235352, "step": 6277 }, { "epoch": 0.8747996934438793, "grad_norm": 0.38633018732070923, "learning_rate": 1.7728074350958068e-06, "loss": 0.0752410888671875, "step": 6278 }, { "epoch": 0.8749390371350937, "grad_norm": 0.3701724708080292, "learning_rate": 1.7689291475679748e-06, "loss": 0.10185432434082031, "step": 6279 }, { "epoch": 0.8750783808263081, "grad_norm": 0.2864452600479126, "learning_rate": 1.7650549105678583e-06, "loss": 0.07678890228271484, "step": 6280 }, { "epoch": 0.8752177245175224, "grad_norm": 0.4438377320766449, "learning_rate": 1.7611847249562352e-06, "loss": 0.09981346130371094, "step": 6281 }, { "epoch": 0.8753570682087368, "grad_norm": 0.653642475605011, "learning_rate": 1.7573185915929625e-06, "loss": 0.08833503723144531, "step": 6282 }, { "epoch": 0.8754964118999512, "grad_norm": 0.4506444036960602, "learning_rate": 1.7534565113370106e-06, "loss": 0.09870147705078125, "step": 6283 }, { "epoch": 0.8756357555911656, "grad_norm": 0.2357621043920517, "learning_rate": 1.749598485046451e-06, "loss": 0.06714439392089844, "step": 6284 }, { "epoch": 0.87577509928238, "grad_norm": 0.3442234694957733, "learning_rate": 1.7457445135784423e-06, "loss": 0.08381462097167969, "step": 6285 }, { "epoch": 0.8759144429735943, "grad_norm": 0.4789642095565796, "learning_rate": 1.7418945977892488e-06, "loss": 0.10666275024414062, "step": 6286 }, { "epoch": 0.8760537866648087, "grad_norm": 0.36808258295059204, "learning_rate": 1.7380487385342371e-06, "loss": 0.06828880310058594, "step": 6287 }, { "epoch": 0.8761931303560231, "grad_norm": 0.4764224886894226, "learning_rate": 1.7342069366678705e-06, "loss": 0.11638832092285156, "step": 6288 }, { "epoch": 0.8763324740472375, "grad_norm": 0.5305808186531067, "learning_rate": 1.7303691930437062e-06, "loss": 0.0950927734375, "step": 6289 }, { "epoch": 0.8764718177384518, "grad_norm": 0.24285465478897095, "learning_rate": 1.726535508514402e-06, "loss": 0.06531906127929688, "step": 6290 }, { "epoch": 0.8766111614296662, "grad_norm": 0.6481537818908691, "learning_rate": 1.7227058839317213e-06, "loss": 0.12899255752563477, "step": 6291 }, { "epoch": 0.8767505051208806, "grad_norm": 0.31092897057533264, "learning_rate": 1.718880320146512e-06, "loss": 0.07834815979003906, "step": 6292 }, { "epoch": 0.8768898488120951, "grad_norm": 0.4491005539894104, "learning_rate": 1.7150588180087302e-06, "loss": 0.10754728317260742, "step": 6293 }, { "epoch": 0.8770291925033095, "grad_norm": 0.4724651873111725, "learning_rate": 1.7112413783674276e-06, "loss": 0.08931875228881836, "step": 6294 }, { "epoch": 0.8771685361945238, "grad_norm": 0.26519766449928284, "learning_rate": 1.7074280020707568e-06, "loss": 0.062496185302734375, "step": 6295 }, { "epoch": 0.8773078798857382, "grad_norm": 0.4439862370491028, "learning_rate": 1.7036186899659513e-06, "loss": 0.07197189331054688, "step": 6296 }, { "epoch": 0.8774472235769526, "grad_norm": 0.4077838063240051, "learning_rate": 1.6998134428993606e-06, "loss": 0.08937931060791016, "step": 6297 }, { "epoch": 0.877586567268167, "grad_norm": 0.252949059009552, "learning_rate": 1.6960122617164243e-06, "loss": 0.06935405731201172, "step": 6298 }, { "epoch": 0.8777259109593814, "grad_norm": 0.43165531754493713, "learning_rate": 1.6922151472616733e-06, "loss": 0.0875539779663086, "step": 6299 }, { "epoch": 0.8778652546505957, "grad_norm": 0.2921707332134247, "learning_rate": 1.688422100378746e-06, "loss": 0.07463264465332031, "step": 6300 }, { "epoch": 0.8780045983418101, "grad_norm": 0.8306127786636353, "learning_rate": 1.684633121910364e-06, "loss": 0.11566734313964844, "step": 6301 }, { "epoch": 0.8781439420330245, "grad_norm": 0.4203175902366638, "learning_rate": 1.6808482126983584e-06, "loss": 0.07790756225585938, "step": 6302 }, { "epoch": 0.8782832857242389, "grad_norm": 0.37196820974349976, "learning_rate": 1.6770673735836452e-06, "loss": 0.08359527587890625, "step": 6303 }, { "epoch": 0.8784226294154532, "grad_norm": 0.39255020022392273, "learning_rate": 1.6732906054062392e-06, "loss": 0.09268379211425781, "step": 6304 }, { "epoch": 0.8785619731066676, "grad_norm": 0.4745374023914337, "learning_rate": 1.6695179090052604e-06, "loss": 0.076812744140625, "step": 6305 }, { "epoch": 0.878701316797882, "grad_norm": 0.41142576932907104, "learning_rate": 1.6657492852189095e-06, "loss": 0.0684356689453125, "step": 6306 }, { "epoch": 0.8788406604890964, "grad_norm": 0.6419191956520081, "learning_rate": 1.6619847348844854e-06, "loss": 0.1318340301513672, "step": 6307 }, { "epoch": 0.8789800041803107, "grad_norm": 0.7184076905250549, "learning_rate": 1.6582242588383902e-06, "loss": 0.12021541595458984, "step": 6308 }, { "epoch": 0.8791193478715251, "grad_norm": 0.28911063075065613, "learning_rate": 1.6544678579161178e-06, "loss": 0.07973861694335938, "step": 6309 }, { "epoch": 0.8792586915627395, "grad_norm": 0.5950722098350525, "learning_rate": 1.6507155329522517e-06, "loss": 0.1248025894165039, "step": 6310 }, { "epoch": 0.8793980352539539, "grad_norm": 0.5022260546684265, "learning_rate": 1.6469672847804697e-06, "loss": 0.1122589111328125, "step": 6311 }, { "epoch": 0.8795373789451683, "grad_norm": 0.2946960926055908, "learning_rate": 1.6432231142335498e-06, "loss": 0.060614585876464844, "step": 6312 }, { "epoch": 0.8796767226363826, "grad_norm": 0.4212896525859833, "learning_rate": 1.6394830221433643e-06, "loss": 0.082183837890625, "step": 6313 }, { "epoch": 0.879816066327597, "grad_norm": 0.9693267941474915, "learning_rate": 1.635747009340871e-06, "loss": 0.15661239624023438, "step": 6314 }, { "epoch": 0.8799554100188114, "grad_norm": 0.4627176523208618, "learning_rate": 1.6320150766561283e-06, "loss": 0.0990142822265625, "step": 6315 }, { "epoch": 0.8800947537100258, "grad_norm": 0.2826659381389618, "learning_rate": 1.6282872249182923e-06, "loss": 0.08464241027832031, "step": 6316 }, { "epoch": 0.8802340974012401, "grad_norm": 0.3160756826400757, "learning_rate": 1.624563454955601e-06, "loss": 0.06738758087158203, "step": 6317 }, { "epoch": 0.8803734410924545, "grad_norm": 0.31561124324798584, "learning_rate": 1.620843767595388e-06, "loss": 0.07369232177734375, "step": 6318 }, { "epoch": 0.8805127847836689, "grad_norm": 0.4235861599445343, "learning_rate": 1.6171281636640856e-06, "loss": 0.10497665405273438, "step": 6319 }, { "epoch": 0.8806521284748833, "grad_norm": 0.3496689200401306, "learning_rate": 1.6134166439872224e-06, "loss": 0.07475948333740234, "step": 6320 }, { "epoch": 0.8807914721660977, "grad_norm": 0.5472114682197571, "learning_rate": 1.6097092093894074e-06, "loss": 0.09774208068847656, "step": 6321 }, { "epoch": 0.880930815857312, "grad_norm": 0.4223344326019287, "learning_rate": 1.606005860694344e-06, "loss": 0.07436847686767578, "step": 6322 }, { "epoch": 0.8810701595485264, "grad_norm": 0.293888121843338, "learning_rate": 1.6023065987248388e-06, "loss": 0.0741119384765625, "step": 6323 }, { "epoch": 0.8812095032397408, "grad_norm": 0.3115915060043335, "learning_rate": 1.598611424302783e-06, "loss": 0.07414817810058594, "step": 6324 }, { "epoch": 0.8813488469309552, "grad_norm": 0.39990267157554626, "learning_rate": 1.5949203382491529e-06, "loss": 0.08307456970214844, "step": 6325 }, { "epoch": 0.8814881906221695, "grad_norm": 0.3197343349456787, "learning_rate": 1.5912333413840331e-06, "loss": 0.07665061950683594, "step": 6326 }, { "epoch": 0.8816275343133839, "grad_norm": 0.39891931414604187, "learning_rate": 1.587550434526588e-06, "loss": 0.08481979370117188, "step": 6327 }, { "epoch": 0.8817668780045983, "grad_norm": 0.2505267262458801, "learning_rate": 1.5838716184950653e-06, "loss": 0.06880569458007812, "step": 6328 }, { "epoch": 0.8819062216958127, "grad_norm": 0.46227431297302246, "learning_rate": 1.5801968941068247e-06, "loss": 0.07881450653076172, "step": 6329 }, { "epoch": 0.882045565387027, "grad_norm": 0.3289506137371063, "learning_rate": 1.5765262621783062e-06, "loss": 0.08876895904541016, "step": 6330 }, { "epoch": 0.8821849090782414, "grad_norm": 0.4018467962741852, "learning_rate": 1.572859723525031e-06, "loss": 0.0957794189453125, "step": 6331 }, { "epoch": 0.8823242527694558, "grad_norm": 0.2207142412662506, "learning_rate": 1.5691972789616338e-06, "loss": 0.06149482727050781, "step": 6332 }, { "epoch": 0.8824635964606703, "grad_norm": 0.5938962697982788, "learning_rate": 1.565538929301813e-06, "loss": 0.08368301391601562, "step": 6333 }, { "epoch": 0.8826029401518847, "grad_norm": 0.4678293764591217, "learning_rate": 1.5618846753583805e-06, "loss": 0.08010482788085938, "step": 6334 }, { "epoch": 0.882742283843099, "grad_norm": 0.37277859449386597, "learning_rate": 1.558234517943218e-06, "loss": 0.08573055267333984, "step": 6335 }, { "epoch": 0.8828816275343134, "grad_norm": 0.3740997314453125, "learning_rate": 1.5545884578673165e-06, "loss": 0.08347320556640625, "step": 6336 }, { "epoch": 0.8830209712255278, "grad_norm": 0.5697695016860962, "learning_rate": 1.5509464959407438e-06, "loss": 0.10097694396972656, "step": 6337 }, { "epoch": 0.8831603149167422, "grad_norm": 0.4636383056640625, "learning_rate": 1.5473086329726638e-06, "loss": 0.09846973419189453, "step": 6338 }, { "epoch": 0.8832996586079566, "grad_norm": 0.4265225827693939, "learning_rate": 1.543674869771319e-06, "loss": 0.0861673355102539, "step": 6339 }, { "epoch": 0.8834390022991709, "grad_norm": 0.46911922097206116, "learning_rate": 1.540045207144052e-06, "loss": 0.09244251251220703, "step": 6340 }, { "epoch": 0.8835783459903853, "grad_norm": 0.26285481452941895, "learning_rate": 1.5364196458972957e-06, "loss": 0.07189369201660156, "step": 6341 }, { "epoch": 0.8837176896815997, "grad_norm": 0.5140864253044128, "learning_rate": 1.5327981868365638e-06, "loss": 0.10577774047851562, "step": 6342 }, { "epoch": 0.8838570333728141, "grad_norm": 0.45155325531959534, "learning_rate": 1.529180830766459e-06, "loss": 0.08824634552001953, "step": 6343 }, { "epoch": 0.8839963770640284, "grad_norm": 0.3038387894630432, "learning_rate": 1.5255675784906764e-06, "loss": 0.07589530944824219, "step": 6344 }, { "epoch": 0.8841357207552428, "grad_norm": 0.6385385990142822, "learning_rate": 1.5219584308120028e-06, "loss": 0.10736274719238281, "step": 6345 }, { "epoch": 0.8842750644464572, "grad_norm": 0.37171927094459534, "learning_rate": 1.518353388532301e-06, "loss": 0.06946468353271484, "step": 6346 }, { "epoch": 0.8844144081376716, "grad_norm": 0.317028284072876, "learning_rate": 1.5147524524525392e-06, "loss": 0.07025146484375, "step": 6347 }, { "epoch": 0.884553751828886, "grad_norm": 0.2616197168827057, "learning_rate": 1.511155623372751e-06, "loss": 0.07137203216552734, "step": 6348 }, { "epoch": 0.8846930955201003, "grad_norm": 0.40096405148506165, "learning_rate": 1.507562902092079e-06, "loss": 0.09584236145019531, "step": 6349 }, { "epoch": 0.8848324392113147, "grad_norm": 0.6763895750045776, "learning_rate": 1.5039742894087384e-06, "loss": 0.10794448852539062, "step": 6350 }, { "epoch": 0.8849717829025291, "grad_norm": 0.34371688961982727, "learning_rate": 1.50038978612004e-06, "loss": 0.07867646217346191, "step": 6351 }, { "epoch": 0.8851111265937435, "grad_norm": 0.5387987494468689, "learning_rate": 1.4968093930223804e-06, "loss": 0.09865760803222656, "step": 6352 }, { "epoch": 0.8852504702849578, "grad_norm": 0.2825624942779541, "learning_rate": 1.4932331109112387e-06, "loss": 0.07334613800048828, "step": 6353 }, { "epoch": 0.8853898139761722, "grad_norm": 0.34986135363578796, "learning_rate": 1.489660940581179e-06, "loss": 0.07717323303222656, "step": 6354 }, { "epoch": 0.8855291576673866, "grad_norm": 0.38700202107429504, "learning_rate": 1.4860928828258604e-06, "loss": 0.0963287353515625, "step": 6355 }, { "epoch": 0.885668501358601, "grad_norm": 0.35238394141197205, "learning_rate": 1.4825289384380282e-06, "loss": 0.07713699340820312, "step": 6356 }, { "epoch": 0.8858078450498154, "grad_norm": 0.20589211583137512, "learning_rate": 1.4789691082095004e-06, "loss": 0.07008171081542969, "step": 6357 }, { "epoch": 0.8859471887410297, "grad_norm": 0.5352255702018738, "learning_rate": 1.4754133929311975e-06, "loss": 0.11149215698242188, "step": 6358 }, { "epoch": 0.8860865324322441, "grad_norm": 0.4348975718021393, "learning_rate": 1.4718617933931146e-06, "loss": 0.08794784545898438, "step": 6359 }, { "epoch": 0.8862258761234585, "grad_norm": 0.291655033826828, "learning_rate": 1.4683143103843355e-06, "loss": 0.061901092529296875, "step": 6360 }, { "epoch": 0.8863652198146729, "grad_norm": 0.27522122859954834, "learning_rate": 1.464770944693028e-06, "loss": 0.07447481155395508, "step": 6361 }, { "epoch": 0.8865045635058872, "grad_norm": 0.2338055819272995, "learning_rate": 1.4612316971064555e-06, "loss": 0.055794715881347656, "step": 6362 }, { "epoch": 0.8866439071971016, "grad_norm": 0.9842904210090637, "learning_rate": 1.4576965684109534e-06, "loss": 0.14532089233398438, "step": 6363 }, { "epoch": 0.886783250888316, "grad_norm": 0.3685941696166992, "learning_rate": 1.4541655593919402e-06, "loss": 0.10296058654785156, "step": 6364 }, { "epoch": 0.8869225945795304, "grad_norm": 0.5249600410461426, "learning_rate": 1.4506386708339325e-06, "loss": 0.10117912292480469, "step": 6365 }, { "epoch": 0.8870619382707448, "grad_norm": 0.4400736391544342, "learning_rate": 1.4471159035205262e-06, "loss": 0.09916877746582031, "step": 6366 }, { "epoch": 0.8872012819619591, "grad_norm": 0.4385639429092407, "learning_rate": 1.4435972582343948e-06, "loss": 0.09350395202636719, "step": 6367 }, { "epoch": 0.8873406256531735, "grad_norm": 0.35537055134773254, "learning_rate": 1.4400827357573043e-06, "loss": 0.08539390563964844, "step": 6368 }, { "epoch": 0.8874799693443879, "grad_norm": 0.31984764337539673, "learning_rate": 1.4365723368700968e-06, "loss": 0.08342933654785156, "step": 6369 }, { "epoch": 0.8876193130356023, "grad_norm": 0.22788922488689423, "learning_rate": 1.4330660623527081e-06, "loss": 0.06122398376464844, "step": 6370 }, { "epoch": 0.8877586567268166, "grad_norm": 0.42257726192474365, "learning_rate": 1.4295639129841466e-06, "loss": 0.08043670654296875, "step": 6371 }, { "epoch": 0.887898000418031, "grad_norm": 0.44300416111946106, "learning_rate": 1.4260658895425162e-06, "loss": 0.06927108764648438, "step": 6372 }, { "epoch": 0.8880373441092455, "grad_norm": 0.32648053765296936, "learning_rate": 1.4225719928049953e-06, "loss": 0.07955360412597656, "step": 6373 }, { "epoch": 0.8881766878004599, "grad_norm": 0.4176311790943146, "learning_rate": 1.4190822235478496e-06, "loss": 0.09214019775390625, "step": 6374 }, { "epoch": 0.8883160314916743, "grad_norm": 0.28784963488578796, "learning_rate": 1.415596582546419e-06, "loss": 0.080810546875, "step": 6375 }, { "epoch": 0.8884553751828886, "grad_norm": 0.2369752675294876, "learning_rate": 1.4121150705751396e-06, "loss": 0.07083320617675781, "step": 6376 }, { "epoch": 0.888594718874103, "grad_norm": 0.2049647718667984, "learning_rate": 1.4086376884075282e-06, "loss": 0.054579734802246094, "step": 6377 }, { "epoch": 0.8887340625653174, "grad_norm": 0.30273738503456116, "learning_rate": 1.4051644368161688e-06, "loss": 0.06805419921875, "step": 6378 }, { "epoch": 0.8888734062565318, "grad_norm": 0.3864225447177887, "learning_rate": 1.4016953165727487e-06, "loss": 0.08341598510742188, "step": 6379 }, { "epoch": 0.8890127499477462, "grad_norm": 0.28347402811050415, "learning_rate": 1.398230328448018e-06, "loss": 0.07316207885742188, "step": 6380 }, { "epoch": 0.8891520936389605, "grad_norm": 0.5013516545295715, "learning_rate": 1.3947694732118278e-06, "loss": 0.08783721923828125, "step": 6381 }, { "epoch": 0.8892914373301749, "grad_norm": 0.368417352437973, "learning_rate": 1.3913127516330916e-06, "loss": 0.07451248168945312, "step": 6382 }, { "epoch": 0.8894307810213893, "grad_norm": 0.4261535406112671, "learning_rate": 1.3878601644798173e-06, "loss": 0.10359954833984375, "step": 6383 }, { "epoch": 0.8895701247126037, "grad_norm": 0.3200541138648987, "learning_rate": 1.3844117125190982e-06, "loss": 0.07808494567871094, "step": 6384 }, { "epoch": 0.889709468403818, "grad_norm": 0.3023897111415863, "learning_rate": 1.3809673965170923e-06, "loss": 0.07169914245605469, "step": 6385 }, { "epoch": 0.8898488120950324, "grad_norm": 0.7536265850067139, "learning_rate": 1.3775272172390497e-06, "loss": 0.12587738037109375, "step": 6386 }, { "epoch": 0.8899881557862468, "grad_norm": 0.43995535373687744, "learning_rate": 1.3740911754493014e-06, "loss": 0.09344100952148438, "step": 6387 }, { "epoch": 0.8901274994774612, "grad_norm": 0.3706681728363037, "learning_rate": 1.3706592719112588e-06, "loss": 0.0832977294921875, "step": 6388 }, { "epoch": 0.8902668431686755, "grad_norm": 0.33370307087898254, "learning_rate": 1.3672315073874098e-06, "loss": 0.08833885192871094, "step": 6389 }, { "epoch": 0.8904061868598899, "grad_norm": 0.5277245044708252, "learning_rate": 1.3638078826393296e-06, "loss": 0.08568191528320312, "step": 6390 }, { "epoch": 0.8905455305511043, "grad_norm": 0.3230781555175781, "learning_rate": 1.3603883984276656e-06, "loss": 0.07376670837402344, "step": 6391 }, { "epoch": 0.8906848742423187, "grad_norm": 0.25533026456832886, "learning_rate": 1.3569730555121452e-06, "loss": 0.06451034545898438, "step": 6392 }, { "epoch": 0.8908242179335331, "grad_norm": 0.29474076628685, "learning_rate": 1.353561854651586e-06, "loss": 0.07569503784179688, "step": 6393 }, { "epoch": 0.8909635616247474, "grad_norm": 0.7196462154388428, "learning_rate": 1.3501547966038775e-06, "loss": 0.10285568237304688, "step": 6394 }, { "epoch": 0.8911029053159618, "grad_norm": 0.41512438654899597, "learning_rate": 1.3467518821259963e-06, "loss": 0.09024667739868164, "step": 6395 }, { "epoch": 0.8912422490071762, "grad_norm": 0.3708645701408386, "learning_rate": 1.3433531119739794e-06, "loss": 0.08479118347167969, "step": 6396 }, { "epoch": 0.8913815926983906, "grad_norm": 0.6879599690437317, "learning_rate": 1.3399584869029613e-06, "loss": 0.1258697509765625, "step": 6397 }, { "epoch": 0.891520936389605, "grad_norm": 0.43867599964141846, "learning_rate": 1.3365680076671561e-06, "loss": 0.07941675186157227, "step": 6398 }, { "epoch": 0.8916602800808193, "grad_norm": 0.3768528699874878, "learning_rate": 1.3331816750198412e-06, "loss": 0.07877159118652344, "step": 6399 }, { "epoch": 0.8917996237720337, "grad_norm": 0.2934498190879822, "learning_rate": 1.3297994897133927e-06, "loss": 0.07696533203125, "step": 6400 }, { "epoch": 0.8919389674632481, "grad_norm": 0.23889079689979553, "learning_rate": 1.326421452499247e-06, "loss": 0.06939268112182617, "step": 6401 }, { "epoch": 0.8920783111544625, "grad_norm": 0.3615988492965698, "learning_rate": 1.3230475641279306e-06, "loss": 0.0897369384765625, "step": 6402 }, { "epoch": 0.8922176548456768, "grad_norm": 0.3492756187915802, "learning_rate": 1.3196778253490417e-06, "loss": 0.0834197998046875, "step": 6403 }, { "epoch": 0.8923569985368912, "grad_norm": 0.5397752523422241, "learning_rate": 1.3163122369112591e-06, "loss": 0.11254501342773438, "step": 6404 }, { "epoch": 0.8924963422281056, "grad_norm": 0.44906291365623474, "learning_rate": 1.312950799562347e-06, "loss": 0.08768463134765625, "step": 6405 }, { "epoch": 0.89263568591932, "grad_norm": 0.3079161047935486, "learning_rate": 1.3095935140491323e-06, "loss": 0.07628440856933594, "step": 6406 }, { "epoch": 0.8927750296105343, "grad_norm": 0.3130105435848236, "learning_rate": 1.3062403811175272e-06, "loss": 0.07228851318359375, "step": 6407 }, { "epoch": 0.8929143733017487, "grad_norm": 0.2311532348394394, "learning_rate": 1.302891401512525e-06, "loss": 0.06795310974121094, "step": 6408 }, { "epoch": 0.8930537169929631, "grad_norm": 0.5303028225898743, "learning_rate": 1.2995465759781944e-06, "loss": 0.09942054748535156, "step": 6409 }, { "epoch": 0.8931930606841775, "grad_norm": 0.3070272207260132, "learning_rate": 1.2962059052576703e-06, "loss": 0.06454277038574219, "step": 6410 }, { "epoch": 0.8933324043753919, "grad_norm": 0.4686877429485321, "learning_rate": 1.2928693900931856e-06, "loss": 0.08547210693359375, "step": 6411 }, { "epoch": 0.8934717480666062, "grad_norm": 0.3089117407798767, "learning_rate": 1.2895370312260247e-06, "loss": 0.08093643188476562, "step": 6412 }, { "epoch": 0.8936110917578207, "grad_norm": 0.3711654245853424, "learning_rate": 1.2862088293965736e-06, "loss": 0.07990455627441406, "step": 6413 }, { "epoch": 0.8937504354490351, "grad_norm": 0.376597136259079, "learning_rate": 1.2828847853442738e-06, "loss": 0.07566642761230469, "step": 6414 }, { "epoch": 0.8938897791402495, "grad_norm": 0.30630528926849365, "learning_rate": 1.2795648998076572e-06, "loss": 0.08663558959960938, "step": 6415 }, { "epoch": 0.8940291228314639, "grad_norm": 0.2340475171804428, "learning_rate": 1.2762491735243264e-06, "loss": 0.06133604049682617, "step": 6416 }, { "epoch": 0.8941684665226782, "grad_norm": 0.2934882640838623, "learning_rate": 1.2729376072309597e-06, "loss": 0.070220947265625, "step": 6417 }, { "epoch": 0.8943078102138926, "grad_norm": 0.25584396719932556, "learning_rate": 1.2696302016633078e-06, "loss": 0.07174873352050781, "step": 6418 }, { "epoch": 0.894447153905107, "grad_norm": 0.5982056260108948, "learning_rate": 1.266326957556201e-06, "loss": 0.0820159912109375, "step": 6419 }, { "epoch": 0.8945864975963214, "grad_norm": 0.480836421251297, "learning_rate": 1.2630278756435522e-06, "loss": 0.10312271118164062, "step": 6420 }, { "epoch": 0.8947258412875357, "grad_norm": 0.46907317638397217, "learning_rate": 1.2597329566583372e-06, "loss": 0.07863807678222656, "step": 6421 }, { "epoch": 0.8948651849787501, "grad_norm": 0.8326483368873596, "learning_rate": 1.2564422013326083e-06, "loss": 0.1148519515991211, "step": 6422 }, { "epoch": 0.8950045286699645, "grad_norm": 0.33113378286361694, "learning_rate": 1.2531556103974984e-06, "loss": 0.079345703125, "step": 6423 }, { "epoch": 0.8951438723611789, "grad_norm": 0.41896703839302063, "learning_rate": 1.249873184583219e-06, "loss": 0.08963680267333984, "step": 6424 }, { "epoch": 0.8952832160523932, "grad_norm": 0.31311658024787903, "learning_rate": 1.2465949246190422e-06, "loss": 0.07894611358642578, "step": 6425 }, { "epoch": 0.8954225597436076, "grad_norm": 0.4299217462539673, "learning_rate": 1.2433208312333255e-06, "loss": 0.0848855972290039, "step": 6426 }, { "epoch": 0.895561903434822, "grad_norm": 0.6932771801948547, "learning_rate": 1.2400509051535026e-06, "loss": 0.1132349967956543, "step": 6427 }, { "epoch": 0.8957012471260364, "grad_norm": 0.28347328305244446, "learning_rate": 1.2367851471060654e-06, "loss": 0.07630443572998047, "step": 6428 }, { "epoch": 0.8958405908172508, "grad_norm": 0.2832571268081665, "learning_rate": 1.2335235578165983e-06, "loss": 0.07648468017578125, "step": 6429 }, { "epoch": 0.8959799345084651, "grad_norm": 0.44226112961769104, "learning_rate": 1.2302661380097547e-06, "loss": 0.10201454162597656, "step": 6430 }, { "epoch": 0.8961192781996795, "grad_norm": 0.4169379472732544, "learning_rate": 1.227012888409249e-06, "loss": 0.08756446838378906, "step": 6431 }, { "epoch": 0.8962586218908939, "grad_norm": 0.5970325469970703, "learning_rate": 1.2237638097378902e-06, "loss": 0.10465049743652344, "step": 6432 }, { "epoch": 0.8963979655821083, "grad_norm": 0.3429519236087799, "learning_rate": 1.2205189027175402e-06, "loss": 0.08566665649414062, "step": 6433 }, { "epoch": 0.8965373092733226, "grad_norm": 0.7479395270347595, "learning_rate": 1.2172781680691515e-06, "loss": 0.10310554504394531, "step": 6434 }, { "epoch": 0.896676652964537, "grad_norm": 0.48586395382881165, "learning_rate": 1.2140416065127324e-06, "loss": 0.09621620178222656, "step": 6435 }, { "epoch": 0.8968159966557514, "grad_norm": 0.4328007102012634, "learning_rate": 1.2108092187673791e-06, "loss": 0.10470008850097656, "step": 6436 }, { "epoch": 0.8969553403469658, "grad_norm": 0.4776395857334137, "learning_rate": 1.2075810055512549e-06, "loss": 0.10046195983886719, "step": 6437 }, { "epoch": 0.8970946840381802, "grad_norm": 0.33546799421310425, "learning_rate": 1.2043569675815924e-06, "loss": 0.07385635375976562, "step": 6438 }, { "epoch": 0.8972340277293945, "grad_norm": 0.47450485825538635, "learning_rate": 1.201137105574699e-06, "loss": 0.0881948471069336, "step": 6439 }, { "epoch": 0.8973733714206089, "grad_norm": 0.7836313247680664, "learning_rate": 1.1979214202459532e-06, "loss": 0.12908363342285156, "step": 6440 }, { "epoch": 0.8975127151118233, "grad_norm": 0.5593071579933167, "learning_rate": 1.1947099123098126e-06, "loss": 0.11519432067871094, "step": 6441 }, { "epoch": 0.8976520588030377, "grad_norm": 0.41252776980400085, "learning_rate": 1.1915025824797976e-06, "loss": 0.09378814697265625, "step": 6442 }, { "epoch": 0.897791402494252, "grad_norm": 0.3600638210773468, "learning_rate": 1.1882994314685003e-06, "loss": 0.08592414855957031, "step": 6443 }, { "epoch": 0.8979307461854664, "grad_norm": 0.23847205936908722, "learning_rate": 1.1851004599875915e-06, "loss": 0.06051445007324219, "step": 6444 }, { "epoch": 0.8980700898766808, "grad_norm": 0.34454572200775146, "learning_rate": 1.1819056687478114e-06, "loss": 0.08388137817382812, "step": 6445 }, { "epoch": 0.8982094335678952, "grad_norm": 0.5135355591773987, "learning_rate": 1.1787150584589657e-06, "loss": 0.10358238220214844, "step": 6446 }, { "epoch": 0.8983487772591096, "grad_norm": 0.5596545338630676, "learning_rate": 1.1755286298299339e-06, "loss": 0.08795928955078125, "step": 6447 }, { "epoch": 0.8984881209503239, "grad_norm": 0.4202527105808258, "learning_rate": 1.1723463835686765e-06, "loss": 0.08997249603271484, "step": 6448 }, { "epoch": 0.8986274646415383, "grad_norm": 0.4047374129295349, "learning_rate": 1.169168320382208e-06, "loss": 0.09187507629394531, "step": 6449 }, { "epoch": 0.8987668083327527, "grad_norm": 0.5031136274337769, "learning_rate": 1.165994440976621e-06, "loss": 0.08526802062988281, "step": 6450 }, { "epoch": 0.8989061520239671, "grad_norm": 0.3582023084163666, "learning_rate": 1.162824746057083e-06, "loss": 0.06972312927246094, "step": 6451 }, { "epoch": 0.8990454957151814, "grad_norm": 0.38129180669784546, "learning_rate": 1.159659236327828e-06, "loss": 0.07792091369628906, "step": 6452 }, { "epoch": 0.8991848394063959, "grad_norm": 0.2468128204345703, "learning_rate": 1.1564979124921582e-06, "loss": 0.06743526458740234, "step": 6453 }, { "epoch": 0.8993241830976103, "grad_norm": 0.7499858736991882, "learning_rate": 1.153340775252445e-06, "loss": 0.11701011657714844, "step": 6454 }, { "epoch": 0.8994635267888247, "grad_norm": 0.6271414756774902, "learning_rate": 1.1501878253101362e-06, "loss": 0.12547969818115234, "step": 6455 }, { "epoch": 0.8996028704800391, "grad_norm": 0.5412514805793762, "learning_rate": 1.1470390633657468e-06, "loss": 0.09924697875976562, "step": 6456 }, { "epoch": 0.8997422141712534, "grad_norm": 0.3636331260204315, "learning_rate": 1.1438944901188532e-06, "loss": 0.091461181640625, "step": 6457 }, { "epoch": 0.8998815578624678, "grad_norm": 0.4750295877456665, "learning_rate": 1.1407541062681138e-06, "loss": 0.07520675659179688, "step": 6458 }, { "epoch": 0.9000209015536822, "grad_norm": 0.8362365961074829, "learning_rate": 1.1376179125112508e-06, "loss": 0.13773345947265625, "step": 6459 }, { "epoch": 0.9001602452448966, "grad_norm": 0.5754382610321045, "learning_rate": 1.1344859095450468e-06, "loss": 0.09871292114257812, "step": 6460 }, { "epoch": 0.900299588936111, "grad_norm": 0.712861180305481, "learning_rate": 1.1313580980653671e-06, "loss": 0.10795021057128906, "step": 6461 }, { "epoch": 0.9004389326273253, "grad_norm": 0.6808586716651917, "learning_rate": 1.1282344787671429e-06, "loss": 0.12317085266113281, "step": 6462 }, { "epoch": 0.9005782763185397, "grad_norm": 0.312374085187912, "learning_rate": 1.1251150523443676e-06, "loss": 0.07724952697753906, "step": 6463 }, { "epoch": 0.9007176200097541, "grad_norm": 0.316883385181427, "learning_rate": 1.121999819490105e-06, "loss": 0.09416007995605469, "step": 6464 }, { "epoch": 0.9008569637009685, "grad_norm": 0.23273472487926483, "learning_rate": 1.1188887808964878e-06, "loss": 0.05534648895263672, "step": 6465 }, { "epoch": 0.9009963073921828, "grad_norm": 0.4550366997718811, "learning_rate": 1.1157819372547252e-06, "loss": 0.09100723266601562, "step": 6466 }, { "epoch": 0.9011356510833972, "grad_norm": 0.43512532114982605, "learning_rate": 1.1126792892550786e-06, "loss": 0.07414722442626953, "step": 6467 }, { "epoch": 0.9012749947746116, "grad_norm": 0.5720616579055786, "learning_rate": 1.10958083758689e-06, "loss": 0.11523056030273438, "step": 6468 }, { "epoch": 0.901414338465826, "grad_norm": 0.3631727695465088, "learning_rate": 1.1064865829385662e-06, "loss": 0.08643436431884766, "step": 6469 }, { "epoch": 0.9015536821570403, "grad_norm": 0.3908413052558899, "learning_rate": 1.1033965259975777e-06, "loss": 0.07838821411132812, "step": 6470 }, { "epoch": 0.9016930258482547, "grad_norm": 0.3303401470184326, "learning_rate": 1.1003106674504592e-06, "loss": 0.09259796142578125, "step": 6471 }, { "epoch": 0.9018323695394691, "grad_norm": 0.4576283395290375, "learning_rate": 1.0972290079828252e-06, "loss": 0.08506393432617188, "step": 6472 }, { "epoch": 0.9019717132306835, "grad_norm": 0.2140074372291565, "learning_rate": 1.094151548279352e-06, "loss": 0.0700082778930664, "step": 6473 }, { "epoch": 0.9021110569218979, "grad_norm": 0.2155413031578064, "learning_rate": 1.0910782890237747e-06, "loss": 0.05397987365722656, "step": 6474 }, { "epoch": 0.9022504006131122, "grad_norm": 0.5524152517318726, "learning_rate": 1.088009230898901e-06, "loss": 0.1094512939453125, "step": 6475 }, { "epoch": 0.9023897443043266, "grad_norm": 0.39625462889671326, "learning_rate": 1.0849443745866095e-06, "loss": 0.09475898742675781, "step": 6476 }, { "epoch": 0.902529087995541, "grad_norm": 0.408661425113678, "learning_rate": 1.0818837207678423e-06, "loss": 0.08189964294433594, "step": 6477 }, { "epoch": 0.9026684316867554, "grad_norm": 0.49468615651130676, "learning_rate": 1.0788272701226e-06, "loss": 0.10625457763671875, "step": 6478 }, { "epoch": 0.9028077753779697, "grad_norm": 0.39627891778945923, "learning_rate": 1.0757750233299658e-06, "loss": 0.09277153015136719, "step": 6479 }, { "epoch": 0.9029471190691841, "grad_norm": 0.24497127532958984, "learning_rate": 1.0727269810680685e-06, "loss": 0.07392692565917969, "step": 6480 }, { "epoch": 0.9030864627603985, "grad_norm": 0.3497641682624817, "learning_rate": 1.0696831440141242e-06, "loss": 0.06962203979492188, "step": 6481 }, { "epoch": 0.9032258064516129, "grad_norm": 0.5436011552810669, "learning_rate": 1.0666435128443942e-06, "loss": 0.11316108703613281, "step": 6482 }, { "epoch": 0.9033651501428273, "grad_norm": 0.3385253846645355, "learning_rate": 1.0636080882342203e-06, "loss": 0.07769584655761719, "step": 6483 }, { "epoch": 0.9035044938340416, "grad_norm": 0.5331652760505676, "learning_rate": 1.0605768708580078e-06, "loss": 0.11466217041015625, "step": 6484 }, { "epoch": 0.903643837525256, "grad_norm": 0.3398861885070801, "learning_rate": 1.0575498613892199e-06, "loss": 0.0869140625, "step": 6485 }, { "epoch": 0.9037831812164704, "grad_norm": 0.6810885667800903, "learning_rate": 1.0545270605003855e-06, "loss": 0.11503410339355469, "step": 6486 }, { "epoch": 0.9039225249076848, "grad_norm": 0.4192269444465637, "learning_rate": 1.0515084688631071e-06, "loss": 0.10396385192871094, "step": 6487 }, { "epoch": 0.9040618685988991, "grad_norm": 0.38606685400009155, "learning_rate": 1.0484940871480464e-06, "loss": 0.07985305786132812, "step": 6488 }, { "epoch": 0.9042012122901135, "grad_norm": 0.32324695587158203, "learning_rate": 1.0454839160249274e-06, "loss": 0.08354759216308594, "step": 6489 }, { "epoch": 0.9043405559813279, "grad_norm": 0.5439515709877014, "learning_rate": 1.0424779561625465e-06, "loss": 0.11631011962890625, "step": 6490 }, { "epoch": 0.9044798996725423, "grad_norm": 0.38752853870391846, "learning_rate": 1.0394762082287557e-06, "loss": 0.0877838134765625, "step": 6491 }, { "epoch": 0.9046192433637567, "grad_norm": 0.275966078042984, "learning_rate": 1.036478672890473e-06, "loss": 0.07407951354980469, "step": 6492 }, { "epoch": 0.904758587054971, "grad_norm": 0.40706509351730347, "learning_rate": 1.0334853508136854e-06, "loss": 0.08558464050292969, "step": 6493 }, { "epoch": 0.9048979307461855, "grad_norm": 0.48518505692481995, "learning_rate": 1.030496242663439e-06, "loss": 0.08606147766113281, "step": 6494 }, { "epoch": 0.9050372744373999, "grad_norm": 0.26346778869628906, "learning_rate": 1.0275113491038512e-06, "loss": 0.058875083923339844, "step": 6495 }, { "epoch": 0.9051766181286143, "grad_norm": 0.4266607463359833, "learning_rate": 1.0245306707980873e-06, "loss": 0.08748149871826172, "step": 6496 }, { "epoch": 0.9053159618198287, "grad_norm": 0.33037081360816956, "learning_rate": 1.021554208408393e-06, "loss": 0.06938838958740234, "step": 6497 }, { "epoch": 0.905455305511043, "grad_norm": 0.5838533043861389, "learning_rate": 1.018581962596068e-06, "loss": 0.12118339538574219, "step": 6498 }, { "epoch": 0.9055946492022574, "grad_norm": 0.3080866038799286, "learning_rate": 1.0156139340214755e-06, "loss": 0.08479690551757812, "step": 6499 }, { "epoch": 0.9057339928934718, "grad_norm": 0.3781090974807739, "learning_rate": 1.01265012334405e-06, "loss": 0.0875406265258789, "step": 6500 }, { "epoch": 0.9058733365846862, "grad_norm": 0.6796485781669617, "learning_rate": 1.009690531222276e-06, "loss": 0.15683364868164062, "step": 6501 }, { "epoch": 0.9060126802759005, "grad_norm": 0.2567310035228729, "learning_rate": 1.0067351583137119e-06, "loss": 0.07499122619628906, "step": 6502 }, { "epoch": 0.9061520239671149, "grad_norm": 0.2932274043560028, "learning_rate": 1.0037840052749682e-06, "loss": 0.07372474670410156, "step": 6503 }, { "epoch": 0.9062913676583293, "grad_norm": 0.2446686029434204, "learning_rate": 1.000837072761729e-06, "loss": 0.06362724304199219, "step": 6504 }, { "epoch": 0.9064307113495437, "grad_norm": 0.33956632018089294, "learning_rate": 9.978943614287374e-07, "loss": 0.07677841186523438, "step": 6505 }, { "epoch": 0.906570055040758, "grad_norm": 0.3824218511581421, "learning_rate": 9.949558719297924e-07, "loss": 0.07726478576660156, "step": 6506 }, { "epoch": 0.9067093987319724, "grad_norm": 0.37389788031578064, "learning_rate": 9.920216049177566e-07, "loss": 0.0780935287475586, "step": 6507 }, { "epoch": 0.9068487424231868, "grad_norm": 0.27759623527526855, "learning_rate": 9.890915610445617e-07, "loss": 0.07737159729003906, "step": 6508 }, { "epoch": 0.9069880861144012, "grad_norm": 0.5292997360229492, "learning_rate": 9.861657409611958e-07, "loss": 0.08637237548828125, "step": 6509 }, { "epoch": 0.9071274298056156, "grad_norm": 0.4656522572040558, "learning_rate": 9.832441453177099e-07, "loss": 0.09535694122314453, "step": 6510 }, { "epoch": 0.9072667734968299, "grad_norm": 0.22645217180252075, "learning_rate": 9.803267747632162e-07, "loss": 0.055855751037597656, "step": 6511 }, { "epoch": 0.9074061171880443, "grad_norm": 0.20605604350566864, "learning_rate": 9.774136299458825e-07, "loss": 0.06304645538330078, "step": 6512 }, { "epoch": 0.9075454608792587, "grad_norm": 0.2640606760978699, "learning_rate": 9.745047115129513e-07, "loss": 0.077880859375, "step": 6513 }, { "epoch": 0.9076848045704731, "grad_norm": 0.3148421049118042, "learning_rate": 9.716000201107102e-07, "loss": 0.07644271850585938, "step": 6514 }, { "epoch": 0.9078241482616874, "grad_norm": 0.5422933101654053, "learning_rate": 9.686995563845204e-07, "loss": 0.09826850891113281, "step": 6515 }, { "epoch": 0.9079634919529018, "grad_norm": 0.7735921740531921, "learning_rate": 9.658033209787998e-07, "loss": 0.11243438720703125, "step": 6516 }, { "epoch": 0.9081028356441162, "grad_norm": 0.32186219096183777, "learning_rate": 9.629113145370228e-07, "loss": 0.08183002471923828, "step": 6517 }, { "epoch": 0.9082421793353306, "grad_norm": 0.6657729744911194, "learning_rate": 9.60023537701724e-07, "loss": 0.13293075561523438, "step": 6518 }, { "epoch": 0.908381523026545, "grad_norm": 0.20923347771167755, "learning_rate": 9.57139991114504e-07, "loss": 0.0635833740234375, "step": 6519 }, { "epoch": 0.9085208667177593, "grad_norm": 0.3430497348308563, "learning_rate": 9.542606754160277e-07, "loss": 0.0715036392211914, "step": 6520 }, { "epoch": 0.9086602104089737, "grad_norm": 0.37958282232284546, "learning_rate": 9.513855912460057e-07, "loss": 0.08574342727661133, "step": 6521 }, { "epoch": 0.9087995541001881, "grad_norm": 0.45350345969200134, "learning_rate": 9.485147392432159e-07, "loss": 0.08557701110839844, "step": 6522 }, { "epoch": 0.9089388977914025, "grad_norm": 0.4867132902145386, "learning_rate": 9.456481200454992e-07, "loss": 0.0965728759765625, "step": 6523 }, { "epoch": 0.9090782414826168, "grad_norm": 0.4705277383327484, "learning_rate": 9.427857342897528e-07, "loss": 0.09188461303710938, "step": 6524 }, { "epoch": 0.9092175851738312, "grad_norm": 0.26971274614334106, "learning_rate": 9.399275826119325e-07, "loss": 0.07405662536621094, "step": 6525 }, { "epoch": 0.9093569288650456, "grad_norm": 0.32735133171081543, "learning_rate": 9.370736656470525e-07, "loss": 0.08984375, "step": 6526 }, { "epoch": 0.90949627255626, "grad_norm": 0.24433533847332, "learning_rate": 9.342239840291967e-07, "loss": 0.061412811279296875, "step": 6527 }, { "epoch": 0.9096356162474744, "grad_norm": 0.2714102566242218, "learning_rate": 9.313785383914852e-07, "loss": 0.07314586639404297, "step": 6528 }, { "epoch": 0.9097749599386887, "grad_norm": 0.3460380434989929, "learning_rate": 9.285373293661193e-07, "loss": 0.07423973083496094, "step": 6529 }, { "epoch": 0.9099143036299031, "grad_norm": 0.47802403569221497, "learning_rate": 9.257003575843537e-07, "loss": 0.09429073333740234, "step": 6530 }, { "epoch": 0.9100536473211175, "grad_norm": 0.6600710153579712, "learning_rate": 9.228676236764911e-07, "loss": 0.11326217651367188, "step": 6531 }, { "epoch": 0.9101929910123319, "grad_norm": 0.304179847240448, "learning_rate": 9.200391282719079e-07, "loss": 0.07192230224609375, "step": 6532 }, { "epoch": 0.9103323347035462, "grad_norm": 0.2895108163356781, "learning_rate": 9.172148719990237e-07, "loss": 0.07944869995117188, "step": 6533 }, { "epoch": 0.9104716783947607, "grad_norm": 0.4352632761001587, "learning_rate": 9.143948554853299e-07, "loss": 0.09868431091308594, "step": 6534 }, { "epoch": 0.9106110220859751, "grad_norm": 0.5349122285842896, "learning_rate": 9.115790793573653e-07, "loss": 0.108612060546875, "step": 6535 }, { "epoch": 0.9107503657771895, "grad_norm": 0.30581212043762207, "learning_rate": 9.08767544240734e-07, "loss": 0.08356475830078125, "step": 6536 }, { "epoch": 0.9108897094684039, "grad_norm": 0.6149179339408875, "learning_rate": 9.059602507600962e-07, "loss": 0.10852622985839844, "step": 6537 }, { "epoch": 0.9110290531596182, "grad_norm": 0.40292200446128845, "learning_rate": 9.031571995391664e-07, "loss": 0.09441184997558594, "step": 6538 }, { "epoch": 0.9111683968508326, "grad_norm": 0.4705089032649994, "learning_rate": 9.003583912007152e-07, "loss": 0.105224609375, "step": 6539 }, { "epoch": 0.911307740542047, "grad_norm": 0.3757724165916443, "learning_rate": 8.975638263665787e-07, "loss": 0.09456825256347656, "step": 6540 }, { "epoch": 0.9114470842332614, "grad_norm": 0.3695238530635834, "learning_rate": 8.947735056576468e-07, "loss": 0.09360694885253906, "step": 6541 }, { "epoch": 0.9115864279244758, "grad_norm": 0.4511104226112366, "learning_rate": 8.919874296938569e-07, "loss": 0.0967559814453125, "step": 6542 }, { "epoch": 0.9117257716156901, "grad_norm": 0.2884587347507477, "learning_rate": 8.892055990942228e-07, "loss": 0.07970237731933594, "step": 6543 }, { "epoch": 0.9118651153069045, "grad_norm": 0.3478052020072937, "learning_rate": 8.864280144767923e-07, "loss": 0.07880020141601562, "step": 6544 }, { "epoch": 0.9120044589981189, "grad_norm": 0.4460470378398895, "learning_rate": 8.836546764586895e-07, "loss": 0.07903194427490234, "step": 6545 }, { "epoch": 0.9121438026893333, "grad_norm": 0.4114014804363251, "learning_rate": 8.808855856560838e-07, "loss": 0.10237884521484375, "step": 6546 }, { "epoch": 0.9122831463805476, "grad_norm": 0.2861233651638031, "learning_rate": 8.781207426842031e-07, "loss": 0.06640911102294922, "step": 6547 }, { "epoch": 0.912422490071762, "grad_norm": 0.4342323839664459, "learning_rate": 8.753601481573337e-07, "loss": 0.0813751220703125, "step": 6548 }, { "epoch": 0.9125618337629764, "grad_norm": 0.40865620970726013, "learning_rate": 8.726038026888184e-07, "loss": 0.09746932983398438, "step": 6549 }, { "epoch": 0.9127011774541908, "grad_norm": 0.2707406282424927, "learning_rate": 8.698517068910495e-07, "loss": 0.07116508483886719, "step": 6550 }, { "epoch": 0.9128405211454051, "grad_norm": 0.5059632658958435, "learning_rate": 8.671038613754846e-07, "loss": 0.10668182373046875, "step": 6551 }, { "epoch": 0.9129798648366195, "grad_norm": 0.4219966232776642, "learning_rate": 8.643602667526307e-07, "loss": 0.0897216796875, "step": 6552 }, { "epoch": 0.9131192085278339, "grad_norm": 0.5468851923942566, "learning_rate": 8.616209236320517e-07, "loss": 0.08156299591064453, "step": 6553 }, { "epoch": 0.9132585522190483, "grad_norm": 0.28750577569007874, "learning_rate": 8.588858326223625e-07, "loss": 0.05863666534423828, "step": 6554 }, { "epoch": 0.9133978959102627, "grad_norm": 0.5559219121932983, "learning_rate": 8.561549943312442e-07, "loss": 0.09984970092773438, "step": 6555 }, { "epoch": 0.913537239601477, "grad_norm": 0.3423694968223572, "learning_rate": 8.534284093654288e-07, "loss": 0.08151626586914062, "step": 6556 }, { "epoch": 0.9136765832926914, "grad_norm": 0.32039275765419006, "learning_rate": 8.507060783306919e-07, "loss": 0.07769775390625, "step": 6557 }, { "epoch": 0.9138159269839058, "grad_norm": 0.20852826535701752, "learning_rate": 8.479880018318831e-07, "loss": 0.058948516845703125, "step": 6558 }, { "epoch": 0.9139552706751202, "grad_norm": 0.360268235206604, "learning_rate": 8.452741804728904e-07, "loss": 0.08536529541015625, "step": 6559 }, { "epoch": 0.9140946143663345, "grad_norm": 0.42852532863616943, "learning_rate": 8.425646148566624e-07, "loss": 0.09309864044189453, "step": 6560 }, { "epoch": 0.9142339580575489, "grad_norm": 0.603644609451294, "learning_rate": 8.398593055852067e-07, "loss": 0.09510612487792969, "step": 6561 }, { "epoch": 0.9143733017487633, "grad_norm": 0.23036816716194153, "learning_rate": 8.371582532595823e-07, "loss": 0.06501007080078125, "step": 6562 }, { "epoch": 0.9145126454399777, "grad_norm": 0.6221429109573364, "learning_rate": 8.34461458479896e-07, "loss": 0.0954437255859375, "step": 6563 }, { "epoch": 0.9146519891311921, "grad_norm": 0.7127260565757751, "learning_rate": 8.317689218453196e-07, "loss": 0.10004425048828125, "step": 6564 }, { "epoch": 0.9147913328224064, "grad_norm": 0.33584344387054443, "learning_rate": 8.290806439540677e-07, "loss": 0.08370208740234375, "step": 6565 }, { "epoch": 0.9149306765136208, "grad_norm": 0.2507660984992981, "learning_rate": 8.263966254034206e-07, "loss": 0.06017112731933594, "step": 6566 }, { "epoch": 0.9150700202048352, "grad_norm": 0.3566182553768158, "learning_rate": 8.237168667897011e-07, "loss": 0.08274078369140625, "step": 6567 }, { "epoch": 0.9152093638960496, "grad_norm": 0.2487594038248062, "learning_rate": 8.210413687082885e-07, "loss": 0.06760787963867188, "step": 6568 }, { "epoch": 0.9153487075872639, "grad_norm": 0.213240385055542, "learning_rate": 8.183701317536253e-07, "loss": 0.05800342559814453, "step": 6569 }, { "epoch": 0.9154880512784783, "grad_norm": 0.2985682487487793, "learning_rate": 8.157031565191941e-07, "loss": 0.07796287536621094, "step": 6570 }, { "epoch": 0.9156273949696927, "grad_norm": 0.6000349521636963, "learning_rate": 8.130404435975348e-07, "loss": 0.09581947326660156, "step": 6571 }, { "epoch": 0.9157667386609071, "grad_norm": 0.3141961097717285, "learning_rate": 8.103819935802426e-07, "loss": 0.08697319030761719, "step": 6572 }, { "epoch": 0.9159060823521215, "grad_norm": 0.7443259358406067, "learning_rate": 8.077278070579675e-07, "loss": 0.1261310577392578, "step": 6573 }, { "epoch": 0.9160454260433359, "grad_norm": 0.42007237672805786, "learning_rate": 8.050778846204066e-07, "loss": 0.0881195068359375, "step": 6574 }, { "epoch": 0.9161847697345503, "grad_norm": 0.8759194612503052, "learning_rate": 8.024322268563112e-07, "loss": 0.17675399780273438, "step": 6575 }, { "epoch": 0.9163241134257647, "grad_norm": 0.6751231551170349, "learning_rate": 7.997908343534844e-07, "loss": 0.089111328125, "step": 6576 }, { "epoch": 0.9164634571169791, "grad_norm": 0.8199658989906311, "learning_rate": 7.971537076987901e-07, "loss": 0.09866905212402344, "step": 6577 }, { "epoch": 0.9166028008081935, "grad_norm": 0.29679831862449646, "learning_rate": 7.945208474781307e-07, "loss": 0.0630502700805664, "step": 6578 }, { "epoch": 0.9167421444994078, "grad_norm": 0.40699079632759094, "learning_rate": 7.918922542764717e-07, "loss": 0.09285354614257812, "step": 6579 }, { "epoch": 0.9168814881906222, "grad_norm": 0.3590869605541229, "learning_rate": 7.89267928677826e-07, "loss": 0.08063507080078125, "step": 6580 }, { "epoch": 0.9170208318818366, "grad_norm": 0.5593793988227844, "learning_rate": 7.866478712652581e-07, "loss": 0.1119527816772461, "step": 6581 }, { "epoch": 0.917160175573051, "grad_norm": 0.6665065288543701, "learning_rate": 7.840320826208825e-07, "loss": 0.1184549331665039, "step": 6582 }, { "epoch": 0.9172995192642653, "grad_norm": 0.27801838517189026, "learning_rate": 7.81420563325872e-07, "loss": 0.06329822540283203, "step": 6583 }, { "epoch": 0.9174388629554797, "grad_norm": 0.3614061772823334, "learning_rate": 7.788133139604448e-07, "loss": 0.08771896362304688, "step": 6584 }, { "epoch": 0.9175782066466941, "grad_norm": 0.2301931530237198, "learning_rate": 7.76210335103873e-07, "loss": 0.06080055236816406, "step": 6585 }, { "epoch": 0.9177175503379085, "grad_norm": 0.37533503770828247, "learning_rate": 7.736116273344763e-07, "loss": 0.07808113098144531, "step": 6586 }, { "epoch": 0.9178568940291228, "grad_norm": 0.45541253685951233, "learning_rate": 7.710171912296305e-07, "loss": 0.08830451965332031, "step": 6587 }, { "epoch": 0.9179962377203372, "grad_norm": 0.5162289142608643, "learning_rate": 7.684270273657612e-07, "loss": 0.09355926513671875, "step": 6588 }, { "epoch": 0.9181355814115516, "grad_norm": 0.5937817692756653, "learning_rate": 7.658411363183393e-07, "loss": 0.1232309341430664, "step": 6589 }, { "epoch": 0.918274925102766, "grad_norm": 0.4314119517803192, "learning_rate": 7.632595186618963e-07, "loss": 0.09695911407470703, "step": 6590 }, { "epoch": 0.9184142687939804, "grad_norm": 0.5070757865905762, "learning_rate": 7.606821749700045e-07, "loss": 0.08881378173828125, "step": 6591 }, { "epoch": 0.9185536124851947, "grad_norm": 0.2695625126361847, "learning_rate": 7.581091058152923e-07, "loss": 0.06934928894042969, "step": 6592 }, { "epoch": 0.9186929561764091, "grad_norm": 0.34121841192245483, "learning_rate": 7.55540311769436e-07, "loss": 0.08825492858886719, "step": 6593 }, { "epoch": 0.9188322998676235, "grad_norm": 0.37000909447669983, "learning_rate": 7.529757934031634e-07, "loss": 0.0848236083984375, "step": 6594 }, { "epoch": 0.9189716435588379, "grad_norm": 0.7696034908294678, "learning_rate": 7.504155512862587e-07, "loss": 0.11981010437011719, "step": 6595 }, { "epoch": 0.9191109872500522, "grad_norm": 0.33245086669921875, "learning_rate": 7.478595859875381e-07, "loss": 0.08704757690429688, "step": 6596 }, { "epoch": 0.9192503309412666, "grad_norm": 0.4866340756416321, "learning_rate": 7.453078980748829e-07, "loss": 0.09759283065795898, "step": 6597 }, { "epoch": 0.919389674632481, "grad_norm": 0.3704664409160614, "learning_rate": 7.427604881152239e-07, "loss": 0.08462715148925781, "step": 6598 }, { "epoch": 0.9195290183236954, "grad_norm": 0.7151444554328918, "learning_rate": 7.402173566745308e-07, "loss": 0.12769508361816406, "step": 6599 }, { "epoch": 0.9196683620149098, "grad_norm": 0.31409314274787903, "learning_rate": 7.376785043178314e-07, "loss": 0.06514549255371094, "step": 6600 }, { "epoch": 0.9198077057061241, "grad_norm": 0.34350916743278503, "learning_rate": 7.35143931609208e-07, "loss": 0.07901954650878906, "step": 6601 }, { "epoch": 0.9199470493973385, "grad_norm": 0.2665935456752777, "learning_rate": 7.326136391117766e-07, "loss": 0.07143783569335938, "step": 6602 }, { "epoch": 0.9200863930885529, "grad_norm": 0.3795723617076874, "learning_rate": 7.3008762738771e-07, "loss": 0.0830841064453125, "step": 6603 }, { "epoch": 0.9202257367797673, "grad_norm": 0.2776455879211426, "learning_rate": 7.275658969982324e-07, "loss": 0.07346248626708984, "step": 6604 }, { "epoch": 0.9203650804709816, "grad_norm": 0.4205428659915924, "learning_rate": 7.250484485036202e-07, "loss": 0.09348106384277344, "step": 6605 }, { "epoch": 0.920504424162196, "grad_norm": 0.2679228186607361, "learning_rate": 7.225352824631859e-07, "loss": 0.06442070007324219, "step": 6606 }, { "epoch": 0.9206437678534104, "grad_norm": 0.42654886841773987, "learning_rate": 7.200263994352985e-07, "loss": 0.08454322814941406, "step": 6607 }, { "epoch": 0.9207831115446248, "grad_norm": 0.36222341656684875, "learning_rate": 7.175217999773765e-07, "loss": 0.0687551498413086, "step": 6608 }, { "epoch": 0.9209224552358392, "grad_norm": 0.4494735896587372, "learning_rate": 7.150214846458859e-07, "loss": 0.07894515991210938, "step": 6609 }, { "epoch": 0.9210617989270535, "grad_norm": 0.5438541173934937, "learning_rate": 7.125254539963356e-07, "loss": 0.10592079162597656, "step": 6610 }, { "epoch": 0.9212011426182679, "grad_norm": 0.17529365420341492, "learning_rate": 7.100337085832909e-07, "loss": 0.05667686462402344, "step": 6611 }, { "epoch": 0.9213404863094823, "grad_norm": 0.3781813979148865, "learning_rate": 7.075462489603557e-07, "loss": 0.08823299407958984, "step": 6612 }, { "epoch": 0.9214798300006967, "grad_norm": 0.37499719858169556, "learning_rate": 7.050630756801924e-07, "loss": 0.08816289901733398, "step": 6613 }, { "epoch": 0.9216191736919112, "grad_norm": 0.34485799074172974, "learning_rate": 7.025841892945018e-07, "loss": 0.07259178161621094, "step": 6614 }, { "epoch": 0.9217585173831255, "grad_norm": 0.18768461048603058, "learning_rate": 7.001095903540345e-07, "loss": 0.05909156799316406, "step": 6615 }, { "epoch": 0.9218978610743399, "grad_norm": 0.5603020191192627, "learning_rate": 6.976392794085973e-07, "loss": 0.11426544189453125, "step": 6616 }, { "epoch": 0.9220372047655543, "grad_norm": 0.3104931712150574, "learning_rate": 6.951732570070291e-07, "loss": 0.08048248291015625, "step": 6617 }, { "epoch": 0.9221765484567687, "grad_norm": 0.5593743324279785, "learning_rate": 6.92711523697227e-07, "loss": 0.10905838012695312, "step": 6618 }, { "epoch": 0.922315892147983, "grad_norm": 0.5056818127632141, "learning_rate": 6.902540800261292e-07, "loss": 0.1168522834777832, "step": 6619 }, { "epoch": 0.9224552358391974, "grad_norm": 0.34235528111457825, "learning_rate": 6.878009265397301e-07, "loss": 0.07930946350097656, "step": 6620 }, { "epoch": 0.9225945795304118, "grad_norm": 0.5320193767547607, "learning_rate": 6.853520637830557e-07, "loss": 0.11658096313476562, "step": 6621 }, { "epoch": 0.9227339232216262, "grad_norm": 0.6496424674987793, "learning_rate": 6.829074923001955e-07, "loss": 0.12418460845947266, "step": 6622 }, { "epoch": 0.9228732669128406, "grad_norm": 0.385430246591568, "learning_rate": 6.804672126342704e-07, "loss": 0.09539985656738281, "step": 6623 }, { "epoch": 0.9230126106040549, "grad_norm": 0.3858891427516937, "learning_rate": 6.780312253274624e-07, "loss": 0.08365726470947266, "step": 6624 }, { "epoch": 0.9231519542952693, "grad_norm": 0.41163238883018494, "learning_rate": 6.755995309209851e-07, "loss": 0.09324836730957031, "step": 6625 }, { "epoch": 0.9232912979864837, "grad_norm": 0.2914195656776428, "learning_rate": 6.731721299551064e-07, "loss": 0.06652259826660156, "step": 6626 }, { "epoch": 0.9234306416776981, "grad_norm": 0.3604721426963806, "learning_rate": 6.707490229691483e-07, "loss": 0.0698084831237793, "step": 6627 }, { "epoch": 0.9235699853689124, "grad_norm": 0.3797692060470581, "learning_rate": 6.683302105014577e-07, "loss": 0.08304214477539062, "step": 6628 }, { "epoch": 0.9237093290601268, "grad_norm": 0.4249309003353119, "learning_rate": 6.65915693089445e-07, "loss": 0.09428596496582031, "step": 6629 }, { "epoch": 0.9238486727513412, "grad_norm": 0.2175358086824417, "learning_rate": 6.635054712695632e-07, "loss": 0.05502510070800781, "step": 6630 }, { "epoch": 0.9239880164425556, "grad_norm": 0.32141923904418945, "learning_rate": 6.610995455773017e-07, "loss": 0.0782012939453125, "step": 6631 }, { "epoch": 0.92412736013377, "grad_norm": 0.5654904842376709, "learning_rate": 6.586979165472107e-07, "loss": 0.11055564880371094, "step": 6632 }, { "epoch": 0.9242667038249843, "grad_norm": 0.5399138331413269, "learning_rate": 6.563005847128701e-07, "loss": 0.10218429565429688, "step": 6633 }, { "epoch": 0.9244060475161987, "grad_norm": 0.35986974835395813, "learning_rate": 6.539075506069181e-07, "loss": 0.08703899383544922, "step": 6634 }, { "epoch": 0.9245453912074131, "grad_norm": 0.4902538061141968, "learning_rate": 6.515188147610274e-07, "loss": 0.10908699035644531, "step": 6635 }, { "epoch": 0.9246847348986275, "grad_norm": 0.47024884819984436, "learning_rate": 6.491343777059245e-07, "loss": 0.11682510375976562, "step": 6636 }, { "epoch": 0.9248240785898418, "grad_norm": 0.3190925419330597, "learning_rate": 6.467542399713744e-07, "loss": 0.08687400817871094, "step": 6637 }, { "epoch": 0.9249634222810562, "grad_norm": 0.37971243262290955, "learning_rate": 6.443784020861921e-07, "loss": 0.08101654052734375, "step": 6638 }, { "epoch": 0.9251027659722706, "grad_norm": 0.5773796439170837, "learning_rate": 6.420068645782285e-07, "loss": 0.08248615264892578, "step": 6639 }, { "epoch": 0.925242109663485, "grad_norm": 0.49288609623908997, "learning_rate": 6.396396279743911e-07, "loss": 0.10174751281738281, "step": 6640 }, { "epoch": 0.9253814533546993, "grad_norm": 0.33727696537971497, "learning_rate": 6.372766928006236e-07, "loss": 0.07999420166015625, "step": 6641 }, { "epoch": 0.9255207970459137, "grad_norm": 0.5195273160934448, "learning_rate": 6.349180595819171e-07, "loss": 0.08247184753417969, "step": 6642 }, { "epoch": 0.9256601407371281, "grad_norm": 0.2792443633079529, "learning_rate": 6.325637288423059e-07, "loss": 0.08041191101074219, "step": 6643 }, { "epoch": 0.9257994844283425, "grad_norm": 0.344217449426651, "learning_rate": 6.302137011048648e-07, "loss": 0.08887672424316406, "step": 6644 }, { "epoch": 0.9259388281195569, "grad_norm": 0.2827705442905426, "learning_rate": 6.278679768917229e-07, "loss": 0.06989479064941406, "step": 6645 }, { "epoch": 0.9260781718107712, "grad_norm": 0.38775885105133057, "learning_rate": 6.25526556724041e-07, "loss": 0.08710765838623047, "step": 6646 }, { "epoch": 0.9262175155019856, "grad_norm": 0.5543617010116577, "learning_rate": 6.231894411220319e-07, "loss": 0.10077857971191406, "step": 6647 }, { "epoch": 0.9263568591932, "grad_norm": 0.5051101446151733, "learning_rate": 6.208566306049513e-07, "loss": 0.09556198120117188, "step": 6648 }, { "epoch": 0.9264962028844144, "grad_norm": 0.3866461515426636, "learning_rate": 6.185281256910936e-07, "loss": 0.09577178955078125, "step": 6649 }, { "epoch": 0.9266355465756287, "grad_norm": 0.21760503947734833, "learning_rate": 6.162039268977981e-07, "loss": 0.059696197509765625, "step": 6650 }, { "epoch": 0.9267748902668431, "grad_norm": 0.562396764755249, "learning_rate": 6.138840347414498e-07, "loss": 0.10908699035644531, "step": 6651 }, { "epoch": 0.9269142339580575, "grad_norm": 0.3615497648715973, "learning_rate": 6.115684497374786e-07, "loss": 0.0806131362915039, "step": 6652 }, { "epoch": 0.9270535776492719, "grad_norm": 0.44118282198905945, "learning_rate": 6.092571724003527e-07, "loss": 0.08130264282226562, "step": 6653 }, { "epoch": 0.9271929213404864, "grad_norm": 0.395177960395813, "learning_rate": 6.069502032435814e-07, "loss": 0.08608627319335938, "step": 6654 }, { "epoch": 0.9273322650317007, "grad_norm": 0.25216373801231384, "learning_rate": 6.04647542779726e-07, "loss": 0.07448005676269531, "step": 6655 }, { "epoch": 0.9274716087229151, "grad_norm": 0.6371353268623352, "learning_rate": 6.023491915203839e-07, "loss": 0.10519790649414062, "step": 6656 }, { "epoch": 0.9276109524141295, "grad_norm": 0.3372279405593872, "learning_rate": 6.000551499761953e-07, "loss": 0.08106422424316406, "step": 6657 }, { "epoch": 0.9277502961053439, "grad_norm": 0.6367672681808472, "learning_rate": 5.977654186568438e-07, "loss": 0.11925888061523438, "step": 6658 }, { "epoch": 0.9278896397965583, "grad_norm": 0.3356008529663086, "learning_rate": 5.954799980710579e-07, "loss": 0.06897258758544922, "step": 6659 }, { "epoch": 0.9280289834877726, "grad_norm": 0.3450528383255005, "learning_rate": 5.931988887266005e-07, "loss": 0.07631921768188477, "step": 6660 }, { "epoch": 0.928168327178987, "grad_norm": 0.5043894052505493, "learning_rate": 5.90922091130286e-07, "loss": 0.09424209594726562, "step": 6661 }, { "epoch": 0.9283076708702014, "grad_norm": 0.4736936092376709, "learning_rate": 5.886496057879676e-07, "loss": 0.10711097717285156, "step": 6662 }, { "epoch": 0.9284470145614158, "grad_norm": 0.5441661477088928, "learning_rate": 5.863814332045347e-07, "loss": 0.09080123901367188, "step": 6663 }, { "epoch": 0.9285863582526301, "grad_norm": 0.44081375002861023, "learning_rate": 5.841175738839311e-07, "loss": 0.1002349853515625, "step": 6664 }, { "epoch": 0.9287257019438445, "grad_norm": 0.48935723304748535, "learning_rate": 5.818580283291276e-07, "loss": 0.09604644775390625, "step": 6665 }, { "epoch": 0.9288650456350589, "grad_norm": 0.31389591097831726, "learning_rate": 5.796027970421492e-07, "loss": 0.0885152816772461, "step": 6666 }, { "epoch": 0.9290043893262733, "grad_norm": 0.34090209007263184, "learning_rate": 5.773518805240508e-07, "loss": 0.08242607116699219, "step": 6667 }, { "epoch": 0.9291437330174876, "grad_norm": 0.3995712697505951, "learning_rate": 5.75105279274939e-07, "loss": 0.07183647155761719, "step": 6668 }, { "epoch": 0.929283076708702, "grad_norm": 0.32118910551071167, "learning_rate": 5.728629937939568e-07, "loss": 0.07657337188720703, "step": 6669 }, { "epoch": 0.9294224203999164, "grad_norm": 0.52314293384552, "learning_rate": 5.706250245792899e-07, "loss": 0.07912826538085938, "step": 6670 }, { "epoch": 0.9295617640911308, "grad_norm": 0.3489578366279602, "learning_rate": 5.683913721281586e-07, "loss": 0.08083343505859375, "step": 6671 }, { "epoch": 0.9297011077823452, "grad_norm": 0.46099725365638733, "learning_rate": 5.661620369368348e-07, "loss": 0.09641265869140625, "step": 6672 }, { "epoch": 0.9298404514735595, "grad_norm": 0.4630115330219269, "learning_rate": 5.639370195006266e-07, "loss": 0.08951950073242188, "step": 6673 }, { "epoch": 0.9299797951647739, "grad_norm": 0.6780856251716614, "learning_rate": 5.617163203138765e-07, "loss": 0.11866950988769531, "step": 6674 }, { "epoch": 0.9301191388559883, "grad_norm": 0.516190230846405, "learning_rate": 5.594999398699785e-07, "loss": 0.08120203018188477, "step": 6675 }, { "epoch": 0.9302584825472027, "grad_norm": 0.3080327808856964, "learning_rate": 5.572878786613589e-07, "loss": 0.06950187683105469, "step": 6676 }, { "epoch": 0.930397826238417, "grad_norm": 0.5138849020004272, "learning_rate": 5.55080137179489e-07, "loss": 0.10778236389160156, "step": 6677 }, { "epoch": 0.9305371699296314, "grad_norm": 0.3800298869609833, "learning_rate": 5.528767159148762e-07, "loss": 0.0932769775390625, "step": 6678 }, { "epoch": 0.9306765136208458, "grad_norm": 0.3422374725341797, "learning_rate": 5.506776153570714e-07, "loss": 0.07755470275878906, "step": 6679 }, { "epoch": 0.9308158573120602, "grad_norm": 0.3924712538719177, "learning_rate": 5.484828359946682e-07, "loss": 0.09271240234375, "step": 6680 }, { "epoch": 0.9309552010032746, "grad_norm": 0.2326449304819107, "learning_rate": 5.462923783152918e-07, "loss": 0.06902503967285156, "step": 6681 }, { "epoch": 0.9310945446944889, "grad_norm": 0.28155168890953064, "learning_rate": 5.44106242805611e-07, "loss": 0.07817268371582031, "step": 6682 }, { "epoch": 0.9312338883857033, "grad_norm": 0.23045183718204498, "learning_rate": 5.419244299513371e-07, "loss": 0.06684207916259766, "step": 6683 }, { "epoch": 0.9313732320769177, "grad_norm": 0.25650131702423096, "learning_rate": 5.397469402372224e-07, "loss": 0.06693553924560547, "step": 6684 }, { "epoch": 0.9315125757681321, "grad_norm": 0.4884472191333771, "learning_rate": 5.375737741470511e-07, "loss": 0.09341812133789062, "step": 6685 }, { "epoch": 0.9316519194593464, "grad_norm": 0.19392023980617523, "learning_rate": 5.354049321636523e-07, "loss": 0.0544891357421875, "step": 6686 }, { "epoch": 0.9317912631505608, "grad_norm": 0.5219374299049377, "learning_rate": 5.332404147688919e-07, "loss": 0.08742904663085938, "step": 6687 }, { "epoch": 0.9319306068417752, "grad_norm": 0.37558475136756897, "learning_rate": 5.310802224436806e-07, "loss": 0.09094047546386719, "step": 6688 }, { "epoch": 0.9320699505329896, "grad_norm": 0.5372804999351501, "learning_rate": 5.28924355667959e-07, "loss": 0.088897705078125, "step": 6689 }, { "epoch": 0.932209294224204, "grad_norm": 0.608688235282898, "learning_rate": 5.267728149207152e-07, "loss": 0.12084197998046875, "step": 6690 }, { "epoch": 0.9323486379154183, "grad_norm": 0.5047572255134583, "learning_rate": 5.246256006799689e-07, "loss": 0.10957527160644531, "step": 6691 }, { "epoch": 0.9324879816066327, "grad_norm": 0.33723902702331543, "learning_rate": 5.22482713422785e-07, "loss": 0.07510757446289062, "step": 6692 }, { "epoch": 0.9326273252978471, "grad_norm": 0.3445902168750763, "learning_rate": 5.203441536252607e-07, "loss": 0.0868844985961914, "step": 6693 }, { "epoch": 0.9327666689890615, "grad_norm": 0.4786708950996399, "learning_rate": 5.182099217625381e-07, "loss": 0.10385608673095703, "step": 6694 }, { "epoch": 0.932906012680276, "grad_norm": 0.4463580846786499, "learning_rate": 5.160800183087955e-07, "loss": 0.07473182678222656, "step": 6695 }, { "epoch": 0.9330453563714903, "grad_norm": 0.2888081669807434, "learning_rate": 5.139544437372457e-07, "loss": 0.06249237060546875, "step": 6696 }, { "epoch": 0.9331847000627047, "grad_norm": 0.3051902949810028, "learning_rate": 5.118331985201441e-07, "loss": 0.08365345001220703, "step": 6697 }, { "epoch": 0.9333240437539191, "grad_norm": 0.3894302546977997, "learning_rate": 5.097162831287872e-07, "loss": 0.08506298065185547, "step": 6698 }, { "epoch": 0.9334633874451335, "grad_norm": 0.680584728717804, "learning_rate": 5.076036980334964e-07, "loss": 0.08860015869140625, "step": 6699 }, { "epoch": 0.9336027311363478, "grad_norm": 0.28569626808166504, "learning_rate": 5.054954437036475e-07, "loss": 0.0767364501953125, "step": 6700 }, { "epoch": 0.9337420748275622, "grad_norm": 0.35863956809043884, "learning_rate": 5.033915206076456e-07, "loss": 0.0759124755859375, "step": 6701 }, { "epoch": 0.9338814185187766, "grad_norm": 0.4638131558895111, "learning_rate": 5.012919292129303e-07, "loss": 0.09745597839355469, "step": 6702 }, { "epoch": 0.934020762209991, "grad_norm": 0.4212966561317444, "learning_rate": 4.991966699859863e-07, "loss": 0.09281730651855469, "step": 6703 }, { "epoch": 0.9341601059012054, "grad_norm": 0.5626142024993896, "learning_rate": 4.971057433923298e-07, "loss": 0.09919357299804688, "step": 6704 }, { "epoch": 0.9342994495924197, "grad_norm": 0.43106403946876526, "learning_rate": 4.950191498965207e-07, "loss": 0.07739830017089844, "step": 6705 }, { "epoch": 0.9344387932836341, "grad_norm": 0.4531414806842804, "learning_rate": 4.929368899621479e-07, "loss": 0.08102798461914062, "step": 6706 }, { "epoch": 0.9345781369748485, "grad_norm": 0.3744693100452423, "learning_rate": 4.908589640518458e-07, "loss": 0.09240055084228516, "step": 6707 }, { "epoch": 0.9347174806660629, "grad_norm": 0.7656886577606201, "learning_rate": 4.887853726272785e-07, "loss": 0.13350677490234375, "step": 6708 }, { "epoch": 0.9348568243572772, "grad_norm": 0.376514196395874, "learning_rate": 4.867161161491551e-07, "loss": 0.07762527465820312, "step": 6709 }, { "epoch": 0.9349961680484916, "grad_norm": 0.2048104703426361, "learning_rate": 4.846511950772148e-07, "loss": 0.06313323974609375, "step": 6710 }, { "epoch": 0.935135511739706, "grad_norm": 0.39675194025039673, "learning_rate": 4.825906098702348e-07, "loss": 0.09535980224609375, "step": 6711 }, { "epoch": 0.9352748554309204, "grad_norm": 0.37547704577445984, "learning_rate": 4.805343609860314e-07, "loss": 0.09307098388671875, "step": 6712 }, { "epoch": 0.9354141991221347, "grad_norm": 0.32007312774658203, "learning_rate": 4.784824488814588e-07, "loss": 0.07244300842285156, "step": 6713 }, { "epoch": 0.9355535428133491, "grad_norm": 0.3219676613807678, "learning_rate": 4.7643487401239917e-07, "loss": 0.09319114685058594, "step": 6714 }, { "epoch": 0.9356928865045635, "grad_norm": 0.6558754444122314, "learning_rate": 4.7439163683377975e-07, "loss": 0.11772727966308594, "step": 6715 }, { "epoch": 0.9358322301957779, "grad_norm": 0.7904849052429199, "learning_rate": 4.723527377995618e-07, "loss": 0.13175582885742188, "step": 6716 }, { "epoch": 0.9359715738869923, "grad_norm": 0.5510526895523071, "learning_rate": 4.7031817736274297e-07, "loss": 0.09984874725341797, "step": 6717 }, { "epoch": 0.9361109175782066, "grad_norm": 0.26091140508651733, "learning_rate": 4.68287955975355e-07, "loss": 0.06839561462402344, "step": 6718 }, { "epoch": 0.936250261269421, "grad_norm": 0.24571335315704346, "learning_rate": 4.662620740884638e-07, "loss": 0.06809425354003906, "step": 6719 }, { "epoch": 0.9363896049606354, "grad_norm": 0.8018134236335754, "learning_rate": 4.642405321521803e-07, "loss": 0.11262702941894531, "step": 6720 }, { "epoch": 0.9365289486518498, "grad_norm": 0.6890111565589905, "learning_rate": 4.622233306156387e-07, "loss": 0.09411048889160156, "step": 6721 }, { "epoch": 0.9366682923430641, "grad_norm": 0.3756306767463684, "learning_rate": 4.6021046992702046e-07, "loss": 0.07724571228027344, "step": 6722 }, { "epoch": 0.9368076360342785, "grad_norm": 0.3384687602519989, "learning_rate": 4.5820195053353445e-07, "loss": 0.07736015319824219, "step": 6723 }, { "epoch": 0.9369469797254929, "grad_norm": 0.22445981204509735, "learning_rate": 4.561977728814282e-07, "loss": 0.05788135528564453, "step": 6724 }, { "epoch": 0.9370863234167073, "grad_norm": 0.49907374382019043, "learning_rate": 4.541979374159833e-07, "loss": 0.10427284240722656, "step": 6725 }, { "epoch": 0.9372256671079217, "grad_norm": 0.27394819259643555, "learning_rate": 4.522024445815176e-07, "loss": 0.06667280197143555, "step": 6726 }, { "epoch": 0.937365010799136, "grad_norm": 0.361652672290802, "learning_rate": 4.502112948213899e-07, "loss": 0.07003211975097656, "step": 6727 }, { "epoch": 0.9375043544903504, "grad_norm": 0.31202757358551025, "learning_rate": 4.482244885779774e-07, "loss": 0.08358573913574219, "step": 6728 }, { "epoch": 0.9376436981815648, "grad_norm": 0.47156238555908203, "learning_rate": 4.462420262927114e-07, "loss": 0.10611915588378906, "step": 6729 }, { "epoch": 0.9377830418727792, "grad_norm": 0.23631687462329865, "learning_rate": 4.442639084060463e-07, "loss": 0.05667877197265625, "step": 6730 }, { "epoch": 0.9379223855639935, "grad_norm": 0.38064736127853394, "learning_rate": 4.422901353574771e-07, "loss": 0.08551788330078125, "step": 6731 }, { "epoch": 0.9380617292552079, "grad_norm": 0.5650538802146912, "learning_rate": 4.403207075855265e-07, "loss": 0.0947718620300293, "step": 6732 }, { "epoch": 0.9382010729464223, "grad_norm": 0.4957156479358673, "learning_rate": 4.3835562552776434e-07, "loss": 0.09737992286682129, "step": 6733 }, { "epoch": 0.9383404166376367, "grad_norm": 0.6571094989776611, "learning_rate": 4.3639488962077923e-07, "loss": 0.11236286163330078, "step": 6734 }, { "epoch": 0.9384797603288512, "grad_norm": 0.3780471682548523, "learning_rate": 4.3443850030020497e-07, "loss": 0.07454299926757812, "step": 6735 }, { "epoch": 0.9386191040200655, "grad_norm": 0.479097843170166, "learning_rate": 4.324864580007071e-07, "loss": 0.10063743591308594, "step": 6736 }, { "epoch": 0.9387584477112799, "grad_norm": 0.5277379155158997, "learning_rate": 4.305387631559854e-07, "loss": 0.09780406951904297, "step": 6737 }, { "epoch": 0.9388977914024943, "grad_norm": 0.3923250734806061, "learning_rate": 4.2859541619877154e-07, "loss": 0.07928085327148438, "step": 6738 }, { "epoch": 0.9390371350937087, "grad_norm": 0.3509894609451294, "learning_rate": 4.2665641756083344e-07, "loss": 0.09309768676757812, "step": 6739 }, { "epoch": 0.939176478784923, "grad_norm": 0.5545886158943176, "learning_rate": 4.247217676729709e-07, "loss": 0.11981391906738281, "step": 6740 }, { "epoch": 0.9393158224761374, "grad_norm": 0.2218812257051468, "learning_rate": 4.227914669650224e-07, "loss": 0.06036949157714844, "step": 6741 }, { "epoch": 0.9394551661673518, "grad_norm": 0.7205094695091248, "learning_rate": 4.2086551586585144e-07, "loss": 0.13799285888671875, "step": 6742 }, { "epoch": 0.9395945098585662, "grad_norm": 0.5799086689949036, "learning_rate": 4.1894391480336694e-07, "loss": 0.10459136962890625, "step": 6743 }, { "epoch": 0.9397338535497806, "grad_norm": 0.17685124278068542, "learning_rate": 4.1702666420450064e-07, "loss": 0.05365562438964844, "step": 6744 }, { "epoch": 0.9398731972409949, "grad_norm": 0.3525027334690094, "learning_rate": 4.15113764495223e-07, "loss": 0.08859920501708984, "step": 6745 }, { "epoch": 0.9400125409322093, "grad_norm": 0.5000352263450623, "learning_rate": 4.1320521610053624e-07, "loss": 0.09018325805664062, "step": 6746 }, { "epoch": 0.9401518846234237, "grad_norm": 0.6123389005661011, "learning_rate": 4.113010194444744e-07, "loss": 0.12257766723632812, "step": 6747 }, { "epoch": 0.9402912283146381, "grad_norm": 0.29779455065727234, "learning_rate": 4.094011749501103e-07, "loss": 0.06926155090332031, "step": 6748 }, { "epoch": 0.9404305720058524, "grad_norm": 0.248356431722641, "learning_rate": 4.075056830395441e-07, "loss": 0.0585484504699707, "step": 6749 }, { "epoch": 0.9405699156970668, "grad_norm": 0.31627774238586426, "learning_rate": 4.056145441339099e-07, "loss": 0.07529830932617188, "step": 6750 }, { "epoch": 0.9407092593882812, "grad_norm": 0.3172943592071533, "learning_rate": 4.037277586533761e-07, "loss": 0.0702676773071289, "step": 6751 }, { "epoch": 0.9408486030794956, "grad_norm": 0.25905534625053406, "learning_rate": 4.018453270171474e-07, "loss": 0.06768226623535156, "step": 6752 }, { "epoch": 0.94098794677071, "grad_norm": 0.2098427563905716, "learning_rate": 3.9996724964344924e-07, "loss": 0.050777435302734375, "step": 6753 }, { "epoch": 0.9411272904619243, "grad_norm": 0.7403552532196045, "learning_rate": 3.980935269495545e-07, "loss": 0.11927223205566406, "step": 6754 }, { "epoch": 0.9412666341531387, "grad_norm": 0.5926846265792847, "learning_rate": 3.9622415935175683e-07, "loss": 0.11301231384277344, "step": 6755 }, { "epoch": 0.9414059778443531, "grad_norm": 0.23571304976940155, "learning_rate": 3.943591472653929e-07, "loss": 0.06699562072753906, "step": 6756 }, { "epoch": 0.9415453215355675, "grad_norm": 0.33697178959846497, "learning_rate": 3.92498491104818e-07, "loss": 0.06890869140625, "step": 6757 }, { "epoch": 0.9416846652267818, "grad_norm": 0.286998987197876, "learning_rate": 3.906421912834324e-07, "loss": 0.08141326904296875, "step": 6758 }, { "epoch": 0.9418240089179962, "grad_norm": 0.5283071994781494, "learning_rate": 3.887902482136663e-07, "loss": 0.09610557556152344, "step": 6759 }, { "epoch": 0.9419633526092106, "grad_norm": 0.4568100869655609, "learning_rate": 3.8694266230697053e-07, "loss": 0.08820056915283203, "step": 6760 }, { "epoch": 0.942102696300425, "grad_norm": 0.2876872420310974, "learning_rate": 3.850994339738434e-07, "loss": 0.07981109619140625, "step": 6761 }, { "epoch": 0.9422420399916394, "grad_norm": 0.5309542417526245, "learning_rate": 3.8326056362380846e-07, "loss": 0.0925445556640625, "step": 6762 }, { "epoch": 0.9423813836828537, "grad_norm": 0.34627702832221985, "learning_rate": 3.814260516654145e-07, "loss": 0.07609367370605469, "step": 6763 }, { "epoch": 0.9425207273740681, "grad_norm": 0.4071417450904846, "learning_rate": 3.795958985062553e-07, "loss": 0.07770729064941406, "step": 6764 }, { "epoch": 0.9426600710652825, "grad_norm": 0.5057757496833801, "learning_rate": 3.777701045529436e-07, "loss": 0.08545494079589844, "step": 6765 }, { "epoch": 0.9427994147564969, "grad_norm": 0.30591559410095215, "learning_rate": 3.759486702111348e-07, "loss": 0.07672119140625, "step": 6766 }, { "epoch": 0.9429387584477112, "grad_norm": 0.2544185221195221, "learning_rate": 3.7413159588550295e-07, "loss": 0.0711965560913086, "step": 6767 }, { "epoch": 0.9430781021389256, "grad_norm": 0.4555186629295349, "learning_rate": 3.723188819797652e-07, "loss": 0.08280754089355469, "step": 6768 }, { "epoch": 0.94321744583014, "grad_norm": 0.40748047828674316, "learning_rate": 3.7051052889666596e-07, "loss": 0.08355522155761719, "step": 6769 }, { "epoch": 0.9433567895213544, "grad_norm": 0.19781652092933655, "learning_rate": 3.6870653703797943e-07, "loss": 0.06423664093017578, "step": 6770 }, { "epoch": 0.9434961332125688, "grad_norm": 0.29825934767723083, "learning_rate": 3.6690690680450723e-07, "loss": 0.07646465301513672, "step": 6771 }, { "epoch": 0.9436354769037831, "grad_norm": 0.46692174673080444, "learning_rate": 3.6511163859608957e-07, "loss": 0.083953857421875, "step": 6772 }, { "epoch": 0.9437748205949975, "grad_norm": 0.3504297137260437, "learning_rate": 3.6332073281159394e-07, "loss": 0.05820512771606445, "step": 6773 }, { "epoch": 0.9439141642862119, "grad_norm": 0.3720932900905609, "learning_rate": 3.6153418984891996e-07, "loss": 0.08486652374267578, "step": 6774 }, { "epoch": 0.9440535079774264, "grad_norm": 0.24168632924556732, "learning_rate": 3.597520101049945e-07, "loss": 0.0670480728149414, "step": 6775 }, { "epoch": 0.9441928516686408, "grad_norm": 0.43814966082572937, "learning_rate": 3.579741939757764e-07, "loss": 0.08017730712890625, "step": 6776 }, { "epoch": 0.9443321953598551, "grad_norm": 0.33266839385032654, "learning_rate": 3.5620074185626075e-07, "loss": 0.09765338897705078, "step": 6777 }, { "epoch": 0.9444715390510695, "grad_norm": 0.387081116437912, "learning_rate": 3.544316541404613e-07, "loss": 0.07778739929199219, "step": 6778 }, { "epoch": 0.9446108827422839, "grad_norm": 0.30255016684532166, "learning_rate": 3.526669312214326e-07, "loss": 0.07753181457519531, "step": 6779 }, { "epoch": 0.9447502264334983, "grad_norm": 0.2982569932937622, "learning_rate": 3.5090657349125647e-07, "loss": 0.07930564880371094, "step": 6780 }, { "epoch": 0.9448895701247126, "grad_norm": 0.5392690300941467, "learning_rate": 3.491505813410445e-07, "loss": 0.10487604141235352, "step": 6781 }, { "epoch": 0.945028913815927, "grad_norm": 0.521966278553009, "learning_rate": 3.473989551609358e-07, "loss": 0.09293174743652344, "step": 6782 }, { "epoch": 0.9451682575071414, "grad_norm": 0.24884885549545288, "learning_rate": 3.4565169534010123e-07, "loss": 0.06592941284179688, "step": 6783 }, { "epoch": 0.9453076011983558, "grad_norm": 0.4902370870113373, "learning_rate": 3.439088022667458e-07, "loss": 0.08084869384765625, "step": 6784 }, { "epoch": 0.9454469448895702, "grad_norm": 0.47224441170692444, "learning_rate": 3.421702763280976e-07, "loss": 0.09177112579345703, "step": 6785 }, { "epoch": 0.9455862885807845, "grad_norm": 0.3201057016849518, "learning_rate": 3.4043611791041874e-07, "loss": 0.07168960571289062, "step": 6786 }, { "epoch": 0.9457256322719989, "grad_norm": 0.216248020529747, "learning_rate": 3.387063273989966e-07, "loss": 0.06376266479492188, "step": 6787 }, { "epoch": 0.9458649759632133, "grad_norm": 0.37640872597694397, "learning_rate": 3.3698090517815696e-07, "loss": 0.08612728118896484, "step": 6788 }, { "epoch": 0.9460043196544277, "grad_norm": 0.812937319278717, "learning_rate": 3.352598516312422e-07, "loss": 0.13990020751953125, "step": 6789 }, { "epoch": 0.946143663345642, "grad_norm": 0.47826844453811646, "learning_rate": 3.3354316714063527e-07, "loss": 0.09345436096191406, "step": 6790 }, { "epoch": 0.9462830070368564, "grad_norm": 0.2711336612701416, "learning_rate": 3.318308520877489e-07, "loss": 0.07067680358886719, "step": 6791 }, { "epoch": 0.9464223507280708, "grad_norm": 0.4487355947494507, "learning_rate": 3.301229068530098e-07, "loss": 0.09762763977050781, "step": 6792 }, { "epoch": 0.9465616944192852, "grad_norm": 0.3978444039821625, "learning_rate": 3.2841933181589234e-07, "loss": 0.07670211791992188, "step": 6793 }, { "epoch": 0.9467010381104995, "grad_norm": 0.4419565498828888, "learning_rate": 3.26720127354887e-07, "loss": 0.08962249755859375, "step": 6794 }, { "epoch": 0.9468403818017139, "grad_norm": 0.18686196208000183, "learning_rate": 3.250252938475229e-07, "loss": 0.06031608581542969, "step": 6795 }, { "epoch": 0.9469797254929283, "grad_norm": 0.3549850881099701, "learning_rate": 3.2333483167035217e-07, "loss": 0.07960891723632812, "step": 6796 }, { "epoch": 0.9471190691841427, "grad_norm": 0.4807749390602112, "learning_rate": 3.216487411989544e-07, "loss": 0.08895015716552734, "step": 6797 }, { "epoch": 0.9472584128753571, "grad_norm": 0.2885565459728241, "learning_rate": 3.19967022807941e-07, "loss": 0.0731344223022461, "step": 6798 }, { "epoch": 0.9473977565665714, "grad_norm": 0.4322466254234314, "learning_rate": 3.182896768709531e-07, "loss": 0.1116037368774414, "step": 6799 }, { "epoch": 0.9475371002577858, "grad_norm": 0.32523635029792786, "learning_rate": 3.166167037606571e-07, "loss": 0.08742141723632812, "step": 6800 }, { "epoch": 0.9476764439490002, "grad_norm": 0.8179355263710022, "learning_rate": 3.1494810384875343e-07, "loss": 0.13885116577148438, "step": 6801 }, { "epoch": 0.9478157876402146, "grad_norm": 0.38150760531425476, "learning_rate": 3.132838775059632e-07, "loss": 0.08342647552490234, "step": 6802 }, { "epoch": 0.947955131331429, "grad_norm": 0.6485220789909363, "learning_rate": 3.116240251020375e-07, "loss": 0.12058830261230469, "step": 6803 }, { "epoch": 0.9480944750226433, "grad_norm": 0.34870508313179016, "learning_rate": 3.0996854700575896e-07, "loss": 0.0844268798828125, "step": 6804 }, { "epoch": 0.9482338187138577, "grad_norm": 0.48343950510025024, "learning_rate": 3.083174435849423e-07, "loss": 0.08814430236816406, "step": 6805 }, { "epoch": 0.9483731624050721, "grad_norm": 0.5554733276367188, "learning_rate": 3.0667071520641857e-07, "loss": 0.11525154113769531, "step": 6806 }, { "epoch": 0.9485125060962865, "grad_norm": 0.2892366647720337, "learning_rate": 3.05028362236055e-07, "loss": 0.0774850845336914, "step": 6807 }, { "epoch": 0.9486518497875008, "grad_norm": 0.32199710607528687, "learning_rate": 3.033903850387465e-07, "loss": 0.07614517211914062, "step": 6808 }, { "epoch": 0.9487911934787152, "grad_norm": 0.3980802595615387, "learning_rate": 3.01756783978413e-07, "loss": 0.0816202163696289, "step": 6809 }, { "epoch": 0.9489305371699296, "grad_norm": 0.28441715240478516, "learning_rate": 3.001275594180042e-07, "loss": 0.0677957534790039, "step": 6810 }, { "epoch": 0.949069880861144, "grad_norm": 0.3296477794647217, "learning_rate": 2.9850271171949495e-07, "loss": 0.07610130310058594, "step": 6811 }, { "epoch": 0.9492092245523583, "grad_norm": 0.5645394921302795, "learning_rate": 2.968822412438921e-07, "loss": 0.0933837890625, "step": 6812 }, { "epoch": 0.9493485682435727, "grad_norm": 0.37131574749946594, "learning_rate": 2.9526614835122314e-07, "loss": 0.08467864990234375, "step": 6813 }, { "epoch": 0.9494879119347871, "grad_norm": 0.22064082324504852, "learning_rate": 2.936544334005498e-07, "loss": 0.054790496826171875, "step": 6814 }, { "epoch": 0.9496272556260016, "grad_norm": 0.5203630924224854, "learning_rate": 2.920470967499589e-07, "loss": 0.0867919921875, "step": 6815 }, { "epoch": 0.949766599317216, "grad_norm": 0.3252600431442261, "learning_rate": 2.9044413875656266e-07, "loss": 0.07757568359375, "step": 6816 }, { "epoch": 0.9499059430084303, "grad_norm": 0.47037890553474426, "learning_rate": 2.8884555977650277e-07, "loss": 0.09355545043945312, "step": 6817 }, { "epoch": 0.9500452866996447, "grad_norm": 0.4034830331802368, "learning_rate": 2.8725136016494404e-07, "loss": 0.08321380615234375, "step": 6818 }, { "epoch": 0.9501846303908591, "grad_norm": 0.290571928024292, "learning_rate": 2.856615402760832e-07, "loss": 0.07594108581542969, "step": 6819 }, { "epoch": 0.9503239740820735, "grad_norm": 0.3489253520965576, "learning_rate": 2.8407610046314425e-07, "loss": 0.07201194763183594, "step": 6820 }, { "epoch": 0.9504633177732879, "grad_norm": 0.26834601163864136, "learning_rate": 2.824950410783722e-07, "loss": 0.0742497444152832, "step": 6821 }, { "epoch": 0.9506026614645022, "grad_norm": 0.34987637400627136, "learning_rate": 2.8091836247304603e-07, "loss": 0.085662841796875, "step": 6822 }, { "epoch": 0.9507420051557166, "grad_norm": 0.3450709581375122, "learning_rate": 2.7934606499746106e-07, "loss": 0.08451986312866211, "step": 6823 }, { "epoch": 0.950881348846931, "grad_norm": 0.4613151550292969, "learning_rate": 2.7777814900095344e-07, "loss": 0.10283851623535156, "step": 6824 }, { "epoch": 0.9510206925381454, "grad_norm": 0.33864906430244446, "learning_rate": 2.7621461483187563e-07, "loss": 0.07989501953125, "step": 6825 }, { "epoch": 0.9511600362293597, "grad_norm": 0.5372464656829834, "learning_rate": 2.7465546283760526e-07, "loss": 0.09317207336425781, "step": 6826 }, { "epoch": 0.9512993799205741, "grad_norm": 0.32699865102767944, "learning_rate": 2.731006933645586e-07, "loss": 0.07907390594482422, "step": 6827 }, { "epoch": 0.9514387236117885, "grad_norm": 0.2476688027381897, "learning_rate": 2.7155030675816153e-07, "loss": 0.06430625915527344, "step": 6828 }, { "epoch": 0.9515780673030029, "grad_norm": 0.23487898707389832, "learning_rate": 2.7000430336287855e-07, "loss": 0.07407283782958984, "step": 6829 }, { "epoch": 0.9517174109942172, "grad_norm": 0.632836639881134, "learning_rate": 2.684626835221971e-07, "loss": 0.10158824920654297, "step": 6830 }, { "epoch": 0.9518567546854316, "grad_norm": 0.782696545124054, "learning_rate": 2.669254475786276e-07, "loss": 0.12182998657226562, "step": 6831 }, { "epoch": 0.951996098376646, "grad_norm": 0.25316810607910156, "learning_rate": 2.6539259587371026e-07, "loss": 0.07417678833007812, "step": 6832 }, { "epoch": 0.9521354420678604, "grad_norm": 0.6412493586540222, "learning_rate": 2.638641287480104e-07, "loss": 0.10609626770019531, "step": 6833 }, { "epoch": 0.9522747857590748, "grad_norm": 0.4874313771724701, "learning_rate": 2.6234004654111854e-07, "loss": 0.09543275833129883, "step": 6834 }, { "epoch": 0.9524141294502891, "grad_norm": 0.6039149761199951, "learning_rate": 2.6082034959164613e-07, "loss": 0.11219596862792969, "step": 6835 }, { "epoch": 0.9525534731415035, "grad_norm": 0.3766322731971741, "learning_rate": 2.5930503823724086e-07, "loss": 0.09813117980957031, "step": 6836 }, { "epoch": 0.9526928168327179, "grad_norm": 0.7744880318641663, "learning_rate": 2.577941128145689e-07, "loss": 0.12305259704589844, "step": 6837 }, { "epoch": 0.9528321605239323, "grad_norm": 0.6358051896095276, "learning_rate": 2.562875736593218e-07, "loss": 0.11170005798339844, "step": 6838 }, { "epoch": 0.9529715042151466, "grad_norm": 0.33489784598350525, "learning_rate": 2.547854211062206e-07, "loss": 0.07693099975585938, "step": 6839 }, { "epoch": 0.953110847906361, "grad_norm": 0.2715928852558136, "learning_rate": 2.532876554890051e-07, "loss": 0.0626983642578125, "step": 6840 }, { "epoch": 0.9532501915975754, "grad_norm": 0.5176957845687866, "learning_rate": 2.5179427714045136e-07, "loss": 0.08530426025390625, "step": 6841 }, { "epoch": 0.9533895352887898, "grad_norm": 0.5964969396591187, "learning_rate": 2.5030528639234717e-07, "loss": 0.08558845520019531, "step": 6842 }, { "epoch": 0.9535288789800042, "grad_norm": 0.3663705885410309, "learning_rate": 2.488206835755147e-07, "loss": 0.07744073867797852, "step": 6843 }, { "epoch": 0.9536682226712185, "grad_norm": 0.505815863609314, "learning_rate": 2.4734046901980114e-07, "loss": 0.11165618896484375, "step": 6844 }, { "epoch": 0.9538075663624329, "grad_norm": 0.655243456363678, "learning_rate": 2.4586464305407454e-07, "loss": 0.1357421875, "step": 6845 }, { "epoch": 0.9539469100536473, "grad_norm": 0.6102237105369568, "learning_rate": 2.443932060062282e-07, "loss": 0.10542774200439453, "step": 6846 }, { "epoch": 0.9540862537448617, "grad_norm": 0.5520254373550415, "learning_rate": 2.429261582031828e-07, "loss": 0.10726737976074219, "step": 6847 }, { "epoch": 0.954225597436076, "grad_norm": 0.6612182259559631, "learning_rate": 2.4146349997088646e-07, "loss": 0.1026763916015625, "step": 6848 }, { "epoch": 0.9543649411272904, "grad_norm": 0.2598809599876404, "learning_rate": 2.400052316343038e-07, "loss": 0.07125663757324219, "step": 6849 }, { "epoch": 0.9545042848185048, "grad_norm": 0.3196321725845337, "learning_rate": 2.385513535174289e-07, "loss": 0.09238624572753906, "step": 6850 }, { "epoch": 0.9546436285097192, "grad_norm": 0.34134194254875183, "learning_rate": 2.3710186594328333e-07, "loss": 0.08108663558959961, "step": 6851 }, { "epoch": 0.9547829722009336, "grad_norm": 0.28955012559890747, "learning_rate": 2.3565676923390734e-07, "loss": 0.07465744018554688, "step": 6852 }, { "epoch": 0.9549223158921479, "grad_norm": 0.7093157172203064, "learning_rate": 2.3421606371037075e-07, "loss": 0.081024169921875, "step": 6853 }, { "epoch": 0.9550616595833623, "grad_norm": 0.5355100035667419, "learning_rate": 2.3277974969276417e-07, "loss": 0.10746383666992188, "step": 6854 }, { "epoch": 0.9552010032745768, "grad_norm": 0.3892204463481903, "learning_rate": 2.3134782750020347e-07, "loss": 0.0892801284790039, "step": 6855 }, { "epoch": 0.9553403469657912, "grad_norm": 0.2920844852924347, "learning_rate": 2.2992029745082966e-07, "loss": 0.07392501831054688, "step": 6856 }, { "epoch": 0.9554796906570056, "grad_norm": 0.2566264569759369, "learning_rate": 2.2849715986180688e-07, "loss": 0.06862640380859375, "step": 6857 }, { "epoch": 0.9556190343482199, "grad_norm": 0.7659746408462524, "learning_rate": 2.2707841504932438e-07, "loss": 0.14964866638183594, "step": 6858 }, { "epoch": 0.9557583780394343, "grad_norm": 0.5824476480484009, "learning_rate": 2.2566406332859449e-07, "loss": 0.12337684631347656, "step": 6859 }, { "epoch": 0.9558977217306487, "grad_norm": 0.4194205403327942, "learning_rate": 2.242541050138547e-07, "loss": 0.07881641387939453, "step": 6860 }, { "epoch": 0.9560370654218631, "grad_norm": 0.5804741382598877, "learning_rate": 2.2284854041836335e-07, "loss": 0.1168060302734375, "step": 6861 }, { "epoch": 0.9561764091130774, "grad_norm": 0.3460279405117035, "learning_rate": 2.214473698544084e-07, "loss": 0.0838165283203125, "step": 6862 }, { "epoch": 0.9563157528042918, "grad_norm": 0.5177403092384338, "learning_rate": 2.2005059363329196e-07, "loss": 0.08751773834228516, "step": 6863 }, { "epoch": 0.9564550964955062, "grad_norm": 0.5308154225349426, "learning_rate": 2.1865821206535243e-07, "loss": 0.1054840087890625, "step": 6864 }, { "epoch": 0.9565944401867206, "grad_norm": 0.6004311442375183, "learning_rate": 2.1727022545994237e-07, "loss": 0.09744071960449219, "step": 6865 }, { "epoch": 0.956733783877935, "grad_norm": 0.46168020367622375, "learning_rate": 2.1588663412544174e-07, "loss": 0.08893585205078125, "step": 6866 }, { "epoch": 0.9568731275691493, "grad_norm": 0.7353544235229492, "learning_rate": 2.1450743836925136e-07, "loss": 0.11985588073730469, "step": 6867 }, { "epoch": 0.9570124712603637, "grad_norm": 0.25411537289619446, "learning_rate": 2.1313263849779498e-07, "loss": 0.07079029083251953, "step": 6868 }, { "epoch": 0.9571518149515781, "grad_norm": 0.4066571891307831, "learning_rate": 2.1176223481652824e-07, "loss": 0.092071533203125, "step": 6869 }, { "epoch": 0.9572911586427925, "grad_norm": 0.6468311548233032, "learning_rate": 2.1039622762991874e-07, "loss": 0.10663414001464844, "step": 6870 }, { "epoch": 0.9574305023340068, "grad_norm": 0.36678484082221985, "learning_rate": 2.0903461724146146e-07, "loss": 0.08179473876953125, "step": 6871 }, { "epoch": 0.9575698460252212, "grad_norm": 0.26636847853660583, "learning_rate": 2.0767740395367886e-07, "loss": 0.06438827514648438, "step": 6872 }, { "epoch": 0.9577091897164356, "grad_norm": 0.631134569644928, "learning_rate": 2.0632458806810974e-07, "loss": 0.1323070526123047, "step": 6873 }, { "epoch": 0.95784853340765, "grad_norm": 0.3878439962863922, "learning_rate": 2.0497616988532032e-07, "loss": 0.08051109313964844, "step": 6874 }, { "epoch": 0.9579878770988643, "grad_norm": 0.37732556462287903, "learning_rate": 2.0363214970489763e-07, "loss": 0.08594512939453125, "step": 6875 }, { "epoch": 0.9581272207900787, "grad_norm": 0.2774856686592102, "learning_rate": 2.0229252782545171e-07, "loss": 0.07093238830566406, "step": 6876 }, { "epoch": 0.9582665644812931, "grad_norm": 0.5948699712753296, "learning_rate": 2.0095730454461781e-07, "loss": 0.10755538940429688, "step": 6877 }, { "epoch": 0.9584059081725075, "grad_norm": 0.5790180563926697, "learning_rate": 1.9962648015904972e-07, "loss": 0.10915184020996094, "step": 6878 }, { "epoch": 0.9585452518637219, "grad_norm": 0.7054398655891418, "learning_rate": 1.9830005496442873e-07, "loss": 0.12053871154785156, "step": 6879 }, { "epoch": 0.9586845955549362, "grad_norm": 0.40910354256629944, "learning_rate": 1.969780292554546e-07, "loss": 0.0960693359375, "step": 6880 }, { "epoch": 0.9588239392461506, "grad_norm": 0.41224390268325806, "learning_rate": 1.9566040332585246e-07, "loss": 0.09960746765136719, "step": 6881 }, { "epoch": 0.958963282937365, "grad_norm": 0.26176291704177856, "learning_rate": 1.9434717746836805e-07, "loss": 0.07081985473632812, "step": 6882 }, { "epoch": 0.9591026266285794, "grad_norm": 0.44478657841682434, "learning_rate": 1.9303835197476804e-07, "loss": 0.08673095703125, "step": 6883 }, { "epoch": 0.9592419703197937, "grad_norm": 0.5774131417274475, "learning_rate": 1.917339271358465e-07, "loss": 0.10587501525878906, "step": 6884 }, { "epoch": 0.9593813140110081, "grad_norm": 0.30005010962486267, "learning_rate": 1.9043390324141597e-07, "loss": 0.07809829711914062, "step": 6885 }, { "epoch": 0.9595206577022225, "grad_norm": 0.39908862113952637, "learning_rate": 1.8913828058031436e-07, "loss": 0.08977985382080078, "step": 6886 }, { "epoch": 0.9596600013934369, "grad_norm": 0.31218811869621277, "learning_rate": 1.878470594403936e-07, "loss": 0.077484130859375, "step": 6887 }, { "epoch": 0.9597993450846513, "grad_norm": 0.20497344434261322, "learning_rate": 1.865602401085398e-07, "loss": 0.060100555419921875, "step": 6888 }, { "epoch": 0.9599386887758656, "grad_norm": 0.6906037926673889, "learning_rate": 1.852778228706509e-07, "loss": 0.10911750793457031, "step": 6889 }, { "epoch": 0.96007803246708, "grad_norm": 0.29766973853111267, "learning_rate": 1.8399980801165006e-07, "loss": 0.07554864883422852, "step": 6890 }, { "epoch": 0.9602173761582944, "grad_norm": 0.24392370879650116, "learning_rate": 1.8272619581549022e-07, "loss": 0.06373119354248047, "step": 6891 }, { "epoch": 0.9603567198495088, "grad_norm": 0.37702521681785583, "learning_rate": 1.8145698656512943e-07, "loss": 0.08230781555175781, "step": 6892 }, { "epoch": 0.9604960635407231, "grad_norm": 0.3420012891292572, "learning_rate": 1.8019218054256216e-07, "loss": 0.07683563232421875, "step": 6893 }, { "epoch": 0.9606354072319375, "grad_norm": 0.31027016043663025, "learning_rate": 1.7893177802879692e-07, "loss": 0.06746292114257812, "step": 6894 }, { "epoch": 0.9607747509231519, "grad_norm": 0.533224880695343, "learning_rate": 1.776757793038697e-07, "loss": 0.08683204650878906, "step": 6895 }, { "epoch": 0.9609140946143664, "grad_norm": 0.26800382137298584, "learning_rate": 1.7642418464683287e-07, "loss": 0.07262229919433594, "step": 6896 }, { "epoch": 0.9610534383055808, "grad_norm": 0.2904371917247772, "learning_rate": 1.7517699433576173e-07, "loss": 0.07321834564208984, "step": 6897 }, { "epoch": 0.9611927819967951, "grad_norm": 0.3037748336791992, "learning_rate": 1.7393420864775467e-07, "loss": 0.08370590209960938, "step": 6898 }, { "epoch": 0.9613321256880095, "grad_norm": 0.4292869567871094, "learning_rate": 1.7269582785892858e-07, "loss": 0.10738945007324219, "step": 6899 }, { "epoch": 0.9614714693792239, "grad_norm": 0.28763148188591003, "learning_rate": 1.7146185224442557e-07, "loss": 0.06386375427246094, "step": 6900 }, { "epoch": 0.9616108130704383, "grad_norm": 0.5367462038993835, "learning_rate": 1.7023228207840637e-07, "loss": 0.1044921875, "step": 6901 }, { "epoch": 0.9617501567616527, "grad_norm": 0.42610830068588257, "learning_rate": 1.6900711763405242e-07, "loss": 0.09033393859863281, "step": 6902 }, { "epoch": 0.961889500452867, "grad_norm": 0.5485749840736389, "learning_rate": 1.67786359183566e-07, "loss": 0.1175689697265625, "step": 6903 }, { "epoch": 0.9620288441440814, "grad_norm": 0.37856075167655945, "learning_rate": 1.665700069981746e-07, "loss": 0.08183479309082031, "step": 6904 }, { "epoch": 0.9621681878352958, "grad_norm": 0.3508590757846832, "learning_rate": 1.6535806134812427e-07, "loss": 0.06269598007202148, "step": 6905 }, { "epoch": 0.9623075315265102, "grad_norm": 0.3254605233669281, "learning_rate": 1.6415052250267738e-07, "loss": 0.06852245330810547, "step": 6906 }, { "epoch": 0.9624468752177245, "grad_norm": 0.40261203050613403, "learning_rate": 1.62947390730126e-07, "loss": 0.08795356750488281, "step": 6907 }, { "epoch": 0.9625862189089389, "grad_norm": 0.48366087675094604, "learning_rate": 1.617486662977763e-07, "loss": 0.09106731414794922, "step": 6908 }, { "epoch": 0.9627255626001533, "grad_norm": 0.3535245954990387, "learning_rate": 1.6055434947195746e-07, "loss": 0.08889389038085938, "step": 6909 }, { "epoch": 0.9628649062913677, "grad_norm": 0.3078405559062958, "learning_rate": 1.5936444051801947e-07, "loss": 0.07130241394042969, "step": 6910 }, { "epoch": 0.963004249982582, "grad_norm": 0.4257046580314636, "learning_rate": 1.5817893970033305e-07, "loss": 0.09334564208984375, "step": 6911 }, { "epoch": 0.9631435936737964, "grad_norm": 0.4736168384552002, "learning_rate": 1.5699784728229196e-07, "loss": 0.08681297302246094, "step": 6912 }, { "epoch": 0.9632829373650108, "grad_norm": 0.5674563050270081, "learning_rate": 1.5582116352630626e-07, "loss": 0.08890533447265625, "step": 6913 }, { "epoch": 0.9634222810562252, "grad_norm": 0.45424142479896545, "learning_rate": 1.5464888869380468e-07, "loss": 0.08562278747558594, "step": 6914 }, { "epoch": 0.9635616247474396, "grad_norm": 0.7406771779060364, "learning_rate": 1.5348102304524548e-07, "loss": 0.13175678253173828, "step": 6915 }, { "epoch": 0.9637009684386539, "grad_norm": 0.3858097493648529, "learning_rate": 1.523175668400989e-07, "loss": 0.0803537368774414, "step": 6916 }, { "epoch": 0.9638403121298683, "grad_norm": 0.3306906521320343, "learning_rate": 1.511585203368582e-07, "loss": 0.08117866516113281, "step": 6917 }, { "epoch": 0.9639796558210827, "grad_norm": 0.3828243017196655, "learning_rate": 1.5000388379303732e-07, "loss": 0.06913185119628906, "step": 6918 }, { "epoch": 0.9641189995122971, "grad_norm": 0.4043271243572235, "learning_rate": 1.4885365746516889e-07, "loss": 0.08068084716796875, "step": 6919 }, { "epoch": 0.9642583432035114, "grad_norm": 0.2773694097995758, "learning_rate": 1.477078416088107e-07, "loss": 0.06998443603515625, "step": 6920 }, { "epoch": 0.9643976868947258, "grad_norm": 0.746692419052124, "learning_rate": 1.465664364785324e-07, "loss": 0.10826492309570312, "step": 6921 }, { "epoch": 0.9645370305859402, "grad_norm": 0.6899163126945496, "learning_rate": 1.454294423279312e-07, "loss": 0.12300872802734375, "step": 6922 }, { "epoch": 0.9646763742771546, "grad_norm": 0.5337240695953369, "learning_rate": 1.4429685940962278e-07, "loss": 0.11635398864746094, "step": 6923 }, { "epoch": 0.964815717968369, "grad_norm": 0.7090213298797607, "learning_rate": 1.4316868797523697e-07, "loss": 0.10947799682617188, "step": 6924 }, { "epoch": 0.9649550616595833, "grad_norm": 0.22038039565086365, "learning_rate": 1.420449282754288e-07, "loss": 0.061023712158203125, "step": 6925 }, { "epoch": 0.9650944053507977, "grad_norm": 0.5109238028526306, "learning_rate": 1.4092558055987193e-07, "loss": 0.10279083251953125, "step": 6926 }, { "epoch": 0.9652337490420121, "grad_norm": 0.27294161915779114, "learning_rate": 1.3981064507726295e-07, "loss": 0.07410430908203125, "step": 6927 }, { "epoch": 0.9653730927332265, "grad_norm": 0.2468087524175644, "learning_rate": 1.387001220753126e-07, "loss": 0.06290435791015625, "step": 6928 }, { "epoch": 0.9655124364244408, "grad_norm": 0.520156979560852, "learning_rate": 1.3759401180075239e-07, "loss": 0.07934188842773438, "step": 6929 }, { "epoch": 0.9656517801156552, "grad_norm": 0.7201396226882935, "learning_rate": 1.3649231449933686e-07, "loss": 0.14395713806152344, "step": 6930 }, { "epoch": 0.9657911238068696, "grad_norm": 0.4271526038646698, "learning_rate": 1.3539503041583913e-07, "loss": 0.09188652038574219, "step": 6931 }, { "epoch": 0.965930467498084, "grad_norm": 0.481665700674057, "learning_rate": 1.3430215979404638e-07, "loss": 0.10725021362304688, "step": 6932 }, { "epoch": 0.9660698111892984, "grad_norm": 0.6855002045631409, "learning_rate": 1.3321370287677328e-07, "loss": 0.11236763000488281, "step": 6933 }, { "epoch": 0.9662091548805127, "grad_norm": 0.5029910802841187, "learning_rate": 1.321296599058508e-07, "loss": 0.10214042663574219, "step": 6934 }, { "epoch": 0.9663484985717271, "grad_norm": 0.7182873487472534, "learning_rate": 1.310500311221241e-07, "loss": 0.08667182922363281, "step": 6935 }, { "epoch": 0.9664878422629416, "grad_norm": 1.0259249210357666, "learning_rate": 1.2997481676546576e-07, "loss": 0.11879348754882812, "step": 6936 }, { "epoch": 0.966627185954156, "grad_norm": 0.5986254215240479, "learning_rate": 1.2890401707476242e-07, "loss": 0.12112808227539062, "step": 6937 }, { "epoch": 0.9667665296453704, "grad_norm": 0.5382232666015625, "learning_rate": 1.2783763228792156e-07, "loss": 0.09486103057861328, "step": 6938 }, { "epoch": 0.9669058733365847, "grad_norm": 0.6517792344093323, "learning_rate": 1.2677566264186925e-07, "loss": 0.11126899719238281, "step": 6939 }, { "epoch": 0.9670452170277991, "grad_norm": 0.22945058345794678, "learning_rate": 1.2571810837255228e-07, "loss": 0.05762887001037598, "step": 6940 }, { "epoch": 0.9671845607190135, "grad_norm": 0.4503476023674011, "learning_rate": 1.246649697149338e-07, "loss": 0.0834808349609375, "step": 6941 }, { "epoch": 0.9673239044102279, "grad_norm": 0.41739916801452637, "learning_rate": 1.2361624690299557e-07, "loss": 0.08879852294921875, "step": 6942 }, { "epoch": 0.9674632481014422, "grad_norm": 0.38998496532440186, "learning_rate": 1.225719401697445e-07, "loss": 0.08797454833984375, "step": 6943 }, { "epoch": 0.9676025917926566, "grad_norm": 0.45006734132766724, "learning_rate": 1.2153204974719722e-07, "loss": 0.06791305541992188, "step": 6944 }, { "epoch": 0.967741935483871, "grad_norm": 0.37120741605758667, "learning_rate": 1.2049657586639786e-07, "loss": 0.07333755493164062, "step": 6945 }, { "epoch": 0.9678812791750854, "grad_norm": 0.48564016819000244, "learning_rate": 1.194655187574001e-07, "loss": 0.07888603210449219, "step": 6946 }, { "epoch": 0.9680206228662998, "grad_norm": 0.705226719379425, "learning_rate": 1.1843887864928294e-07, "loss": 0.11122679710388184, "step": 6947 }, { "epoch": 0.9681599665575141, "grad_norm": 0.44755661487579346, "learning_rate": 1.1741665577014393e-07, "loss": 0.077423095703125, "step": 6948 }, { "epoch": 0.9682993102487285, "grad_norm": 0.6297992467880249, "learning_rate": 1.163988503470992e-07, "loss": 0.11162757873535156, "step": 6949 }, { "epoch": 0.9684386539399429, "grad_norm": 0.5026305913925171, "learning_rate": 1.153854626062767e-07, "loss": 0.10066604614257812, "step": 6950 }, { "epoch": 0.9685779976311573, "grad_norm": 0.5859058499336243, "learning_rate": 1.1437649277283191e-07, "loss": 0.10268115997314453, "step": 6951 }, { "epoch": 0.9687173413223716, "grad_norm": 0.25809770822525024, "learning_rate": 1.133719410709344e-07, "loss": 0.05733203887939453, "step": 6952 }, { "epoch": 0.968856685013586, "grad_norm": 0.2935693860054016, "learning_rate": 1.1237180772377233e-07, "loss": 0.07291603088378906, "step": 6953 }, { "epoch": 0.9689960287048004, "grad_norm": 0.42765146493911743, "learning_rate": 1.1137609295355234e-07, "loss": 0.09762382507324219, "step": 6954 }, { "epoch": 0.9691353723960148, "grad_norm": 0.6709672212600708, "learning_rate": 1.1038479698149752e-07, "loss": 0.08826065063476562, "step": 6955 }, { "epoch": 0.9692747160872291, "grad_norm": 0.2507948875427246, "learning_rate": 1.093979200278561e-07, "loss": 0.07392692565917969, "step": 6956 }, { "epoch": 0.9694140597784435, "grad_norm": 0.44603589177131653, "learning_rate": 1.0841546231188382e-07, "loss": 0.09691047668457031, "step": 6957 }, { "epoch": 0.9695534034696579, "grad_norm": 0.3231179118156433, "learning_rate": 1.0743742405186385e-07, "loss": 0.07532978057861328, "step": 6958 }, { "epoch": 0.9696927471608723, "grad_norm": 0.3740464746952057, "learning_rate": 1.0646380546509572e-07, "loss": 0.09311771392822266, "step": 6959 }, { "epoch": 0.9698320908520867, "grad_norm": 0.46767061948776245, "learning_rate": 1.054946067678908e-07, "loss": 0.08648872375488281, "step": 6960 }, { "epoch": 0.969971434543301, "grad_norm": 0.35739776492118835, "learning_rate": 1.0452982817558577e-07, "loss": 0.08187484741210938, "step": 6961 }, { "epoch": 0.9701107782345154, "grad_norm": 0.3432977497577667, "learning_rate": 1.0356946990253137e-07, "loss": 0.07628822326660156, "step": 6962 }, { "epoch": 0.9702501219257298, "grad_norm": 0.2109745889902115, "learning_rate": 1.0261353216209691e-07, "loss": 0.06323432922363281, "step": 6963 }, { "epoch": 0.9703894656169442, "grad_norm": 0.46725064516067505, "learning_rate": 1.0166201516667029e-07, "loss": 0.09242820739746094, "step": 6964 }, { "epoch": 0.9705288093081585, "grad_norm": 0.47335028648376465, "learning_rate": 1.0071491912766018e-07, "loss": 0.09148979187011719, "step": 6965 }, { "epoch": 0.9706681529993729, "grad_norm": 0.3854447901248932, "learning_rate": 9.977224425548271e-08, "loss": 0.09890365600585938, "step": 6966 }, { "epoch": 0.9708074966905873, "grad_norm": 0.30972781777381897, "learning_rate": 9.883399075958589e-08, "loss": 0.07860946655273438, "step": 6967 }, { "epoch": 0.9709468403818017, "grad_norm": 0.31210437417030334, "learning_rate": 9.790015884842297e-08, "loss": 0.0736083984375, "step": 6968 }, { "epoch": 0.9710861840730161, "grad_norm": 0.2317143976688385, "learning_rate": 9.697074872947242e-08, "loss": 0.06217765808105469, "step": 6969 }, { "epoch": 0.9712255277642304, "grad_norm": 0.44886812567710876, "learning_rate": 9.604576060922687e-08, "loss": 0.08819389343261719, "step": 6970 }, { "epoch": 0.9713648714554448, "grad_norm": 0.5474607348442078, "learning_rate": 9.51251946931997e-08, "loss": 0.09588050842285156, "step": 6971 }, { "epoch": 0.9715042151466592, "grad_norm": 0.38972100615501404, "learning_rate": 9.420905118591617e-08, "loss": 0.08313179016113281, "step": 6972 }, { "epoch": 0.9716435588378736, "grad_norm": 0.2803761065006256, "learning_rate": 9.32973302909268e-08, "loss": 0.0762186050415039, "step": 6973 }, { "epoch": 0.9717829025290879, "grad_norm": 0.2522527277469635, "learning_rate": 9.239003221079179e-08, "loss": 0.06608009338378906, "step": 6974 }, { "epoch": 0.9719222462203023, "grad_norm": 0.38832926750183105, "learning_rate": 9.148715714709433e-08, "loss": 0.0764017105102539, "step": 6975 }, { "epoch": 0.9720615899115168, "grad_norm": 0.551490068435669, "learning_rate": 9.058870530042952e-08, "loss": 0.09401130676269531, "step": 6976 }, { "epoch": 0.9722009336027312, "grad_norm": 0.5891680717468262, "learning_rate": 8.969467687041766e-08, "loss": 0.11402702331542969, "step": 6977 }, { "epoch": 0.9723402772939456, "grad_norm": 0.40731725096702576, "learning_rate": 8.880507205568656e-08, "loss": 0.09615612030029297, "step": 6978 }, { "epoch": 0.9724796209851599, "grad_norm": 0.385280579328537, "learning_rate": 8.791989105388699e-08, "loss": 0.08094978332519531, "step": 6979 }, { "epoch": 0.9726189646763743, "grad_norm": 0.5281490087509155, "learning_rate": 8.70391340616883e-08, "loss": 0.13261032104492188, "step": 6980 }, { "epoch": 0.9727583083675887, "grad_norm": 0.668847382068634, "learning_rate": 8.616280127477395e-08, "loss": 0.10655689239501953, "step": 6981 }, { "epoch": 0.9728976520588031, "grad_norm": 0.5761567950248718, "learning_rate": 8.529089288784376e-08, "loss": 0.08992767333984375, "step": 6982 }, { "epoch": 0.9730369957500175, "grad_norm": 0.5830804705619812, "learning_rate": 8.442340909461832e-08, "loss": 0.10387802124023438, "step": 6983 }, { "epoch": 0.9731763394412318, "grad_norm": 0.3646184802055359, "learning_rate": 8.356035008783014e-08, "loss": 0.08025169372558594, "step": 6984 }, { "epoch": 0.9733156831324462, "grad_norm": 0.39931198954582214, "learning_rate": 8.270171605923027e-08, "loss": 0.07467174530029297, "step": 6985 }, { "epoch": 0.9734550268236606, "grad_norm": 0.6173413395881653, "learning_rate": 8.184750719959278e-08, "loss": 0.09091377258300781, "step": 6986 }, { "epoch": 0.973594370514875, "grad_norm": 0.41381680965423584, "learning_rate": 8.099772369869696e-08, "loss": 0.0939178466796875, "step": 6987 }, { "epoch": 0.9737337142060893, "grad_norm": 0.34412848949432373, "learning_rate": 8.015236574534957e-08, "loss": 0.08616828918457031, "step": 6988 }, { "epoch": 0.9738730578973037, "grad_norm": 0.41478678584098816, "learning_rate": 7.931143352736925e-08, "loss": 0.09993362426757812, "step": 6989 }, { "epoch": 0.9740124015885181, "grad_norm": 0.43842586874961853, "learning_rate": 7.8474927231591e-08, "loss": 0.07330989837646484, "step": 6990 }, { "epoch": 0.9741517452797325, "grad_norm": 0.41159167885780334, "learning_rate": 7.764284704386837e-08, "loss": 0.07631874084472656, "step": 6991 }, { "epoch": 0.9742910889709469, "grad_norm": 0.23821572959423065, "learning_rate": 7.681519314906904e-08, "loss": 0.06789970397949219, "step": 6992 }, { "epoch": 0.9744304326621612, "grad_norm": 0.5896190404891968, "learning_rate": 7.599196573107925e-08, "loss": 0.09914016723632812, "step": 6993 }, { "epoch": 0.9745697763533756, "grad_norm": 0.3216404318809509, "learning_rate": 7.517316497280158e-08, "loss": 0.07890510559082031, "step": 6994 }, { "epoch": 0.97470912004459, "grad_norm": 0.31259527802467346, "learning_rate": 7.435879105615718e-08, "loss": 0.08390045166015625, "step": 6995 }, { "epoch": 0.9748484637358044, "grad_norm": 0.47789040207862854, "learning_rate": 7.354884416207686e-08, "loss": 0.10335922241210938, "step": 6996 }, { "epoch": 0.9749878074270187, "grad_norm": 0.2896292805671692, "learning_rate": 7.274332447051668e-08, "loss": 0.06148719787597656, "step": 6997 }, { "epoch": 0.9751271511182331, "grad_norm": 0.5316546559333801, "learning_rate": 7.194223216044238e-08, "loss": 0.10186767578125, "step": 6998 }, { "epoch": 0.9752664948094475, "grad_norm": 0.39263755083084106, "learning_rate": 7.114556740983824e-08, "loss": 0.09210681915283203, "step": 6999 }, { "epoch": 0.9754058385006619, "grad_norm": 0.3270455002784729, "learning_rate": 7.035333039570492e-08, "loss": 0.08107757568359375, "step": 7000 }, { "epoch": 0.9755451821918762, "grad_norm": 0.46385714411735535, "learning_rate": 6.956552129406158e-08, "loss": 0.08921623229980469, "step": 7001 }, { "epoch": 0.9756845258830906, "grad_norm": 0.5873894095420837, "learning_rate": 6.878214027993935e-08, "loss": 0.10069561004638672, "step": 7002 }, { "epoch": 0.975823869574305, "grad_norm": 0.4363866150379181, "learning_rate": 6.800318752738788e-08, "loss": 0.10394477844238281, "step": 7003 }, { "epoch": 0.9759632132655194, "grad_norm": 0.6883102655410767, "learning_rate": 6.722866320947319e-08, "loss": 0.10693740844726562, "step": 7004 }, { "epoch": 0.9761025569567338, "grad_norm": 0.3237481117248535, "learning_rate": 6.64585674982754e-08, "loss": 0.07271003723144531, "step": 7005 }, { "epoch": 0.9762419006479481, "grad_norm": 0.6260445713996887, "learning_rate": 6.569290056489542e-08, "loss": 0.10754013061523438, "step": 7006 }, { "epoch": 0.9763812443391625, "grad_norm": 0.38082200288772583, "learning_rate": 6.493166257944384e-08, "loss": 0.07172489166259766, "step": 7007 }, { "epoch": 0.9765205880303769, "grad_norm": 0.37063854932785034, "learning_rate": 6.417485371105204e-08, "loss": 0.07721710205078125, "step": 7008 }, { "epoch": 0.9766599317215913, "grad_norm": 0.26536422967910767, "learning_rate": 6.342247412786329e-08, "loss": 0.07077693939208984, "step": 7009 }, { "epoch": 0.9767992754128056, "grad_norm": 0.4128875732421875, "learning_rate": 6.267452399704387e-08, "loss": 0.09178829193115234, "step": 7010 }, { "epoch": 0.97693861910402, "grad_norm": 0.3108527362346649, "learning_rate": 6.193100348476533e-08, "loss": 0.060512542724609375, "step": 7011 }, { "epoch": 0.9770779627952344, "grad_norm": 0.7662290334701538, "learning_rate": 6.11919127562266e-08, "loss": 0.11783027648925781, "step": 7012 }, { "epoch": 0.9772173064864488, "grad_norm": 0.3592942953109741, "learning_rate": 6.045725197563413e-08, "loss": 0.07145118713378906, "step": 7013 }, { "epoch": 0.9773566501776632, "grad_norm": 0.46367043256759644, "learning_rate": 5.972702130621067e-08, "loss": 0.0959005355834961, "step": 7014 }, { "epoch": 0.9774959938688775, "grad_norm": 0.665275514125824, "learning_rate": 5.900122091019977e-08, "loss": 0.10320186614990234, "step": 7015 }, { "epoch": 0.977635337560092, "grad_norm": 0.7338187098503113, "learning_rate": 5.827985094885691e-08, "loss": 0.11705780029296875, "step": 7016 }, { "epoch": 0.9777746812513064, "grad_norm": 0.38060909509658813, "learning_rate": 5.756291158245386e-08, "loss": 0.08435726165771484, "step": 7017 }, { "epoch": 0.9779140249425208, "grad_norm": 0.4880305826663971, "learning_rate": 5.6850402970278774e-08, "loss": 0.09760475158691406, "step": 7018 }, { "epoch": 0.9780533686337352, "grad_norm": 0.2773900628089905, "learning_rate": 5.6142325270633904e-08, "loss": 0.06842613220214844, "step": 7019 }, { "epoch": 0.9781927123249495, "grad_norm": 0.7557621598243713, "learning_rate": 5.543867864083785e-08, "loss": 0.13771915435791016, "step": 7020 }, { "epoch": 0.9783320560161639, "grad_norm": 0.2838873565196991, "learning_rate": 5.473946323722556e-08, "loss": 0.07871818542480469, "step": 7021 }, { "epoch": 0.9784713997073783, "grad_norm": 0.3506816029548645, "learning_rate": 5.404467921514611e-08, "loss": 0.0852508544921875, "step": 7022 }, { "epoch": 0.9786107433985927, "grad_norm": 0.4458865523338318, "learning_rate": 5.335432672896712e-08, "loss": 0.09770965576171875, "step": 7023 }, { "epoch": 0.978750087089807, "grad_norm": 0.31380510330200195, "learning_rate": 5.26684059320659e-08, "loss": 0.07405471801757812, "step": 7024 }, { "epoch": 0.9788894307810214, "grad_norm": 0.2875730097293854, "learning_rate": 5.198691697683833e-08, "loss": 0.06359291076660156, "step": 7025 }, { "epoch": 0.9790287744722358, "grad_norm": 0.2847253680229187, "learning_rate": 5.130986001469884e-08, "loss": 0.06658935546875, "step": 7026 }, { "epoch": 0.9791681181634502, "grad_norm": 0.6496864557266235, "learning_rate": 5.0637235196071555e-08, "loss": 0.12758827209472656, "step": 7027 }, { "epoch": 0.9793074618546646, "grad_norm": 0.27724024653434753, "learning_rate": 4.996904267039693e-08, "loss": 0.06882858276367188, "step": 7028 }, { "epoch": 0.9794468055458789, "grad_norm": 0.3606931269168854, "learning_rate": 4.9305282586136206e-08, "loss": 0.0836334228515625, "step": 7029 }, { "epoch": 0.9795861492370933, "grad_norm": 0.7000800371170044, "learning_rate": 4.864595509076031e-08, "loss": 0.10184764862060547, "step": 7030 }, { "epoch": 0.9797254929283077, "grad_norm": 0.4327346384525299, "learning_rate": 4.799106033075429e-08, "loss": 0.09087944030761719, "step": 7031 }, { "epoch": 0.9798648366195221, "grad_norm": 0.3313043713569641, "learning_rate": 4.734059845162175e-08, "loss": 0.0842437744140625, "step": 7032 }, { "epoch": 0.9800041803107364, "grad_norm": 0.2857504189014435, "learning_rate": 4.669456959788265e-08, "loss": 0.0750732421875, "step": 7033 }, { "epoch": 0.9801435240019508, "grad_norm": 0.23580637574195862, "learning_rate": 4.6052973913068844e-08, "loss": 0.07046031951904297, "step": 7034 }, { "epoch": 0.9802828676931652, "grad_norm": 0.45814013481140137, "learning_rate": 4.541581153972852e-08, "loss": 0.09259319305419922, "step": 7035 }, { "epoch": 0.9804222113843796, "grad_norm": 0.24139456450939178, "learning_rate": 4.478308261942177e-08, "loss": 0.06894397735595703, "step": 7036 }, { "epoch": 0.980561555075594, "grad_norm": 0.3430197536945343, "learning_rate": 4.415478729272949e-08, "loss": 0.07888317108154297, "step": 7037 }, { "epoch": 0.9807008987668083, "grad_norm": 0.3584829866886139, "learning_rate": 4.353092569924444e-08, "loss": 0.07424736022949219, "step": 7038 }, { "epoch": 0.9808402424580227, "grad_norm": 0.3139784038066864, "learning_rate": 4.2911497977573545e-08, "loss": 0.07613945007324219, "step": 7039 }, { "epoch": 0.9809795861492371, "grad_norm": 0.6756899952888489, "learning_rate": 4.2296504265340044e-08, "loss": 0.09279060363769531, "step": 7040 }, { "epoch": 0.9811189298404515, "grad_norm": 0.4448457956314087, "learning_rate": 4.1685944699181304e-08, "loss": 0.08668327331542969, "step": 7041 }, { "epoch": 0.9812582735316658, "grad_norm": 0.3114352524280548, "learning_rate": 4.107981941474881e-08, "loss": 0.07505226135253906, "step": 7042 }, { "epoch": 0.9813976172228802, "grad_norm": 0.2929372787475586, "learning_rate": 4.04781285467104e-08, "loss": 0.0663747787475586, "step": 7043 }, { "epoch": 0.9815369609140946, "grad_norm": 0.38055741786956787, "learning_rate": 3.9880872228748034e-08, "loss": 0.07517147064208984, "step": 7044 }, { "epoch": 0.981676304605309, "grad_norm": 0.5596561431884766, "learning_rate": 3.928805059356e-08, "loss": 0.09222793579101562, "step": 7045 }, { "epoch": 0.9818156482965233, "grad_norm": 0.24990394711494446, "learning_rate": 3.869966377285428e-08, "loss": 0.07199287414550781, "step": 7046 }, { "epoch": 0.9819549919877377, "grad_norm": 0.3485788106918335, "learning_rate": 3.8115711897359634e-08, "loss": 0.07266807556152344, "step": 7047 }, { "epoch": 0.9820943356789521, "grad_norm": 0.5842549204826355, "learning_rate": 3.753619509681672e-08, "loss": 0.101531982421875, "step": 7048 }, { "epoch": 0.9822336793701665, "grad_norm": 0.1712196171283722, "learning_rate": 3.696111349998255e-08, "loss": 0.05659675598144531, "step": 7049 }, { "epoch": 0.9823730230613809, "grad_norm": 0.4386426508426666, "learning_rate": 3.6390467234621585e-08, "loss": 0.10433197021484375, "step": 7050 }, { "epoch": 0.9825123667525952, "grad_norm": 0.33179235458374023, "learning_rate": 3.582425642752352e-08, "loss": 0.0924997329711914, "step": 7051 }, { "epoch": 0.9826517104438096, "grad_norm": 0.3954107165336609, "learning_rate": 3.52624812044855e-08, "loss": 0.08159446716308594, "step": 7052 }, { "epoch": 0.982791054135024, "grad_norm": 0.5345085859298706, "learning_rate": 3.470514169032102e-08, "loss": 0.08497428894042969, "step": 7053 }, { "epoch": 0.9829303978262384, "grad_norm": 0.29185932874679565, "learning_rate": 3.4152238008859915e-08, "loss": 0.07545089721679688, "step": 7054 }, { "epoch": 0.9830697415174527, "grad_norm": 0.3108827471733093, "learning_rate": 3.360377028294171e-08, "loss": 0.08596038818359375, "step": 7055 }, { "epoch": 0.9832090852086672, "grad_norm": 0.5638288855552673, "learning_rate": 3.305973863442669e-08, "loss": 0.08717155456542969, "step": 7056 }, { "epoch": 0.9833484288998816, "grad_norm": 0.3348013758659363, "learning_rate": 3.252014318418484e-08, "loss": 0.07555675506591797, "step": 7057 }, { "epoch": 0.983487772591096, "grad_norm": 0.2989587187767029, "learning_rate": 3.198498405210027e-08, "loss": 0.0787515640258789, "step": 7058 }, { "epoch": 0.9836271162823104, "grad_norm": 0.4358947277069092, "learning_rate": 3.145426135707563e-08, "loss": 0.0921783447265625, "step": 7059 }, { "epoch": 0.9837664599735247, "grad_norm": 0.2488207370042801, "learning_rate": 3.092797521702551e-08, "loss": 0.07392692565917969, "step": 7060 }, { "epoch": 0.9839058036647391, "grad_norm": 0.27952393889427185, "learning_rate": 3.040612574887636e-08, "loss": 0.06281471252441406, "step": 7061 }, { "epoch": 0.9840451473559535, "grad_norm": 0.46728116273880005, "learning_rate": 2.9888713068573215e-08, "loss": 0.09207344055175781, "step": 7062 }, { "epoch": 0.9841844910471679, "grad_norm": 0.7059627771377563, "learning_rate": 2.9375737291070795e-08, "loss": 0.1212167739868164, "step": 7063 }, { "epoch": 0.9843238347383823, "grad_norm": 0.4854995310306549, "learning_rate": 2.8867198530344587e-08, "loss": 0.09108161926269531, "step": 7064 }, { "epoch": 0.9844631784295966, "grad_norm": 0.21682116389274597, "learning_rate": 2.836309689937755e-08, "loss": 0.05723285675048828, "step": 7065 }, { "epoch": 0.984602522120811, "grad_norm": 0.33756276965141296, "learning_rate": 2.7863432510168988e-08, "loss": 0.07539939880371094, "step": 7066 }, { "epoch": 0.9847418658120254, "grad_norm": 0.514140248298645, "learning_rate": 2.7368205473734532e-08, "loss": 0.09599113464355469, "step": 7067 }, { "epoch": 0.9848812095032398, "grad_norm": 0.4274795651435852, "learning_rate": 2.6877415900103955e-08, "loss": 0.07802772521972656, "step": 7068 }, { "epoch": 0.9850205531944541, "grad_norm": 0.3368118107318878, "learning_rate": 2.6391063898314474e-08, "loss": 0.08564567565917969, "step": 7069 }, { "epoch": 0.9851598968856685, "grad_norm": 0.4978190064430237, "learning_rate": 2.5909149576428537e-08, "loss": 0.1035308837890625, "step": 7070 }, { "epoch": 0.9852992405768829, "grad_norm": 0.9163516759872437, "learning_rate": 2.5431673041509396e-08, "loss": 0.12821292877197266, "step": 7071 }, { "epoch": 0.9854385842680973, "grad_norm": 0.25151243805885315, "learning_rate": 2.495863439964774e-08, "loss": 0.07075381278991699, "step": 7072 }, { "epoch": 0.9855779279593117, "grad_norm": 0.3855830729007721, "learning_rate": 2.44900337559395e-08, "loss": 0.07012176513671875, "step": 7073 }, { "epoch": 0.985717271650526, "grad_norm": 0.6811702847480774, "learning_rate": 2.4025871214496954e-08, "loss": 0.12108612060546875, "step": 7074 }, { "epoch": 0.9858566153417404, "grad_norm": 0.44098055362701416, "learning_rate": 2.3566146878446495e-08, "loss": 0.08254623413085938, "step": 7075 }, { "epoch": 0.9859959590329548, "grad_norm": 0.6748990416526794, "learning_rate": 2.311086084992864e-08, "loss": 0.10392284393310547, "step": 7076 }, { "epoch": 0.9861353027241692, "grad_norm": 0.46223184466362, "learning_rate": 2.2660013230098032e-08, "loss": 0.09841346740722656, "step": 7077 }, { "epoch": 0.9862746464153835, "grad_norm": 0.2507038712501526, "learning_rate": 2.2213604119121214e-08, "loss": 0.06400299072265625, "step": 7078 }, { "epoch": 0.9864139901065979, "grad_norm": 0.5277556777000427, "learning_rate": 2.1771633616181066e-08, "loss": 0.10231876373291016, "step": 7079 }, { "epoch": 0.9865533337978123, "grad_norm": 0.27427002787590027, "learning_rate": 2.1334101819472375e-08, "loss": 0.07265853881835938, "step": 7080 }, { "epoch": 0.9866926774890267, "grad_norm": 0.4197658598423004, "learning_rate": 2.0901008826206272e-08, "loss": 0.07384395599365234, "step": 7081 }, { "epoch": 0.986832021180241, "grad_norm": 0.7394188642501831, "learning_rate": 2.047235473260578e-08, "loss": 0.10232353210449219, "step": 7082 }, { "epoch": 0.9869713648714554, "grad_norm": 0.37359046936035156, "learning_rate": 2.004813963390584e-08, "loss": 0.09153366088867188, "step": 7083 }, { "epoch": 0.9871107085626698, "grad_norm": 0.4021826386451721, "learning_rate": 1.9628363624362155e-08, "loss": 0.08776664733886719, "step": 7084 }, { "epoch": 0.9872500522538842, "grad_norm": 0.3616896867752075, "learning_rate": 1.921302679723569e-08, "loss": 0.07695770263671875, "step": 7085 }, { "epoch": 0.9873893959450986, "grad_norm": 0.33554959297180176, "learning_rate": 1.8802129244803735e-08, "loss": 0.08467292785644531, "step": 7086 }, { "epoch": 0.9875287396363129, "grad_norm": 0.4439508616924286, "learning_rate": 1.839567105836215e-08, "loss": 0.10301589965820312, "step": 7087 }, { "epoch": 0.9876680833275273, "grad_norm": 0.38142141699790955, "learning_rate": 1.7993652328214263e-08, "loss": 0.0683135986328125, "step": 7088 }, { "epoch": 0.9878074270187417, "grad_norm": 0.6153779625892639, "learning_rate": 1.7596073143677505e-08, "loss": 0.10979461669921875, "step": 7089 }, { "epoch": 0.9879467707099561, "grad_norm": 0.4699292480945587, "learning_rate": 1.720293359309011e-08, "loss": 0.08847618103027344, "step": 7090 }, { "epoch": 0.9880861144011704, "grad_norm": 0.5092649459838867, "learning_rate": 1.681423376379554e-08, "loss": 0.09864997863769531, "step": 7091 }, { "epoch": 0.9882254580923848, "grad_norm": 0.30132609605789185, "learning_rate": 1.6429973742153606e-08, "loss": 0.08182907104492188, "step": 7092 }, { "epoch": 0.9883648017835992, "grad_norm": 0.2984306216239929, "learning_rate": 1.6050153613538234e-08, "loss": 0.0735321044921875, "step": 7093 }, { "epoch": 0.9885041454748136, "grad_norm": 0.6130729913711548, "learning_rate": 1.567477346233748e-08, "loss": 0.11347103118896484, "step": 7094 }, { "epoch": 0.988643489166028, "grad_norm": 0.37042757868766785, "learning_rate": 1.5303833371953514e-08, "loss": 0.08262252807617188, "step": 7095 }, { "epoch": 0.9887828328572423, "grad_norm": 0.2572517395019531, "learning_rate": 1.4937333424798194e-08, "loss": 0.07491302490234375, "step": 7096 }, { "epoch": 0.9889221765484568, "grad_norm": 0.23394711315631866, "learning_rate": 1.457527370230194e-08, "loss": 0.0747365951538086, "step": 7097 }, { "epoch": 0.9890615202396712, "grad_norm": 0.5420699715614319, "learning_rate": 1.4217654284904846e-08, "loss": 0.09369659423828125, "step": 7098 }, { "epoch": 0.9892008639308856, "grad_norm": 0.44607287645339966, "learning_rate": 1.3864475252058918e-08, "loss": 0.0908660888671875, "step": 7099 }, { "epoch": 0.9893402076221, "grad_norm": 0.4076556861400604, "learning_rate": 1.3515736682239156e-08, "loss": 0.09599685668945312, "step": 7100 }, { "epoch": 0.9894795513133143, "grad_norm": 0.4245949983596802, "learning_rate": 1.3171438652921365e-08, "loss": 0.08612251281738281, "step": 7101 }, { "epoch": 0.9896188950045287, "grad_norm": 0.46599850058555603, "learning_rate": 1.2831581240602131e-08, "loss": 0.09012413024902344, "step": 7102 }, { "epoch": 0.9897582386957431, "grad_norm": 0.6813873648643494, "learning_rate": 1.2496164520792165e-08, "loss": 0.12961578369140625, "step": 7103 }, { "epoch": 0.9898975823869575, "grad_norm": 0.30176764726638794, "learning_rate": 1.2165188568011854e-08, "loss": 0.08404541015625, "step": 7104 }, { "epoch": 0.9900369260781718, "grad_norm": 0.47852760553359985, "learning_rate": 1.1838653455797933e-08, "loss": 0.08382797241210938, "step": 7105 }, { "epoch": 0.9901762697693862, "grad_norm": 0.3482668399810791, "learning_rate": 1.1516559256694594e-08, "loss": 0.08401203155517578, "step": 7106 }, { "epoch": 0.9903156134606006, "grad_norm": 0.5583547949790955, "learning_rate": 1.1198906042269032e-08, "loss": 0.11604690551757812, "step": 7107 }, { "epoch": 0.990454957151815, "grad_norm": 0.32300105690956116, "learning_rate": 1.0885693883093685e-08, "loss": 0.06453895568847656, "step": 7108 }, { "epoch": 0.9905943008430294, "grad_norm": 0.4005473554134369, "learning_rate": 1.0576922848759552e-08, "loss": 0.07660126686096191, "step": 7109 }, { "epoch": 0.9907336445342437, "grad_norm": 0.3001633286476135, "learning_rate": 1.0272593007865094e-08, "loss": 0.07167816162109375, "step": 7110 }, { "epoch": 0.9908729882254581, "grad_norm": 0.35614943504333496, "learning_rate": 9.972704428027335e-09, "loss": 0.07771682739257812, "step": 7111 }, { "epoch": 0.9910123319166725, "grad_norm": 0.3928835988044739, "learning_rate": 9.677257175875199e-09, "loss": 0.08368492126464844, "step": 7112 }, { "epoch": 0.9911516756078869, "grad_norm": 0.3923397660255432, "learning_rate": 9.38625131704951e-09, "loss": 0.08047294616699219, "step": 7113 }, { "epoch": 0.9912910192991012, "grad_norm": 0.4559655487537384, "learning_rate": 9.099686916205219e-09, "loss": 0.10528564453125, "step": 7114 }, { "epoch": 0.9914303629903156, "grad_norm": 0.3142716884613037, "learning_rate": 8.817564037009174e-09, "loss": 0.09048271179199219, "step": 7115 }, { "epoch": 0.99156970668153, "grad_norm": 0.6222264766693115, "learning_rate": 8.539882742146788e-09, "loss": 0.1301126480102539, "step": 7116 }, { "epoch": 0.9917090503727444, "grad_norm": 0.3332425057888031, "learning_rate": 8.266643093306492e-09, "loss": 0.07227134704589844, "step": 7117 }, { "epoch": 0.9918483940639587, "grad_norm": 0.4975399374961853, "learning_rate": 7.997845151201944e-09, "loss": 0.09442520141601562, "step": 7118 }, { "epoch": 0.9919877377551731, "grad_norm": 0.5111963152885437, "learning_rate": 7.733488975549819e-09, "loss": 0.08022308349609375, "step": 7119 }, { "epoch": 0.9921270814463875, "grad_norm": 0.30542102456092834, "learning_rate": 7.473574625085355e-09, "loss": 0.07353878021240234, "step": 7120 }, { "epoch": 0.9922664251376019, "grad_norm": 0.5740736722946167, "learning_rate": 7.218102157555695e-09, "loss": 0.10625457763671875, "step": 7121 }, { "epoch": 0.9924057688288163, "grad_norm": 0.2600778341293335, "learning_rate": 6.967071629719879e-09, "loss": 0.07552909851074219, "step": 7122 }, { "epoch": 0.9925451125200306, "grad_norm": 0.3793385624885559, "learning_rate": 6.720483097353292e-09, "loss": 0.08315372467041016, "step": 7123 }, { "epoch": 0.992684456211245, "grad_norm": 0.42303621768951416, "learning_rate": 6.478336615238779e-09, "loss": 0.08804702758789062, "step": 7124 }, { "epoch": 0.9928237999024594, "grad_norm": 0.38640889525413513, "learning_rate": 6.240632237179967e-09, "loss": 0.080322265625, "step": 7125 }, { "epoch": 0.9929631435936738, "grad_norm": 0.31561338901519775, "learning_rate": 6.0073700159879455e-09, "loss": 0.08009529113769531, "step": 7126 }, { "epoch": 0.9931024872848881, "grad_norm": 0.3362888991832733, "learning_rate": 5.778550003485706e-09, "loss": 0.08336448669433594, "step": 7127 }, { "epoch": 0.9932418309761025, "grad_norm": 0.42725372314453125, "learning_rate": 5.5541722505148e-09, "loss": 0.09050273895263672, "step": 7128 }, { "epoch": 0.9933811746673169, "grad_norm": 0.3879239559173584, "learning_rate": 5.334236806926462e-09, "loss": 0.09314918518066406, "step": 7129 }, { "epoch": 0.9935205183585313, "grad_norm": 0.3044133484363556, "learning_rate": 5.11874372158383e-09, "loss": 0.0707387924194336, "step": 7130 }, { "epoch": 0.9936598620497457, "grad_norm": 0.2373647540807724, "learning_rate": 4.907693042366379e-09, "loss": 0.06121635437011719, "step": 7131 }, { "epoch": 0.99379920574096, "grad_norm": 0.7730281949043274, "learning_rate": 4.70108481616327e-09, "loss": 0.11113739013671875, "step": 7132 }, { "epoch": 0.9939385494321744, "grad_norm": 0.3169896602630615, "learning_rate": 4.498919088877784e-09, "loss": 0.08699417114257812, "step": 7133 }, { "epoch": 0.9940778931233888, "grad_norm": 0.5029552578926086, "learning_rate": 4.301195905427324e-09, "loss": 0.08768463134765625, "step": 7134 }, { "epoch": 0.9942172368146032, "grad_norm": 0.6178176403045654, "learning_rate": 4.107915309743416e-09, "loss": 0.11826133728027344, "step": 7135 }, { "epoch": 0.9943565805058175, "grad_norm": 0.26564306020736694, "learning_rate": 3.919077344765043e-09, "loss": 0.0717763900756836, "step": 7136 }, { "epoch": 0.994495924197032, "grad_norm": 0.5027791261672974, "learning_rate": 3.734682052449756e-09, "loss": 0.10825157165527344, "step": 7137 }, { "epoch": 0.9946352678882464, "grad_norm": 0.5332139134407043, "learning_rate": 3.5547294737670046e-09, "loss": 0.10050201416015625, "step": 7138 }, { "epoch": 0.9947746115794608, "grad_norm": 0.4502524137496948, "learning_rate": 3.3792196486959193e-09, "loss": 0.09029960632324219, "step": 7139 }, { "epoch": 0.9949139552706752, "grad_norm": 0.588246762752533, "learning_rate": 3.2081526162319744e-09, "loss": 0.119659423828125, "step": 7140 }, { "epoch": 0.9950532989618895, "grad_norm": 0.5456023812294006, "learning_rate": 3.0415284143803234e-09, "loss": 0.09521961212158203, "step": 7141 }, { "epoch": 0.9951926426531039, "grad_norm": 0.5365489721298218, "learning_rate": 2.879347080164685e-09, "loss": 0.0863037109375, "step": 7142 }, { "epoch": 0.9953319863443183, "grad_norm": 0.48316916823387146, "learning_rate": 2.721608649614016e-09, "loss": 0.08998489379882812, "step": 7143 }, { "epoch": 0.9954713300355327, "grad_norm": 0.6606982350349426, "learning_rate": 2.5683131577780574e-09, "loss": 0.10013771057128906, "step": 7144 }, { "epoch": 0.995610673726747, "grad_norm": 0.22666063904762268, "learning_rate": 2.4194606387140106e-09, "loss": 0.06587696075439453, "step": 7145 }, { "epoch": 0.9957500174179614, "grad_norm": 0.27906206250190735, "learning_rate": 2.275051125490979e-09, "loss": 0.07310104370117188, "step": 7146 }, { "epoch": 0.9958893611091758, "grad_norm": 0.3456067740917206, "learning_rate": 2.1350846501988487e-09, "loss": 0.07865715026855469, "step": 7147 }, { "epoch": 0.9960287048003902, "grad_norm": 0.3336813747882843, "learning_rate": 1.9995612439305255e-09, "loss": 0.06795501708984375, "step": 7148 }, { "epoch": 0.9961680484916046, "grad_norm": 0.716064453125, "learning_rate": 1.8684809367974786e-09, "loss": 0.1285228729248047, "step": 7149 }, { "epoch": 0.9963073921828189, "grad_norm": 0.6322847008705139, "learning_rate": 1.741843757920858e-09, "loss": 0.12994861602783203, "step": 7150 }, { "epoch": 0.9964467358740333, "grad_norm": 0.3212963938713074, "learning_rate": 1.6196497354403761e-09, "loss": 0.08428096771240234, "step": 7151 }, { "epoch": 0.9965860795652477, "grad_norm": 0.32924893498420715, "learning_rate": 1.5018988965032067e-09, "loss": 0.07504653930664062, "step": 7152 }, { "epoch": 0.9967254232564621, "grad_norm": 0.3272203505039215, "learning_rate": 1.3885912672706447e-09, "loss": 0.07892417907714844, "step": 7153 }, { "epoch": 0.9968647669476765, "grad_norm": 0.3179364502429962, "learning_rate": 1.279726872918108e-09, "loss": 0.07855224609375, "step": 7154 }, { "epoch": 0.9970041106388908, "grad_norm": 0.3892156481742859, "learning_rate": 1.1753057376306942e-09, "loss": 0.08907508850097656, "step": 7155 }, { "epoch": 0.9971434543301052, "grad_norm": 0.462395578622818, "learning_rate": 1.0753278846076242e-09, "loss": 0.10816287994384766, "step": 7156 }, { "epoch": 0.9972827980213196, "grad_norm": 0.25442808866500854, "learning_rate": 9.79793336066681e-10, "loss": 0.06982994079589844, "step": 7157 }, { "epoch": 0.997422141712534, "grad_norm": 0.4455505609512329, "learning_rate": 8.887021132286677e-10, "loss": 0.08577728271484375, "step": 7158 }, { "epoch": 0.9975614854037483, "grad_norm": 0.5211091041564941, "learning_rate": 8.020542363329497e-10, "loss": 0.1019287109375, "step": 7159 }, { "epoch": 0.9977008290949627, "grad_norm": 0.5843652486801147, "learning_rate": 7.198497246307945e-10, "loss": 0.10460472106933594, "step": 7160 }, { "epoch": 0.9978401727861771, "grad_norm": 0.35286614298820496, "learning_rate": 6.420885963875912e-10, "loss": 0.07641410827636719, "step": 7161 }, { "epoch": 0.9979795164773915, "grad_norm": 0.6045248508453369, "learning_rate": 5.687708688806304e-10, "loss": 0.10607051849365234, "step": 7162 }, { "epoch": 0.9981188601686058, "grad_norm": 0.5210958123207092, "learning_rate": 4.99896558396884e-10, "loss": 0.10666561126708984, "step": 7163 }, { "epoch": 0.9982582038598202, "grad_norm": 0.41777899861335754, "learning_rate": 4.3546568023966616e-10, "loss": 0.08942031860351562, "step": 7164 }, { "epoch": 0.9983975475510346, "grad_norm": 0.1885034143924713, "learning_rate": 3.754782487241926e-10, "loss": 0.05783271789550781, "step": 7165 }, { "epoch": 0.998536891242249, "grad_norm": 0.3365144729614258, "learning_rate": 3.1993427717758043e-10, "loss": 0.07682609558105469, "step": 7166 }, { "epoch": 0.9986762349334634, "grad_norm": 0.3610086739063263, "learning_rate": 2.688337779410688e-10, "loss": 0.07254791259765625, "step": 7167 }, { "epoch": 0.9988155786246777, "grad_norm": 0.18554919958114624, "learning_rate": 2.2217676237001883e-10, "loss": 0.052176475524902344, "step": 7168 }, { "epoch": 0.9989549223158921, "grad_norm": 0.7821488976478577, "learning_rate": 1.7996324082725224e-10, "loss": 0.14793014526367188, "step": 7169 }, { "epoch": 0.9990942660071065, "grad_norm": 1.0546404123306274, "learning_rate": 1.4219322269193313e-10, "loss": 0.14149093627929688, "step": 7170 }, { "epoch": 0.9992336096983209, "grad_norm": 0.34660789370536804, "learning_rate": 1.0886671635956803e-10, "loss": 0.08515548706054688, "step": 7171 }, { "epoch": 0.9993729533895352, "grad_norm": 0.23722459375858307, "learning_rate": 7.998372922868313e-11, "loss": 0.06633567810058594, "step": 7172 }, { "epoch": 0.9995122970807496, "grad_norm": 0.6010448932647705, "learning_rate": 5.554426772080845e-11, "loss": 0.10683441162109375, "step": 7173 }, { "epoch": 0.999651640771964, "grad_norm": 0.356526643037796, "learning_rate": 3.554833726493456e-11, "loss": 0.0800333023071289, "step": 7174 }, { "epoch": 0.9997909844631784, "grad_norm": 0.5378692150115967, "learning_rate": 1.999594230417401e-11, "loss": 0.09458732604980469, "step": 7175 }, { "epoch": 0.9999303281543928, "grad_norm": 0.48508283495903015, "learning_rate": 8.887086291320401e-12, "loss": 0.10258102416992188, "step": 7176 }, { "epoch": 1.0, "grad_norm": 1.1569210290908813, "learning_rate": 2.2217716955097445e-12, "loss": 0.14859390258789062, "step": 7177 }, { "epoch": 1.0, "step": 7177, "total_flos": 5.326107668090192e+19, "train_loss": 0.10390519324295099, "train_runtime": 71823.5105, "train_samples_per_second": 25.579, "train_steps_per_second": 0.1 } ], "logging_steps": 1.0, "max_steps": 7177, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.326107668090192e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }