{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.14399308833176008, "eval_steps": 500, "global_step": 108000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.6665386728103716e-05, "grad_norm": 10.5, "learning_rate": 1.266599114713882e-09, "loss": 1.0378957748413087, "num_tokens": 2386476.0, "step": 20 }, { "epoch": 5.333077345620743e-05, "grad_norm": 9.75, "learning_rate": 2.5998613407284946e-09, "loss": 1.0462759017944336, "num_tokens": 5034833.0, "step": 40 }, { "epoch": 7.999616018431115e-05, "grad_norm": 9.375, "learning_rate": 3.933123566743107e-09, "loss": 1.036106491088867, "num_tokens": 7442595.0, "step": 60 }, { "epoch": 0.00010666154691241487, "grad_norm": 10.125, "learning_rate": 5.26638579275772e-09, "loss": 1.058732509613037, "num_tokens": 9742278.0, "step": 80 }, { "epoch": 0.0001333269336405186, "grad_norm": 10.0, "learning_rate": 6.599648018772333e-09, "loss": 1.0622886657714843, "num_tokens": 12040516.0, "step": 100 }, { "epoch": 0.0001599923203686223, "grad_norm": 8.625, "learning_rate": 7.932910244786945e-09, "loss": 1.0076488494873046, "num_tokens": 14466854.0, "step": 120 }, { "epoch": 0.00018665770709672602, "grad_norm": 8.875, "learning_rate": 9.266172470801559e-09, "loss": 1.0777412414550782, "num_tokens": 16823161.0, "step": 140 }, { "epoch": 0.00021332309382482973, "grad_norm": 9.5, "learning_rate": 1.0599434696816171e-08, "loss": 1.0367786407470703, "num_tokens": 19106328.0, "step": 160 }, { "epoch": 0.00023998848055293347, "grad_norm": 8.4375, "learning_rate": 1.1932696922830783e-08, "loss": 1.0064573287963867, "num_tokens": 21670130.0, "step": 180 }, { "epoch": 0.0002666538672810372, "grad_norm": 11.0, "learning_rate": 1.3265959148845397e-08, "loss": 1.0498308181762694, "num_tokens": 24315138.0, "step": 200 }, { "epoch": 0.00029331925400914087, "grad_norm": 9.6875, "learning_rate": 1.4599221374860008e-08, "loss": 1.0239522933959961, "num_tokens": 26637388.0, "step": 220 }, { "epoch": 0.0003199846407372446, "grad_norm": 9.5, "learning_rate": 1.5932483600874623e-08, "loss": 1.0335540771484375, "num_tokens": 29045414.0, "step": 240 }, { "epoch": 0.00034665002746534835, "grad_norm": 9.75, "learning_rate": 1.7265745826889232e-08, "loss": 1.002272605895996, "num_tokens": 31452947.0, "step": 260 }, { "epoch": 0.00037331541419345204, "grad_norm": 9.625, "learning_rate": 1.8599008052903847e-08, "loss": 1.026328945159912, "num_tokens": 33545723.0, "step": 280 }, { "epoch": 0.0003999808009215558, "grad_norm": 9.625, "learning_rate": 1.993227027891846e-08, "loss": 1.0547853469848634, "num_tokens": 36106006.0, "step": 300 }, { "epoch": 0.00042664618764965946, "grad_norm": 9.5, "learning_rate": 2.1265532504933072e-08, "loss": 1.0599270820617677, "num_tokens": 38538633.0, "step": 320 }, { "epoch": 0.0004533115743777632, "grad_norm": 10.625, "learning_rate": 2.2598794730947687e-08, "loss": 1.061457061767578, "num_tokens": 40865912.0, "step": 340 }, { "epoch": 0.00047997696110586694, "grad_norm": 8.8125, "learning_rate": 2.39320569569623e-08, "loss": 1.043738555908203, "num_tokens": 43462864.0, "step": 360 }, { "epoch": 0.0005066423478339707, "grad_norm": 8.875, "learning_rate": 2.5265319182976908e-08, "loss": 1.0403480529785156, "num_tokens": 45891556.0, "step": 380 }, { "epoch": 0.0005333077345620744, "grad_norm": 7.8125, "learning_rate": 2.659858140899152e-08, "loss": 1.0314786911010743, "num_tokens": 48346846.0, "step": 400 }, { "epoch": 0.0005599731212901781, "grad_norm": 9.375, "learning_rate": 2.7931843635006136e-08, "loss": 1.046822738647461, "num_tokens": 50687096.0, "step": 420 }, { "epoch": 0.0005866385080182817, "grad_norm": 9.8125, "learning_rate": 2.9265105861020748e-08, "loss": 1.0244486808776856, "num_tokens": 53174665.0, "step": 440 }, { "epoch": 0.0006133038947463855, "grad_norm": 10.125, "learning_rate": 3.059836808703536e-08, "loss": 1.0535762786865235, "num_tokens": 55761839.0, "step": 460 }, { "epoch": 0.0006399692814744892, "grad_norm": 10.25, "learning_rate": 3.193163031304997e-08, "loss": 1.0535314559936524, "num_tokens": 58552424.0, "step": 480 }, { "epoch": 0.0006666346682025929, "grad_norm": 9.1875, "learning_rate": 3.3264892539064585e-08, "loss": 1.0430606842041015, "num_tokens": 60882925.0, "step": 500 }, { "epoch": 0.0006933000549306967, "grad_norm": 10.5625, "learning_rate": 3.45981547650792e-08, "loss": 1.041437530517578, "num_tokens": 63237812.0, "step": 520 }, { "epoch": 0.0007199654416588004, "grad_norm": 9.5, "learning_rate": 3.593141699109381e-08, "loss": 1.0482681274414063, "num_tokens": 65693409.0, "step": 540 }, { "epoch": 0.0007466308283869041, "grad_norm": 9.75, "learning_rate": 3.7264679217108425e-08, "loss": 1.0185321807861327, "num_tokens": 68164586.0, "step": 560 }, { "epoch": 0.0007732962151150078, "grad_norm": 8.6875, "learning_rate": 3.8597941443123033e-08, "loss": 1.0575294494628906, "num_tokens": 70542948.0, "step": 580 }, { "epoch": 0.0007999616018431116, "grad_norm": 8.8125, "learning_rate": 3.993120366913765e-08, "loss": 1.0618281364440918, "num_tokens": 73005526.0, "step": 600 }, { "epoch": 0.0008266269885712152, "grad_norm": 9.3125, "learning_rate": 4.126446589515226e-08, "loss": 1.0395849227905274, "num_tokens": 75483051.0, "step": 620 }, { "epoch": 0.0008532923752993189, "grad_norm": 8.5625, "learning_rate": 4.259772812116688e-08, "loss": 1.0205188751220704, "num_tokens": 77824695.0, "step": 640 }, { "epoch": 0.0008799577620274227, "grad_norm": 10.4375, "learning_rate": 4.393099034718149e-08, "loss": 1.0532617568969727, "num_tokens": 80297210.0, "step": 660 }, { "epoch": 0.0009066231487555264, "grad_norm": 8.5625, "learning_rate": 4.5264252573196104e-08, "loss": 1.0337120056152345, "num_tokens": 82649502.0, "step": 680 }, { "epoch": 0.0009332885354836301, "grad_norm": 9.5625, "learning_rate": 4.659751479921071e-08, "loss": 1.025763702392578, "num_tokens": 85090008.0, "step": 700 }, { "epoch": 0.0009599539222117339, "grad_norm": 9.1875, "learning_rate": 4.793077702522532e-08, "loss": 1.0669082641601562, "num_tokens": 87746792.0, "step": 720 }, { "epoch": 0.0009866193089398375, "grad_norm": 9.625, "learning_rate": 4.926403925123994e-08, "loss": 1.0468431472778321, "num_tokens": 90159925.0, "step": 740 }, { "epoch": 0.0010132846956679414, "grad_norm": 9.25, "learning_rate": 5.0597301477254546e-08, "loss": 1.0536123275756837, "num_tokens": 92589650.0, "step": 760 }, { "epoch": 0.001039950082396045, "grad_norm": 9.75, "learning_rate": 5.193056370326916e-08, "loss": 1.0188938140869142, "num_tokens": 95058218.0, "step": 780 }, { "epoch": 0.0010666154691241487, "grad_norm": 11.1875, "learning_rate": 5.326382592928378e-08, "loss": 1.0735032081604003, "num_tokens": 97588491.0, "step": 800 }, { "epoch": 0.0010932808558522524, "grad_norm": 9.4375, "learning_rate": 5.459708815529839e-08, "loss": 1.0131439208984374, "num_tokens": 99903281.0, "step": 820 }, { "epoch": 0.0011199462425803561, "grad_norm": 8.8125, "learning_rate": 5.5930350381313e-08, "loss": 1.037496566772461, "num_tokens": 102101390.0, "step": 840 }, { "epoch": 0.0011466116293084598, "grad_norm": 9.3125, "learning_rate": 5.726361260732762e-08, "loss": 1.0562919616699218, "num_tokens": 104528935.0, "step": 860 }, { "epoch": 0.0011732770160365635, "grad_norm": 8.1875, "learning_rate": 5.8596874833342226e-08, "loss": 1.0082353591918944, "num_tokens": 107104548.0, "step": 880 }, { "epoch": 0.0011999424027646674, "grad_norm": 9.4375, "learning_rate": 5.993013705935684e-08, "loss": 1.0233242988586426, "num_tokens": 109495432.0, "step": 900 }, { "epoch": 0.001226607789492771, "grad_norm": 10.5625, "learning_rate": 6.126339928537145e-08, "loss": 1.0626348495483398, "num_tokens": 111954996.0, "step": 920 }, { "epoch": 0.0012532731762208748, "grad_norm": 10.4375, "learning_rate": 6.259666151138606e-08, "loss": 1.0437286376953125, "num_tokens": 114297525.0, "step": 940 }, { "epoch": 0.0012799385629489784, "grad_norm": 9.875, "learning_rate": 6.392992373740067e-08, "loss": 1.038482666015625, "num_tokens": 116546981.0, "step": 960 }, { "epoch": 0.0013066039496770821, "grad_norm": 8.875, "learning_rate": 6.526318596341529e-08, "loss": 1.0363761901855468, "num_tokens": 118945836.0, "step": 980 }, { "epoch": 0.0013332693364051858, "grad_norm": 9.75, "learning_rate": 6.65964481894299e-08, "loss": 1.042701530456543, "num_tokens": 121384745.0, "step": 1000 }, { "epoch": 0.0013599347231332895, "grad_norm": 9.375, "learning_rate": 6.792971041544452e-08, "loss": 1.0713863372802734, "num_tokens": 123892722.0, "step": 1020 }, { "epoch": 0.0013866001098613934, "grad_norm": 9.75, "learning_rate": 6.926297264145913e-08, "loss": 1.0278459548950196, "num_tokens": 126353323.0, "step": 1040 }, { "epoch": 0.001413265496589497, "grad_norm": 9.6875, "learning_rate": 7.059623486747374e-08, "loss": 1.0657243728637695, "num_tokens": 128603753.0, "step": 1060 }, { "epoch": 0.0014399308833176008, "grad_norm": 10.875, "learning_rate": 7.192949709348835e-08, "loss": 1.037228775024414, "num_tokens": 131003166.0, "step": 1080 }, { "epoch": 0.0014665962700457045, "grad_norm": 9.9375, "learning_rate": 7.326275931950296e-08, "loss": 1.0316571235656737, "num_tokens": 133589960.0, "step": 1100 }, { "epoch": 0.0014932616567738081, "grad_norm": 8.9375, "learning_rate": 7.459602154551758e-08, "loss": 1.038112258911133, "num_tokens": 136001349.0, "step": 1120 }, { "epoch": 0.0015199270435019118, "grad_norm": 10.5, "learning_rate": 7.59292837715322e-08, "loss": 1.0522043228149414, "num_tokens": 138396691.0, "step": 1140 }, { "epoch": 0.0015465924302300155, "grad_norm": 9.9375, "learning_rate": 7.726254599754681e-08, "loss": 1.052998161315918, "num_tokens": 140801872.0, "step": 1160 }, { "epoch": 0.0015732578169581194, "grad_norm": 9.125, "learning_rate": 7.859580822356142e-08, "loss": 1.0279390335083007, "num_tokens": 143208854.0, "step": 1180 }, { "epoch": 0.0015999232036862231, "grad_norm": 8.6875, "learning_rate": 7.992907044957603e-08, "loss": 1.0181364059448241, "num_tokens": 145652616.0, "step": 1200 }, { "epoch": 0.0016265885904143268, "grad_norm": 9.75, "learning_rate": 8.126233267559064e-08, "loss": 1.0548544883728028, "num_tokens": 147994503.0, "step": 1220 }, { "epoch": 0.0016532539771424305, "grad_norm": 9.75, "learning_rate": 8.259559490160526e-08, "loss": 1.0366222381591796, "num_tokens": 150380216.0, "step": 1240 }, { "epoch": 0.0016799193638705342, "grad_norm": 9.5625, "learning_rate": 8.392885712761987e-08, "loss": 1.0116182327270509, "num_tokens": 152815663.0, "step": 1260 }, { "epoch": 0.0017065847505986379, "grad_norm": 8.75, "learning_rate": 8.526211935363448e-08, "loss": 1.014047908782959, "num_tokens": 155190090.0, "step": 1280 }, { "epoch": 0.0017332501373267418, "grad_norm": 8.75, "learning_rate": 8.659538157964909e-08, "loss": 1.0731770515441894, "num_tokens": 157730936.0, "step": 1300 }, { "epoch": 0.0017599155240548454, "grad_norm": 9.25, "learning_rate": 8.79286438056637e-08, "loss": 1.009451675415039, "num_tokens": 160072798.0, "step": 1320 }, { "epoch": 0.0017865809107829491, "grad_norm": 8.9375, "learning_rate": 8.926190603167832e-08, "loss": 1.0482282638549805, "num_tokens": 162698450.0, "step": 1340 }, { "epoch": 0.0018132462975110528, "grad_norm": 9.625, "learning_rate": 9.059516825769293e-08, "loss": 1.0410526275634766, "num_tokens": 165230264.0, "step": 1360 }, { "epoch": 0.0018399116842391565, "grad_norm": 9.25, "learning_rate": 9.192843048370753e-08, "loss": 1.0357679367065429, "num_tokens": 167526832.0, "step": 1380 }, { "epoch": 0.0018665770709672602, "grad_norm": 9.75, "learning_rate": 9.326169270972217e-08, "loss": 1.0310096740722656, "num_tokens": 170030979.0, "step": 1400 }, { "epoch": 0.0018932424576953639, "grad_norm": 9.0, "learning_rate": 9.459495493573678e-08, "loss": 1.0277082443237304, "num_tokens": 172342065.0, "step": 1420 }, { "epoch": 0.0019199078444234678, "grad_norm": 8.6875, "learning_rate": 9.592821716175139e-08, "loss": 1.0393661499023437, "num_tokens": 174937109.0, "step": 1440 }, { "epoch": 0.0019465732311515715, "grad_norm": 9.4375, "learning_rate": 9.7261479387766e-08, "loss": 1.033489990234375, "num_tokens": 177242325.0, "step": 1460 }, { "epoch": 0.001973238617879675, "grad_norm": 10.8125, "learning_rate": 9.85947416137806e-08, "loss": 1.0367914199829102, "num_tokens": 179771095.0, "step": 1480 }, { "epoch": 0.001999904004607779, "grad_norm": 8.3125, "learning_rate": 9.992800383979523e-08, "loss": 1.0197120666503907, "num_tokens": 182283888.0, "step": 1500 }, { "epoch": 0.0020265693913358827, "grad_norm": 9.3125, "learning_rate": 1.0126126606580984e-07, "loss": 1.0408472061157226, "num_tokens": 184756923.0, "step": 1520 }, { "epoch": 0.002053234778063986, "grad_norm": 9.8125, "learning_rate": 1.0259452829182444e-07, "loss": 1.020615577697754, "num_tokens": 187207427.0, "step": 1540 }, { "epoch": 0.00207990016479209, "grad_norm": 9.6875, "learning_rate": 1.0392779051783905e-07, "loss": 1.0326616287231445, "num_tokens": 189439923.0, "step": 1560 }, { "epoch": 0.0021065655515201936, "grad_norm": 9.5625, "learning_rate": 1.0526105274385366e-07, "loss": 1.0133373260498046, "num_tokens": 191539176.0, "step": 1580 }, { "epoch": 0.0021332309382482975, "grad_norm": 9.75, "learning_rate": 1.0659431496986828e-07, "loss": 1.0492067337036133, "num_tokens": 194095781.0, "step": 1600 }, { "epoch": 0.002159896324976401, "grad_norm": 9.0, "learning_rate": 1.0792757719588289e-07, "loss": 1.039297866821289, "num_tokens": 196531268.0, "step": 1620 }, { "epoch": 0.002186561711704505, "grad_norm": 8.9375, "learning_rate": 1.092608394218975e-07, "loss": 1.0345168113708496, "num_tokens": 198883037.0, "step": 1640 }, { "epoch": 0.0022132270984326088, "grad_norm": 9.0, "learning_rate": 1.1059410164791211e-07, "loss": 1.0589424133300782, "num_tokens": 201334054.0, "step": 1660 }, { "epoch": 0.0022398924851607122, "grad_norm": 9.0, "learning_rate": 1.1192736387392673e-07, "loss": 1.0425819396972655, "num_tokens": 203883985.0, "step": 1680 }, { "epoch": 0.002266557871888816, "grad_norm": 9.75, "learning_rate": 1.1326062609994134e-07, "loss": 1.0313312530517578, "num_tokens": 206602614.0, "step": 1700 }, { "epoch": 0.0022932232586169196, "grad_norm": 8.5, "learning_rate": 1.1459388832595595e-07, "loss": 1.0216485977172851, "num_tokens": 208977603.0, "step": 1720 }, { "epoch": 0.0023198886453450235, "grad_norm": 8.75, "learning_rate": 1.1592715055197057e-07, "loss": 1.042165470123291, "num_tokens": 211598495.0, "step": 1740 }, { "epoch": 0.002346554032073127, "grad_norm": 8.125, "learning_rate": 1.172604127779852e-07, "loss": 1.0144313812255858, "num_tokens": 214206514.0, "step": 1760 }, { "epoch": 0.002373219418801231, "grad_norm": 8.75, "learning_rate": 1.185936750039998e-07, "loss": 1.0199343681335449, "num_tokens": 216519900.0, "step": 1780 }, { "epoch": 0.0023998848055293348, "grad_norm": 9.5, "learning_rate": 1.199269372300144e-07, "loss": 1.0445449829101563, "num_tokens": 218950093.0, "step": 1800 }, { "epoch": 0.0024265501922574382, "grad_norm": 9.6875, "learning_rate": 1.2126019945602904e-07, "loss": 1.049318504333496, "num_tokens": 221633729.0, "step": 1820 }, { "epoch": 0.002453215578985542, "grad_norm": 10.125, "learning_rate": 1.2259346168204363e-07, "loss": 1.028379249572754, "num_tokens": 224076221.0, "step": 1840 }, { "epoch": 0.0024798809657136456, "grad_norm": 8.4375, "learning_rate": 1.2392672390805825e-07, "loss": 1.0204413414001465, "num_tokens": 226544616.0, "step": 1860 }, { "epoch": 0.0025065463524417495, "grad_norm": 8.1875, "learning_rate": 1.2525998613407285e-07, "loss": 1.01558198928833, "num_tokens": 229000573.0, "step": 1880 }, { "epoch": 0.002533211739169853, "grad_norm": 9.4375, "learning_rate": 1.2659324836008747e-07, "loss": 1.0443326950073242, "num_tokens": 231288917.0, "step": 1900 }, { "epoch": 0.002559877125897957, "grad_norm": 9.4375, "learning_rate": 1.279265105861021e-07, "loss": 1.0481420516967774, "num_tokens": 233579958.0, "step": 1920 }, { "epoch": 0.002586542512626061, "grad_norm": 9.75, "learning_rate": 1.292597728121167e-07, "loss": 1.0132708549499512, "num_tokens": 236130490.0, "step": 1940 }, { "epoch": 0.0026132078993541643, "grad_norm": 10.1875, "learning_rate": 1.305930350381313e-07, "loss": 0.9910290718078614, "num_tokens": 238468891.0, "step": 1960 }, { "epoch": 0.002639873286082268, "grad_norm": 7.625, "learning_rate": 1.319262972641459e-07, "loss": 1.044781494140625, "num_tokens": 240943503.0, "step": 1980 }, { "epoch": 0.0026665386728103716, "grad_norm": 10.0625, "learning_rate": 1.3325955949016053e-07, "loss": 1.037123680114746, "num_tokens": 243430808.0, "step": 2000 }, { "epoch": 0.0026932040595384755, "grad_norm": 9.5, "learning_rate": 1.3459282171617515e-07, "loss": 1.0232873916625977, "num_tokens": 245963608.0, "step": 2020 }, { "epoch": 0.002719869446266579, "grad_norm": 9.0, "learning_rate": 1.3592608394218975e-07, "loss": 1.0178335189819336, "num_tokens": 248327303.0, "step": 2040 }, { "epoch": 0.002746534832994683, "grad_norm": 10.1875, "learning_rate": 1.3725934616820437e-07, "loss": 0.9839044570922851, "num_tokens": 250745939.0, "step": 2060 }, { "epoch": 0.002773200219722787, "grad_norm": 8.3125, "learning_rate": 1.38592608394219e-07, "loss": 0.9965020179748535, "num_tokens": 253226980.0, "step": 2080 }, { "epoch": 0.0027998656064508903, "grad_norm": 9.875, "learning_rate": 1.399258706202336e-07, "loss": 1.0142908096313477, "num_tokens": 255491949.0, "step": 2100 }, { "epoch": 0.002826530993178994, "grad_norm": 8.6875, "learning_rate": 1.412591328462482e-07, "loss": 1.0431265830993652, "num_tokens": 257799654.0, "step": 2120 }, { "epoch": 0.0028531963799070977, "grad_norm": 10.25, "learning_rate": 1.4259239507226283e-07, "loss": 1.0448129653930665, "num_tokens": 260194187.0, "step": 2140 }, { "epoch": 0.0028798617666352016, "grad_norm": 9.3125, "learning_rate": 1.4392565729827745e-07, "loss": 0.9835843086242676, "num_tokens": 262630096.0, "step": 2160 }, { "epoch": 0.002906527153363305, "grad_norm": 9.25, "learning_rate": 1.4525891952429205e-07, "loss": 1.0212353706359862, "num_tokens": 264847558.0, "step": 2180 }, { "epoch": 0.002933192540091409, "grad_norm": 9.125, "learning_rate": 1.4659218175030667e-07, "loss": 1.0493291854858398, "num_tokens": 267069866.0, "step": 2200 }, { "epoch": 0.002959857926819513, "grad_norm": 8.375, "learning_rate": 1.4792544397632127e-07, "loss": 1.051121139526367, "num_tokens": 269581298.0, "step": 2220 }, { "epoch": 0.0029865233135476163, "grad_norm": 9.125, "learning_rate": 1.492587062023359e-07, "loss": 1.0580559730529786, "num_tokens": 272202806.0, "step": 2240 }, { "epoch": 0.00301318870027572, "grad_norm": 9.75, "learning_rate": 1.505919684283505e-07, "loss": 1.038565444946289, "num_tokens": 274498812.0, "step": 2260 }, { "epoch": 0.0030398540870038237, "grad_norm": 9.1875, "learning_rate": 1.519252306543651e-07, "loss": 1.0276926040649415, "num_tokens": 277047636.0, "step": 2280 }, { "epoch": 0.0030665194737319276, "grad_norm": 10.8125, "learning_rate": 1.5325849288037973e-07, "loss": 1.034402847290039, "num_tokens": 279528637.0, "step": 2300 }, { "epoch": 0.003093184860460031, "grad_norm": 8.375, "learning_rate": 1.5459175510639432e-07, "loss": 0.9923318862915039, "num_tokens": 281943962.0, "step": 2320 }, { "epoch": 0.003119850247188135, "grad_norm": 9.125, "learning_rate": 1.5592501733240895e-07, "loss": 1.0207632064819336, "num_tokens": 284448641.0, "step": 2340 }, { "epoch": 0.003146515633916239, "grad_norm": 8.6875, "learning_rate": 1.5725827955842357e-07, "loss": 1.0014940261840821, "num_tokens": 287064647.0, "step": 2360 }, { "epoch": 0.0031731810206443423, "grad_norm": 8.375, "learning_rate": 1.5859154178443816e-07, "loss": 1.031901741027832, "num_tokens": 289708054.0, "step": 2380 }, { "epoch": 0.0031998464073724462, "grad_norm": 10.125, "learning_rate": 1.5992480401045279e-07, "loss": 1.0157596588134765, "num_tokens": 292055563.0, "step": 2400 }, { "epoch": 0.0032265117941005497, "grad_norm": 9.1875, "learning_rate": 1.6125806623646738e-07, "loss": 0.9791703224182129, "num_tokens": 294191787.0, "step": 2420 }, { "epoch": 0.0032531771808286536, "grad_norm": 9.125, "learning_rate": 1.62591328462482e-07, "loss": 1.0224925041198731, "num_tokens": 296646762.0, "step": 2440 }, { "epoch": 0.003279842567556757, "grad_norm": 8.875, "learning_rate": 1.6392459068849662e-07, "loss": 1.0014867782592773, "num_tokens": 299198850.0, "step": 2460 }, { "epoch": 0.003306507954284861, "grad_norm": 8.6875, "learning_rate": 1.6525785291451122e-07, "loss": 1.0327044486999513, "num_tokens": 301721167.0, "step": 2480 }, { "epoch": 0.003333173341012965, "grad_norm": 8.25, "learning_rate": 1.6659111514052584e-07, "loss": 1.0073762893676759, "num_tokens": 304434581.0, "step": 2500 }, { "epoch": 0.0033598387277410683, "grad_norm": 8.4375, "learning_rate": 1.6792437736654044e-07, "loss": 1.0193467140197754, "num_tokens": 307019204.0, "step": 2520 }, { "epoch": 0.0033865041144691722, "grad_norm": 9.375, "learning_rate": 1.6925763959255506e-07, "loss": 0.990755271911621, "num_tokens": 309397148.0, "step": 2540 }, { "epoch": 0.0034131695011972757, "grad_norm": 9.3125, "learning_rate": 1.7059090181856968e-07, "loss": 0.9950632095336914, "num_tokens": 311681936.0, "step": 2560 }, { "epoch": 0.0034398348879253796, "grad_norm": 9.625, "learning_rate": 1.7192416404458433e-07, "loss": 1.026517105102539, "num_tokens": 314152685.0, "step": 2580 }, { "epoch": 0.0034665002746534835, "grad_norm": 9.125, "learning_rate": 1.7325742627059893e-07, "loss": 1.0268559455871582, "num_tokens": 316720371.0, "step": 2600 }, { "epoch": 0.003493165661381587, "grad_norm": 9.875, "learning_rate": 1.7459068849661355e-07, "loss": 1.0623804092407227, "num_tokens": 318949535.0, "step": 2620 }, { "epoch": 0.003519831048109691, "grad_norm": 9.75, "learning_rate": 1.7592395072262814e-07, "loss": 1.0125499725341798, "num_tokens": 321392259.0, "step": 2640 }, { "epoch": 0.0035464964348377944, "grad_norm": 9.1875, "learning_rate": 1.7725721294864277e-07, "loss": 1.0223895072937013, "num_tokens": 323808951.0, "step": 2660 }, { "epoch": 0.0035731618215658983, "grad_norm": 8.4375, "learning_rate": 1.785904751746574e-07, "loss": 1.0229875564575195, "num_tokens": 326348213.0, "step": 2680 }, { "epoch": 0.0035998272082940017, "grad_norm": 9.5, "learning_rate": 1.7992373740067198e-07, "loss": 1.0217164993286132, "num_tokens": 328669467.0, "step": 2700 }, { "epoch": 0.0036264925950221056, "grad_norm": 8.8125, "learning_rate": 1.812569996266866e-07, "loss": 1.0094970703125, "num_tokens": 330953672.0, "step": 2720 }, { "epoch": 0.0036531579817502095, "grad_norm": 9.0, "learning_rate": 1.825902618527012e-07, "loss": 1.0204289436340332, "num_tokens": 333662709.0, "step": 2740 }, { "epoch": 0.003679823368478313, "grad_norm": 8.125, "learning_rate": 1.8392352407871582e-07, "loss": 1.024044132232666, "num_tokens": 335983079.0, "step": 2760 }, { "epoch": 0.003706488755206417, "grad_norm": 8.75, "learning_rate": 1.8525678630473045e-07, "loss": 1.0306682586669922, "num_tokens": 338382893.0, "step": 2780 }, { "epoch": 0.0037331541419345204, "grad_norm": 7.75, "learning_rate": 1.8659004853074504e-07, "loss": 1.009512710571289, "num_tokens": 340911518.0, "step": 2800 }, { "epoch": 0.0037598195286626243, "grad_norm": 9.4375, "learning_rate": 1.8792331075675966e-07, "loss": 0.9991010665893555, "num_tokens": 343330970.0, "step": 2820 }, { "epoch": 0.0037864849153907277, "grad_norm": 7.59375, "learning_rate": 1.8925657298277426e-07, "loss": 0.9996419906616211, "num_tokens": 345885265.0, "step": 2840 }, { "epoch": 0.0038131503021188316, "grad_norm": 9.5, "learning_rate": 1.9058983520878888e-07, "loss": 1.0269476890563964, "num_tokens": 348329654.0, "step": 2860 }, { "epoch": 0.0038398156888469356, "grad_norm": 8.125, "learning_rate": 1.919230974348035e-07, "loss": 1.0100475311279298, "num_tokens": 350877662.0, "step": 2880 }, { "epoch": 0.003866481075575039, "grad_norm": 8.6875, "learning_rate": 1.932563596608181e-07, "loss": 1.0317946434020997, "num_tokens": 353203773.0, "step": 2900 }, { "epoch": 0.003893146462303143, "grad_norm": 7.9375, "learning_rate": 1.9458962188683272e-07, "loss": 0.9728310585021973, "num_tokens": 355670051.0, "step": 2920 }, { "epoch": 0.003919811849031247, "grad_norm": 9.0, "learning_rate": 1.9592288411284732e-07, "loss": 1.014097309112549, "num_tokens": 358026197.0, "step": 2940 }, { "epoch": 0.00394647723575935, "grad_norm": 8.625, "learning_rate": 1.9725614633886194e-07, "loss": 1.022810935974121, "num_tokens": 360438985.0, "step": 2960 }, { "epoch": 0.003973142622487454, "grad_norm": 9.0, "learning_rate": 1.9858940856487656e-07, "loss": 1.000529670715332, "num_tokens": 362906300.0, "step": 2980 }, { "epoch": 0.003999808009215558, "grad_norm": 7.46875, "learning_rate": 1.9992267079089116e-07, "loss": 1.0024723052978515, "num_tokens": 365130921.0, "step": 3000 }, { "epoch": 0.004026473395943662, "grad_norm": 8.75, "learning_rate": 2.0125593301690578e-07, "loss": 1.0052038192749024, "num_tokens": 367457960.0, "step": 3020 }, { "epoch": 0.0040531387826717655, "grad_norm": 9.125, "learning_rate": 2.0258919524292037e-07, "loss": 0.9921468734741211, "num_tokens": 369804149.0, "step": 3040 }, { "epoch": 0.0040798041693998685, "grad_norm": 9.0, "learning_rate": 2.03922457468935e-07, "loss": 1.024406337738037, "num_tokens": 372316204.0, "step": 3060 }, { "epoch": 0.004106469556127972, "grad_norm": 9.6875, "learning_rate": 2.0525571969494962e-07, "loss": 1.033228874206543, "num_tokens": 374676288.0, "step": 3080 }, { "epoch": 0.004133134942856076, "grad_norm": 8.3125, "learning_rate": 2.0658898192096421e-07, "loss": 0.9954107284545899, "num_tokens": 377090951.0, "step": 3100 }, { "epoch": 0.00415980032958418, "grad_norm": 9.1875, "learning_rate": 2.0792224414697884e-07, "loss": 0.9928818702697754, "num_tokens": 379639962.0, "step": 3120 }, { "epoch": 0.004186465716312283, "grad_norm": 8.5625, "learning_rate": 2.0925550637299346e-07, "loss": 0.9901020050048828, "num_tokens": 382263373.0, "step": 3140 }, { "epoch": 0.004213131103040387, "grad_norm": 9.3125, "learning_rate": 2.1058876859900805e-07, "loss": 0.9745883941650391, "num_tokens": 384641713.0, "step": 3160 }, { "epoch": 0.004239796489768491, "grad_norm": 9.375, "learning_rate": 2.1192203082502268e-07, "loss": 0.9926258087158203, "num_tokens": 386918270.0, "step": 3180 }, { "epoch": 0.004266461876496595, "grad_norm": 9.5, "learning_rate": 2.1325529305103727e-07, "loss": 1.0098329544067384, "num_tokens": 389216267.0, "step": 3200 }, { "epoch": 0.004293127263224699, "grad_norm": 9.375, "learning_rate": 2.145885552770519e-07, "loss": 0.9948061943054199, "num_tokens": 391373332.0, "step": 3220 }, { "epoch": 0.004319792649952802, "grad_norm": 7.59375, "learning_rate": 2.1592181750306652e-07, "loss": 0.9930863380432129, "num_tokens": 393777370.0, "step": 3240 }, { "epoch": 0.004346458036680906, "grad_norm": 8.8125, "learning_rate": 2.1725507972908114e-07, "loss": 0.9974763870239258, "num_tokens": 396134484.0, "step": 3260 }, { "epoch": 0.00437312342340901, "grad_norm": 8.0625, "learning_rate": 2.1858834195509576e-07, "loss": 1.023780345916748, "num_tokens": 398666339.0, "step": 3280 }, { "epoch": 0.004399788810137114, "grad_norm": 8.875, "learning_rate": 2.1992160418111038e-07, "loss": 0.9830857276916504, "num_tokens": 401202468.0, "step": 3300 }, { "epoch": 0.0044264541968652175, "grad_norm": 8.5, "learning_rate": 2.2125486640712498e-07, "loss": 0.9859627723693848, "num_tokens": 403740068.0, "step": 3320 }, { "epoch": 0.0044531195835933205, "grad_norm": 9.5, "learning_rate": 2.225881286331396e-07, "loss": 0.9911046981811523, "num_tokens": 405979372.0, "step": 3340 }, { "epoch": 0.0044797849703214244, "grad_norm": 8.8125, "learning_rate": 2.239213908591542e-07, "loss": 0.9971678733825684, "num_tokens": 408386302.0, "step": 3360 }, { "epoch": 0.004506450357049528, "grad_norm": 8.75, "learning_rate": 2.2525465308516882e-07, "loss": 1.0064891815185546, "num_tokens": 410824133.0, "step": 3380 }, { "epoch": 0.004533115743777632, "grad_norm": 7.9375, "learning_rate": 2.2658791531118344e-07, "loss": 0.9958887100219727, "num_tokens": 413262199.0, "step": 3400 }, { "epoch": 0.004559781130505736, "grad_norm": 8.6875, "learning_rate": 2.2792117753719804e-07, "loss": 1.0003417015075684, "num_tokens": 415601169.0, "step": 3420 }, { "epoch": 0.004586446517233839, "grad_norm": 9.1875, "learning_rate": 2.2925443976321266e-07, "loss": 1.005995273590088, "num_tokens": 418027014.0, "step": 3440 }, { "epoch": 0.004613111903961943, "grad_norm": 8.75, "learning_rate": 2.3058770198922725e-07, "loss": 0.9825302124023437, "num_tokens": 420483673.0, "step": 3460 }, { "epoch": 0.004639777290690047, "grad_norm": 9.125, "learning_rate": 2.3192096421524188e-07, "loss": 0.9895725250244141, "num_tokens": 423037172.0, "step": 3480 }, { "epoch": 0.004666442677418151, "grad_norm": 8.375, "learning_rate": 2.332542264412565e-07, "loss": 1.0188589096069336, "num_tokens": 425458008.0, "step": 3500 }, { "epoch": 0.004693108064146254, "grad_norm": 8.75, "learning_rate": 2.345874886672711e-07, "loss": 0.991973876953125, "num_tokens": 427943660.0, "step": 3520 }, { "epoch": 0.004719773450874358, "grad_norm": 8.125, "learning_rate": 2.3592075089328572e-07, "loss": 0.9630790710449219, "num_tokens": 430436824.0, "step": 3540 }, { "epoch": 0.004746438837602462, "grad_norm": 8.125, "learning_rate": 2.3725401311930034e-07, "loss": 1.0003299713134766, "num_tokens": 433003325.0, "step": 3560 }, { "epoch": 0.004773104224330566, "grad_norm": 8.375, "learning_rate": 2.3858727534531493e-07, "loss": 1.0054038047790528, "num_tokens": 435509091.0, "step": 3580 }, { "epoch": 0.0047997696110586695, "grad_norm": 8.25, "learning_rate": 2.3992053757132953e-07, "loss": 0.9945505142211915, "num_tokens": 437960920.0, "step": 3600 }, { "epoch": 0.004826434997786773, "grad_norm": 7.96875, "learning_rate": 2.412537997973442e-07, "loss": 0.9867237091064454, "num_tokens": 440485120.0, "step": 3620 }, { "epoch": 0.0048531003845148765, "grad_norm": 8.5625, "learning_rate": 2.4258706202335877e-07, "loss": 0.9656665802001954, "num_tokens": 442955898.0, "step": 3640 }, { "epoch": 0.00487976577124298, "grad_norm": 8.6875, "learning_rate": 2.4392032424937337e-07, "loss": 0.999897575378418, "num_tokens": 445454795.0, "step": 3660 }, { "epoch": 0.004906431157971084, "grad_norm": 8.8125, "learning_rate": 2.45253586475388e-07, "loss": 0.9745407104492188, "num_tokens": 447817765.0, "step": 3680 }, { "epoch": 0.004933096544699188, "grad_norm": 8.3125, "learning_rate": 2.465868487014026e-07, "loss": 0.9961479187011719, "num_tokens": 450376528.0, "step": 3700 }, { "epoch": 0.004959761931427291, "grad_norm": 8.3125, "learning_rate": 2.479201109274172e-07, "loss": 0.9714190483093261, "num_tokens": 452610708.0, "step": 3720 }, { "epoch": 0.004986427318155395, "grad_norm": 8.6875, "learning_rate": 2.492533731534318e-07, "loss": 0.958547306060791, "num_tokens": 454836537.0, "step": 3740 }, { "epoch": 0.005013092704883499, "grad_norm": 8.125, "learning_rate": 2.5058663537944645e-07, "loss": 0.9671206474304199, "num_tokens": 457413084.0, "step": 3760 }, { "epoch": 0.005039758091611603, "grad_norm": 7.90625, "learning_rate": 2.5191989760546105e-07, "loss": 0.9956648826599122, "num_tokens": 459895784.0, "step": 3780 }, { "epoch": 0.005066423478339706, "grad_norm": 9.1875, "learning_rate": 2.5325315983147564e-07, "loss": 0.9415251731872558, "num_tokens": 462240493.0, "step": 3800 }, { "epoch": 0.00509308886506781, "grad_norm": 7.3125, "learning_rate": 2.545864220574903e-07, "loss": 0.9676140785217285, "num_tokens": 464512797.0, "step": 3820 }, { "epoch": 0.005119754251795914, "grad_norm": 8.625, "learning_rate": 2.559196842835049e-07, "loss": 0.9689323425292968, "num_tokens": 466978947.0, "step": 3840 }, { "epoch": 0.005146419638524018, "grad_norm": 7.46875, "learning_rate": 2.572529465095195e-07, "loss": 0.9640571594238281, "num_tokens": 469082052.0, "step": 3860 }, { "epoch": 0.005173085025252122, "grad_norm": 8.0, "learning_rate": 2.5858620873553413e-07, "loss": 0.9790838241577149, "num_tokens": 471471334.0, "step": 3880 }, { "epoch": 0.005199750411980225, "grad_norm": 7.8125, "learning_rate": 2.5991947096154873e-07, "loss": 0.9860439300537109, "num_tokens": 473679412.0, "step": 3900 }, { "epoch": 0.0052264157987083285, "grad_norm": 8.0625, "learning_rate": 2.612527331875633e-07, "loss": 0.9582957267761231, "num_tokens": 475974736.0, "step": 3920 }, { "epoch": 0.005253081185436432, "grad_norm": 7.90625, "learning_rate": 2.6258599541357797e-07, "loss": 0.9921460151672363, "num_tokens": 478517915.0, "step": 3940 }, { "epoch": 0.005279746572164536, "grad_norm": 7.71875, "learning_rate": 2.6391925763959257e-07, "loss": 0.9887395858764648, "num_tokens": 481096362.0, "step": 3960 }, { "epoch": 0.00530641195889264, "grad_norm": 7.4375, "learning_rate": 2.652525198656072e-07, "loss": 0.9579450607299804, "num_tokens": 483705649.0, "step": 3980 }, { "epoch": 0.005333077345620743, "grad_norm": 7.875, "learning_rate": 2.665857820916218e-07, "loss": 0.9738332748413085, "num_tokens": 486082660.0, "step": 4000 }, { "epoch": 0.005359742732348847, "grad_norm": 7.59375, "learning_rate": 2.679190443176364e-07, "loss": 0.9818484306335449, "num_tokens": 488812433.0, "step": 4020 }, { "epoch": 0.005386408119076951, "grad_norm": 8.0625, "learning_rate": 2.6925230654365106e-07, "loss": 0.9716938018798829, "num_tokens": 491110052.0, "step": 4040 }, { "epoch": 0.005413073505805055, "grad_norm": 7.9375, "learning_rate": 2.7058556876966565e-07, "loss": 0.9632538795471192, "num_tokens": 493433723.0, "step": 4060 }, { "epoch": 0.005439738892533158, "grad_norm": 9.1875, "learning_rate": 2.7191883099568025e-07, "loss": 0.9873666763305664, "num_tokens": 495914810.0, "step": 4080 }, { "epoch": 0.005466404279261262, "grad_norm": 8.9375, "learning_rate": 2.732520932216949e-07, "loss": 0.9664394378662109, "num_tokens": 498040721.0, "step": 4100 }, { "epoch": 0.005493069665989366, "grad_norm": 7.71875, "learning_rate": 2.745853554477095e-07, "loss": 0.9695411682128906, "num_tokens": 500365264.0, "step": 4120 }, { "epoch": 0.00551973505271747, "grad_norm": 7.15625, "learning_rate": 2.759186176737241e-07, "loss": 0.973029899597168, "num_tokens": 503032612.0, "step": 4140 }, { "epoch": 0.005546400439445574, "grad_norm": 7.46875, "learning_rate": 2.772518798997387e-07, "loss": 0.9625458717346191, "num_tokens": 505544115.0, "step": 4160 }, { "epoch": 0.005573065826173677, "grad_norm": 9.1875, "learning_rate": 2.7858514212575333e-07, "loss": 0.9847710609436036, "num_tokens": 508100860.0, "step": 4180 }, { "epoch": 0.0055997312129017806, "grad_norm": 10.125, "learning_rate": 2.7991840435176793e-07, "loss": 0.9676617622375489, "num_tokens": 510705839.0, "step": 4200 }, { "epoch": 0.0056263965996298845, "grad_norm": 9.0, "learning_rate": 2.812516665777825e-07, "loss": 0.9626630783081055, "num_tokens": 513195897.0, "step": 4220 }, { "epoch": 0.005653061986357988, "grad_norm": 7.96875, "learning_rate": 2.8258492880379717e-07, "loss": 0.9706341743469238, "num_tokens": 515694618.0, "step": 4240 }, { "epoch": 0.005679727373086092, "grad_norm": 7.40625, "learning_rate": 2.8391819102981177e-07, "loss": 0.9699504852294922, "num_tokens": 518297816.0, "step": 4260 }, { "epoch": 0.005706392759814195, "grad_norm": 7.96875, "learning_rate": 2.8525145325582636e-07, "loss": 0.9704195976257324, "num_tokens": 520714792.0, "step": 4280 }, { "epoch": 0.005733058146542299, "grad_norm": 8.4375, "learning_rate": 2.86584715481841e-07, "loss": 0.9863205909729004, "num_tokens": 523051545.0, "step": 4300 }, { "epoch": 0.005759723533270403, "grad_norm": 8.25, "learning_rate": 2.879179777078556e-07, "loss": 0.996036434173584, "num_tokens": 525421053.0, "step": 4320 }, { "epoch": 0.005786388919998507, "grad_norm": 8.0625, "learning_rate": 2.892512399338702e-07, "loss": 0.969749927520752, "num_tokens": 527767444.0, "step": 4340 }, { "epoch": 0.00581305430672661, "grad_norm": 9.1875, "learning_rate": 2.905845021598848e-07, "loss": 0.9826925277709961, "num_tokens": 530285207.0, "step": 4360 }, { "epoch": 0.005839719693454714, "grad_norm": 9.0, "learning_rate": 2.9191776438589945e-07, "loss": 0.9835399627685547, "num_tokens": 532783440.0, "step": 4380 }, { "epoch": 0.005866385080182818, "grad_norm": 6.90625, "learning_rate": 2.9325102661191404e-07, "loss": 0.9662948608398437, "num_tokens": 535226167.0, "step": 4400 }, { "epoch": 0.005893050466910922, "grad_norm": 7.59375, "learning_rate": 2.9458428883792864e-07, "loss": 0.9578766822814941, "num_tokens": 537642667.0, "step": 4420 }, { "epoch": 0.005919715853639026, "grad_norm": 7.65625, "learning_rate": 2.959175510639433e-07, "loss": 0.956723403930664, "num_tokens": 539883201.0, "step": 4440 }, { "epoch": 0.005946381240367129, "grad_norm": 7.28125, "learning_rate": 2.972508132899579e-07, "loss": 0.9692272186279297, "num_tokens": 542426319.0, "step": 4460 }, { "epoch": 0.005973046627095233, "grad_norm": 7.40625, "learning_rate": 2.985840755159725e-07, "loss": 0.9454196929931641, "num_tokens": 544783232.0, "step": 4480 }, { "epoch": 0.0059997120138233365, "grad_norm": 8.125, "learning_rate": 2.9991733774198713e-07, "loss": 0.999168586730957, "num_tokens": 547248197.0, "step": 4500 }, { "epoch": 0.00602637740055144, "grad_norm": 7.46875, "learning_rate": 3.012505999680017e-07, "loss": 0.9971094131469727, "num_tokens": 549897844.0, "step": 4520 }, { "epoch": 0.006053042787279544, "grad_norm": 8.4375, "learning_rate": 3.025838621940163e-07, "loss": 0.9815726280212402, "num_tokens": 552328136.0, "step": 4540 }, { "epoch": 0.006079708174007647, "grad_norm": 8.3125, "learning_rate": 3.0391712442003097e-07, "loss": 1.006311798095703, "num_tokens": 554885062.0, "step": 4560 }, { "epoch": 0.006106373560735751, "grad_norm": 8.0, "learning_rate": 3.0525038664604556e-07, "loss": 0.9664499282836914, "num_tokens": 557251895.0, "step": 4580 }, { "epoch": 0.006133038947463855, "grad_norm": 7.15625, "learning_rate": 3.0658364887206016e-07, "loss": 0.9640503883361816, "num_tokens": 559497696.0, "step": 4600 }, { "epoch": 0.006159704334191959, "grad_norm": 7.03125, "learning_rate": 3.079169110980748e-07, "loss": 0.9520719528198243, "num_tokens": 562132144.0, "step": 4620 }, { "epoch": 0.006186369720920062, "grad_norm": 7.1875, "learning_rate": 3.092501733240894e-07, "loss": 0.9826997756958008, "num_tokens": 564681616.0, "step": 4640 }, { "epoch": 0.006213035107648166, "grad_norm": 8.1875, "learning_rate": 3.1058343555010405e-07, "loss": 0.9662124633789062, "num_tokens": 567055007.0, "step": 4660 }, { "epoch": 0.00623970049437627, "grad_norm": 8.8125, "learning_rate": 3.1191669777611865e-07, "loss": 0.9941400527954102, "num_tokens": 569572631.0, "step": 4680 }, { "epoch": 0.006266365881104374, "grad_norm": 7.65625, "learning_rate": 3.1324996000213324e-07, "loss": 0.9693017959594726, "num_tokens": 572086825.0, "step": 4700 }, { "epoch": 0.006293031267832478, "grad_norm": 7.5625, "learning_rate": 3.145832222281479e-07, "loss": 0.9569571495056153, "num_tokens": 574546631.0, "step": 4720 }, { "epoch": 0.006319696654560581, "grad_norm": 7.6875, "learning_rate": 3.159164844541625e-07, "loss": 0.9653665542602539, "num_tokens": 576975309.0, "step": 4740 }, { "epoch": 0.006346362041288685, "grad_norm": 7.4375, "learning_rate": 3.172497466801771e-07, "loss": 0.9836759567260742, "num_tokens": 579368238.0, "step": 4760 }, { "epoch": 0.0063730274280167885, "grad_norm": 8.375, "learning_rate": 3.185830089061917e-07, "loss": 0.970465087890625, "num_tokens": 581657888.0, "step": 4780 }, { "epoch": 0.0063996928147448924, "grad_norm": 7.84375, "learning_rate": 3.199162711322063e-07, "loss": 0.9438470840454102, "num_tokens": 584247907.0, "step": 4800 }, { "epoch": 0.006426358201472996, "grad_norm": 6.75, "learning_rate": 3.212495333582209e-07, "loss": 0.9684782028198242, "num_tokens": 586510474.0, "step": 4820 }, { "epoch": 0.006453023588201099, "grad_norm": 6.9375, "learning_rate": 3.225827955842355e-07, "loss": 0.9684048652648926, "num_tokens": 589044630.0, "step": 4840 }, { "epoch": 0.006479688974929203, "grad_norm": 8.1875, "learning_rate": 3.2391605781025017e-07, "loss": 0.973388671875, "num_tokens": 591517954.0, "step": 4860 }, { "epoch": 0.006506354361657307, "grad_norm": 6.875, "learning_rate": 3.2524932003626476e-07, "loss": 0.962765121459961, "num_tokens": 593918813.0, "step": 4880 }, { "epoch": 0.006533019748385411, "grad_norm": 7.96875, "learning_rate": 3.2658258226227936e-07, "loss": 0.9573356628417968, "num_tokens": 596494256.0, "step": 4900 }, { "epoch": 0.006559685135113514, "grad_norm": 7.5625, "learning_rate": 3.27915844488294e-07, "loss": 0.9629076004028321, "num_tokens": 598853285.0, "step": 4920 }, { "epoch": 0.006586350521841618, "grad_norm": 7.9375, "learning_rate": 3.292491067143086e-07, "loss": 0.9736644744873046, "num_tokens": 601312077.0, "step": 4940 }, { "epoch": 0.006613015908569722, "grad_norm": 6.46875, "learning_rate": 3.305823689403232e-07, "loss": 0.9529596328735351, "num_tokens": 603651479.0, "step": 4960 }, { "epoch": 0.006639681295297826, "grad_norm": 6.875, "learning_rate": 3.3191563116633785e-07, "loss": 0.9575572967529297, "num_tokens": 605875775.0, "step": 4980 }, { "epoch": 0.00666634668202593, "grad_norm": 7.0, "learning_rate": 3.3324889339235244e-07, "loss": 0.98643798828125, "num_tokens": 608518456.0, "step": 5000 }, { "epoch": 0.006693012068754033, "grad_norm": 8.0625, "learning_rate": 3.3458215561836704e-07, "loss": 0.9182318687438965, "num_tokens": 610677851.0, "step": 5020 }, { "epoch": 0.006719677455482137, "grad_norm": 8.0625, "learning_rate": 3.3591541784438163e-07, "loss": 0.9435154914855957, "num_tokens": 612992317.0, "step": 5040 }, { "epoch": 0.006746342842210241, "grad_norm": 9.0, "learning_rate": 3.372486800703963e-07, "loss": 0.9611308097839355, "num_tokens": 615519964.0, "step": 5060 }, { "epoch": 0.0067730082289383445, "grad_norm": 7.46875, "learning_rate": 3.385819422964109e-07, "loss": 0.9565324783325195, "num_tokens": 617962929.0, "step": 5080 }, { "epoch": 0.006799673615666448, "grad_norm": 7.40625, "learning_rate": 3.3991520452242547e-07, "loss": 0.979229736328125, "num_tokens": 620464354.0, "step": 5100 }, { "epoch": 0.006826339002394551, "grad_norm": 7.09375, "learning_rate": 3.412484667484401e-07, "loss": 0.9430905342102051, "num_tokens": 622781370.0, "step": 5120 }, { "epoch": 0.006853004389122655, "grad_norm": 7.1875, "learning_rate": 3.425817289744547e-07, "loss": 0.9431401252746582, "num_tokens": 625154683.0, "step": 5140 }, { "epoch": 0.006879669775850759, "grad_norm": 6.46875, "learning_rate": 3.439149912004693e-07, "loss": 0.9681482315063477, "num_tokens": 627644007.0, "step": 5160 }, { "epoch": 0.006906335162578863, "grad_norm": 7.40625, "learning_rate": 3.4524825342648396e-07, "loss": 0.9422245025634766, "num_tokens": 630034311.0, "step": 5180 }, { "epoch": 0.006933000549306967, "grad_norm": 7.375, "learning_rate": 3.4658151565249856e-07, "loss": 0.9606742858886719, "num_tokens": 632381435.0, "step": 5200 }, { "epoch": 0.00695966593603507, "grad_norm": 6.5, "learning_rate": 3.4791477787851315e-07, "loss": 0.9747447967529297, "num_tokens": 634785004.0, "step": 5220 }, { "epoch": 0.006986331322763174, "grad_norm": 6.9375, "learning_rate": 3.4924804010452775e-07, "loss": 0.9463565826416016, "num_tokens": 637312902.0, "step": 5240 }, { "epoch": 0.007012996709491278, "grad_norm": 7.75, "learning_rate": 3.505813023305424e-07, "loss": 0.9352779388427734, "num_tokens": 639710978.0, "step": 5260 }, { "epoch": 0.007039662096219382, "grad_norm": 7.625, "learning_rate": 3.51914564556557e-07, "loss": 0.972838306427002, "num_tokens": 641885325.0, "step": 5280 }, { "epoch": 0.007066327482947485, "grad_norm": 6.5625, "learning_rate": 3.532478267825716e-07, "loss": 0.9225208282470703, "num_tokens": 644157153.0, "step": 5300 }, { "epoch": 0.007092992869675589, "grad_norm": 8.0625, "learning_rate": 3.5458108900858624e-07, "loss": 0.9581082344055176, "num_tokens": 646451514.0, "step": 5320 }, { "epoch": 0.007119658256403693, "grad_norm": 7.0625, "learning_rate": 3.5591435123460083e-07, "loss": 0.9502447128295899, "num_tokens": 648915137.0, "step": 5340 }, { "epoch": 0.0071463236431317965, "grad_norm": 6.90625, "learning_rate": 3.5724761346061543e-07, "loss": 0.934903907775879, "num_tokens": 651283330.0, "step": 5360 }, { "epoch": 0.0071729890298599, "grad_norm": 6.96875, "learning_rate": 3.585808756866301e-07, "loss": 0.9410161972045898, "num_tokens": 653691490.0, "step": 5380 }, { "epoch": 0.0071996544165880035, "grad_norm": 7.09375, "learning_rate": 3.5991413791264467e-07, "loss": 0.9669290542602539, "num_tokens": 656306668.0, "step": 5400 }, { "epoch": 0.007226319803316107, "grad_norm": 6.34375, "learning_rate": 3.6124740013865927e-07, "loss": 0.9123322486877441, "num_tokens": 658805795.0, "step": 5420 }, { "epoch": 0.007252985190044211, "grad_norm": 7.1875, "learning_rate": 3.6258066236467386e-07, "loss": 0.9398767471313476, "num_tokens": 661533915.0, "step": 5440 }, { "epoch": 0.007279650576772315, "grad_norm": 7.0625, "learning_rate": 3.639139245906885e-07, "loss": 0.9659612655639649, "num_tokens": 664238899.0, "step": 5460 }, { "epoch": 0.007306315963500419, "grad_norm": 6.53125, "learning_rate": 3.652471868167031e-07, "loss": 0.9467973709106445, "num_tokens": 666530426.0, "step": 5480 }, { "epoch": 0.007332981350228522, "grad_norm": 7.5625, "learning_rate": 3.665804490427177e-07, "loss": 0.9426327705383301, "num_tokens": 669047778.0, "step": 5500 }, { "epoch": 0.007359646736956626, "grad_norm": 6.5, "learning_rate": 3.6791371126873235e-07, "loss": 0.9321221351623535, "num_tokens": 671564528.0, "step": 5520 }, { "epoch": 0.00738631212368473, "grad_norm": 6.4375, "learning_rate": 3.6924697349474695e-07, "loss": 0.9241449356079101, "num_tokens": 673906783.0, "step": 5540 }, { "epoch": 0.007412977510412834, "grad_norm": 7.0625, "learning_rate": 3.7058023572076154e-07, "loss": 0.9423864364624024, "num_tokens": 676329867.0, "step": 5560 }, { "epoch": 0.007439642897140937, "grad_norm": 6.0625, "learning_rate": 3.719134979467762e-07, "loss": 0.917717170715332, "num_tokens": 678899815.0, "step": 5580 }, { "epoch": 0.007466308283869041, "grad_norm": 7.5, "learning_rate": 3.732467601727908e-07, "loss": 0.9235759735107422, "num_tokens": 681269353.0, "step": 5600 }, { "epoch": 0.007492973670597145, "grad_norm": 6.71875, "learning_rate": 3.745800223988055e-07, "loss": 0.9349365234375, "num_tokens": 683468695.0, "step": 5620 }, { "epoch": 0.0075196390573252486, "grad_norm": 6.90625, "learning_rate": 3.759132846248201e-07, "loss": 0.9422449111938477, "num_tokens": 685781279.0, "step": 5640 }, { "epoch": 0.0075463044440533525, "grad_norm": 7.25, "learning_rate": 3.772465468508347e-07, "loss": 0.9308782577514648, "num_tokens": 688189423.0, "step": 5660 }, { "epoch": 0.0075729698307814555, "grad_norm": 6.65625, "learning_rate": 3.785798090768493e-07, "loss": 0.931591796875, "num_tokens": 690480238.0, "step": 5680 }, { "epoch": 0.007599635217509559, "grad_norm": 6.375, "learning_rate": 3.799130713028639e-07, "loss": 0.9325260162353516, "num_tokens": 693053902.0, "step": 5700 }, { "epoch": 0.007626300604237663, "grad_norm": 6.875, "learning_rate": 3.812463335288785e-07, "loss": 0.9505001068115234, "num_tokens": 695768288.0, "step": 5720 }, { "epoch": 0.007652965990965767, "grad_norm": 6.8125, "learning_rate": 3.825795957548931e-07, "loss": 0.9563799858093261, "num_tokens": 698113576.0, "step": 5740 }, { "epoch": 0.007679631377693871, "grad_norm": 6.09375, "learning_rate": 3.8391285798090776e-07, "loss": 0.9023025512695313, "num_tokens": 700380962.0, "step": 5760 }, { "epoch": 0.007706296764421974, "grad_norm": 7.84375, "learning_rate": 3.8524612020692236e-07, "loss": 0.9318767547607422, "num_tokens": 702882737.0, "step": 5780 }, { "epoch": 0.007732962151150078, "grad_norm": 7.78125, "learning_rate": 3.8657938243293696e-07, "loss": 0.9333745956420898, "num_tokens": 705342210.0, "step": 5800 }, { "epoch": 0.007759627537878182, "grad_norm": 6.21875, "learning_rate": 3.879126446589516e-07, "loss": 0.9448786735534668, "num_tokens": 707812554.0, "step": 5820 }, { "epoch": 0.007786292924606286, "grad_norm": 6.46875, "learning_rate": 3.892459068849662e-07, "loss": 0.9095854759216309, "num_tokens": 710168439.0, "step": 5840 }, { "epoch": 0.007812958311334389, "grad_norm": 6.46875, "learning_rate": 3.905791691109808e-07, "loss": 0.8990493774414062, "num_tokens": 712577026.0, "step": 5860 }, { "epoch": 0.007839623698062494, "grad_norm": 6.75, "learning_rate": 3.919124313369954e-07, "loss": 0.9313816070556641, "num_tokens": 715397235.0, "step": 5880 }, { "epoch": 0.007866289084790597, "grad_norm": 6.21875, "learning_rate": 3.9324569356301004e-07, "loss": 0.8708738327026367, "num_tokens": 717709894.0, "step": 5900 }, { "epoch": 0.0078929544715187, "grad_norm": 5.9375, "learning_rate": 3.9457895578902463e-07, "loss": 0.9510887145996094, "num_tokens": 720010035.0, "step": 5920 }, { "epoch": 0.007919619858246804, "grad_norm": 6.625, "learning_rate": 3.9591221801503923e-07, "loss": 0.9338432312011719, "num_tokens": 722519017.0, "step": 5940 }, { "epoch": 0.007946285244974908, "grad_norm": 6.1875, "learning_rate": 3.972454802410539e-07, "loss": 0.905040168762207, "num_tokens": 724971736.0, "step": 5960 }, { "epoch": 0.007972950631703012, "grad_norm": 6.09375, "learning_rate": 3.985787424670685e-07, "loss": 0.9308603286743165, "num_tokens": 727602185.0, "step": 5980 }, { "epoch": 0.007999616018431115, "grad_norm": 5.5, "learning_rate": 3.9991200469308307e-07, "loss": 0.9128826141357422, "num_tokens": 730262271.0, "step": 6000 }, { "epoch": 0.008026281405159218, "grad_norm": 6.4375, "learning_rate": 4.012452669190977e-07, "loss": 0.9449222564697266, "num_tokens": 732775129.0, "step": 6020 }, { "epoch": 0.008052946791887323, "grad_norm": 6.5, "learning_rate": 4.025785291451123e-07, "loss": 0.9248483657836915, "num_tokens": 735087986.0, "step": 6040 }, { "epoch": 0.008079612178615426, "grad_norm": 6.4375, "learning_rate": 4.039117913711269e-07, "loss": 0.8974367141723633, "num_tokens": 737370658.0, "step": 6060 }, { "epoch": 0.008106277565343531, "grad_norm": 6.46875, "learning_rate": 4.052450535971415e-07, "loss": 0.9357335090637207, "num_tokens": 739845534.0, "step": 6080 }, { "epoch": 0.008132942952071634, "grad_norm": 5.90625, "learning_rate": 4.0657831582315615e-07, "loss": 0.9022478103637696, "num_tokens": 742394211.0, "step": 6100 }, { "epoch": 0.008159608338799737, "grad_norm": 5.90625, "learning_rate": 4.0791157804917075e-07, "loss": 0.9189371109008789, "num_tokens": 744884961.0, "step": 6120 }, { "epoch": 0.008186273725527842, "grad_norm": 5.8125, "learning_rate": 4.0924484027518535e-07, "loss": 0.880592155456543, "num_tokens": 747495059.0, "step": 6140 }, { "epoch": 0.008212939112255945, "grad_norm": 5.90625, "learning_rate": 4.105781025012e-07, "loss": 0.907773780822754, "num_tokens": 749832166.0, "step": 6160 }, { "epoch": 0.00823960449898405, "grad_norm": 5.875, "learning_rate": 4.119113647272146e-07, "loss": 0.9014801979064941, "num_tokens": 752129776.0, "step": 6180 }, { "epoch": 0.008266269885712153, "grad_norm": 5.90625, "learning_rate": 4.132446269532292e-07, "loss": 0.915550422668457, "num_tokens": 754561672.0, "step": 6200 }, { "epoch": 0.008292935272440256, "grad_norm": 6.25, "learning_rate": 4.1457788917924383e-07, "loss": 0.8943172454833984, "num_tokens": 757045674.0, "step": 6220 }, { "epoch": 0.00831960065916836, "grad_norm": 5.65625, "learning_rate": 4.1591115140525843e-07, "loss": 0.9294287681579589, "num_tokens": 759624784.0, "step": 6240 }, { "epoch": 0.008346266045896463, "grad_norm": 5.9375, "learning_rate": 4.17244413631273e-07, "loss": 0.9283315658569335, "num_tokens": 761917275.0, "step": 6260 }, { "epoch": 0.008372931432624567, "grad_norm": 6.0, "learning_rate": 4.185776758572876e-07, "loss": 0.9253885269165039, "num_tokens": 764209708.0, "step": 6280 }, { "epoch": 0.008399596819352671, "grad_norm": 5.84375, "learning_rate": 4.1991093808330227e-07, "loss": 0.8997428894042969, "num_tokens": 766583720.0, "step": 6300 }, { "epoch": 0.008426262206080774, "grad_norm": 6.8125, "learning_rate": 4.2124420030931687e-07, "loss": 0.9100629806518554, "num_tokens": 768986149.0, "step": 6320 }, { "epoch": 0.008452927592808879, "grad_norm": 5.96875, "learning_rate": 4.2257746253533146e-07, "loss": 0.8885984420776367, "num_tokens": 771296938.0, "step": 6340 }, { "epoch": 0.008479592979536982, "grad_norm": 5.8125, "learning_rate": 4.239107247613461e-07, "loss": 0.8913467407226563, "num_tokens": 773693479.0, "step": 6360 }, { "epoch": 0.008506258366265085, "grad_norm": 5.5, "learning_rate": 4.252439869873607e-07, "loss": 0.8897605895996094, "num_tokens": 776038672.0, "step": 6380 }, { "epoch": 0.00853292375299319, "grad_norm": 6.375, "learning_rate": 4.265772492133753e-07, "loss": 0.9023760795593262, "num_tokens": 778546010.0, "step": 6400 }, { "epoch": 0.008559589139721293, "grad_norm": 5.0, "learning_rate": 4.2791051143938995e-07, "loss": 0.8991652488708496, "num_tokens": 780939762.0, "step": 6420 }, { "epoch": 0.008586254526449398, "grad_norm": 6.03125, "learning_rate": 4.2924377366540455e-07, "loss": 0.8991971969604492, "num_tokens": 783673559.0, "step": 6440 }, { "epoch": 0.0086129199131775, "grad_norm": 6.15625, "learning_rate": 4.3057703589141914e-07, "loss": 0.9161497116088867, "num_tokens": 786278367.0, "step": 6460 }, { "epoch": 0.008639585299905604, "grad_norm": 6.125, "learning_rate": 4.3191029811743374e-07, "loss": 0.8725687026977539, "num_tokens": 788690984.0, "step": 6480 }, { "epoch": 0.008666250686633709, "grad_norm": 6.59375, "learning_rate": 4.332435603434484e-07, "loss": 0.8871957778930664, "num_tokens": 791290267.0, "step": 6500 }, { "epoch": 0.008692916073361812, "grad_norm": 5.625, "learning_rate": 4.34576822569463e-07, "loss": 0.878775691986084, "num_tokens": 793930432.0, "step": 6520 }, { "epoch": 0.008719581460089916, "grad_norm": 6.4375, "learning_rate": 4.359100847954776e-07, "loss": 0.902864933013916, "num_tokens": 796551162.0, "step": 6540 }, { "epoch": 0.00874624684681802, "grad_norm": 5.3125, "learning_rate": 4.372433470214922e-07, "loss": 0.8566383361816406, "num_tokens": 798810517.0, "step": 6560 }, { "epoch": 0.008772912233546122, "grad_norm": 5.71875, "learning_rate": 4.385766092475068e-07, "loss": 0.8943666458129883, "num_tokens": 801432377.0, "step": 6580 }, { "epoch": 0.008799577620274227, "grad_norm": 5.4375, "learning_rate": 4.399098714735214e-07, "loss": 0.8897884368896485, "num_tokens": 803955783.0, "step": 6600 }, { "epoch": 0.00882624300700233, "grad_norm": 5.3125, "learning_rate": 4.4124313369953606e-07, "loss": 0.9015933990478515, "num_tokens": 806526572.0, "step": 6620 }, { "epoch": 0.008852908393730435, "grad_norm": 5.375, "learning_rate": 4.4257639592555066e-07, "loss": 0.9153587341308593, "num_tokens": 808951848.0, "step": 6640 }, { "epoch": 0.008879573780458538, "grad_norm": 5.0, "learning_rate": 4.4390965815156526e-07, "loss": 0.8879348754882812, "num_tokens": 811416392.0, "step": 6660 }, { "epoch": 0.008906239167186641, "grad_norm": 5.96875, "learning_rate": 4.452429203775799e-07, "loss": 0.8857749938964844, "num_tokens": 813775755.0, "step": 6680 }, { "epoch": 0.008932904553914746, "grad_norm": 5.03125, "learning_rate": 4.465761826035945e-07, "loss": 0.8681654930114746, "num_tokens": 816235389.0, "step": 6700 }, { "epoch": 0.008959569940642849, "grad_norm": 5.5, "learning_rate": 4.479094448296091e-07, "loss": 0.8852704048156739, "num_tokens": 818619728.0, "step": 6720 }, { "epoch": 0.008986235327370954, "grad_norm": 5.28125, "learning_rate": 4.492427070556237e-07, "loss": 0.8621146202087402, "num_tokens": 821086489.0, "step": 6740 }, { "epoch": 0.009012900714099057, "grad_norm": 5.5625, "learning_rate": 4.5057596928163834e-07, "loss": 0.9096282958984375, "num_tokens": 823375370.0, "step": 6760 }, { "epoch": 0.00903956610082716, "grad_norm": 5.40625, "learning_rate": 4.5190923150765294e-07, "loss": 0.8932401657104492, "num_tokens": 825793077.0, "step": 6780 }, { "epoch": 0.009066231487555265, "grad_norm": 5.46875, "learning_rate": 4.5324249373366753e-07, "loss": 0.8844699859619141, "num_tokens": 828170211.0, "step": 6800 }, { "epoch": 0.009092896874283368, "grad_norm": 4.71875, "learning_rate": 4.545757559596822e-07, "loss": 0.8772954940795898, "num_tokens": 830596283.0, "step": 6820 }, { "epoch": 0.009119562261011472, "grad_norm": 5.28125, "learning_rate": 4.559090181856968e-07, "loss": 0.8903511047363282, "num_tokens": 832902512.0, "step": 6840 }, { "epoch": 0.009146227647739575, "grad_norm": 4.6875, "learning_rate": 4.5724228041171137e-07, "loss": 0.8846453666687012, "num_tokens": 835364193.0, "step": 6860 }, { "epoch": 0.009172893034467678, "grad_norm": 5.21875, "learning_rate": 4.58575542637726e-07, "loss": 0.8742074012756348, "num_tokens": 837715842.0, "step": 6880 }, { "epoch": 0.009199558421195783, "grad_norm": 5.25, "learning_rate": 4.599088048637406e-07, "loss": 0.877764892578125, "num_tokens": 840070866.0, "step": 6900 }, { "epoch": 0.009226223807923886, "grad_norm": 5.40625, "learning_rate": 4.612420670897552e-07, "loss": 0.880133056640625, "num_tokens": 842641949.0, "step": 6920 }, { "epoch": 0.00925288919465199, "grad_norm": 4.8125, "learning_rate": 4.625753293157698e-07, "loss": 0.8791229248046875, "num_tokens": 845150875.0, "step": 6940 }, { "epoch": 0.009279554581380094, "grad_norm": 4.625, "learning_rate": 4.6390859154178446e-07, "loss": 0.8592386245727539, "num_tokens": 847701536.0, "step": 6960 }, { "epoch": 0.009306219968108197, "grad_norm": 5.3125, "learning_rate": 4.652418537677991e-07, "loss": 0.8811845779418945, "num_tokens": 850139344.0, "step": 6980 }, { "epoch": 0.009332885354836302, "grad_norm": 5.4375, "learning_rate": 4.6657511599381375e-07, "loss": 0.8783363342285156, "num_tokens": 852710168.0, "step": 7000 }, { "epoch": 0.009359550741564405, "grad_norm": 5.53125, "learning_rate": 4.6790837821982835e-07, "loss": 0.9298919677734375, "num_tokens": 855335625.0, "step": 7020 }, { "epoch": 0.009386216128292508, "grad_norm": 4.90625, "learning_rate": 4.6924164044584294e-07, "loss": 0.8767964363098144, "num_tokens": 857981295.0, "step": 7040 }, { "epoch": 0.009412881515020613, "grad_norm": 5.40625, "learning_rate": 4.705749026718576e-07, "loss": 0.8729633331298828, "num_tokens": 860197684.0, "step": 7060 }, { "epoch": 0.009439546901748716, "grad_norm": 5.125, "learning_rate": 4.719081648978722e-07, "loss": 0.8928216934204102, "num_tokens": 862796933.0, "step": 7080 }, { "epoch": 0.00946621228847682, "grad_norm": 5.21875, "learning_rate": 4.732414271238868e-07, "loss": 0.8730875015258789, "num_tokens": 865269329.0, "step": 7100 }, { "epoch": 0.009492877675204923, "grad_norm": 4.78125, "learning_rate": 4.745746893499014e-07, "loss": 0.8748949050903321, "num_tokens": 867730352.0, "step": 7120 }, { "epoch": 0.009519543061933027, "grad_norm": 4.75, "learning_rate": 4.7590795157591603e-07, "loss": 0.8798394203186035, "num_tokens": 870227208.0, "step": 7140 }, { "epoch": 0.009546208448661131, "grad_norm": 4.8125, "learning_rate": 4.772412138019306e-07, "loss": 0.862841796875, "num_tokens": 872579052.0, "step": 7160 }, { "epoch": 0.009572873835389234, "grad_norm": 5.0, "learning_rate": 4.785744760279452e-07, "loss": 0.8604619979858399, "num_tokens": 875150718.0, "step": 7180 }, { "epoch": 0.009599539222117339, "grad_norm": 4.78125, "learning_rate": 4.799077382539598e-07, "loss": 0.8541357040405273, "num_tokens": 877493373.0, "step": 7200 }, { "epoch": 0.009626204608845442, "grad_norm": 4.46875, "learning_rate": 4.812410004799744e-07, "loss": 0.8791914939880371, "num_tokens": 879812590.0, "step": 7220 }, { "epoch": 0.009652869995573545, "grad_norm": 4.53125, "learning_rate": 4.825742627059891e-07, "loss": 0.8721774101257325, "num_tokens": 882284034.0, "step": 7240 }, { "epoch": 0.00967953538230165, "grad_norm": 5.375, "learning_rate": 4.839075249320037e-07, "loss": 0.8760224342346191, "num_tokens": 884717961.0, "step": 7260 }, { "epoch": 0.009706200769029753, "grad_norm": 4.9375, "learning_rate": 4.852407871580183e-07, "loss": 0.8682772636413574, "num_tokens": 887024332.0, "step": 7280 }, { "epoch": 0.009732866155757858, "grad_norm": 4.71875, "learning_rate": 4.865740493840329e-07, "loss": 0.8586809158325195, "num_tokens": 889540883.0, "step": 7300 }, { "epoch": 0.00975953154248596, "grad_norm": 4.84375, "learning_rate": 4.879073116100475e-07, "loss": 0.8505172729492188, "num_tokens": 891728351.0, "step": 7320 }, { "epoch": 0.009786196929214064, "grad_norm": 4.71875, "learning_rate": 4.892405738360621e-07, "loss": 0.8502351760864257, "num_tokens": 894084920.0, "step": 7340 }, { "epoch": 0.009812862315942169, "grad_norm": 5.59375, "learning_rate": 4.905738360620768e-07, "loss": 0.8689682960510254, "num_tokens": 896555090.0, "step": 7360 }, { "epoch": 0.009839527702670272, "grad_norm": 5.0625, "learning_rate": 4.919070982880914e-07, "loss": 0.8532674789428711, "num_tokens": 899191499.0, "step": 7380 }, { "epoch": 0.009866193089398376, "grad_norm": 4.25, "learning_rate": 4.93240360514106e-07, "loss": 0.8683614730834961, "num_tokens": 901737284.0, "step": 7400 }, { "epoch": 0.00989285847612648, "grad_norm": 4.59375, "learning_rate": 4.945736227401206e-07, "loss": 0.8426225662231446, "num_tokens": 904074851.0, "step": 7420 }, { "epoch": 0.009919523862854582, "grad_norm": 4.6875, "learning_rate": 4.959068849661352e-07, "loss": 0.881045150756836, "num_tokens": 906498249.0, "step": 7440 }, { "epoch": 0.009946189249582687, "grad_norm": 5.4375, "learning_rate": 4.972401471921498e-07, "loss": 0.8611955642700195, "num_tokens": 908783386.0, "step": 7460 }, { "epoch": 0.00997285463631079, "grad_norm": 5.15625, "learning_rate": 4.985734094181644e-07, "loss": 0.8735305786132812, "num_tokens": 911171029.0, "step": 7480 }, { "epoch": 0.009999520023038893, "grad_norm": 5.0, "learning_rate": 4.999066716441791e-07, "loss": 0.8694980621337891, "num_tokens": 913423347.0, "step": 7500 }, { "epoch": 0.010026185409766998, "grad_norm": 4.6875, "learning_rate": 5.012399338701937e-07, "loss": 0.844780158996582, "num_tokens": 915828150.0, "step": 7520 }, { "epoch": 0.010052850796495101, "grad_norm": 4.625, "learning_rate": 5.025731960962083e-07, "loss": 0.8640545845031739, "num_tokens": 918536560.0, "step": 7540 }, { "epoch": 0.010079516183223206, "grad_norm": 5.0, "learning_rate": 5.039064583222229e-07, "loss": 0.8455503463745118, "num_tokens": 921054332.0, "step": 7560 }, { "epoch": 0.010106181569951309, "grad_norm": 5.21875, "learning_rate": 5.052397205482374e-07, "loss": 0.894550895690918, "num_tokens": 923516038.0, "step": 7580 }, { "epoch": 0.010132846956679412, "grad_norm": 4.9375, "learning_rate": 5.06572982774252e-07, "loss": 0.8788552284240723, "num_tokens": 926012860.0, "step": 7600 }, { "epoch": 0.010159512343407517, "grad_norm": 4.96875, "learning_rate": 5.079062450002667e-07, "loss": 0.858818244934082, "num_tokens": 928431517.0, "step": 7620 }, { "epoch": 0.01018617773013562, "grad_norm": 4.46875, "learning_rate": 5.092395072262813e-07, "loss": 0.8553915023803711, "num_tokens": 931028056.0, "step": 7640 }, { "epoch": 0.010212843116863725, "grad_norm": 3.03125, "learning_rate": 5.105727694522959e-07, "loss": 0.8548051834106445, "num_tokens": 933523996.0, "step": 7660 }, { "epoch": 0.010239508503591828, "grad_norm": 5.15625, "learning_rate": 5.119060316783105e-07, "loss": 0.8647537231445312, "num_tokens": 936121878.0, "step": 7680 }, { "epoch": 0.01026617389031993, "grad_norm": 4.5625, "learning_rate": 5.132392939043251e-07, "loss": 0.8373430252075196, "num_tokens": 938593469.0, "step": 7700 }, { "epoch": 0.010292839277048035, "grad_norm": 4.15625, "learning_rate": 5.145725561303397e-07, "loss": 0.8592494010925293, "num_tokens": 940818432.0, "step": 7720 }, { "epoch": 0.010319504663776138, "grad_norm": 4.6875, "learning_rate": 5.159058183563543e-07, "loss": 0.8755224227905274, "num_tokens": 943245202.0, "step": 7740 }, { "epoch": 0.010346170050504243, "grad_norm": 5.03125, "learning_rate": 5.17239080582369e-07, "loss": 0.8652084350585938, "num_tokens": 945775890.0, "step": 7760 }, { "epoch": 0.010372835437232346, "grad_norm": 4.65625, "learning_rate": 5.185723428083836e-07, "loss": 0.8517812728881836, "num_tokens": 948380126.0, "step": 7780 }, { "epoch": 0.01039950082396045, "grad_norm": 4.125, "learning_rate": 5.199056050343982e-07, "loss": 0.839174461364746, "num_tokens": 950706642.0, "step": 7800 }, { "epoch": 0.010426166210688554, "grad_norm": 4.59375, "learning_rate": 5.212388672604128e-07, "loss": 0.8637144088745117, "num_tokens": 953041261.0, "step": 7820 }, { "epoch": 0.010452831597416657, "grad_norm": 4.40625, "learning_rate": 5.225721294864274e-07, "loss": 0.8314404487609863, "num_tokens": 955489222.0, "step": 7840 }, { "epoch": 0.010479496984144762, "grad_norm": 5.46875, "learning_rate": 5.23905391712442e-07, "loss": 0.8349652290344238, "num_tokens": 957925644.0, "step": 7860 }, { "epoch": 0.010506162370872865, "grad_norm": 5.0625, "learning_rate": 5.252386539384566e-07, "loss": 0.8764775276184082, "num_tokens": 960546038.0, "step": 7880 }, { "epoch": 0.010532827757600968, "grad_norm": 4.09375, "learning_rate": 5.265719161644713e-07, "loss": 0.8482358932495118, "num_tokens": 963101675.0, "step": 7900 }, { "epoch": 0.010559493144329073, "grad_norm": 4.625, "learning_rate": 5.279051783904859e-07, "loss": 0.8471168518066406, "num_tokens": 965546038.0, "step": 7920 }, { "epoch": 0.010586158531057176, "grad_norm": 4.71875, "learning_rate": 5.292384406165005e-07, "loss": 0.8505868911743164, "num_tokens": 967892942.0, "step": 7940 }, { "epoch": 0.01061282391778528, "grad_norm": 4.09375, "learning_rate": 5.305717028425151e-07, "loss": 0.8334033012390136, "num_tokens": 970415324.0, "step": 7960 }, { "epoch": 0.010639489304513383, "grad_norm": 4.4375, "learning_rate": 5.319049650685297e-07, "loss": 0.8320198059082031, "num_tokens": 972693342.0, "step": 7980 }, { "epoch": 0.010666154691241487, "grad_norm": 4.5, "learning_rate": 5.332382272945443e-07, "loss": 0.8387496948242188, "num_tokens": 975193874.0, "step": 8000 }, { "epoch": 0.010692820077969591, "grad_norm": 4.96875, "learning_rate": 5.34571489520559e-07, "loss": 0.8463296890258789, "num_tokens": 977696272.0, "step": 8020 }, { "epoch": 0.010719485464697694, "grad_norm": 4.4375, "learning_rate": 5.359047517465736e-07, "loss": 0.8195672988891601, "num_tokens": 980245182.0, "step": 8040 }, { "epoch": 0.010746150851425797, "grad_norm": 4.09375, "learning_rate": 5.372380139725882e-07, "loss": 0.8649040222167969, "num_tokens": 982909105.0, "step": 8060 }, { "epoch": 0.010772816238153902, "grad_norm": 4.71875, "learning_rate": 5.385712761986028e-07, "loss": 0.8529966354370118, "num_tokens": 985479301.0, "step": 8080 }, { "epoch": 0.010799481624882005, "grad_norm": 4.78125, "learning_rate": 5.399045384246174e-07, "loss": 0.8432211875915527, "num_tokens": 987984249.0, "step": 8100 }, { "epoch": 0.01082614701161011, "grad_norm": 4.75, "learning_rate": 5.41237800650632e-07, "loss": 0.8413124084472656, "num_tokens": 990598759.0, "step": 8120 }, { "epoch": 0.010852812398338213, "grad_norm": 4.46875, "learning_rate": 5.425710628766466e-07, "loss": 0.8442449569702148, "num_tokens": 993124093.0, "step": 8140 }, { "epoch": 0.010879477785066316, "grad_norm": 4.65625, "learning_rate": 5.439043251026613e-07, "loss": 0.855975341796875, "num_tokens": 995521356.0, "step": 8160 }, { "epoch": 0.01090614317179442, "grad_norm": 4.3125, "learning_rate": 5.452375873286758e-07, "loss": 0.8206426620483398, "num_tokens": 998074151.0, "step": 8180 }, { "epoch": 0.010932808558522524, "grad_norm": 4.71875, "learning_rate": 5.465708495546904e-07, "loss": 0.8527069091796875, "num_tokens": 1000617556.0, "step": 8200 }, { "epoch": 0.010959473945250629, "grad_norm": 3.765625, "learning_rate": 5.47904111780705e-07, "loss": 0.8218093872070312, "num_tokens": 1002977465.0, "step": 8220 }, { "epoch": 0.010986139331978732, "grad_norm": 3.765625, "learning_rate": 5.492373740067196e-07, "loss": 0.8376995086669922, "num_tokens": 1005487379.0, "step": 8240 }, { "epoch": 0.011012804718706835, "grad_norm": 4.34375, "learning_rate": 5.505706362327342e-07, "loss": 0.8302839279174805, "num_tokens": 1007944804.0, "step": 8260 }, { "epoch": 0.01103947010543494, "grad_norm": 4.21875, "learning_rate": 5.519038984587489e-07, "loss": 0.8350641250610351, "num_tokens": 1010303571.0, "step": 8280 }, { "epoch": 0.011066135492163042, "grad_norm": 4.0625, "learning_rate": 5.532371606847635e-07, "loss": 0.824098014831543, "num_tokens": 1012633386.0, "step": 8300 }, { "epoch": 0.011092800878891147, "grad_norm": 4.65625, "learning_rate": 5.545704229107781e-07, "loss": 0.8551748275756836, "num_tokens": 1014954176.0, "step": 8320 }, { "epoch": 0.01111946626561925, "grad_norm": 4.09375, "learning_rate": 5.559036851367928e-07, "loss": 0.843202018737793, "num_tokens": 1017255488.0, "step": 8340 }, { "epoch": 0.011146131652347353, "grad_norm": 4.15625, "learning_rate": 5.572369473628074e-07, "loss": 0.8457742691040039, "num_tokens": 1019825435.0, "step": 8360 }, { "epoch": 0.011172797039075458, "grad_norm": 4.46875, "learning_rate": 5.58570209588822e-07, "loss": 0.8237207412719727, "num_tokens": 1022255971.0, "step": 8380 }, { "epoch": 0.011199462425803561, "grad_norm": 4.1875, "learning_rate": 5.599034718148366e-07, "loss": 0.8187740325927735, "num_tokens": 1024596599.0, "step": 8400 }, { "epoch": 0.011226127812531666, "grad_norm": 4.4375, "learning_rate": 5.612367340408512e-07, "loss": 0.8193572998046875, "num_tokens": 1026929444.0, "step": 8420 }, { "epoch": 0.011252793199259769, "grad_norm": 3.84375, "learning_rate": 5.625699962668658e-07, "loss": 0.8372261047363281, "num_tokens": 1029459057.0, "step": 8440 }, { "epoch": 0.011279458585987872, "grad_norm": 4.65625, "learning_rate": 5.639032584928805e-07, "loss": 0.8411383628845215, "num_tokens": 1031763811.0, "step": 8460 }, { "epoch": 0.011306123972715977, "grad_norm": 4.21875, "learning_rate": 5.652365207188951e-07, "loss": 0.8584859848022461, "num_tokens": 1034340641.0, "step": 8480 }, { "epoch": 0.01133278935944408, "grad_norm": 4.125, "learning_rate": 5.665697829449097e-07, "loss": 0.8306212425231934, "num_tokens": 1036709596.0, "step": 8500 }, { "epoch": 0.011359454746172185, "grad_norm": 3.953125, "learning_rate": 5.679030451709243e-07, "loss": 0.8168930053710938, "num_tokens": 1039162962.0, "step": 8520 }, { "epoch": 0.011386120132900288, "grad_norm": 3.84375, "learning_rate": 5.692363073969389e-07, "loss": 0.8296215057373046, "num_tokens": 1041686086.0, "step": 8540 }, { "epoch": 0.01141278551962839, "grad_norm": 4.03125, "learning_rate": 5.705695696229535e-07, "loss": 0.8371697425842285, "num_tokens": 1044143714.0, "step": 8560 }, { "epoch": 0.011439450906356495, "grad_norm": 4.21875, "learning_rate": 5.719028318489681e-07, "loss": 0.8332053184509277, "num_tokens": 1046433979.0, "step": 8580 }, { "epoch": 0.011466116293084598, "grad_norm": 4.8125, "learning_rate": 5.732360940749828e-07, "loss": 0.8365188598632812, "num_tokens": 1048928839.0, "step": 8600 }, { "epoch": 0.011492781679812703, "grad_norm": 4.3125, "learning_rate": 5.745693563009974e-07, "loss": 0.8285781860351562, "num_tokens": 1051392427.0, "step": 8620 }, { "epoch": 0.011519447066540806, "grad_norm": 4.0625, "learning_rate": 5.75902618527012e-07, "loss": 0.8377013206481934, "num_tokens": 1054009320.0, "step": 8640 }, { "epoch": 0.01154611245326891, "grad_norm": 3.90625, "learning_rate": 5.772358807530266e-07, "loss": 0.8636884689331055, "num_tokens": 1056479740.0, "step": 8660 }, { "epoch": 0.011572777839997014, "grad_norm": 4.0625, "learning_rate": 5.785691429790412e-07, "loss": 0.8345742225646973, "num_tokens": 1058870741.0, "step": 8680 }, { "epoch": 0.011599443226725117, "grad_norm": 4.1875, "learning_rate": 5.799024052050558e-07, "loss": 0.8473046302795411, "num_tokens": 1061478437.0, "step": 8700 }, { "epoch": 0.01162610861345322, "grad_norm": 3.921875, "learning_rate": 5.812356674310704e-07, "loss": 0.8374380111694336, "num_tokens": 1063915063.0, "step": 8720 }, { "epoch": 0.011652774000181325, "grad_norm": 4.46875, "learning_rate": 5.825689296570851e-07, "loss": 0.8333610534667969, "num_tokens": 1066515873.0, "step": 8740 }, { "epoch": 0.011679439386909428, "grad_norm": 4.96875, "learning_rate": 5.839021918830997e-07, "loss": 0.82457275390625, "num_tokens": 1069059733.0, "step": 8760 }, { "epoch": 0.011706104773637533, "grad_norm": 3.703125, "learning_rate": 5.852354541091142e-07, "loss": 0.8107315063476562, "num_tokens": 1071595780.0, "step": 8780 }, { "epoch": 0.011732770160365636, "grad_norm": 3.890625, "learning_rate": 5.865687163351288e-07, "loss": 0.8580812454223633, "num_tokens": 1073980696.0, "step": 8800 }, { "epoch": 0.011759435547093739, "grad_norm": 3.71875, "learning_rate": 5.879019785611434e-07, "loss": 0.8481468200683594, "num_tokens": 1076412392.0, "step": 8820 }, { "epoch": 0.011786100933821844, "grad_norm": 4.1875, "learning_rate": 5.89235240787158e-07, "loss": 0.8069692611694336, "num_tokens": 1078831483.0, "step": 8840 }, { "epoch": 0.011812766320549947, "grad_norm": 4.15625, "learning_rate": 5.905685030131727e-07, "loss": 0.8421249389648438, "num_tokens": 1081269507.0, "step": 8860 }, { "epoch": 0.011839431707278051, "grad_norm": 3.671875, "learning_rate": 5.919017652391873e-07, "loss": 0.829189395904541, "num_tokens": 1083649877.0, "step": 8880 }, { "epoch": 0.011866097094006154, "grad_norm": 5.1875, "learning_rate": 5.932350274652019e-07, "loss": 0.8503017425537109, "num_tokens": 1085969031.0, "step": 8900 }, { "epoch": 0.011892762480734257, "grad_norm": 3.671875, "learning_rate": 5.945682896912165e-07, "loss": 0.8196663856506348, "num_tokens": 1088439797.0, "step": 8920 }, { "epoch": 0.011919427867462362, "grad_norm": 3.90625, "learning_rate": 5.959015519172311e-07, "loss": 0.8273536682128906, "num_tokens": 1090986079.0, "step": 8940 }, { "epoch": 0.011946093254190465, "grad_norm": 3.625, "learning_rate": 5.972348141432457e-07, "loss": 0.8282415390014648, "num_tokens": 1093406278.0, "step": 8960 }, { "epoch": 0.01197275864091857, "grad_norm": 3.609375, "learning_rate": 5.985680763692603e-07, "loss": 0.8308365821838379, "num_tokens": 1095962783.0, "step": 8980 }, { "epoch": 0.011999424027646673, "grad_norm": 3.828125, "learning_rate": 5.99901338595275e-07, "loss": 0.8077051162719726, "num_tokens": 1098364832.0, "step": 9000 }, { "epoch": 0.012026089414374776, "grad_norm": 4.09375, "learning_rate": 6.012346008212896e-07, "loss": 0.8224486351013184, "num_tokens": 1100622994.0, "step": 9020 }, { "epoch": 0.01205275480110288, "grad_norm": 4.28125, "learning_rate": 6.025678630473042e-07, "loss": 0.811423397064209, "num_tokens": 1103237995.0, "step": 9040 }, { "epoch": 0.012079420187830984, "grad_norm": 3.78125, "learning_rate": 6.039011252733188e-07, "loss": 0.853298282623291, "num_tokens": 1105838440.0, "step": 9060 }, { "epoch": 0.012106085574559089, "grad_norm": 4.40625, "learning_rate": 6.052343874993334e-07, "loss": 0.8343170166015625, "num_tokens": 1108264887.0, "step": 9080 }, { "epoch": 0.012132750961287192, "grad_norm": 3.96875, "learning_rate": 6.06567649725348e-07, "loss": 0.8342461585998535, "num_tokens": 1110674769.0, "step": 9100 }, { "epoch": 0.012159416348015295, "grad_norm": 4.09375, "learning_rate": 6.079009119513627e-07, "loss": 0.8227170944213867, "num_tokens": 1113126296.0, "step": 9120 }, { "epoch": 0.0121860817347434, "grad_norm": 4.65625, "learning_rate": 6.092341741773773e-07, "loss": 0.8310941696166992, "num_tokens": 1115490729.0, "step": 9140 }, { "epoch": 0.012212747121471502, "grad_norm": 3.828125, "learning_rate": 6.105674364033919e-07, "loss": 0.8119984626770019, "num_tokens": 1118065947.0, "step": 9160 }, { "epoch": 0.012239412508199607, "grad_norm": 3.953125, "learning_rate": 6.119006986294065e-07, "loss": 0.8515683174133301, "num_tokens": 1120632259.0, "step": 9180 }, { "epoch": 0.01226607789492771, "grad_norm": 3.75, "learning_rate": 6.132339608554211e-07, "loss": 0.8081449508666992, "num_tokens": 1122831742.0, "step": 9200 }, { "epoch": 0.012292743281655813, "grad_norm": 4.375, "learning_rate": 6.145672230814357e-07, "loss": 0.8173588752746582, "num_tokens": 1125173494.0, "step": 9220 }, { "epoch": 0.012319408668383918, "grad_norm": 4.34375, "learning_rate": 6.159004853074503e-07, "loss": 0.8471095085144043, "num_tokens": 1127652643.0, "step": 9240 }, { "epoch": 0.012346074055112021, "grad_norm": 4.125, "learning_rate": 6.17233747533465e-07, "loss": 0.8245231628417968, "num_tokens": 1130149436.0, "step": 9260 }, { "epoch": 0.012372739441840124, "grad_norm": 3.609375, "learning_rate": 6.185670097594796e-07, "loss": 0.8112277030944824, "num_tokens": 1132672535.0, "step": 9280 }, { "epoch": 0.012399404828568229, "grad_norm": 4.5625, "learning_rate": 6.199002719854942e-07, "loss": 0.8399072647094726, "num_tokens": 1135046030.0, "step": 9300 }, { "epoch": 0.012426070215296332, "grad_norm": 4.15625, "learning_rate": 6.212335342115088e-07, "loss": 0.8231882095336914, "num_tokens": 1137558876.0, "step": 9320 }, { "epoch": 0.012452735602024437, "grad_norm": 4.125, "learning_rate": 6.225667964375233e-07, "loss": 0.8399515151977539, "num_tokens": 1139881120.0, "step": 9340 }, { "epoch": 0.01247940098875254, "grad_norm": 4.40625, "learning_rate": 6.239000586635379e-07, "loss": 0.80125732421875, "num_tokens": 1142145096.0, "step": 9360 }, { "epoch": 0.012506066375480643, "grad_norm": 4.09375, "learning_rate": 6.252333208895526e-07, "loss": 0.8212223052978516, "num_tokens": 1144550855.0, "step": 9380 }, { "epoch": 0.012532731762208748, "grad_norm": 4.15625, "learning_rate": 6.265665831155672e-07, "loss": 0.8243355751037598, "num_tokens": 1146987201.0, "step": 9400 }, { "epoch": 0.01255939714893685, "grad_norm": 4.59375, "learning_rate": 6.278998453415818e-07, "loss": 0.8306315422058106, "num_tokens": 1149366979.0, "step": 9420 }, { "epoch": 0.012586062535664955, "grad_norm": 4.09375, "learning_rate": 6.292331075675964e-07, "loss": 0.8116741180419922, "num_tokens": 1152002525.0, "step": 9440 }, { "epoch": 0.012612727922393058, "grad_norm": 3.859375, "learning_rate": 6.305663697936111e-07, "loss": 0.8248636245727539, "num_tokens": 1154451384.0, "step": 9460 }, { "epoch": 0.012639393309121161, "grad_norm": 4.34375, "learning_rate": 6.318996320196256e-07, "loss": 0.845060157775879, "num_tokens": 1157013225.0, "step": 9480 }, { "epoch": 0.012666058695849266, "grad_norm": 3.921875, "learning_rate": 6.332328942456403e-07, "loss": 0.8389139175415039, "num_tokens": 1159672265.0, "step": 9500 }, { "epoch": 0.01269272408257737, "grad_norm": 4.5, "learning_rate": 6.345661564716549e-07, "loss": 0.8375086784362793, "num_tokens": 1162050449.0, "step": 9520 }, { "epoch": 0.012719389469305474, "grad_norm": 4.15625, "learning_rate": 6.358994186976695e-07, "loss": 0.8484991073608399, "num_tokens": 1164615555.0, "step": 9540 }, { "epoch": 0.012746054856033577, "grad_norm": 3.53125, "learning_rate": 6.372326809236841e-07, "loss": 0.8104243278503418, "num_tokens": 1167120530.0, "step": 9560 }, { "epoch": 0.01277272024276168, "grad_norm": 4.46875, "learning_rate": 6.385659431496988e-07, "loss": 0.8079850196838378, "num_tokens": 1169679105.0, "step": 9580 }, { "epoch": 0.012799385629489785, "grad_norm": 3.390625, "learning_rate": 6.398992053757133e-07, "loss": 0.7943625926971436, "num_tokens": 1171980113.0, "step": 9600 }, { "epoch": 0.012826051016217888, "grad_norm": 4.4375, "learning_rate": 6.41232467601728e-07, "loss": 0.8180845260620118, "num_tokens": 1174491737.0, "step": 9620 }, { "epoch": 0.012852716402945993, "grad_norm": 4.46875, "learning_rate": 6.425657298277425e-07, "loss": 0.8241122245788575, "num_tokens": 1176948044.0, "step": 9640 }, { "epoch": 0.012879381789674096, "grad_norm": 3.46875, "learning_rate": 6.438989920537572e-07, "loss": 0.8090435028076172, "num_tokens": 1179597639.0, "step": 9660 }, { "epoch": 0.012906047176402199, "grad_norm": 3.484375, "learning_rate": 6.452322542797718e-07, "loss": 0.8166584968566895, "num_tokens": 1181871626.0, "step": 9680 }, { "epoch": 0.012932712563130304, "grad_norm": 3.890625, "learning_rate": 6.465655165057865e-07, "loss": 0.8154104232788086, "num_tokens": 1184020747.0, "step": 9700 }, { "epoch": 0.012959377949858407, "grad_norm": 3.75, "learning_rate": 6.47898778731801e-07, "loss": 0.8258517265319825, "num_tokens": 1186383356.0, "step": 9720 }, { "epoch": 0.012986043336586511, "grad_norm": 4.09375, "learning_rate": 6.492320409578157e-07, "loss": 0.8206949234008789, "num_tokens": 1188880937.0, "step": 9740 }, { "epoch": 0.013012708723314614, "grad_norm": 4.0, "learning_rate": 6.505653031838302e-07, "loss": 0.8358908653259277, "num_tokens": 1191638251.0, "step": 9760 }, { "epoch": 0.013039374110042717, "grad_norm": 3.75, "learning_rate": 6.518985654098449e-07, "loss": 0.8197385787963867, "num_tokens": 1194042447.0, "step": 9780 }, { "epoch": 0.013066039496770822, "grad_norm": 3.953125, "learning_rate": 6.532318276358595e-07, "loss": 0.8501688957214355, "num_tokens": 1196391514.0, "step": 9800 }, { "epoch": 0.013092704883498925, "grad_norm": 3.65625, "learning_rate": 6.545650898618741e-07, "loss": 0.8218921661376953, "num_tokens": 1198791800.0, "step": 9820 }, { "epoch": 0.013119370270227028, "grad_norm": 4.15625, "learning_rate": 6.558983520878887e-07, "loss": 0.8181506156921386, "num_tokens": 1201270981.0, "step": 9840 }, { "epoch": 0.013146035656955133, "grad_norm": 3.96875, "learning_rate": 6.572316143139034e-07, "loss": 0.8347586631774903, "num_tokens": 1203781155.0, "step": 9860 }, { "epoch": 0.013172701043683236, "grad_norm": 3.84375, "learning_rate": 6.585648765399179e-07, "loss": 0.8368364334106445, "num_tokens": 1206450603.0, "step": 9880 }, { "epoch": 0.01319936643041134, "grad_norm": 3.921875, "learning_rate": 6.598981387659326e-07, "loss": 0.8279842376708985, "num_tokens": 1209115032.0, "step": 9900 }, { "epoch": 0.013226031817139444, "grad_norm": 3.890625, "learning_rate": 6.612314009919472e-07, "loss": 0.8356925964355468, "num_tokens": 1211779607.0, "step": 9920 }, { "epoch": 0.013252697203867547, "grad_norm": 3.84375, "learning_rate": 6.625646632179617e-07, "loss": 0.8339454650878906, "num_tokens": 1214517211.0, "step": 9940 }, { "epoch": 0.013279362590595652, "grad_norm": 3.59375, "learning_rate": 6.638979254439763e-07, "loss": 0.8087945938110351, "num_tokens": 1216765024.0, "step": 9960 }, { "epoch": 0.013306027977323755, "grad_norm": 3.828125, "learning_rate": 6.65231187669991e-07, "loss": 0.8071343421936035, "num_tokens": 1219189418.0, "step": 9980 }, { "epoch": 0.01333269336405186, "grad_norm": 4.0, "learning_rate": 6.665644498960055e-07, "loss": 0.8082025527954102, "num_tokens": 1221500342.0, "step": 10000 }, { "epoch": 0.013359358750779963, "grad_norm": 3.5625, "learning_rate": 6.678977121220202e-07, "loss": 0.8178438186645508, "num_tokens": 1224234417.0, "step": 10020 }, { "epoch": 0.013386024137508066, "grad_norm": 3.46875, "learning_rate": 6.692309743480347e-07, "loss": 0.8003694534301757, "num_tokens": 1226649485.0, "step": 10040 }, { "epoch": 0.01341268952423617, "grad_norm": 4.375, "learning_rate": 6.705642365740494e-07, "loss": 0.8220190048217774, "num_tokens": 1229176336.0, "step": 10060 }, { "epoch": 0.013439354910964273, "grad_norm": 3.828125, "learning_rate": 6.71897498800064e-07, "loss": 0.8105052947998047, "num_tokens": 1231684273.0, "step": 10080 }, { "epoch": 0.013466020297692378, "grad_norm": 3.671875, "learning_rate": 6.732307610260787e-07, "loss": 0.802791690826416, "num_tokens": 1234043287.0, "step": 10100 }, { "epoch": 0.013492685684420481, "grad_norm": 3.921875, "learning_rate": 6.745640232520932e-07, "loss": 0.8221502304077148, "num_tokens": 1236616285.0, "step": 10120 }, { "epoch": 0.013519351071148584, "grad_norm": 3.625, "learning_rate": 6.758972854781079e-07, "loss": 0.7944392681121826, "num_tokens": 1238972000.0, "step": 10140 }, { "epoch": 0.013546016457876689, "grad_norm": 3.875, "learning_rate": 6.772305477041224e-07, "loss": 0.800814151763916, "num_tokens": 1241235742.0, "step": 10160 }, { "epoch": 0.013572681844604792, "grad_norm": 4.15625, "learning_rate": 6.785638099301371e-07, "loss": 0.827815055847168, "num_tokens": 1243808536.0, "step": 10180 }, { "epoch": 0.013599347231332897, "grad_norm": 3.671875, "learning_rate": 6.798970721561517e-07, "loss": 0.827209758758545, "num_tokens": 1246371401.0, "step": 10200 }, { "epoch": 0.013626012618061, "grad_norm": 3.65625, "learning_rate": 6.812303343821663e-07, "loss": 0.8198887825012207, "num_tokens": 1248944486.0, "step": 10220 }, { "epoch": 0.013652678004789103, "grad_norm": 3.703125, "learning_rate": 6.825635966081809e-07, "loss": 0.8198467254638672, "num_tokens": 1251277742.0, "step": 10240 }, { "epoch": 0.013679343391517208, "grad_norm": 3.796875, "learning_rate": 6.838968588341956e-07, "loss": 0.7853316783905029, "num_tokens": 1253505682.0, "step": 10260 }, { "epoch": 0.01370600877824531, "grad_norm": 3.625, "learning_rate": 6.852301210602101e-07, "loss": 0.8137887001037598, "num_tokens": 1255887176.0, "step": 10280 }, { "epoch": 0.013732674164973415, "grad_norm": 3.65625, "learning_rate": 6.865633832862248e-07, "loss": 0.7989689826965332, "num_tokens": 1258483744.0, "step": 10300 }, { "epoch": 0.013759339551701518, "grad_norm": 3.4375, "learning_rate": 6.878966455122394e-07, "loss": 0.809956169128418, "num_tokens": 1260929718.0, "step": 10320 }, { "epoch": 0.013786004938429621, "grad_norm": 3.6875, "learning_rate": 6.89229907738254e-07, "loss": 0.815289306640625, "num_tokens": 1263397356.0, "step": 10340 }, { "epoch": 0.013812670325157726, "grad_norm": 3.765625, "learning_rate": 6.905631699642687e-07, "loss": 0.8165367126464844, "num_tokens": 1265738458.0, "step": 10360 }, { "epoch": 0.01383933571188583, "grad_norm": 3.5, "learning_rate": 6.918964321902833e-07, "loss": 0.8076818466186524, "num_tokens": 1268258832.0, "step": 10380 }, { "epoch": 0.013866001098613934, "grad_norm": 3.421875, "learning_rate": 6.932296944162979e-07, "loss": 0.7999887466430664, "num_tokens": 1270836717.0, "step": 10400 }, { "epoch": 0.013892666485342037, "grad_norm": 3.40625, "learning_rate": 6.945629566423125e-07, "loss": 0.8237237930297852, "num_tokens": 1273090065.0, "step": 10420 }, { "epoch": 0.01391933187207014, "grad_norm": 3.203125, "learning_rate": 6.958962188683272e-07, "loss": 0.8114664077758789, "num_tokens": 1275410680.0, "step": 10440 }, { "epoch": 0.013945997258798245, "grad_norm": 3.953125, "learning_rate": 6.972294810943417e-07, "loss": 0.8289201736450196, "num_tokens": 1277852980.0, "step": 10460 }, { "epoch": 0.013972662645526348, "grad_norm": 3.59375, "learning_rate": 6.985627433203564e-07, "loss": 0.8142255783081055, "num_tokens": 1280593338.0, "step": 10480 }, { "epoch": 0.013999328032254451, "grad_norm": 4.65625, "learning_rate": 6.99896005546371e-07, "loss": 0.7779539108276368, "num_tokens": 1282940249.0, "step": 10500 }, { "epoch": 0.014025993418982556, "grad_norm": 3.875, "learning_rate": 7.012292677723855e-07, "loss": 0.8142606735229492, "num_tokens": 1285605599.0, "step": 10520 }, { "epoch": 0.014052658805710659, "grad_norm": 4.0, "learning_rate": 7.025625299984001e-07, "loss": 0.7993472576141357, "num_tokens": 1288053981.0, "step": 10540 }, { "epoch": 0.014079324192438764, "grad_norm": 3.59375, "learning_rate": 7.038957922244148e-07, "loss": 0.8119115829467773, "num_tokens": 1290509809.0, "step": 10560 }, { "epoch": 0.014105989579166867, "grad_norm": 3.640625, "learning_rate": 7.052290544504293e-07, "loss": 0.7881217002868652, "num_tokens": 1293084674.0, "step": 10580 }, { "epoch": 0.01413265496589497, "grad_norm": 3.796875, "learning_rate": 7.06562316676444e-07, "loss": 0.7849487781524658, "num_tokens": 1295351102.0, "step": 10600 }, { "epoch": 0.014159320352623074, "grad_norm": 3.703125, "learning_rate": 7.078955789024585e-07, "loss": 0.8286909103393555, "num_tokens": 1297648674.0, "step": 10620 }, { "epoch": 0.014185985739351177, "grad_norm": 3.671875, "learning_rate": 7.092288411284732e-07, "loss": 0.8305426597595215, "num_tokens": 1300255897.0, "step": 10640 }, { "epoch": 0.014212651126079282, "grad_norm": 3.390625, "learning_rate": 7.105621033544878e-07, "loss": 0.8088605880737305, "num_tokens": 1302600283.0, "step": 10660 }, { "epoch": 0.014239316512807385, "grad_norm": 4.4375, "learning_rate": 7.118953655805025e-07, "loss": 0.8009950637817382, "num_tokens": 1304896498.0, "step": 10680 }, { "epoch": 0.014265981899535488, "grad_norm": 3.953125, "learning_rate": 7.13228627806517e-07, "loss": 0.8010103225708007, "num_tokens": 1307515663.0, "step": 10700 }, { "epoch": 0.014292647286263593, "grad_norm": 3.3125, "learning_rate": 7.145618900325317e-07, "loss": 0.8067817687988281, "num_tokens": 1309874841.0, "step": 10720 }, { "epoch": 0.014319312672991696, "grad_norm": 4.125, "learning_rate": 7.158951522585462e-07, "loss": 0.8217216491699219, "num_tokens": 1312451519.0, "step": 10740 }, { "epoch": 0.0143459780597198, "grad_norm": 4.625, "learning_rate": 7.172284144845609e-07, "loss": 0.8052752494812012, "num_tokens": 1314759360.0, "step": 10760 }, { "epoch": 0.014372643446447904, "grad_norm": 4.09375, "learning_rate": 7.185616767105755e-07, "loss": 0.8054489135742188, "num_tokens": 1317066672.0, "step": 10780 }, { "epoch": 0.014399308833176007, "grad_norm": 4.34375, "learning_rate": 7.198949389365902e-07, "loss": 0.8133197784423828, "num_tokens": 1319380868.0, "step": 10800 }, { "epoch": 0.014425974219904112, "grad_norm": 3.21875, "learning_rate": 7.212282011626047e-07, "loss": 0.8084254264831543, "num_tokens": 1321735212.0, "step": 10820 }, { "epoch": 0.014452639606632215, "grad_norm": 3.734375, "learning_rate": 7.225614633886194e-07, "loss": 0.7850629329681397, "num_tokens": 1324096718.0, "step": 10840 }, { "epoch": 0.01447930499336032, "grad_norm": 4.125, "learning_rate": 7.238947256146339e-07, "loss": 0.826406192779541, "num_tokens": 1326404889.0, "step": 10860 }, { "epoch": 0.014505970380088423, "grad_norm": 3.84375, "learning_rate": 7.252279878406486e-07, "loss": 0.7985017776489258, "num_tokens": 1328773456.0, "step": 10880 }, { "epoch": 0.014532635766816526, "grad_norm": 4.03125, "learning_rate": 7.265612500666632e-07, "loss": 0.8013053894042969, "num_tokens": 1331298835.0, "step": 10900 }, { "epoch": 0.01455930115354463, "grad_norm": 3.125, "learning_rate": 7.278945122926778e-07, "loss": 0.7941848754882812, "num_tokens": 1333737031.0, "step": 10920 }, { "epoch": 0.014585966540272733, "grad_norm": 3.984375, "learning_rate": 7.292277745186924e-07, "loss": 0.7998844623565674, "num_tokens": 1335916652.0, "step": 10940 }, { "epoch": 0.014612631927000838, "grad_norm": 4.28125, "learning_rate": 7.305610367447071e-07, "loss": 0.8221041679382324, "num_tokens": 1338340997.0, "step": 10960 }, { "epoch": 0.014639297313728941, "grad_norm": 3.609375, "learning_rate": 7.318942989707216e-07, "loss": 0.7954744338989258, "num_tokens": 1340931977.0, "step": 10980 }, { "epoch": 0.014665962700457044, "grad_norm": 3.515625, "learning_rate": 7.332275611967363e-07, "loss": 0.8177481651306152, "num_tokens": 1343479973.0, "step": 11000 }, { "epoch": 0.014692628087185149, "grad_norm": 3.640625, "learning_rate": 7.345608234227509e-07, "loss": 0.7947704315185546, "num_tokens": 1345805591.0, "step": 11020 }, { "epoch": 0.014719293473913252, "grad_norm": 3.34375, "learning_rate": 7.358940856487655e-07, "loss": 0.8250353813171387, "num_tokens": 1348258889.0, "step": 11040 }, { "epoch": 0.014745958860641355, "grad_norm": 3.59375, "learning_rate": 7.372273478747801e-07, "loss": 0.8199075698852539, "num_tokens": 1350718930.0, "step": 11060 }, { "epoch": 0.01477262424736946, "grad_norm": 3.65625, "learning_rate": 7.385606101007948e-07, "loss": 0.8130535125732422, "num_tokens": 1353336356.0, "step": 11080 }, { "epoch": 0.014799289634097563, "grad_norm": 3.28125, "learning_rate": 7.398938723268092e-07, "loss": 0.8164287567138672, "num_tokens": 1355949288.0, "step": 11100 }, { "epoch": 0.014825955020825668, "grad_norm": 3.578125, "learning_rate": 7.41227134552824e-07, "loss": 0.7999509334564209, "num_tokens": 1358286568.0, "step": 11120 }, { "epoch": 0.01485262040755377, "grad_norm": 3.578125, "learning_rate": 7.425603967788384e-07, "loss": 0.8141899108886719, "num_tokens": 1360718425.0, "step": 11140 }, { "epoch": 0.014879285794281874, "grad_norm": 3.40625, "learning_rate": 7.438936590048531e-07, "loss": 0.80404052734375, "num_tokens": 1363110256.0, "step": 11160 }, { "epoch": 0.014905951181009978, "grad_norm": 3.65625, "learning_rate": 7.452269212308677e-07, "loss": 0.8035571098327636, "num_tokens": 1365509161.0, "step": 11180 }, { "epoch": 0.014932616567738081, "grad_norm": 3.46875, "learning_rate": 7.465601834568824e-07, "loss": 0.7910733222961426, "num_tokens": 1367838852.0, "step": 11200 }, { "epoch": 0.014959281954466186, "grad_norm": 3.421875, "learning_rate": 7.478934456828969e-07, "loss": 0.8152192115783692, "num_tokens": 1370213332.0, "step": 11220 }, { "epoch": 0.01498594734119429, "grad_norm": 3.5625, "learning_rate": 7.492267079089116e-07, "loss": 0.8093388557434082, "num_tokens": 1372731235.0, "step": 11240 }, { "epoch": 0.015012612727922392, "grad_norm": 3.625, "learning_rate": 7.505599701349261e-07, "loss": 0.8080844879150391, "num_tokens": 1375082895.0, "step": 11260 }, { "epoch": 0.015039278114650497, "grad_norm": 3.578125, "learning_rate": 7.518932323609408e-07, "loss": 0.7883134365081788, "num_tokens": 1377484614.0, "step": 11280 }, { "epoch": 0.0150659435013786, "grad_norm": 3.625, "learning_rate": 7.532264945869554e-07, "loss": 0.8316627502441406, "num_tokens": 1379772030.0, "step": 11300 }, { "epoch": 0.015092608888106705, "grad_norm": 3.640625, "learning_rate": 7.5455975681297e-07, "loss": 0.8273283004760742, "num_tokens": 1382278961.0, "step": 11320 }, { "epoch": 0.015119274274834808, "grad_norm": 3.9375, "learning_rate": 7.558930190389846e-07, "loss": 0.8037694931030274, "num_tokens": 1384624611.0, "step": 11340 }, { "epoch": 0.015145939661562911, "grad_norm": 3.59375, "learning_rate": 7.572262812649993e-07, "loss": 0.8186422348022461, "num_tokens": 1386964350.0, "step": 11360 }, { "epoch": 0.015172605048291016, "grad_norm": 4.3125, "learning_rate": 7.585595434910138e-07, "loss": 0.7924356460571289, "num_tokens": 1389289978.0, "step": 11380 }, { "epoch": 0.015199270435019119, "grad_norm": 3.375, "learning_rate": 7.598928057170285e-07, "loss": 0.8074846267700195, "num_tokens": 1391637711.0, "step": 11400 }, { "epoch": 0.015225935821747224, "grad_norm": 3.40625, "learning_rate": 7.612260679430431e-07, "loss": 0.8020513534545899, "num_tokens": 1394279865.0, "step": 11420 }, { "epoch": 0.015252601208475327, "grad_norm": 3.90625, "learning_rate": 7.625593301690577e-07, "loss": 0.8203277587890625, "num_tokens": 1396622454.0, "step": 11440 }, { "epoch": 0.01527926659520343, "grad_norm": 3.140625, "learning_rate": 7.638925923950723e-07, "loss": 0.8167587280273437, "num_tokens": 1399034837.0, "step": 11460 }, { "epoch": 0.015305931981931534, "grad_norm": 2.9375, "learning_rate": 7.65225854621087e-07, "loss": 0.8027131080627441, "num_tokens": 1401740105.0, "step": 11480 }, { "epoch": 0.015332597368659637, "grad_norm": 3.46875, "learning_rate": 7.665591168471015e-07, "loss": 0.7996034622192383, "num_tokens": 1404263731.0, "step": 11500 }, { "epoch": 0.015359262755387742, "grad_norm": 3.75, "learning_rate": 7.678923790731162e-07, "loss": 0.8000609397888183, "num_tokens": 1406816347.0, "step": 11520 }, { "epoch": 0.015385928142115845, "grad_norm": 3.421875, "learning_rate": 7.692256412991307e-07, "loss": 0.7920145034790039, "num_tokens": 1408776695.0, "step": 11540 }, { "epoch": 0.015412593528843948, "grad_norm": 3.71875, "learning_rate": 7.705589035251454e-07, "loss": 0.8026524543762207, "num_tokens": 1411163110.0, "step": 11560 }, { "epoch": 0.015439258915572053, "grad_norm": 3.90625, "learning_rate": 7.7189216575116e-07, "loss": 0.8048521041870117, "num_tokens": 1413491539.0, "step": 11580 }, { "epoch": 0.015465924302300156, "grad_norm": 3.828125, "learning_rate": 7.732254279771747e-07, "loss": 0.8117103576660156, "num_tokens": 1415841211.0, "step": 11600 }, { "epoch": 0.015492589689028259, "grad_norm": 3.609375, "learning_rate": 7.745586902031892e-07, "loss": 0.7993291854858399, "num_tokens": 1418270262.0, "step": 11620 }, { "epoch": 0.015519255075756364, "grad_norm": 3.4375, "learning_rate": 7.758919524292039e-07, "loss": 0.807945442199707, "num_tokens": 1420693661.0, "step": 11640 }, { "epoch": 0.015545920462484467, "grad_norm": 3.6875, "learning_rate": 7.772252146552183e-07, "loss": 0.7916358947753906, "num_tokens": 1423206129.0, "step": 11660 }, { "epoch": 0.015572585849212572, "grad_norm": 3.703125, "learning_rate": 7.78558476881233e-07, "loss": 0.7966407775878906, "num_tokens": 1425607623.0, "step": 11680 }, { "epoch": 0.015599251235940675, "grad_norm": 3.296875, "learning_rate": 7.798917391072476e-07, "loss": 0.7914084434509278, "num_tokens": 1428238262.0, "step": 11700 }, { "epoch": 0.015625916622668778, "grad_norm": 3.8125, "learning_rate": 7.812250013332622e-07, "loss": 0.7980436325073242, "num_tokens": 1430804097.0, "step": 11720 }, { "epoch": 0.01565258200939688, "grad_norm": 3.71875, "learning_rate": 7.825582635592769e-07, "loss": 0.8095009803771973, "num_tokens": 1433255127.0, "step": 11740 }, { "epoch": 0.015679247396124987, "grad_norm": 3.75, "learning_rate": 7.838915257852915e-07, "loss": 0.8047734260559082, "num_tokens": 1435798910.0, "step": 11760 }, { "epoch": 0.01570591278285309, "grad_norm": 3.984375, "learning_rate": 7.852247880113062e-07, "loss": 0.7911839008331298, "num_tokens": 1438276677.0, "step": 11780 }, { "epoch": 0.015732578169581193, "grad_norm": 3.078125, "learning_rate": 7.865580502373207e-07, "loss": 0.8086902618408203, "num_tokens": 1440713186.0, "step": 11800 }, { "epoch": 0.015759243556309296, "grad_norm": 3.453125, "learning_rate": 7.878913124633354e-07, "loss": 0.8080696105957031, "num_tokens": 1443132525.0, "step": 11820 }, { "epoch": 0.0157859089430374, "grad_norm": 3.046875, "learning_rate": 7.892245746893499e-07, "loss": 0.8000141143798828, "num_tokens": 1445533646.0, "step": 11840 }, { "epoch": 0.015812574329765506, "grad_norm": 3.296875, "learning_rate": 7.905578369153646e-07, "loss": 0.7957226753234863, "num_tokens": 1447957818.0, "step": 11860 }, { "epoch": 0.01583923971649361, "grad_norm": 4.09375, "learning_rate": 7.918910991413792e-07, "loss": 0.8210739135742188, "num_tokens": 1450559168.0, "step": 11880 }, { "epoch": 0.015865905103221712, "grad_norm": 4.5, "learning_rate": 7.932243613673938e-07, "loss": 0.8010047912597656, "num_tokens": 1452744453.0, "step": 11900 }, { "epoch": 0.015892570489949815, "grad_norm": 3.5625, "learning_rate": 7.945576235934084e-07, "loss": 0.7926475524902343, "num_tokens": 1455283376.0, "step": 11920 }, { "epoch": 0.015919235876677918, "grad_norm": 3.265625, "learning_rate": 7.958908858194231e-07, "loss": 0.7953871726989746, "num_tokens": 1457592766.0, "step": 11940 }, { "epoch": 0.015945901263406025, "grad_norm": 3.421875, "learning_rate": 7.972241480454376e-07, "loss": 0.7912233352661133, "num_tokens": 1460078785.0, "step": 11960 }, { "epoch": 0.015972566650134128, "grad_norm": 3.65625, "learning_rate": 7.985574102714523e-07, "loss": 0.7915338516235352, "num_tokens": 1462446348.0, "step": 11980 }, { "epoch": 0.01599923203686223, "grad_norm": 3.203125, "learning_rate": 7.998906724974669e-07, "loss": 0.7966867446899414, "num_tokens": 1464784918.0, "step": 12000 }, { "epoch": 0.016025897423590334, "grad_norm": 3.59375, "learning_rate": 8.012239347234815e-07, "loss": 0.8220578193664551, "num_tokens": 1467483905.0, "step": 12020 }, { "epoch": 0.016052562810318437, "grad_norm": 3.453125, "learning_rate": 8.025571969494961e-07, "loss": 0.8048059463500976, "num_tokens": 1469729056.0, "step": 12040 }, { "epoch": 0.016079228197046543, "grad_norm": 3.609375, "learning_rate": 8.038904591755108e-07, "loss": 0.7832933902740479, "num_tokens": 1472276460.0, "step": 12060 }, { "epoch": 0.016105893583774646, "grad_norm": 3.265625, "learning_rate": 8.052237214015253e-07, "loss": 0.7917715072631836, "num_tokens": 1474461899.0, "step": 12080 }, { "epoch": 0.01613255897050275, "grad_norm": 3.40625, "learning_rate": 8.0655698362754e-07, "loss": 0.8167739868164062, "num_tokens": 1477284914.0, "step": 12100 }, { "epoch": 0.016159224357230852, "grad_norm": 3.34375, "learning_rate": 8.078902458535545e-07, "loss": 0.7852533340454102, "num_tokens": 1479900355.0, "step": 12120 }, { "epoch": 0.016185889743958955, "grad_norm": 3.453125, "learning_rate": 8.092235080795692e-07, "loss": 0.7929277420043945, "num_tokens": 1482282359.0, "step": 12140 }, { "epoch": 0.016212555130687062, "grad_norm": 3.0, "learning_rate": 8.105567703055838e-07, "loss": 0.7823873519897461, "num_tokens": 1484569484.0, "step": 12160 }, { "epoch": 0.016239220517415165, "grad_norm": 3.234375, "learning_rate": 8.118900325315985e-07, "loss": 0.7624360084533691, "num_tokens": 1487091444.0, "step": 12180 }, { "epoch": 0.016265885904143268, "grad_norm": 3.625, "learning_rate": 8.13223294757613e-07, "loss": 0.798007869720459, "num_tokens": 1489784727.0, "step": 12200 }, { "epoch": 0.01629255129087137, "grad_norm": 3.640625, "learning_rate": 8.145565569836277e-07, "loss": 0.7658060550689697, "num_tokens": 1492221744.0, "step": 12220 }, { "epoch": 0.016319216677599474, "grad_norm": 3.078125, "learning_rate": 8.158898192096422e-07, "loss": 0.7824455261230469, "num_tokens": 1494784740.0, "step": 12240 }, { "epoch": 0.01634588206432758, "grad_norm": 3.484375, "learning_rate": 8.172230814356569e-07, "loss": 0.7934853553771972, "num_tokens": 1497283522.0, "step": 12260 }, { "epoch": 0.016372547451055684, "grad_norm": 3.21875, "learning_rate": 8.185563436616714e-07, "loss": 0.8011421203613281, "num_tokens": 1499878874.0, "step": 12280 }, { "epoch": 0.016399212837783787, "grad_norm": 2.6875, "learning_rate": 8.19889605887686e-07, "loss": 0.8074872970581055, "num_tokens": 1502310199.0, "step": 12300 }, { "epoch": 0.01642587822451189, "grad_norm": 3.109375, "learning_rate": 8.212228681137006e-07, "loss": 0.7885695457458496, "num_tokens": 1504540832.0, "step": 12320 }, { "epoch": 0.016452543611239993, "grad_norm": 3.734375, "learning_rate": 8.225561303397153e-07, "loss": 0.7732903480529785, "num_tokens": 1506808104.0, "step": 12340 }, { "epoch": 0.0164792089979681, "grad_norm": 2.59375, "learning_rate": 8.238893925657298e-07, "loss": 0.7790070533752441, "num_tokens": 1509340605.0, "step": 12360 }, { "epoch": 0.016505874384696202, "grad_norm": 3.71875, "learning_rate": 8.252226547917445e-07, "loss": 0.8115950584411621, "num_tokens": 1511948938.0, "step": 12380 }, { "epoch": 0.016532539771424305, "grad_norm": 2.6875, "learning_rate": 8.265559170177591e-07, "loss": 0.7750681877136231, "num_tokens": 1514342257.0, "step": 12400 }, { "epoch": 0.01655920515815241, "grad_norm": 3.40625, "learning_rate": 8.278891792437737e-07, "loss": 0.7694839477539063, "num_tokens": 1516877997.0, "step": 12420 }, { "epoch": 0.01658587054488051, "grad_norm": 3.59375, "learning_rate": 8.292224414697883e-07, "loss": 0.8009750366210937, "num_tokens": 1519477847.0, "step": 12440 }, { "epoch": 0.016612535931608618, "grad_norm": 3.203125, "learning_rate": 8.30555703695803e-07, "loss": 0.7814492225646973, "num_tokens": 1521835847.0, "step": 12460 }, { "epoch": 0.01663920131833672, "grad_norm": 3.234375, "learning_rate": 8.318889659218175e-07, "loss": 0.8049396514892578, "num_tokens": 1524371529.0, "step": 12480 }, { "epoch": 0.016665866705064824, "grad_norm": 3.515625, "learning_rate": 8.332222281478322e-07, "loss": 0.7762056350708008, "num_tokens": 1526892685.0, "step": 12500 }, { "epoch": 0.016692532091792927, "grad_norm": 3.46875, "learning_rate": 8.345554903738468e-07, "loss": 0.8045568466186523, "num_tokens": 1529324855.0, "step": 12520 }, { "epoch": 0.01671919747852103, "grad_norm": 4.0625, "learning_rate": 8.358887525998614e-07, "loss": 0.7940521240234375, "num_tokens": 1532072064.0, "step": 12540 }, { "epoch": 0.016745862865249133, "grad_norm": 3.09375, "learning_rate": 8.37222014825876e-07, "loss": 0.7848413467407227, "num_tokens": 1534516103.0, "step": 12560 }, { "epoch": 0.01677252825197724, "grad_norm": 3.296875, "learning_rate": 8.385552770518907e-07, "loss": 0.8135161399841309, "num_tokens": 1537052121.0, "step": 12580 }, { "epoch": 0.016799193638705343, "grad_norm": 3.453125, "learning_rate": 8.398885392779052e-07, "loss": 0.7980330944061279, "num_tokens": 1539373582.0, "step": 12600 }, { "epoch": 0.016825859025433446, "grad_norm": 3.6875, "learning_rate": 8.412218015039199e-07, "loss": 0.7941969871520996, "num_tokens": 1541792223.0, "step": 12620 }, { "epoch": 0.01685252441216155, "grad_norm": 3.015625, "learning_rate": 8.425550637299344e-07, "loss": 0.7830109596252441, "num_tokens": 1544229317.0, "step": 12640 }, { "epoch": 0.01687918979888965, "grad_norm": 3.109375, "learning_rate": 8.438883259559491e-07, "loss": 0.7747329235076904, "num_tokens": 1546366004.0, "step": 12660 }, { "epoch": 0.016905855185617758, "grad_norm": 3.5, "learning_rate": 8.452215881819637e-07, "loss": 0.7859937667846679, "num_tokens": 1549000484.0, "step": 12680 }, { "epoch": 0.01693252057234586, "grad_norm": 3.25, "learning_rate": 8.465548504079784e-07, "loss": 0.805703067779541, "num_tokens": 1551464375.0, "step": 12700 }, { "epoch": 0.016959185959073964, "grad_norm": 3.765625, "learning_rate": 8.478881126339929e-07, "loss": 0.7593299865722656, "num_tokens": 1554034890.0, "step": 12720 }, { "epoch": 0.016985851345802067, "grad_norm": 3.28125, "learning_rate": 8.492213748600076e-07, "loss": 0.7885156154632569, "num_tokens": 1556563928.0, "step": 12740 }, { "epoch": 0.01701251673253017, "grad_norm": 3.359375, "learning_rate": 8.505546370860221e-07, "loss": 0.7871211051940918, "num_tokens": 1558950950.0, "step": 12760 }, { "epoch": 0.017039182119258277, "grad_norm": 3.4375, "learning_rate": 8.518878993120368e-07, "loss": 0.7782999992370605, "num_tokens": 1561288092.0, "step": 12780 }, { "epoch": 0.01706584750598638, "grad_norm": 3.109375, "learning_rate": 8.532211615380514e-07, "loss": 0.7897027015686036, "num_tokens": 1563706763.0, "step": 12800 }, { "epoch": 0.017092512892714483, "grad_norm": 3.328125, "learning_rate": 8.54554423764066e-07, "loss": 0.7759355545043946, "num_tokens": 1566273650.0, "step": 12820 }, { "epoch": 0.017119178279442586, "grad_norm": 3.328125, "learning_rate": 8.558876859900805e-07, "loss": 0.7807538032531738, "num_tokens": 1568718993.0, "step": 12840 }, { "epoch": 0.01714584366617069, "grad_norm": 3.59375, "learning_rate": 8.572209482160953e-07, "loss": 0.767389965057373, "num_tokens": 1571159951.0, "step": 12860 }, { "epoch": 0.017172509052898795, "grad_norm": 3.234375, "learning_rate": 8.585542104421097e-07, "loss": 0.7841579437255859, "num_tokens": 1573593109.0, "step": 12880 }, { "epoch": 0.0171991744396269, "grad_norm": 3.09375, "learning_rate": 8.598874726681244e-07, "loss": 0.7842621803283691, "num_tokens": 1576285001.0, "step": 12900 }, { "epoch": 0.017225839826355, "grad_norm": 2.765625, "learning_rate": 8.61220734894139e-07, "loss": 0.7478207588195801, "num_tokens": 1578724912.0, "step": 12920 }, { "epoch": 0.017252505213083105, "grad_norm": 2.84375, "learning_rate": 8.625539971201536e-07, "loss": 0.7939078330993652, "num_tokens": 1580950451.0, "step": 12940 }, { "epoch": 0.017279170599811208, "grad_norm": 2.78125, "learning_rate": 8.638872593461682e-07, "loss": 0.7768584728240967, "num_tokens": 1583282927.0, "step": 12960 }, { "epoch": 0.017305835986539314, "grad_norm": 3.25, "learning_rate": 8.652205215721829e-07, "loss": 0.7793038368225098, "num_tokens": 1585757295.0, "step": 12980 }, { "epoch": 0.017332501373267417, "grad_norm": 3.453125, "learning_rate": 8.665537837981974e-07, "loss": 0.7709847450256347, "num_tokens": 1588221004.0, "step": 13000 }, { "epoch": 0.01735916675999552, "grad_norm": 3.296875, "learning_rate": 8.678870460242121e-07, "loss": 0.772464656829834, "num_tokens": 1590647865.0, "step": 13020 }, { "epoch": 0.017385832146723623, "grad_norm": 3.03125, "learning_rate": 8.692203082502266e-07, "loss": 0.7748284339904785, "num_tokens": 1593158986.0, "step": 13040 }, { "epoch": 0.017412497533451726, "grad_norm": 2.796875, "learning_rate": 8.705535704762413e-07, "loss": 0.7914755344390869, "num_tokens": 1595615849.0, "step": 13060 }, { "epoch": 0.017439162920179833, "grad_norm": 3.0625, "learning_rate": 8.71886832702256e-07, "loss": 0.7824360847473144, "num_tokens": 1597907536.0, "step": 13080 }, { "epoch": 0.017465828306907936, "grad_norm": 3.609375, "learning_rate": 8.732200949282706e-07, "loss": 0.8062070846557617, "num_tokens": 1600518887.0, "step": 13100 }, { "epoch": 0.01749249369363604, "grad_norm": 3.40625, "learning_rate": 8.745533571542852e-07, "loss": 0.7810359954833984, "num_tokens": 1603149985.0, "step": 13120 }, { "epoch": 0.017519159080364142, "grad_norm": 2.765625, "learning_rate": 8.758866193802998e-07, "loss": 0.8068899154663086, "num_tokens": 1605492874.0, "step": 13140 }, { "epoch": 0.017545824467092245, "grad_norm": 3.8125, "learning_rate": 8.772198816063145e-07, "loss": 0.780000114440918, "num_tokens": 1607955674.0, "step": 13160 }, { "epoch": 0.01757248985382035, "grad_norm": 2.890625, "learning_rate": 8.78553143832329e-07, "loss": 0.7848965644836425, "num_tokens": 1610396156.0, "step": 13180 }, { "epoch": 0.017599155240548454, "grad_norm": 2.921875, "learning_rate": 8.798864060583437e-07, "loss": 0.7539694786071778, "num_tokens": 1612780962.0, "step": 13200 }, { "epoch": 0.017625820627276557, "grad_norm": 3.5, "learning_rate": 8.812196682843582e-07, "loss": 0.7874592304229736, "num_tokens": 1615474425.0, "step": 13220 }, { "epoch": 0.01765248601400466, "grad_norm": 3.0, "learning_rate": 8.825529305103729e-07, "loss": 0.7799440383911133, "num_tokens": 1617870297.0, "step": 13240 }, { "epoch": 0.017679151400732764, "grad_norm": 2.5625, "learning_rate": 8.838861927363875e-07, "loss": 0.7960165023803711, "num_tokens": 1620559884.0, "step": 13260 }, { "epoch": 0.01770581678746087, "grad_norm": 3.234375, "learning_rate": 8.852194549624022e-07, "loss": 0.7633747100830078, "num_tokens": 1623103680.0, "step": 13280 }, { "epoch": 0.017732482174188973, "grad_norm": 3.671875, "learning_rate": 8.865527171884167e-07, "loss": 0.7873907089233398, "num_tokens": 1625395655.0, "step": 13300 }, { "epoch": 0.017759147560917076, "grad_norm": 3.40625, "learning_rate": 8.878859794144314e-07, "loss": 0.7752806186676026, "num_tokens": 1627871915.0, "step": 13320 }, { "epoch": 0.01778581294764518, "grad_norm": 2.921875, "learning_rate": 8.892192416404459e-07, "loss": 0.7658371448516845, "num_tokens": 1630237136.0, "step": 13340 }, { "epoch": 0.017812478334373282, "grad_norm": 2.875, "learning_rate": 8.905525038664606e-07, "loss": 0.7885994911193848, "num_tokens": 1632774912.0, "step": 13360 }, { "epoch": 0.01783914372110139, "grad_norm": 2.703125, "learning_rate": 8.918857660924752e-07, "loss": 0.7532847404479981, "num_tokens": 1635177707.0, "step": 13380 }, { "epoch": 0.01786580910782949, "grad_norm": 2.765625, "learning_rate": 8.932190283184898e-07, "loss": 0.7610866546630859, "num_tokens": 1637655977.0, "step": 13400 }, { "epoch": 0.017892474494557595, "grad_norm": 2.828125, "learning_rate": 8.945522905445044e-07, "loss": 0.7692450523376465, "num_tokens": 1640035468.0, "step": 13420 }, { "epoch": 0.017919139881285698, "grad_norm": 2.953125, "learning_rate": 8.958855527705191e-07, "loss": 0.7713186740875244, "num_tokens": 1642593959.0, "step": 13440 }, { "epoch": 0.0179458052680138, "grad_norm": 2.828125, "learning_rate": 8.972188149965335e-07, "loss": 0.7812939643859863, "num_tokens": 1644854249.0, "step": 13460 }, { "epoch": 0.017972470654741907, "grad_norm": 3.125, "learning_rate": 8.985520772225482e-07, "loss": 0.7553634166717529, "num_tokens": 1647232404.0, "step": 13480 }, { "epoch": 0.01799913604147001, "grad_norm": 3.25, "learning_rate": 8.998853394485628e-07, "loss": 0.7827413082122803, "num_tokens": 1649680356.0, "step": 13500 }, { "epoch": 0.018025801428198113, "grad_norm": 3.140625, "learning_rate": 9.012186016745774e-07, "loss": 0.7749487876892089, "num_tokens": 1652185041.0, "step": 13520 }, { "epoch": 0.018052466814926216, "grad_norm": 2.71875, "learning_rate": 9.02551863900592e-07, "loss": 0.7789804935455322, "num_tokens": 1654652991.0, "step": 13540 }, { "epoch": 0.01807913220165432, "grad_norm": 3.109375, "learning_rate": 9.038851261266067e-07, "loss": 0.7778005123138427, "num_tokens": 1657090261.0, "step": 13560 }, { "epoch": 0.018105797588382426, "grad_norm": 2.828125, "learning_rate": 9.052183883526212e-07, "loss": 0.7909440994262695, "num_tokens": 1659439189.0, "step": 13580 }, { "epoch": 0.01813246297511053, "grad_norm": 3.4375, "learning_rate": 9.065516505786359e-07, "loss": 0.7769979476928711, "num_tokens": 1661756056.0, "step": 13600 }, { "epoch": 0.018159128361838632, "grad_norm": 2.78125, "learning_rate": 9.078849128046504e-07, "loss": 0.7722264289855957, "num_tokens": 1664086038.0, "step": 13620 }, { "epoch": 0.018185793748566735, "grad_norm": 3.078125, "learning_rate": 9.092181750306651e-07, "loss": 0.7523816585540771, "num_tokens": 1666580678.0, "step": 13640 }, { "epoch": 0.018212459135294838, "grad_norm": 2.765625, "learning_rate": 9.105514372566797e-07, "loss": 0.7645846843719483, "num_tokens": 1668838742.0, "step": 13660 }, { "epoch": 0.018239124522022945, "grad_norm": 3.109375, "learning_rate": 9.118846994826944e-07, "loss": 0.8016141891479492, "num_tokens": 1671182674.0, "step": 13680 }, { "epoch": 0.018265789908751048, "grad_norm": 3.09375, "learning_rate": 9.132179617087089e-07, "loss": 0.7651591300964355, "num_tokens": 1673374271.0, "step": 13700 }, { "epoch": 0.01829245529547915, "grad_norm": 2.546875, "learning_rate": 9.145512239347236e-07, "loss": 0.7684417724609375, "num_tokens": 1675832455.0, "step": 13720 }, { "epoch": 0.018319120682207254, "grad_norm": 3.171875, "learning_rate": 9.158844861607381e-07, "loss": 0.7700359344482421, "num_tokens": 1678312950.0, "step": 13740 }, { "epoch": 0.018345786068935357, "grad_norm": 2.84375, "learning_rate": 9.172177483867528e-07, "loss": 0.763557767868042, "num_tokens": 1680819323.0, "step": 13760 }, { "epoch": 0.01837245145566346, "grad_norm": 2.8125, "learning_rate": 9.185510106127674e-07, "loss": 0.7606728076934814, "num_tokens": 1683292167.0, "step": 13780 }, { "epoch": 0.018399116842391566, "grad_norm": 2.828125, "learning_rate": 9.19884272838782e-07, "loss": 0.7583596229553222, "num_tokens": 1685599302.0, "step": 13800 }, { "epoch": 0.01842578222911967, "grad_norm": 2.703125, "learning_rate": 9.212175350647966e-07, "loss": 0.7717523097991943, "num_tokens": 1687995623.0, "step": 13820 }, { "epoch": 0.018452447615847772, "grad_norm": 3.21875, "learning_rate": 9.225507972908113e-07, "loss": 0.760980749130249, "num_tokens": 1690335336.0, "step": 13840 }, { "epoch": 0.018479113002575875, "grad_norm": 3.1875, "learning_rate": 9.238840595168258e-07, "loss": 0.7584730625152588, "num_tokens": 1692925474.0, "step": 13860 }, { "epoch": 0.01850577838930398, "grad_norm": 2.75, "learning_rate": 9.252173217428405e-07, "loss": 0.7731157779693604, "num_tokens": 1695387142.0, "step": 13880 }, { "epoch": 0.018532443776032085, "grad_norm": 3.015625, "learning_rate": 9.265505839688551e-07, "loss": 0.7595084190368653, "num_tokens": 1697696347.0, "step": 13900 }, { "epoch": 0.018559109162760188, "grad_norm": 2.8125, "learning_rate": 9.278838461948697e-07, "loss": 0.7551045894622803, "num_tokens": 1700095631.0, "step": 13920 }, { "epoch": 0.01858577454948829, "grad_norm": 3.109375, "learning_rate": 9.292171084208843e-07, "loss": 0.7540676593780518, "num_tokens": 1702601676.0, "step": 13940 }, { "epoch": 0.018612439936216394, "grad_norm": 2.8125, "learning_rate": 9.30550370646899e-07, "loss": 0.7460041046142578, "num_tokens": 1705065847.0, "step": 13960 }, { "epoch": 0.018639105322944497, "grad_norm": 2.6875, "learning_rate": 9.318836328729135e-07, "loss": 0.7396134376525879, "num_tokens": 1707396441.0, "step": 13980 }, { "epoch": 0.018665770709672604, "grad_norm": 2.515625, "learning_rate": 9.332168950989282e-07, "loss": 0.7632912158966064, "num_tokens": 1709817155.0, "step": 14000 }, { "epoch": 0.018692436096400707, "grad_norm": 2.671875, "learning_rate": 9.345501573249426e-07, "loss": 0.7709494590759277, "num_tokens": 1712064395.0, "step": 14020 }, { "epoch": 0.01871910148312881, "grad_norm": 2.96875, "learning_rate": 9.358834195509573e-07, "loss": 0.752315616607666, "num_tokens": 1714365729.0, "step": 14040 }, { "epoch": 0.018745766869856913, "grad_norm": 3.171875, "learning_rate": 9.372166817769719e-07, "loss": 0.7640792846679687, "num_tokens": 1716589167.0, "step": 14060 }, { "epoch": 0.018772432256585016, "grad_norm": 2.8125, "learning_rate": 9.385499440029866e-07, "loss": 0.7506708145141602, "num_tokens": 1719002930.0, "step": 14080 }, { "epoch": 0.018799097643313122, "grad_norm": 3.28125, "learning_rate": 9.398832062290011e-07, "loss": 0.7624103546142578, "num_tokens": 1721591943.0, "step": 14100 }, { "epoch": 0.018825763030041225, "grad_norm": 3.078125, "learning_rate": 9.412164684550158e-07, "loss": 0.7459580421447753, "num_tokens": 1724160102.0, "step": 14120 }, { "epoch": 0.01885242841676933, "grad_norm": 2.9375, "learning_rate": 9.425497306810303e-07, "loss": 0.7569424629211425, "num_tokens": 1726744508.0, "step": 14140 }, { "epoch": 0.01887909380349743, "grad_norm": 3.0625, "learning_rate": 9.43882992907045e-07, "loss": 0.7558628559112549, "num_tokens": 1729091337.0, "step": 14160 }, { "epoch": 0.018905759190225534, "grad_norm": 3.296875, "learning_rate": 9.452162551330596e-07, "loss": 0.7451479434967041, "num_tokens": 1731393083.0, "step": 14180 }, { "epoch": 0.01893242457695364, "grad_norm": 2.78125, "learning_rate": 9.465495173590743e-07, "loss": 0.7244653701782227, "num_tokens": 1733551149.0, "step": 14200 }, { "epoch": 0.018959089963681744, "grad_norm": 2.6875, "learning_rate": 9.478827795850888e-07, "loss": 0.7559180736541748, "num_tokens": 1736012946.0, "step": 14220 }, { "epoch": 0.018985755350409847, "grad_norm": 2.953125, "learning_rate": 9.492160418111035e-07, "loss": 0.7658005237579346, "num_tokens": 1738351054.0, "step": 14240 }, { "epoch": 0.01901242073713795, "grad_norm": 2.65625, "learning_rate": 9.50549304037118e-07, "loss": 0.7448724746704102, "num_tokens": 1740728080.0, "step": 14260 }, { "epoch": 0.019039086123866053, "grad_norm": 2.6875, "learning_rate": 9.518825662631327e-07, "loss": 0.7645900249481201, "num_tokens": 1743166842.0, "step": 14280 }, { "epoch": 0.01906575151059416, "grad_norm": 3.515625, "learning_rate": 9.532158284891473e-07, "loss": 0.7633172035217285, "num_tokens": 1745698901.0, "step": 14300 }, { "epoch": 0.019092416897322263, "grad_norm": 2.640625, "learning_rate": 9.54549090715162e-07, "loss": 0.7950784683227539, "num_tokens": 1748203368.0, "step": 14320 }, { "epoch": 0.019119082284050366, "grad_norm": 3.421875, "learning_rate": 9.558823529411764e-07, "loss": 0.7538244724273682, "num_tokens": 1750762163.0, "step": 14340 }, { "epoch": 0.01914574767077847, "grad_norm": 2.8125, "learning_rate": 9.57215615167191e-07, "loss": 0.7653398990631104, "num_tokens": 1753361766.0, "step": 14360 }, { "epoch": 0.01917241305750657, "grad_norm": 2.71875, "learning_rate": 9.585488773932058e-07, "loss": 0.7472916603088379, "num_tokens": 1755905883.0, "step": 14380 }, { "epoch": 0.019199078444234678, "grad_norm": 2.90625, "learning_rate": 9.598821396192205e-07, "loss": 0.7192000865936279, "num_tokens": 1758150338.0, "step": 14400 }, { "epoch": 0.01922574383096278, "grad_norm": 3.375, "learning_rate": 9.61215401845235e-07, "loss": 0.7450064182281494, "num_tokens": 1760575722.0, "step": 14420 }, { "epoch": 0.019252409217690884, "grad_norm": 3.046875, "learning_rate": 9.625486640712497e-07, "loss": 0.7398876190185547, "num_tokens": 1762880761.0, "step": 14440 }, { "epoch": 0.019279074604418987, "grad_norm": 2.625, "learning_rate": 9.638819262972644e-07, "loss": 0.7522184371948242, "num_tokens": 1765318624.0, "step": 14460 }, { "epoch": 0.01930573999114709, "grad_norm": 2.359375, "learning_rate": 9.652151885232789e-07, "loss": 0.7355149745941162, "num_tokens": 1767587701.0, "step": 14480 }, { "epoch": 0.019332405377875197, "grad_norm": 2.34375, "learning_rate": 9.665484507492936e-07, "loss": 0.7496234893798828, "num_tokens": 1769943333.0, "step": 14500 }, { "epoch": 0.0193590707646033, "grad_norm": 2.890625, "learning_rate": 9.67881712975308e-07, "loss": 0.7369945526123047, "num_tokens": 1772276124.0, "step": 14520 }, { "epoch": 0.019385736151331403, "grad_norm": 3.109375, "learning_rate": 9.692149752013228e-07, "loss": 0.780247974395752, "num_tokens": 1774748450.0, "step": 14540 }, { "epoch": 0.019412401538059506, "grad_norm": 3.0625, "learning_rate": 9.705482374273373e-07, "loss": 0.7537062644958497, "num_tokens": 1777366256.0, "step": 14560 }, { "epoch": 0.01943906692478761, "grad_norm": 2.390625, "learning_rate": 9.71881499653352e-07, "loss": 0.7539607524871826, "num_tokens": 1779710454.0, "step": 14580 }, { "epoch": 0.019465732311515715, "grad_norm": 2.40625, "learning_rate": 9.732147618793664e-07, "loss": 0.7347469329833984, "num_tokens": 1782133340.0, "step": 14600 }, { "epoch": 0.01949239769824382, "grad_norm": 2.65625, "learning_rate": 9.745480241053811e-07, "loss": 0.7478550910949707, "num_tokens": 1784659186.0, "step": 14620 }, { "epoch": 0.01951906308497192, "grad_norm": 2.765625, "learning_rate": 9.758812863313956e-07, "loss": 0.7378473281860352, "num_tokens": 1787253600.0, "step": 14640 }, { "epoch": 0.019545728471700025, "grad_norm": 2.859375, "learning_rate": 9.772145485574103e-07, "loss": 0.7411347389221191, "num_tokens": 1789752955.0, "step": 14660 }, { "epoch": 0.019572393858428128, "grad_norm": 3.140625, "learning_rate": 9.78547810783425e-07, "loss": 0.7471534252166748, "num_tokens": 1792195936.0, "step": 14680 }, { "epoch": 0.019599059245156234, "grad_norm": 2.84375, "learning_rate": 9.798810730094395e-07, "loss": 0.7416052341461181, "num_tokens": 1794719022.0, "step": 14700 }, { "epoch": 0.019625724631884337, "grad_norm": 3.078125, "learning_rate": 9.812143352354542e-07, "loss": 0.7485010623931885, "num_tokens": 1797136047.0, "step": 14720 }, { "epoch": 0.01965239001861244, "grad_norm": 2.453125, "learning_rate": 9.82547597461469e-07, "loss": 0.733439588546753, "num_tokens": 1799686881.0, "step": 14740 }, { "epoch": 0.019679055405340543, "grad_norm": 2.40625, "learning_rate": 9.838808596874834e-07, "loss": 0.73398756980896, "num_tokens": 1802100168.0, "step": 14760 }, { "epoch": 0.019705720792068646, "grad_norm": 2.71875, "learning_rate": 9.852141219134981e-07, "loss": 0.7410391330718994, "num_tokens": 1804423980.0, "step": 14780 }, { "epoch": 0.019732386178796753, "grad_norm": 2.734375, "learning_rate": 9.865473841395126e-07, "loss": 0.7326507568359375, "num_tokens": 1806771354.0, "step": 14800 }, { "epoch": 0.019759051565524856, "grad_norm": 2.640625, "learning_rate": 9.878806463655273e-07, "loss": 0.7452518463134765, "num_tokens": 1809191121.0, "step": 14820 }, { "epoch": 0.01978571695225296, "grad_norm": 2.265625, "learning_rate": 9.892139085915418e-07, "loss": 0.7191637992858887, "num_tokens": 1811608385.0, "step": 14840 }, { "epoch": 0.019812382338981062, "grad_norm": 3.125, "learning_rate": 9.905471708175565e-07, "loss": 0.7532097816467285, "num_tokens": 1814282347.0, "step": 14860 }, { "epoch": 0.019839047725709165, "grad_norm": 2.828125, "learning_rate": 9.91880433043571e-07, "loss": 0.7618691444396972, "num_tokens": 1817085226.0, "step": 14880 }, { "epoch": 0.019865713112437268, "grad_norm": 2.609375, "learning_rate": 9.932136952695857e-07, "loss": 0.7427017211914062, "num_tokens": 1819567369.0, "step": 14900 }, { "epoch": 0.019892378499165374, "grad_norm": 2.609375, "learning_rate": 9.945469574956002e-07, "loss": 0.7273087978363038, "num_tokens": 1821991201.0, "step": 14920 }, { "epoch": 0.019919043885893477, "grad_norm": 2.546875, "learning_rate": 9.958802197216149e-07, "loss": 0.7460312366485595, "num_tokens": 1824418816.0, "step": 14940 }, { "epoch": 0.01994570927262158, "grad_norm": 2.453125, "learning_rate": 9.972134819476296e-07, "loss": 0.7169840812683106, "num_tokens": 1826750114.0, "step": 14960 }, { "epoch": 0.019972374659349684, "grad_norm": 2.625, "learning_rate": 9.985467441736443e-07, "loss": 0.7195287704467773, "num_tokens": 1829247338.0, "step": 14980 }, { "epoch": 0.019999040046077787, "grad_norm": 2.46875, "learning_rate": 9.998800063996588e-07, "loss": 0.7292281150817871, "num_tokens": 1831746139.0, "step": 15000 }, { "epoch": 0.020025705432805893, "grad_norm": 2.578125, "learning_rate": 1.0012132686256735e-06, "loss": 0.6936606407165528, "num_tokens": 1834174821.0, "step": 15020 }, { "epoch": 0.020052370819533996, "grad_norm": 3.203125, "learning_rate": 1.002546530851688e-06, "loss": 0.7719440460205078, "num_tokens": 1836724676.0, "step": 15040 }, { "epoch": 0.0200790362062621, "grad_norm": 2.5625, "learning_rate": 1.0038797930777027e-06, "loss": 0.7404590129852295, "num_tokens": 1838998981.0, "step": 15060 }, { "epoch": 0.020105701592990202, "grad_norm": 2.984375, "learning_rate": 1.0052130553037172e-06, "loss": 0.7531965732574463, "num_tokens": 1841634893.0, "step": 15080 }, { "epoch": 0.020132366979718305, "grad_norm": 2.578125, "learning_rate": 1.0065463175297319e-06, "loss": 0.7368062973022461, "num_tokens": 1844188107.0, "step": 15100 }, { "epoch": 0.020159032366446412, "grad_norm": 2.453125, "learning_rate": 1.0078795797557464e-06, "loss": 0.7254631042480468, "num_tokens": 1846602913.0, "step": 15120 }, { "epoch": 0.020185697753174515, "grad_norm": 2.78125, "learning_rate": 1.009212841981761e-06, "loss": 0.7583575248718262, "num_tokens": 1849212968.0, "step": 15140 }, { "epoch": 0.020212363139902618, "grad_norm": 2.71875, "learning_rate": 1.0105461042077755e-06, "loss": 0.7344903945922852, "num_tokens": 1851791743.0, "step": 15160 }, { "epoch": 0.02023902852663072, "grad_norm": 3.015625, "learning_rate": 1.0118793664337903e-06, "loss": 0.7438870429992676, "num_tokens": 1854216211.0, "step": 15180 }, { "epoch": 0.020265693913358824, "grad_norm": 3.03125, "learning_rate": 1.013212628659805e-06, "loss": 0.7337470054626465, "num_tokens": 1856728098.0, "step": 15200 }, { "epoch": 0.02029235930008693, "grad_norm": 2.421875, "learning_rate": 1.0145458908858194e-06, "loss": 0.7278183460235595, "num_tokens": 1859202403.0, "step": 15220 }, { "epoch": 0.020319024686815033, "grad_norm": 3.109375, "learning_rate": 1.0158791531118341e-06, "loss": 0.7488512992858887, "num_tokens": 1861837510.0, "step": 15240 }, { "epoch": 0.020345690073543136, "grad_norm": 2.53125, "learning_rate": 1.0172124153378488e-06, "loss": 0.714885139465332, "num_tokens": 1864135251.0, "step": 15260 }, { "epoch": 0.02037235546027124, "grad_norm": 2.90625, "learning_rate": 1.0185456775638633e-06, "loss": 0.7209298133850097, "num_tokens": 1866691589.0, "step": 15280 }, { "epoch": 0.020399020846999343, "grad_norm": 2.5, "learning_rate": 1.019878939789878e-06, "loss": 0.7247463226318359, "num_tokens": 1869113165.0, "step": 15300 }, { "epoch": 0.02042568623372745, "grad_norm": 3.078125, "learning_rate": 1.0212122020158925e-06, "loss": 0.727360725402832, "num_tokens": 1871522652.0, "step": 15320 }, { "epoch": 0.020452351620455552, "grad_norm": 2.75, "learning_rate": 1.0225454642419072e-06, "loss": 0.7468537807464599, "num_tokens": 1873858649.0, "step": 15340 }, { "epoch": 0.020479017007183655, "grad_norm": 2.59375, "learning_rate": 1.0238787264679217e-06, "loss": 0.724464750289917, "num_tokens": 1876086983.0, "step": 15360 }, { "epoch": 0.020505682393911758, "grad_norm": 2.546875, "learning_rate": 1.0252119886939364e-06, "loss": 0.733491325378418, "num_tokens": 1878626112.0, "step": 15380 }, { "epoch": 0.02053234778063986, "grad_norm": 2.375, "learning_rate": 1.026545250919951e-06, "loss": 0.7224382400512696, "num_tokens": 1881170811.0, "step": 15400 }, { "epoch": 0.020559013167367968, "grad_norm": 2.40625, "learning_rate": 1.0278785131459656e-06, "loss": 0.7338397979736329, "num_tokens": 1883611220.0, "step": 15420 }, { "epoch": 0.02058567855409607, "grad_norm": 2.625, "learning_rate": 1.02921177537198e-06, "loss": 0.7250012397766114, "num_tokens": 1885850859.0, "step": 15440 }, { "epoch": 0.020612343940824174, "grad_norm": 3.140625, "learning_rate": 1.0305450375979948e-06, "loss": 0.7307353973388672, "num_tokens": 1888116042.0, "step": 15460 }, { "epoch": 0.020639009327552277, "grad_norm": 2.421875, "learning_rate": 1.0318782998240095e-06, "loss": 0.7330545425415039, "num_tokens": 1890794433.0, "step": 15480 }, { "epoch": 0.02066567471428038, "grad_norm": 3.15625, "learning_rate": 1.033211562050024e-06, "loss": 0.7519386291503907, "num_tokens": 1893349339.0, "step": 15500 }, { "epoch": 0.020692340101008486, "grad_norm": 2.4375, "learning_rate": 1.0345448242760387e-06, "loss": 0.7108519554138184, "num_tokens": 1895787796.0, "step": 15520 }, { "epoch": 0.02071900548773659, "grad_norm": 2.625, "learning_rate": 1.0358780865020534e-06, "loss": 0.7193076133728027, "num_tokens": 1898187619.0, "step": 15540 }, { "epoch": 0.020745670874464692, "grad_norm": 2.421875, "learning_rate": 1.0372113487280679e-06, "loss": 0.7231109619140625, "num_tokens": 1900861786.0, "step": 15560 }, { "epoch": 0.020772336261192795, "grad_norm": 2.78125, "learning_rate": 1.0385446109540826e-06, "loss": 0.7371293544769287, "num_tokens": 1903261040.0, "step": 15580 }, { "epoch": 0.0207990016479209, "grad_norm": 2.6875, "learning_rate": 1.039877873180097e-06, "loss": 0.745411729812622, "num_tokens": 1905850546.0, "step": 15600 }, { "epoch": 0.020825667034649005, "grad_norm": 2.5, "learning_rate": 1.0412111354061118e-06, "loss": 0.7009058952331543, "num_tokens": 1908289674.0, "step": 15620 }, { "epoch": 0.020852332421377108, "grad_norm": 2.453125, "learning_rate": 1.0425443976321263e-06, "loss": 0.7388247489929199, "num_tokens": 1910868452.0, "step": 15640 }, { "epoch": 0.02087899780810521, "grad_norm": 2.078125, "learning_rate": 1.043877659858141e-06, "loss": 0.7165581226348877, "num_tokens": 1913463915.0, "step": 15660 }, { "epoch": 0.020905663194833314, "grad_norm": 2.515625, "learning_rate": 1.0452109220841555e-06, "loss": 0.7286765098571777, "num_tokens": 1915804844.0, "step": 15680 }, { "epoch": 0.020932328581561417, "grad_norm": 2.640625, "learning_rate": 1.0465441843101702e-06, "loss": 0.7329906463623047, "num_tokens": 1918273325.0, "step": 15700 }, { "epoch": 0.020958993968289524, "grad_norm": 2.78125, "learning_rate": 1.0478774465361847e-06, "loss": 0.7172746181488037, "num_tokens": 1920778629.0, "step": 15720 }, { "epoch": 0.020985659355017627, "grad_norm": 2.390625, "learning_rate": 1.0492107087621994e-06, "loss": 0.7232593059539795, "num_tokens": 1923186722.0, "step": 15740 }, { "epoch": 0.02101232474174573, "grad_norm": 2.625, "learning_rate": 1.050543970988214e-06, "loss": 0.7319232940673828, "num_tokens": 1925285569.0, "step": 15760 }, { "epoch": 0.021038990128473833, "grad_norm": 2.421875, "learning_rate": 1.0518772332142288e-06, "loss": 0.7353487014770508, "num_tokens": 1927831630.0, "step": 15780 }, { "epoch": 0.021065655515201936, "grad_norm": 3.234375, "learning_rate": 1.0532104954402432e-06, "loss": 0.7233832359313965, "num_tokens": 1930294735.0, "step": 15800 }, { "epoch": 0.021092320901930042, "grad_norm": 2.125, "learning_rate": 1.054543757666258e-06, "loss": 0.7321640491485596, "num_tokens": 1932734074.0, "step": 15820 }, { "epoch": 0.021118986288658145, "grad_norm": 2.734375, "learning_rate": 1.0558770198922726e-06, "loss": 0.733675193786621, "num_tokens": 1935233917.0, "step": 15840 }, { "epoch": 0.02114565167538625, "grad_norm": 3.03125, "learning_rate": 1.0572102821182871e-06, "loss": 0.7142396926879883, "num_tokens": 1937714164.0, "step": 15860 }, { "epoch": 0.02117231706211435, "grad_norm": 2.359375, "learning_rate": 1.0585435443443018e-06, "loss": 0.734861183166504, "num_tokens": 1939945577.0, "step": 15880 }, { "epoch": 0.021198982448842454, "grad_norm": 2.65625, "learning_rate": 1.0598768065703163e-06, "loss": 0.7322462558746338, "num_tokens": 1942579206.0, "step": 15900 }, { "epoch": 0.02122564783557056, "grad_norm": 2.328125, "learning_rate": 1.061210068796331e-06, "loss": 0.7221770286560059, "num_tokens": 1945038615.0, "step": 15920 }, { "epoch": 0.021252313222298664, "grad_norm": 2.359375, "learning_rate": 1.0625433310223455e-06, "loss": 0.7374361991882324, "num_tokens": 1947313481.0, "step": 15940 }, { "epoch": 0.021278978609026767, "grad_norm": 2.234375, "learning_rate": 1.0638765932483602e-06, "loss": 0.7130237579345703, "num_tokens": 1949980081.0, "step": 15960 }, { "epoch": 0.02130564399575487, "grad_norm": 2.375, "learning_rate": 1.0652098554743747e-06, "loss": 0.6955914497375488, "num_tokens": 1952336664.0, "step": 15980 }, { "epoch": 0.021332309382482973, "grad_norm": 2.390625, "learning_rate": 1.0665431177003894e-06, "loss": 0.7387112617492676, "num_tokens": 1954835919.0, "step": 16000 }, { "epoch": 0.02135897476921108, "grad_norm": 2.5, "learning_rate": 1.067876379926404e-06, "loss": 0.7186610221862793, "num_tokens": 1957247339.0, "step": 16020 }, { "epoch": 0.021385640155939183, "grad_norm": 3.21875, "learning_rate": 1.0692096421524186e-06, "loss": 0.7157442092895507, "num_tokens": 1959638561.0, "step": 16040 }, { "epoch": 0.021412305542667286, "grad_norm": 2.609375, "learning_rate": 1.0705429043784333e-06, "loss": 0.7122097015380859, "num_tokens": 1961814165.0, "step": 16060 }, { "epoch": 0.02143897092939539, "grad_norm": 3.0625, "learning_rate": 1.071876166604448e-06, "loss": 0.715614128112793, "num_tokens": 1964355272.0, "step": 16080 }, { "epoch": 0.02146563631612349, "grad_norm": 2.421875, "learning_rate": 1.0732094288304625e-06, "loss": 0.711142110824585, "num_tokens": 1966908618.0, "step": 16100 }, { "epoch": 0.021492301702851595, "grad_norm": 2.59375, "learning_rate": 1.0745426910564772e-06, "loss": 0.7527265071868896, "num_tokens": 1969313315.0, "step": 16120 }, { "epoch": 0.0215189670895797, "grad_norm": 2.921875, "learning_rate": 1.0758759532824917e-06, "loss": 0.7025959014892578, "num_tokens": 1971817494.0, "step": 16140 }, { "epoch": 0.021545632476307804, "grad_norm": 2.65625, "learning_rate": 1.0772092155085064e-06, "loss": 0.7053061962127686, "num_tokens": 1974352141.0, "step": 16160 }, { "epoch": 0.021572297863035907, "grad_norm": 2.375, "learning_rate": 1.0785424777345209e-06, "loss": 0.727138614654541, "num_tokens": 1976744766.0, "step": 16180 }, { "epoch": 0.02159896324976401, "grad_norm": 2.484375, "learning_rate": 1.0798757399605356e-06, "loss": 0.6933864116668701, "num_tokens": 1979223892.0, "step": 16200 }, { "epoch": 0.021625628636492113, "grad_norm": 2.5625, "learning_rate": 1.08120900218655e-06, "loss": 0.722324275970459, "num_tokens": 1981579615.0, "step": 16220 }, { "epoch": 0.02165229402322022, "grad_norm": 2.28125, "learning_rate": 1.0825422644125648e-06, "loss": 0.7101415634155274, "num_tokens": 1983932981.0, "step": 16240 }, { "epoch": 0.021678959409948323, "grad_norm": 2.546875, "learning_rate": 1.0838755266385793e-06, "loss": 0.7340346813201905, "num_tokens": 1986422045.0, "step": 16260 }, { "epoch": 0.021705624796676426, "grad_norm": 2.890625, "learning_rate": 1.085208788864594e-06, "loss": 0.7194217681884766, "num_tokens": 1989092430.0, "step": 16280 }, { "epoch": 0.02173229018340453, "grad_norm": 2.546875, "learning_rate": 1.0865420510906087e-06, "loss": 0.6966375827789306, "num_tokens": 1991428237.0, "step": 16300 }, { "epoch": 0.021758955570132632, "grad_norm": 2.734375, "learning_rate": 1.0878753133166232e-06, "loss": 0.7074188709259033, "num_tokens": 1993989060.0, "step": 16320 }, { "epoch": 0.02178562095686074, "grad_norm": 2.25, "learning_rate": 1.0892085755426379e-06, "loss": 0.7117795944213867, "num_tokens": 1996537245.0, "step": 16340 }, { "epoch": 0.02181228634358884, "grad_norm": 2.84375, "learning_rate": 1.0905418377686526e-06, "loss": 0.7240973472595215, "num_tokens": 1998775283.0, "step": 16360 }, { "epoch": 0.021838951730316945, "grad_norm": 2.40625, "learning_rate": 1.091875099994667e-06, "loss": 0.7304726600646972, "num_tokens": 2001158777.0, "step": 16380 }, { "epoch": 0.021865617117045048, "grad_norm": 2.515625, "learning_rate": 1.0932083622206817e-06, "loss": 0.7145492553710937, "num_tokens": 2003439156.0, "step": 16400 }, { "epoch": 0.02189228250377315, "grad_norm": 2.75, "learning_rate": 1.0945416244466962e-06, "loss": 0.7270881652832031, "num_tokens": 2005912457.0, "step": 16420 }, { "epoch": 0.021918947890501257, "grad_norm": 2.484375, "learning_rate": 1.095874886672711e-06, "loss": 0.7021062850952149, "num_tokens": 2008388283.0, "step": 16440 }, { "epoch": 0.02194561327722936, "grad_norm": 2.75, "learning_rate": 1.0972081488987254e-06, "loss": 0.7036767959594726, "num_tokens": 2010674736.0, "step": 16460 }, { "epoch": 0.021972278663957463, "grad_norm": 2.765625, "learning_rate": 1.0985414111247401e-06, "loss": 0.7263028621673584, "num_tokens": 2013226893.0, "step": 16480 }, { "epoch": 0.021998944050685566, "grad_norm": 2.421875, "learning_rate": 1.0998746733507546e-06, "loss": 0.712401294708252, "num_tokens": 2015646865.0, "step": 16500 }, { "epoch": 0.02202560943741367, "grad_norm": 2.59375, "learning_rate": 1.1012079355767693e-06, "loss": 0.7196298599243164, "num_tokens": 2018066006.0, "step": 16520 }, { "epoch": 0.022052274824141776, "grad_norm": 2.34375, "learning_rate": 1.1025411978027838e-06, "loss": 0.7005767822265625, "num_tokens": 2020441338.0, "step": 16540 }, { "epoch": 0.02207894021086988, "grad_norm": 2.890625, "learning_rate": 1.1038744600287985e-06, "loss": 0.721629524230957, "num_tokens": 2022910027.0, "step": 16560 }, { "epoch": 0.022105605597597982, "grad_norm": 2.21875, "learning_rate": 1.1052077222548132e-06, "loss": 0.6968272686004638, "num_tokens": 2025384522.0, "step": 16580 }, { "epoch": 0.022132270984326085, "grad_norm": 2.546875, "learning_rate": 1.1065409844808277e-06, "loss": 0.7219696521759034, "num_tokens": 2027986814.0, "step": 16600 }, { "epoch": 0.022158936371054188, "grad_norm": 2.625, "learning_rate": 1.1078742467068424e-06, "loss": 0.7318018913269043, "num_tokens": 2030669500.0, "step": 16620 }, { "epoch": 0.022185601757782294, "grad_norm": 2.71875, "learning_rate": 1.1092075089328571e-06, "loss": 0.7309488296508789, "num_tokens": 2033254646.0, "step": 16640 }, { "epoch": 0.022212267144510398, "grad_norm": 2.140625, "learning_rate": 1.1105407711588716e-06, "loss": 0.7170385360717774, "num_tokens": 2035854655.0, "step": 16660 }, { "epoch": 0.0222389325312385, "grad_norm": 2.296875, "learning_rate": 1.1118740333848863e-06, "loss": 0.7180422782897949, "num_tokens": 2038251240.0, "step": 16680 }, { "epoch": 0.022265597917966604, "grad_norm": 2.46875, "learning_rate": 1.1132072956109008e-06, "loss": 0.6953291893005371, "num_tokens": 2040579150.0, "step": 16700 }, { "epoch": 0.022292263304694707, "grad_norm": 2.703125, "learning_rate": 1.1145405578369155e-06, "loss": 0.7173888206481933, "num_tokens": 2043016961.0, "step": 16720 }, { "epoch": 0.022318928691422813, "grad_norm": 2.484375, "learning_rate": 1.11587382006293e-06, "loss": 0.7109296798706055, "num_tokens": 2045421169.0, "step": 16740 }, { "epoch": 0.022345594078150916, "grad_norm": 2.65625, "learning_rate": 1.1172070822889447e-06, "loss": 0.6890897750854492, "num_tokens": 2047890130.0, "step": 16760 }, { "epoch": 0.02237225946487902, "grad_norm": 2.5, "learning_rate": 1.1185403445149592e-06, "loss": 0.7220746994018554, "num_tokens": 2050335841.0, "step": 16780 }, { "epoch": 0.022398924851607122, "grad_norm": 2.640625, "learning_rate": 1.1198736067409739e-06, "loss": 0.7244638442993164, "num_tokens": 2053015520.0, "step": 16800 }, { "epoch": 0.022425590238335225, "grad_norm": 2.4375, "learning_rate": 1.1212068689669884e-06, "loss": 0.6938535690307617, "num_tokens": 2055399196.0, "step": 16820 }, { "epoch": 0.022452255625063332, "grad_norm": 3.0625, "learning_rate": 1.122540131193003e-06, "loss": 0.731321144104004, "num_tokens": 2057819727.0, "step": 16840 }, { "epoch": 0.022478921011791435, "grad_norm": 2.34375, "learning_rate": 1.1238733934190178e-06, "loss": 0.6994834899902344, "num_tokens": 2060291578.0, "step": 16860 }, { "epoch": 0.022505586398519538, "grad_norm": 2.71875, "learning_rate": 1.1252066556450325e-06, "loss": 0.7097598552703858, "num_tokens": 2062540313.0, "step": 16880 }, { "epoch": 0.02253225178524764, "grad_norm": 2.828125, "learning_rate": 1.126539917871047e-06, "loss": 0.712641429901123, "num_tokens": 2065093466.0, "step": 16900 }, { "epoch": 0.022558917171975744, "grad_norm": 2.703125, "learning_rate": 1.1278731800970617e-06, "loss": 0.7198478698730468, "num_tokens": 2067530942.0, "step": 16920 }, { "epoch": 0.02258558255870385, "grad_norm": 2.59375, "learning_rate": 1.1292064423230761e-06, "loss": 0.7130277633666993, "num_tokens": 2070209805.0, "step": 16940 }, { "epoch": 0.022612247945431953, "grad_norm": 2.0625, "learning_rate": 1.1305397045490909e-06, "loss": 0.7038619995117188, "num_tokens": 2072583964.0, "step": 16960 }, { "epoch": 0.022638913332160056, "grad_norm": 2.078125, "learning_rate": 1.1318729667751053e-06, "loss": 0.7376633644104004, "num_tokens": 2075126061.0, "step": 16980 }, { "epoch": 0.02266557871888816, "grad_norm": 2.453125, "learning_rate": 1.13320622900112e-06, "loss": 0.7096234321594238, "num_tokens": 2077635729.0, "step": 17000 }, { "epoch": 0.022692244105616263, "grad_norm": 2.5625, "learning_rate": 1.1345394912271345e-06, "loss": 0.7323219299316406, "num_tokens": 2080164618.0, "step": 17020 }, { "epoch": 0.02271890949234437, "grad_norm": 2.84375, "learning_rate": 1.1358727534531492e-06, "loss": 0.6878514289855957, "num_tokens": 2082531392.0, "step": 17040 }, { "epoch": 0.022745574879072472, "grad_norm": 2.84375, "learning_rate": 1.1372060156791637e-06, "loss": 0.6832796573638916, "num_tokens": 2084657193.0, "step": 17060 }, { "epoch": 0.022772240265800575, "grad_norm": 2.375, "learning_rate": 1.1385392779051784e-06, "loss": 0.7165525913238525, "num_tokens": 2087103900.0, "step": 17080 }, { "epoch": 0.022798905652528678, "grad_norm": 2.3125, "learning_rate": 1.1398725401311931e-06, "loss": 0.7028615951538086, "num_tokens": 2089375392.0, "step": 17100 }, { "epoch": 0.02282557103925678, "grad_norm": 2.296875, "learning_rate": 1.1412058023572076e-06, "loss": 0.7143473625183105, "num_tokens": 2091808269.0, "step": 17120 }, { "epoch": 0.022852236425984888, "grad_norm": 2.640625, "learning_rate": 1.1425390645832223e-06, "loss": 0.6916439056396484, "num_tokens": 2094217004.0, "step": 17140 }, { "epoch": 0.02287890181271299, "grad_norm": 2.828125, "learning_rate": 1.143872326809237e-06, "loss": 0.7028675556182862, "num_tokens": 2096704626.0, "step": 17160 }, { "epoch": 0.022905567199441094, "grad_norm": 2.375, "learning_rate": 1.1452055890352515e-06, "loss": 0.6936882972717285, "num_tokens": 2099046356.0, "step": 17180 }, { "epoch": 0.022932232586169197, "grad_norm": 2.203125, "learning_rate": 1.1465388512612662e-06, "loss": 0.7130374908447266, "num_tokens": 2101634407.0, "step": 17200 }, { "epoch": 0.0229588979728973, "grad_norm": 2.546875, "learning_rate": 1.147872113487281e-06, "loss": 0.7207943439483643, "num_tokens": 2104002812.0, "step": 17220 }, { "epoch": 0.022985563359625406, "grad_norm": 2.21875, "learning_rate": 1.1492053757132954e-06, "loss": 0.6929831504821777, "num_tokens": 2106399627.0, "step": 17240 }, { "epoch": 0.02301222874635351, "grad_norm": 2.734375, "learning_rate": 1.15053863793931e-06, "loss": 0.7063786506652832, "num_tokens": 2108696270.0, "step": 17260 }, { "epoch": 0.023038894133081612, "grad_norm": 2.375, "learning_rate": 1.1518719001653246e-06, "loss": 0.6842693328857422, "num_tokens": 2110849426.0, "step": 17280 }, { "epoch": 0.023065559519809715, "grad_norm": 2.4375, "learning_rate": 1.1532051623913393e-06, "loss": 0.6948348999023437, "num_tokens": 2113412689.0, "step": 17300 }, { "epoch": 0.02309222490653782, "grad_norm": 2.25, "learning_rate": 1.1545384246173538e-06, "loss": 0.7073054313659668, "num_tokens": 2115856114.0, "step": 17320 }, { "epoch": 0.02311889029326592, "grad_norm": 2.890625, "learning_rate": 1.1558716868433685e-06, "loss": 0.702484130859375, "num_tokens": 2118230082.0, "step": 17340 }, { "epoch": 0.023145555679994028, "grad_norm": 2.5625, "learning_rate": 1.157204949069383e-06, "loss": 0.7125682830810547, "num_tokens": 2120696327.0, "step": 17360 }, { "epoch": 0.02317222106672213, "grad_norm": 2.515625, "learning_rate": 1.1585382112953977e-06, "loss": 0.689415454864502, "num_tokens": 2123167717.0, "step": 17380 }, { "epoch": 0.023198886453450234, "grad_norm": 2.9375, "learning_rate": 1.1598714735214122e-06, "loss": 0.7002860069274902, "num_tokens": 2125656378.0, "step": 17400 }, { "epoch": 0.023225551840178337, "grad_norm": 2.375, "learning_rate": 1.1612047357474269e-06, "loss": 0.687868309020996, "num_tokens": 2128011174.0, "step": 17420 }, { "epoch": 0.02325221722690644, "grad_norm": 2.1875, "learning_rate": 1.1625379979734416e-06, "loss": 0.6780417919158935, "num_tokens": 2130443564.0, "step": 17440 }, { "epoch": 0.023278882613634547, "grad_norm": 2.546875, "learning_rate": 1.1638712601994563e-06, "loss": 0.7099736213684082, "num_tokens": 2132965942.0, "step": 17460 }, { "epoch": 0.02330554800036265, "grad_norm": 2.6875, "learning_rate": 1.1652045224254708e-06, "loss": 0.7180842876434326, "num_tokens": 2135374938.0, "step": 17480 }, { "epoch": 0.023332213387090753, "grad_norm": 2.625, "learning_rate": 1.1665377846514855e-06, "loss": 0.70822114944458, "num_tokens": 2137614968.0, "step": 17500 }, { "epoch": 0.023358878773818856, "grad_norm": 2.6875, "learning_rate": 1.1678710468775e-06, "loss": 0.710837459564209, "num_tokens": 2139914593.0, "step": 17520 }, { "epoch": 0.02338554416054696, "grad_norm": 2.453125, "learning_rate": 1.1692043091035147e-06, "loss": 0.7101484298706054, "num_tokens": 2142405783.0, "step": 17540 }, { "epoch": 0.023412209547275065, "grad_norm": 2.46875, "learning_rate": 1.1705375713295291e-06, "loss": 0.7043177604675293, "num_tokens": 2145014272.0, "step": 17560 }, { "epoch": 0.02343887493400317, "grad_norm": 2.46875, "learning_rate": 1.1718708335555438e-06, "loss": 0.6893977165222168, "num_tokens": 2147217961.0, "step": 17580 }, { "epoch": 0.02346554032073127, "grad_norm": 2.265625, "learning_rate": 1.1732040957815583e-06, "loss": 0.7066365718841553, "num_tokens": 2149768466.0, "step": 17600 }, { "epoch": 0.023492205707459374, "grad_norm": 2.515625, "learning_rate": 1.174537358007573e-06, "loss": 0.6930530071258545, "num_tokens": 2152320976.0, "step": 17620 }, { "epoch": 0.023518871094187477, "grad_norm": 2.109375, "learning_rate": 1.1758706202335875e-06, "loss": 0.6864599227905274, "num_tokens": 2154580362.0, "step": 17640 }, { "epoch": 0.023545536480915584, "grad_norm": 2.6875, "learning_rate": 1.1772038824596022e-06, "loss": 0.7111719131469727, "num_tokens": 2157151099.0, "step": 17660 }, { "epoch": 0.023572201867643687, "grad_norm": 2.0625, "learning_rate": 1.178537144685617e-06, "loss": 0.7096261978149414, "num_tokens": 2159558182.0, "step": 17680 }, { "epoch": 0.02359886725437179, "grad_norm": 2.125, "learning_rate": 1.1798704069116314e-06, "loss": 0.7057888984680176, "num_tokens": 2161826859.0, "step": 17700 }, { "epoch": 0.023625532641099893, "grad_norm": 2.375, "learning_rate": 1.1812036691376461e-06, "loss": 0.6794167041778565, "num_tokens": 2164417639.0, "step": 17720 }, { "epoch": 0.023652198027827996, "grad_norm": 2.296875, "learning_rate": 1.1825369313636608e-06, "loss": 0.6980722427368165, "num_tokens": 2166739755.0, "step": 17740 }, { "epoch": 0.023678863414556103, "grad_norm": 2.09375, "learning_rate": 1.1838701935896753e-06, "loss": 0.7018581390380859, "num_tokens": 2169291083.0, "step": 17760 }, { "epoch": 0.023705528801284206, "grad_norm": 2.953125, "learning_rate": 1.18520345581569e-06, "loss": 0.6913335800170899, "num_tokens": 2171644067.0, "step": 17780 }, { "epoch": 0.02373219418801231, "grad_norm": 2.59375, "learning_rate": 1.1865367180417045e-06, "loss": 0.7099779129028321, "num_tokens": 2174014782.0, "step": 17800 }, { "epoch": 0.023758859574740412, "grad_norm": 2.625, "learning_rate": 1.1878699802677192e-06, "loss": 0.6957932472229004, "num_tokens": 2176325597.0, "step": 17820 }, { "epoch": 0.023785524961468515, "grad_norm": 2.484375, "learning_rate": 1.1892032424937337e-06, "loss": 0.6939113140106201, "num_tokens": 2178764931.0, "step": 17840 }, { "epoch": 0.02381219034819662, "grad_norm": 2.703125, "learning_rate": 1.1905365047197484e-06, "loss": 0.7029125213623046, "num_tokens": 2181128106.0, "step": 17860 }, { "epoch": 0.023838855734924724, "grad_norm": 2.203125, "learning_rate": 1.1918697669457629e-06, "loss": 0.7040555000305175, "num_tokens": 2183375371.0, "step": 17880 }, { "epoch": 0.023865521121652827, "grad_norm": 2.390625, "learning_rate": 1.1932030291717776e-06, "loss": 0.6667572975158691, "num_tokens": 2185834835.0, "step": 17900 }, { "epoch": 0.02389218650838093, "grad_norm": 2.75, "learning_rate": 1.194536291397792e-06, "loss": 0.6794042110443115, "num_tokens": 2188176602.0, "step": 17920 }, { "epoch": 0.023918851895109033, "grad_norm": 2.859375, "learning_rate": 1.1958695536238068e-06, "loss": 0.7026186943054199, "num_tokens": 2190678590.0, "step": 17940 }, { "epoch": 0.02394551728183714, "grad_norm": 2.328125, "learning_rate": 1.1972028158498215e-06, "loss": 0.7045265197753906, "num_tokens": 2193308935.0, "step": 17960 }, { "epoch": 0.023972182668565243, "grad_norm": 2.0, "learning_rate": 1.1985360780758362e-06, "loss": 0.6841240882873535, "num_tokens": 2195880127.0, "step": 17980 }, { "epoch": 0.023998848055293346, "grad_norm": 2.453125, "learning_rate": 1.1998693403018507e-06, "loss": 0.6842628479003906, "num_tokens": 2198391092.0, "step": 18000 }, { "epoch": 0.02402551344202145, "grad_norm": 2.078125, "learning_rate": 1.2012026025278654e-06, "loss": 0.6910080909729004, "num_tokens": 2200506843.0, "step": 18020 }, { "epoch": 0.024052178828749552, "grad_norm": 2.78125, "learning_rate": 1.2025358647538799e-06, "loss": 0.6943975448608398, "num_tokens": 2203015854.0, "step": 18040 }, { "epoch": 0.02407884421547766, "grad_norm": 2.40625, "learning_rate": 1.2038691269798946e-06, "loss": 0.6689146041870118, "num_tokens": 2205353261.0, "step": 18060 }, { "epoch": 0.02410550960220576, "grad_norm": 2.328125, "learning_rate": 1.205202389205909e-06, "loss": 0.6964692592620849, "num_tokens": 2208053075.0, "step": 18080 }, { "epoch": 0.024132174988933865, "grad_norm": 2.984375, "learning_rate": 1.2065356514319238e-06, "loss": 0.6919586181640625, "num_tokens": 2210547327.0, "step": 18100 }, { "epoch": 0.024158840375661968, "grad_norm": 2.21875, "learning_rate": 1.2078689136579382e-06, "loss": 0.7187903881072998, "num_tokens": 2212894191.0, "step": 18120 }, { "epoch": 0.02418550576239007, "grad_norm": 2.390625, "learning_rate": 1.209202175883953e-06, "loss": 0.6938411712646484, "num_tokens": 2215315085.0, "step": 18140 }, { "epoch": 0.024212171149118177, "grad_norm": 2.5, "learning_rate": 1.2105354381099674e-06, "loss": 0.7058948993682861, "num_tokens": 2218053551.0, "step": 18160 }, { "epoch": 0.02423883653584628, "grad_norm": 2.34375, "learning_rate": 1.2118687003359821e-06, "loss": 0.6861472129821777, "num_tokens": 2220527866.0, "step": 18180 }, { "epoch": 0.024265501922574383, "grad_norm": 2.625, "learning_rate": 1.2132019625619968e-06, "loss": 0.682747745513916, "num_tokens": 2223012595.0, "step": 18200 }, { "epoch": 0.024292167309302486, "grad_norm": 2.5, "learning_rate": 1.2145352247880113e-06, "loss": 0.6768270969390869, "num_tokens": 2225411810.0, "step": 18220 }, { "epoch": 0.02431883269603059, "grad_norm": 2.0, "learning_rate": 1.215868487014026e-06, "loss": 0.6866076469421387, "num_tokens": 2227876467.0, "step": 18240 }, { "epoch": 0.024345498082758696, "grad_norm": 2.109375, "learning_rate": 1.2172017492400407e-06, "loss": 0.6871153831481933, "num_tokens": 2230171877.0, "step": 18260 }, { "epoch": 0.0243721634694868, "grad_norm": 2.625, "learning_rate": 1.2185350114660552e-06, "loss": 0.6829017639160156, "num_tokens": 2232825568.0, "step": 18280 }, { "epoch": 0.024398828856214902, "grad_norm": 2.296875, "learning_rate": 1.21986827369207e-06, "loss": 0.7014649391174317, "num_tokens": 2235611231.0, "step": 18300 }, { "epoch": 0.024425494242943005, "grad_norm": 2.046875, "learning_rate": 1.2212015359180844e-06, "loss": 0.6779420852661133, "num_tokens": 2238125432.0, "step": 18320 }, { "epoch": 0.024452159629671108, "grad_norm": 2.296875, "learning_rate": 1.2225347981440991e-06, "loss": 0.6783919334411621, "num_tokens": 2240522841.0, "step": 18340 }, { "epoch": 0.024478825016399215, "grad_norm": 2.375, "learning_rate": 1.2238680603701136e-06, "loss": 0.7025489330291748, "num_tokens": 2242974222.0, "step": 18360 }, { "epoch": 0.024505490403127318, "grad_norm": 2.375, "learning_rate": 1.2252013225961283e-06, "loss": 0.7001354217529296, "num_tokens": 2245297999.0, "step": 18380 }, { "epoch": 0.02453215578985542, "grad_norm": 2.15625, "learning_rate": 1.2265345848221428e-06, "loss": 0.6823984146118164, "num_tokens": 2247641190.0, "step": 18400 }, { "epoch": 0.024558821176583524, "grad_norm": 2.09375, "learning_rate": 1.2278678470481575e-06, "loss": 0.684727144241333, "num_tokens": 2250192722.0, "step": 18420 }, { "epoch": 0.024585486563311627, "grad_norm": 2.140625, "learning_rate": 1.229201109274172e-06, "loss": 0.670126485824585, "num_tokens": 2252556902.0, "step": 18440 }, { "epoch": 0.02461215195003973, "grad_norm": 2.375, "learning_rate": 1.2305343715001867e-06, "loss": 0.6904696941375732, "num_tokens": 2254873593.0, "step": 18460 }, { "epoch": 0.024638817336767836, "grad_norm": 1.953125, "learning_rate": 1.2318676337262014e-06, "loss": 0.6871781349182129, "num_tokens": 2257532565.0, "step": 18480 }, { "epoch": 0.02466548272349594, "grad_norm": 2.453125, "learning_rate": 1.2332008959522159e-06, "loss": 0.6997519493103027, "num_tokens": 2260020250.0, "step": 18500 }, { "epoch": 0.024692148110224042, "grad_norm": 2.0, "learning_rate": 1.2345341581782306e-06, "loss": 0.673027229309082, "num_tokens": 2262588972.0, "step": 18520 }, { "epoch": 0.024718813496952145, "grad_norm": 2.484375, "learning_rate": 1.2358674204042453e-06, "loss": 0.6802132606506348, "num_tokens": 2264935571.0, "step": 18540 }, { "epoch": 0.02474547888368025, "grad_norm": 2.140625, "learning_rate": 1.23720068263026e-06, "loss": 0.7072573184967041, "num_tokens": 2267228901.0, "step": 18560 }, { "epoch": 0.024772144270408355, "grad_norm": 2.703125, "learning_rate": 1.2385339448562745e-06, "loss": 0.6822986602783203, "num_tokens": 2269853415.0, "step": 18580 }, { "epoch": 0.024798809657136458, "grad_norm": 2.453125, "learning_rate": 1.2398672070822892e-06, "loss": 0.6764789581298828, "num_tokens": 2272094143.0, "step": 18600 }, { "epoch": 0.02482547504386456, "grad_norm": 2.109375, "learning_rate": 1.2412004693083037e-06, "loss": 0.6768601417541504, "num_tokens": 2274568259.0, "step": 18620 }, { "epoch": 0.024852140430592664, "grad_norm": 2.234375, "learning_rate": 1.2425337315343184e-06, "loss": 0.6904479026794433, "num_tokens": 2277007134.0, "step": 18640 }, { "epoch": 0.024878805817320767, "grad_norm": 2.609375, "learning_rate": 1.2438669937603329e-06, "loss": 0.664896821975708, "num_tokens": 2279389608.0, "step": 18660 }, { "epoch": 0.024905471204048873, "grad_norm": 2.4375, "learning_rate": 1.2452002559863476e-06, "loss": 0.6691671371459961, "num_tokens": 2281831744.0, "step": 18680 }, { "epoch": 0.024932136590776977, "grad_norm": 2.15625, "learning_rate": 1.246533518212362e-06, "loss": 0.6833416938781738, "num_tokens": 2284334052.0, "step": 18700 }, { "epoch": 0.02495880197750508, "grad_norm": 2.53125, "learning_rate": 1.2478667804383767e-06, "loss": 0.6970582962036133, "num_tokens": 2287081670.0, "step": 18720 }, { "epoch": 0.024985467364233183, "grad_norm": 2.1875, "learning_rate": 1.2492000426643912e-06, "loss": 0.6807233810424804, "num_tokens": 2289610602.0, "step": 18740 }, { "epoch": 0.025012132750961286, "grad_norm": 2.53125, "learning_rate": 1.250533304890406e-06, "loss": 0.6900970458984375, "num_tokens": 2292036759.0, "step": 18760 }, { "epoch": 0.025038798137689392, "grad_norm": 2.109375, "learning_rate": 1.2518665671164206e-06, "loss": 0.6897945404052734, "num_tokens": 2294494045.0, "step": 18780 }, { "epoch": 0.025065463524417495, "grad_norm": 2.109375, "learning_rate": 1.2531998293424351e-06, "loss": 0.6832863807678222, "num_tokens": 2296840894.0, "step": 18800 }, { "epoch": 0.025092128911145598, "grad_norm": 2.59375, "learning_rate": 1.2545330915684498e-06, "loss": 0.6793441772460938, "num_tokens": 2299254700.0, "step": 18820 }, { "epoch": 0.0251187942978737, "grad_norm": 2.34375, "learning_rate": 1.2558663537944643e-06, "loss": 0.6531376361846923, "num_tokens": 2301628762.0, "step": 18840 }, { "epoch": 0.025145459684601804, "grad_norm": 2.453125, "learning_rate": 1.257199616020479e-06, "loss": 0.6664066791534424, "num_tokens": 2304174757.0, "step": 18860 }, { "epoch": 0.02517212507132991, "grad_norm": 2.8125, "learning_rate": 1.2585328782464937e-06, "loss": 0.6765095710754394, "num_tokens": 2306553301.0, "step": 18880 }, { "epoch": 0.025198790458058014, "grad_norm": 2.109375, "learning_rate": 1.2598661404725082e-06, "loss": 0.6596681594848632, "num_tokens": 2308916733.0, "step": 18900 }, { "epoch": 0.025225455844786117, "grad_norm": 2.296875, "learning_rate": 1.2611994026985227e-06, "loss": 0.6700824737548828, "num_tokens": 2311208814.0, "step": 18920 }, { "epoch": 0.02525212123151422, "grad_norm": 2.234375, "learning_rate": 1.2625326649245376e-06, "loss": 0.6662961006164551, "num_tokens": 2313598080.0, "step": 18940 }, { "epoch": 0.025278786618242323, "grad_norm": 2.09375, "learning_rate": 1.2638659271505521e-06, "loss": 0.6544534683227539, "num_tokens": 2315661899.0, "step": 18960 }, { "epoch": 0.02530545200497043, "grad_norm": 2.25, "learning_rate": 1.2651991893765666e-06, "loss": 0.6736226558685303, "num_tokens": 2318225991.0, "step": 18980 }, { "epoch": 0.025332117391698532, "grad_norm": 2.21875, "learning_rate": 1.2665324516025813e-06, "loss": 0.7013875007629394, "num_tokens": 2320721997.0, "step": 19000 }, { "epoch": 0.025358782778426636, "grad_norm": 2.359375, "learning_rate": 1.267865713828596e-06, "loss": 0.6993733406066894, "num_tokens": 2323275693.0, "step": 19020 }, { "epoch": 0.02538544816515474, "grad_norm": 2.234375, "learning_rate": 1.2691989760546105e-06, "loss": 0.6943709373474121, "num_tokens": 2325884979.0, "step": 19040 }, { "epoch": 0.02541211355188284, "grad_norm": 2.203125, "learning_rate": 1.2705322382806252e-06, "loss": 0.6839425563812256, "num_tokens": 2328450875.0, "step": 19060 }, { "epoch": 0.025438778938610948, "grad_norm": 2.375, "learning_rate": 1.2718655005066397e-06, "loss": 0.6604910373687745, "num_tokens": 2330858847.0, "step": 19080 }, { "epoch": 0.02546544432533905, "grad_norm": 2.203125, "learning_rate": 1.2731987627326544e-06, "loss": 0.6672877311706543, "num_tokens": 2333130443.0, "step": 19100 }, { "epoch": 0.025492109712067154, "grad_norm": 2.296875, "learning_rate": 1.274532024958669e-06, "loss": 0.6635828018188477, "num_tokens": 2335710486.0, "step": 19120 }, { "epoch": 0.025518775098795257, "grad_norm": 2.578125, "learning_rate": 1.2758652871846836e-06, "loss": 0.6737747192382812, "num_tokens": 2338034910.0, "step": 19140 }, { "epoch": 0.02554544048552336, "grad_norm": 2.46875, "learning_rate": 1.277198549410698e-06, "loss": 0.6812642097473145, "num_tokens": 2340826178.0, "step": 19160 }, { "epoch": 0.025572105872251467, "grad_norm": 2.21875, "learning_rate": 1.278531811636713e-06, "loss": 0.6905008316040039, "num_tokens": 2343320859.0, "step": 19180 }, { "epoch": 0.02559877125897957, "grad_norm": 2.171875, "learning_rate": 1.2798650738627275e-06, "loss": 0.6676937103271484, "num_tokens": 2345898492.0, "step": 19200 }, { "epoch": 0.025625436645707673, "grad_norm": 2.296875, "learning_rate": 1.281198336088742e-06, "loss": 0.6828871726989746, "num_tokens": 2348488400.0, "step": 19220 }, { "epoch": 0.025652102032435776, "grad_norm": 2.75, "learning_rate": 1.2825315983147564e-06, "loss": 0.6754508972167969, "num_tokens": 2350919573.0, "step": 19240 }, { "epoch": 0.02567876741916388, "grad_norm": 2.390625, "learning_rate": 1.2838648605407714e-06, "loss": 0.6671476364135742, "num_tokens": 2353465431.0, "step": 19260 }, { "epoch": 0.025705432805891985, "grad_norm": 2.21875, "learning_rate": 1.2851981227667859e-06, "loss": 0.6764956474304199, "num_tokens": 2355742231.0, "step": 19280 }, { "epoch": 0.02573209819262009, "grad_norm": 2.265625, "learning_rate": 1.2865313849928006e-06, "loss": 0.6782982349395752, "num_tokens": 2358044910.0, "step": 19300 }, { "epoch": 0.02575876357934819, "grad_norm": 2.375, "learning_rate": 1.287864647218815e-06, "loss": 0.7010870456695557, "num_tokens": 2360406317.0, "step": 19320 }, { "epoch": 0.025785428966076294, "grad_norm": 2.5, "learning_rate": 1.2891979094448297e-06, "loss": 0.6756677627563477, "num_tokens": 2362765253.0, "step": 19340 }, { "epoch": 0.025812094352804398, "grad_norm": 2.234375, "learning_rate": 1.2905311716708444e-06, "loss": 0.6850276947021484, "num_tokens": 2365143262.0, "step": 19360 }, { "epoch": 0.025838759739532504, "grad_norm": 2.1875, "learning_rate": 1.291864433896859e-06, "loss": 0.656473445892334, "num_tokens": 2367530674.0, "step": 19380 }, { "epoch": 0.025865425126260607, "grad_norm": 2.953125, "learning_rate": 1.2931976961228734e-06, "loss": 0.6667253494262695, "num_tokens": 2369901519.0, "step": 19400 }, { "epoch": 0.02589209051298871, "grad_norm": 2.234375, "learning_rate": 1.2945309583488883e-06, "loss": 0.6688387393951416, "num_tokens": 2372532849.0, "step": 19420 }, { "epoch": 0.025918755899716813, "grad_norm": 2.375, "learning_rate": 1.2958642205749028e-06, "loss": 0.6747601509094239, "num_tokens": 2374808286.0, "step": 19440 }, { "epoch": 0.025945421286444916, "grad_norm": 2.359375, "learning_rate": 1.2971974828009173e-06, "loss": 0.6622570991516114, "num_tokens": 2377395611.0, "step": 19460 }, { "epoch": 0.025972086673173023, "grad_norm": 2.1875, "learning_rate": 1.2985307450269318e-06, "loss": 0.6666168212890625, "num_tokens": 2379780006.0, "step": 19480 }, { "epoch": 0.025998752059901126, "grad_norm": 2.59375, "learning_rate": 1.2998640072529467e-06, "loss": 0.670466423034668, "num_tokens": 2382320906.0, "step": 19500 }, { "epoch": 0.02602541744662923, "grad_norm": 2.265625, "learning_rate": 1.3011972694789612e-06, "loss": 0.665500259399414, "num_tokens": 2384770222.0, "step": 19520 }, { "epoch": 0.026052082833357332, "grad_norm": 2.171875, "learning_rate": 1.3025305317049757e-06, "loss": 0.6697690486907959, "num_tokens": 2387112484.0, "step": 19540 }, { "epoch": 0.026078748220085435, "grad_norm": 2.6875, "learning_rate": 1.3038637939309904e-06, "loss": 0.6834795951843262, "num_tokens": 2389635093.0, "step": 19560 }, { "epoch": 0.02610541360681354, "grad_norm": 2.515625, "learning_rate": 1.305197056157005e-06, "loss": 0.6741299152374267, "num_tokens": 2392177342.0, "step": 19580 }, { "epoch": 0.026132078993541644, "grad_norm": 2.328125, "learning_rate": 1.3065303183830196e-06, "loss": 0.6638293266296387, "num_tokens": 2394550909.0, "step": 19600 }, { "epoch": 0.026158744380269747, "grad_norm": 2.234375, "learning_rate": 1.3078635806090343e-06, "loss": 0.6730448722839355, "num_tokens": 2397272390.0, "step": 19620 }, { "epoch": 0.02618540976699785, "grad_norm": 2.125, "learning_rate": 1.3091968428350488e-06, "loss": 0.6883655548095703, "num_tokens": 2399799843.0, "step": 19640 }, { "epoch": 0.026212075153725953, "grad_norm": 2.0625, "learning_rate": 1.3105301050610637e-06, "loss": 0.6792394638061523, "num_tokens": 2402309685.0, "step": 19660 }, { "epoch": 0.026238740540454056, "grad_norm": 2.296875, "learning_rate": 1.3118633672870782e-06, "loss": 0.6543590545654296, "num_tokens": 2404728096.0, "step": 19680 }, { "epoch": 0.026265405927182163, "grad_norm": 2.0, "learning_rate": 1.3131966295130927e-06, "loss": 0.6743145942687988, "num_tokens": 2407241144.0, "step": 19700 }, { "epoch": 0.026292071313910266, "grad_norm": 2.640625, "learning_rate": 1.3145298917391072e-06, "loss": 0.7073892116546631, "num_tokens": 2409491991.0, "step": 19720 }, { "epoch": 0.02631873670063837, "grad_norm": 2.609375, "learning_rate": 1.315863153965122e-06, "loss": 0.7028937339782715, "num_tokens": 2411921041.0, "step": 19740 }, { "epoch": 0.026345402087366472, "grad_norm": 2.34375, "learning_rate": 1.3171964161911366e-06, "loss": 0.6707496166229248, "num_tokens": 2414309986.0, "step": 19760 }, { "epoch": 0.026372067474094575, "grad_norm": 2.46875, "learning_rate": 1.318529678417151e-06, "loss": 0.6540729999542236, "num_tokens": 2416819771.0, "step": 19780 }, { "epoch": 0.02639873286082268, "grad_norm": 1.9609375, "learning_rate": 1.3198629406431658e-06, "loss": 0.6823947429656982, "num_tokens": 2419279244.0, "step": 19800 }, { "epoch": 0.026425398247550785, "grad_norm": 2.453125, "learning_rate": 1.3211962028691805e-06, "loss": 0.6841766357421875, "num_tokens": 2421908668.0, "step": 19820 }, { "epoch": 0.026452063634278888, "grad_norm": 2.5, "learning_rate": 1.322529465095195e-06, "loss": 0.6766789436340332, "num_tokens": 2424497459.0, "step": 19840 }, { "epoch": 0.02647872902100699, "grad_norm": 2.328125, "learning_rate": 1.3238627273212097e-06, "loss": 0.6979313850402832, "num_tokens": 2426901832.0, "step": 19860 }, { "epoch": 0.026505394407735094, "grad_norm": 2.734375, "learning_rate": 1.3251959895472244e-06, "loss": 0.6722838401794433, "num_tokens": 2429330807.0, "step": 19880 }, { "epoch": 0.0265320597944632, "grad_norm": 2.359375, "learning_rate": 1.3265292517732388e-06, "loss": 0.6727786064147949, "num_tokens": 2431673706.0, "step": 19900 }, { "epoch": 0.026558725181191303, "grad_norm": 2.078125, "learning_rate": 1.3278625139992535e-06, "loss": 0.660490894317627, "num_tokens": 2434135683.0, "step": 19920 }, { "epoch": 0.026585390567919406, "grad_norm": 2.265625, "learning_rate": 1.329195776225268e-06, "loss": 0.6592499256134033, "num_tokens": 2436559363.0, "step": 19940 }, { "epoch": 0.02661205595464751, "grad_norm": 2.25, "learning_rate": 1.3305290384512827e-06, "loss": 0.6955799102783203, "num_tokens": 2439048877.0, "step": 19960 }, { "epoch": 0.026638721341375612, "grad_norm": 2.078125, "learning_rate": 1.3318623006772974e-06, "loss": 0.6386431694030762, "num_tokens": 2441523168.0, "step": 19980 }, { "epoch": 0.02666538672810372, "grad_norm": 1.9921875, "learning_rate": 1.333195562903312e-06, "loss": 0.66282057762146, "num_tokens": 2443794163.0, "step": 20000 }, { "epoch": 0.026692052114831822, "grad_norm": 2.015625, "learning_rate": 1.3345288251293264e-06, "loss": 0.6740588188171387, "num_tokens": 2446343484.0, "step": 20020 }, { "epoch": 0.026718717501559925, "grad_norm": 1.8984375, "learning_rate": 1.3358620873553413e-06, "loss": 0.6750660896301269, "num_tokens": 2448799699.0, "step": 20040 }, { "epoch": 0.026745382888288028, "grad_norm": 2.390625, "learning_rate": 1.3371953495813558e-06, "loss": 0.6807625770568848, "num_tokens": 2451516846.0, "step": 20060 }, { "epoch": 0.02677204827501613, "grad_norm": 2.1875, "learning_rate": 1.3385286118073703e-06, "loss": 0.6701568603515625, "num_tokens": 2453904670.0, "step": 20080 }, { "epoch": 0.026798713661744238, "grad_norm": 2.484375, "learning_rate": 1.339861874033385e-06, "loss": 0.6812762260437012, "num_tokens": 2456352500.0, "step": 20100 }, { "epoch": 0.02682537904847234, "grad_norm": 2.078125, "learning_rate": 1.3411951362593997e-06, "loss": 0.6584670066833496, "num_tokens": 2458797637.0, "step": 20120 }, { "epoch": 0.026852044435200444, "grad_norm": 2.203125, "learning_rate": 1.3425283984854142e-06, "loss": 0.6584927082061768, "num_tokens": 2461035459.0, "step": 20140 }, { "epoch": 0.026878709821928547, "grad_norm": 2.421875, "learning_rate": 1.343861660711429e-06, "loss": 0.6727166652679444, "num_tokens": 2463444973.0, "step": 20160 }, { "epoch": 0.02690537520865665, "grad_norm": 2.6875, "learning_rate": 1.3451949229374434e-06, "loss": 0.6500885009765625, "num_tokens": 2465918902.0, "step": 20180 }, { "epoch": 0.026932040595384756, "grad_norm": 2.625, "learning_rate": 1.346528185163458e-06, "loss": 0.6554742813110351, "num_tokens": 2468318865.0, "step": 20200 }, { "epoch": 0.02695870598211286, "grad_norm": 2.53125, "learning_rate": 1.3478614473894728e-06, "loss": 0.6783402442932129, "num_tokens": 2470762609.0, "step": 20220 }, { "epoch": 0.026985371368840962, "grad_norm": 2.328125, "learning_rate": 1.3491947096154873e-06, "loss": 0.6689219951629639, "num_tokens": 2472988370.0, "step": 20240 }, { "epoch": 0.027012036755569065, "grad_norm": 2.453125, "learning_rate": 1.3505279718415018e-06, "loss": 0.6654265403747559, "num_tokens": 2475695097.0, "step": 20260 }, { "epoch": 0.02703870214229717, "grad_norm": 2.484375, "learning_rate": 1.3518612340675167e-06, "loss": 0.6648274898529053, "num_tokens": 2477989551.0, "step": 20280 }, { "epoch": 0.027065367529025275, "grad_norm": 2.84375, "learning_rate": 1.3531944962935312e-06, "loss": 0.663672399520874, "num_tokens": 2480329362.0, "step": 20300 }, { "epoch": 0.027092032915753378, "grad_norm": 2.03125, "learning_rate": 1.3545277585195457e-06, "loss": 0.6475309371948242, "num_tokens": 2482952299.0, "step": 20320 }, { "epoch": 0.02711869830248148, "grad_norm": 2.515625, "learning_rate": 1.3558610207455602e-06, "loss": 0.6697210311889649, "num_tokens": 2485539882.0, "step": 20340 }, { "epoch": 0.027145363689209584, "grad_norm": 2.625, "learning_rate": 1.357194282971575e-06, "loss": 0.6588225364685059, "num_tokens": 2488125463.0, "step": 20360 }, { "epoch": 0.027172029075937687, "grad_norm": 2.125, "learning_rate": 1.3585275451975896e-06, "loss": 0.6599672794342041, "num_tokens": 2490666726.0, "step": 20380 }, { "epoch": 0.027198694462665794, "grad_norm": 2.078125, "learning_rate": 1.359860807423604e-06, "loss": 0.6539901733398438, "num_tokens": 2493079132.0, "step": 20400 }, { "epoch": 0.027225359849393897, "grad_norm": 2.046875, "learning_rate": 1.3611940696496188e-06, "loss": 0.6615249633789062, "num_tokens": 2495669752.0, "step": 20420 }, { "epoch": 0.027252025236122, "grad_norm": 3.234375, "learning_rate": 1.3625273318756335e-06, "loss": 0.6556968688964844, "num_tokens": 2498216567.0, "step": 20440 }, { "epoch": 0.027278690622850103, "grad_norm": 1.984375, "learning_rate": 1.3638605941016482e-06, "loss": 0.6620303153991699, "num_tokens": 2500620598.0, "step": 20460 }, { "epoch": 0.027305356009578206, "grad_norm": 1.875, "learning_rate": 1.3651938563276626e-06, "loss": 0.660067367553711, "num_tokens": 2503007950.0, "step": 20480 }, { "epoch": 0.027332021396306312, "grad_norm": 2.265625, "learning_rate": 1.3665271185536771e-06, "loss": 0.6484202861785888, "num_tokens": 2505352457.0, "step": 20500 }, { "epoch": 0.027358686783034415, "grad_norm": 2.140625, "learning_rate": 1.367860380779692e-06, "loss": 0.6547923564910889, "num_tokens": 2507836359.0, "step": 20520 }, { "epoch": 0.027385352169762518, "grad_norm": 1.921875, "learning_rate": 1.3691936430057065e-06, "loss": 0.6519156455993652, "num_tokens": 2510114362.0, "step": 20540 }, { "epoch": 0.02741201755649062, "grad_norm": 1.984375, "learning_rate": 1.370526905231721e-06, "loss": 0.661497688293457, "num_tokens": 2512566597.0, "step": 20560 }, { "epoch": 0.027438682943218724, "grad_norm": 1.9921875, "learning_rate": 1.3718601674577355e-06, "loss": 0.6460400104522706, "num_tokens": 2514845483.0, "step": 20580 }, { "epoch": 0.02746534832994683, "grad_norm": 2.34375, "learning_rate": 1.3731934296837504e-06, "loss": 0.6666894435882569, "num_tokens": 2517282354.0, "step": 20600 }, { "epoch": 0.027492013716674934, "grad_norm": 2.453125, "learning_rate": 1.374526691909765e-06, "loss": 0.661625337600708, "num_tokens": 2519705648.0, "step": 20620 }, { "epoch": 0.027518679103403037, "grad_norm": 2.1875, "learning_rate": 1.3758599541357794e-06, "loss": 0.6374587059020996, "num_tokens": 2522024322.0, "step": 20640 }, { "epoch": 0.02754534449013114, "grad_norm": 2.421875, "learning_rate": 1.3771932163617941e-06, "loss": 0.6524343490600586, "num_tokens": 2524240042.0, "step": 20660 }, { "epoch": 0.027572009876859243, "grad_norm": 2.453125, "learning_rate": 1.3785264785878088e-06, "loss": 0.6539923191070557, "num_tokens": 2526943361.0, "step": 20680 }, { "epoch": 0.02759867526358735, "grad_norm": 2.40625, "learning_rate": 1.3798597408138233e-06, "loss": 0.659937858581543, "num_tokens": 2529536887.0, "step": 20700 }, { "epoch": 0.027625340650315452, "grad_norm": 1.96875, "learning_rate": 1.381193003039838e-06, "loss": 0.6699861526489258, "num_tokens": 2531987868.0, "step": 20720 }, { "epoch": 0.027652006037043556, "grad_norm": 2.125, "learning_rate": 1.3825262652658525e-06, "loss": 0.6562635421752929, "num_tokens": 2534398784.0, "step": 20740 }, { "epoch": 0.02767867142377166, "grad_norm": 2.203125, "learning_rate": 1.3838595274918674e-06, "loss": 0.6598217010498046, "num_tokens": 2536940612.0, "step": 20760 }, { "epoch": 0.02770533681049976, "grad_norm": 2.390625, "learning_rate": 1.385192789717882e-06, "loss": 0.6405093193054199, "num_tokens": 2539431833.0, "step": 20780 }, { "epoch": 0.027732002197227868, "grad_norm": 2.15625, "learning_rate": 1.3865260519438964e-06, "loss": 0.6704974174499512, "num_tokens": 2541868991.0, "step": 20800 }, { "epoch": 0.02775866758395597, "grad_norm": 1.890625, "learning_rate": 1.3878593141699109e-06, "loss": 0.6594396591186523, "num_tokens": 2544015984.0, "step": 20820 }, { "epoch": 0.027785332970684074, "grad_norm": 2.515625, "learning_rate": 1.3891925763959258e-06, "loss": 0.6392309188842773, "num_tokens": 2546464455.0, "step": 20840 }, { "epoch": 0.027811998357412177, "grad_norm": 2.171875, "learning_rate": 1.3905258386219403e-06, "loss": 0.6559902191162109, "num_tokens": 2548934387.0, "step": 20860 }, { "epoch": 0.02783866374414028, "grad_norm": 2.484375, "learning_rate": 1.3918591008479548e-06, "loss": 0.6511632919311523, "num_tokens": 2551249452.0, "step": 20880 }, { "epoch": 0.027865329130868383, "grad_norm": 2.03125, "learning_rate": 1.3931923630739695e-06, "loss": 0.6521920204162598, "num_tokens": 2553554856.0, "step": 20900 }, { "epoch": 0.02789199451759649, "grad_norm": 2.015625, "learning_rate": 1.3945256252999842e-06, "loss": 0.6512957572937011, "num_tokens": 2555749846.0, "step": 20920 }, { "epoch": 0.027918659904324593, "grad_norm": 2.09375, "learning_rate": 1.3958588875259987e-06, "loss": 0.6632401466369628, "num_tokens": 2558160248.0, "step": 20940 }, { "epoch": 0.027945325291052696, "grad_norm": 2.171875, "learning_rate": 1.3971921497520134e-06, "loss": 0.6768988609313965, "num_tokens": 2560562286.0, "step": 20960 }, { "epoch": 0.0279719906777808, "grad_norm": 2.515625, "learning_rate": 1.3985254119780279e-06, "loss": 0.6593287467956543, "num_tokens": 2563034619.0, "step": 20980 }, { "epoch": 0.027998656064508902, "grad_norm": 2.0, "learning_rate": 1.3998586742040426e-06, "loss": 0.6657226085662842, "num_tokens": 2565492569.0, "step": 21000 }, { "epoch": 0.02802532145123701, "grad_norm": 2.109375, "learning_rate": 1.4011919364300573e-06, "loss": 0.6480897903442383, "num_tokens": 2567931531.0, "step": 21020 }, { "epoch": 0.02805198683796511, "grad_norm": 2.125, "learning_rate": 1.4025251986560717e-06, "loss": 0.6604187965393067, "num_tokens": 2570561526.0, "step": 21040 }, { "epoch": 0.028078652224693215, "grad_norm": 2.140625, "learning_rate": 1.4038584608820862e-06, "loss": 0.6421057701110839, "num_tokens": 2573095301.0, "step": 21060 }, { "epoch": 0.028105317611421318, "grad_norm": 2.34375, "learning_rate": 1.4051917231081012e-06, "loss": 0.6605679512023925, "num_tokens": 2575644237.0, "step": 21080 }, { "epoch": 0.02813198299814942, "grad_norm": 2.34375, "learning_rate": 1.4065249853341156e-06, "loss": 0.6599019527435303, "num_tokens": 2578198431.0, "step": 21100 }, { "epoch": 0.028158648384877527, "grad_norm": 1.921875, "learning_rate": 1.4078582475601301e-06, "loss": 0.6526929378509522, "num_tokens": 2580566339.0, "step": 21120 }, { "epoch": 0.02818531377160563, "grad_norm": 1.8671875, "learning_rate": 1.4091915097861446e-06, "loss": 0.6689793586730957, "num_tokens": 2582960334.0, "step": 21140 }, { "epoch": 0.028211979158333733, "grad_norm": 2.6875, "learning_rate": 1.4105247720121595e-06, "loss": 0.6686238765716552, "num_tokens": 2585466044.0, "step": 21160 }, { "epoch": 0.028238644545061836, "grad_norm": 1.8828125, "learning_rate": 1.411858034238174e-06, "loss": 0.6472736358642578, "num_tokens": 2588004242.0, "step": 21180 }, { "epoch": 0.02826530993178994, "grad_norm": 2.90625, "learning_rate": 1.4131912964641887e-06, "loss": 0.6447295665740966, "num_tokens": 2590529872.0, "step": 21200 }, { "epoch": 0.028291975318518046, "grad_norm": 2.5625, "learning_rate": 1.4145245586902034e-06, "loss": 0.6379655838012696, "num_tokens": 2592804229.0, "step": 21220 }, { "epoch": 0.02831864070524615, "grad_norm": 2.046875, "learning_rate": 1.415857820916218e-06, "loss": 0.6519233703613281, "num_tokens": 2595042217.0, "step": 21240 }, { "epoch": 0.028345306091974252, "grad_norm": 2.171875, "learning_rate": 1.4171910831422326e-06, "loss": 0.6567140579223633, "num_tokens": 2597423191.0, "step": 21260 }, { "epoch": 0.028371971478702355, "grad_norm": 2.25, "learning_rate": 1.4185243453682471e-06, "loss": 0.6414618968963623, "num_tokens": 2600086920.0, "step": 21280 }, { "epoch": 0.028398636865430458, "grad_norm": 2.0625, "learning_rate": 1.4198576075942618e-06, "loss": 0.6359410285949707, "num_tokens": 2602538683.0, "step": 21300 }, { "epoch": 0.028425302252158564, "grad_norm": 1.7109375, "learning_rate": 1.4211908698202765e-06, "loss": 0.6534605503082276, "num_tokens": 2604836590.0, "step": 21320 }, { "epoch": 0.028451967638886667, "grad_norm": 2.1875, "learning_rate": 1.422524132046291e-06, "loss": 0.6447486877441406, "num_tokens": 2607058600.0, "step": 21340 }, { "epoch": 0.02847863302561477, "grad_norm": 2.34375, "learning_rate": 1.4238573942723055e-06, "loss": 0.6407375335693359, "num_tokens": 2609352214.0, "step": 21360 }, { "epoch": 0.028505298412342873, "grad_norm": 2.59375, "learning_rate": 1.4251906564983204e-06, "loss": 0.6595043659210205, "num_tokens": 2611693231.0, "step": 21380 }, { "epoch": 0.028531963799070977, "grad_norm": 2.40625, "learning_rate": 1.426523918724335e-06, "loss": 0.6555684089660645, "num_tokens": 2614221120.0, "step": 21400 }, { "epoch": 0.028558629185799083, "grad_norm": 2.109375, "learning_rate": 1.4278571809503494e-06, "loss": 0.6550335884094238, "num_tokens": 2616447571.0, "step": 21420 }, { "epoch": 0.028585294572527186, "grad_norm": 2.0, "learning_rate": 1.4291904431763639e-06, "loss": 0.6545852184295654, "num_tokens": 2618854100.0, "step": 21440 }, { "epoch": 0.02861195995925529, "grad_norm": 2.484375, "learning_rate": 1.4305237054023788e-06, "loss": 0.651661491394043, "num_tokens": 2621228415.0, "step": 21460 }, { "epoch": 0.028638625345983392, "grad_norm": 2.25, "learning_rate": 1.4318569676283933e-06, "loss": 0.6748315811157226, "num_tokens": 2623813515.0, "step": 21480 }, { "epoch": 0.028665290732711495, "grad_norm": 2.265625, "learning_rate": 1.4331902298544078e-06, "loss": 0.6556596755981445, "num_tokens": 2626410537.0, "step": 21500 }, { "epoch": 0.0286919561194396, "grad_norm": 2.578125, "learning_rate": 1.4345234920804225e-06, "loss": 0.6593049049377442, "num_tokens": 2628832349.0, "step": 21520 }, { "epoch": 0.028718621506167705, "grad_norm": 2.1875, "learning_rate": 1.4358567543064372e-06, "loss": 0.6424490928649902, "num_tokens": 2631369464.0, "step": 21540 }, { "epoch": 0.028745286892895808, "grad_norm": 1.8515625, "learning_rate": 1.4371900165324519e-06, "loss": 0.6513879776000977, "num_tokens": 2633971529.0, "step": 21560 }, { "epoch": 0.02877195227962391, "grad_norm": 2.515625, "learning_rate": 1.4385232787584664e-06, "loss": 0.6359067916870117, "num_tokens": 2636361402.0, "step": 21580 }, { "epoch": 0.028798617666352014, "grad_norm": 2.375, "learning_rate": 1.4398565409844809e-06, "loss": 0.6435974597930908, "num_tokens": 2638779325.0, "step": 21600 }, { "epoch": 0.02882528305308012, "grad_norm": 2.296875, "learning_rate": 1.4411898032104958e-06, "loss": 0.6560244560241699, "num_tokens": 2641079954.0, "step": 21620 }, { "epoch": 0.028851948439808223, "grad_norm": 2.3125, "learning_rate": 1.4425230654365103e-06, "loss": 0.6562707901000977, "num_tokens": 2643640063.0, "step": 21640 }, { "epoch": 0.028878613826536326, "grad_norm": 2.609375, "learning_rate": 1.4438563276625247e-06, "loss": 0.6292196273803711, "num_tokens": 2645990092.0, "step": 21660 }, { "epoch": 0.02890527921326443, "grad_norm": 2.203125, "learning_rate": 1.4451895898885392e-06, "loss": 0.6535050392150878, "num_tokens": 2648466583.0, "step": 21680 }, { "epoch": 0.028931944599992532, "grad_norm": 1.8359375, "learning_rate": 1.4465228521145541e-06, "loss": 0.638399076461792, "num_tokens": 2651090381.0, "step": 21700 }, { "epoch": 0.02895860998672064, "grad_norm": 2.171875, "learning_rate": 1.4478561143405686e-06, "loss": 0.6509194850921631, "num_tokens": 2653598085.0, "step": 21720 }, { "epoch": 0.028985275373448742, "grad_norm": 2.265625, "learning_rate": 1.4491893765665831e-06, "loss": 0.6378519535064697, "num_tokens": 2656182669.0, "step": 21740 }, { "epoch": 0.029011940760176845, "grad_norm": 2.09375, "learning_rate": 1.4505226387925978e-06, "loss": 0.6441785335540772, "num_tokens": 2658683189.0, "step": 21760 }, { "epoch": 0.029038606146904948, "grad_norm": 1.8828125, "learning_rate": 1.4518559010186125e-06, "loss": 0.6448265075683594, "num_tokens": 2661210812.0, "step": 21780 }, { "epoch": 0.02906527153363305, "grad_norm": 2.03125, "learning_rate": 1.453189163244627e-06, "loss": 0.6460099220275879, "num_tokens": 2663594806.0, "step": 21800 }, { "epoch": 0.029091936920361158, "grad_norm": 2.3125, "learning_rate": 1.4545224254706417e-06, "loss": 0.6795196533203125, "num_tokens": 2666168188.0, "step": 21820 }, { "epoch": 0.02911860230708926, "grad_norm": 2.140625, "learning_rate": 1.4558556876966562e-06, "loss": 0.6475842952728271, "num_tokens": 2668346092.0, "step": 21840 }, { "epoch": 0.029145267693817364, "grad_norm": 1.921875, "learning_rate": 1.457188949922671e-06, "loss": 0.6604835987091064, "num_tokens": 2670924458.0, "step": 21860 }, { "epoch": 0.029171933080545467, "grad_norm": 2.296875, "learning_rate": 1.4585222121486856e-06, "loss": 0.6339520931243896, "num_tokens": 2673070589.0, "step": 21880 }, { "epoch": 0.02919859846727357, "grad_norm": 2.03125, "learning_rate": 1.4598554743747e-06, "loss": 0.6412462711334228, "num_tokens": 2675435361.0, "step": 21900 }, { "epoch": 0.029225263854001676, "grad_norm": 2.8125, "learning_rate": 1.4611887366007146e-06, "loss": 0.6497217178344726, "num_tokens": 2677740044.0, "step": 21920 }, { "epoch": 0.02925192924072978, "grad_norm": 2.171875, "learning_rate": 1.4625219988267295e-06, "loss": 0.6669872283935547, "num_tokens": 2680092170.0, "step": 21940 }, { "epoch": 0.029278594627457882, "grad_norm": 2.171875, "learning_rate": 1.463855261052744e-06, "loss": 0.6331659317016601, "num_tokens": 2682535723.0, "step": 21960 }, { "epoch": 0.029305260014185985, "grad_norm": 1.9921875, "learning_rate": 1.4651885232787585e-06, "loss": 0.648689603805542, "num_tokens": 2685076568.0, "step": 21980 }, { "epoch": 0.02933192540091409, "grad_norm": 2.4375, "learning_rate": 1.4665217855047732e-06, "loss": 0.6560869216918945, "num_tokens": 2687312703.0, "step": 22000 }, { "epoch": 0.029358590787642195, "grad_norm": 2.0625, "learning_rate": 1.4678550477307879e-06, "loss": 0.6602269172668457, "num_tokens": 2689717787.0, "step": 22020 }, { "epoch": 0.029385256174370298, "grad_norm": 2.0625, "learning_rate": 1.4691883099568024e-06, "loss": 0.6592827796936035, "num_tokens": 2692100838.0, "step": 22040 }, { "epoch": 0.0294119215610984, "grad_norm": 2.40625, "learning_rate": 1.470521572182817e-06, "loss": 0.654228401184082, "num_tokens": 2694506419.0, "step": 22060 }, { "epoch": 0.029438586947826504, "grad_norm": 2.390625, "learning_rate": 1.4718548344088316e-06, "loss": 0.6503798007965088, "num_tokens": 2696918948.0, "step": 22080 }, { "epoch": 0.029465252334554607, "grad_norm": 2.328125, "learning_rate": 1.4731880966348463e-06, "loss": 0.6422294616699219, "num_tokens": 2699487097.0, "step": 22100 }, { "epoch": 0.02949191772128271, "grad_norm": 2.0, "learning_rate": 1.474521358860861e-06, "loss": 0.6522624015808105, "num_tokens": 2701750620.0, "step": 22120 }, { "epoch": 0.029518583108010817, "grad_norm": 1.765625, "learning_rate": 1.4758546210868755e-06, "loss": 0.6353932380676269, "num_tokens": 2704069683.0, "step": 22140 }, { "epoch": 0.02954524849473892, "grad_norm": 1.859375, "learning_rate": 1.47718788331289e-06, "loss": 0.6588753700256348, "num_tokens": 2706557806.0, "step": 22160 }, { "epoch": 0.029571913881467023, "grad_norm": 2.171875, "learning_rate": 1.4785211455389049e-06, "loss": 0.6449090003967285, "num_tokens": 2708953213.0, "step": 22180 }, { "epoch": 0.029598579268195126, "grad_norm": 1.84375, "learning_rate": 1.4798544077649194e-06, "loss": 0.6493602752685547, "num_tokens": 2711255201.0, "step": 22200 }, { "epoch": 0.02962524465492323, "grad_norm": 2.0625, "learning_rate": 1.4811876699909338e-06, "loss": 0.6544002532958985, "num_tokens": 2713729812.0, "step": 22220 }, { "epoch": 0.029651910041651335, "grad_norm": 2.0625, "learning_rate": 1.4825209322169483e-06, "loss": 0.6468223571777344, "num_tokens": 2716111607.0, "step": 22240 }, { "epoch": 0.02967857542837944, "grad_norm": 2.359375, "learning_rate": 1.4838541944429632e-06, "loss": 0.6341352462768555, "num_tokens": 2718622430.0, "step": 22260 }, { "epoch": 0.02970524081510754, "grad_norm": 1.9140625, "learning_rate": 1.4851874566689777e-06, "loss": 0.6401160717010498, "num_tokens": 2720940398.0, "step": 22280 }, { "epoch": 0.029731906201835644, "grad_norm": 1.7890625, "learning_rate": 1.4865207188949922e-06, "loss": 0.6449059963226318, "num_tokens": 2723500049.0, "step": 22300 }, { "epoch": 0.029758571588563747, "grad_norm": 2.34375, "learning_rate": 1.487853981121007e-06, "loss": 0.6265275955200196, "num_tokens": 2725907626.0, "step": 22320 }, { "epoch": 0.029785236975291854, "grad_norm": 2.59375, "learning_rate": 1.4891872433470216e-06, "loss": 0.6578128337860107, "num_tokens": 2728495600.0, "step": 22340 }, { "epoch": 0.029811902362019957, "grad_norm": 2.109375, "learning_rate": 1.4905205055730363e-06, "loss": 0.6410257339477539, "num_tokens": 2730895362.0, "step": 22360 }, { "epoch": 0.02983856774874806, "grad_norm": 2.46875, "learning_rate": 1.4918537677990508e-06, "loss": 0.6378799438476562, "num_tokens": 2733451211.0, "step": 22380 }, { "epoch": 0.029865233135476163, "grad_norm": 2.125, "learning_rate": 1.4931870300250653e-06, "loss": 0.630402660369873, "num_tokens": 2735985164.0, "step": 22400 }, { "epoch": 0.029891898522204266, "grad_norm": 2.109375, "learning_rate": 1.4945202922510802e-06, "loss": 0.6496837615966797, "num_tokens": 2738455256.0, "step": 22420 }, { "epoch": 0.029918563908932373, "grad_norm": 2.328125, "learning_rate": 1.4958535544770947e-06, "loss": 0.6609603881835937, "num_tokens": 2740841603.0, "step": 22440 }, { "epoch": 0.029945229295660476, "grad_norm": 2.03125, "learning_rate": 1.4971868167031092e-06, "loss": 0.6296627044677734, "num_tokens": 2742962052.0, "step": 22460 }, { "epoch": 0.02997189468238858, "grad_norm": 2.1875, "learning_rate": 1.4985200789291237e-06, "loss": 0.6305673599243165, "num_tokens": 2745213108.0, "step": 22480 }, { "epoch": 0.02999856006911668, "grad_norm": 2.296875, "learning_rate": 1.4998533411551386e-06, "loss": 0.6679672718048095, "num_tokens": 2747645467.0, "step": 22500 }, { "epoch": 0.030025225455844785, "grad_norm": 2.0625, "learning_rate": 1.501186603381153e-06, "loss": 0.6488758087158203, "num_tokens": 2750164348.0, "step": 22520 }, { "epoch": 0.03005189084257289, "grad_norm": 2.3125, "learning_rate": 1.5025198656071676e-06, "loss": 0.6425656318664551, "num_tokens": 2752771200.0, "step": 22540 }, { "epoch": 0.030078556229300994, "grad_norm": 2.5625, "learning_rate": 1.5038531278331825e-06, "loss": 0.6192811012268067, "num_tokens": 2755216140.0, "step": 22560 }, { "epoch": 0.030105221616029097, "grad_norm": 2.125, "learning_rate": 1.505186390059197e-06, "loss": 0.6397000789642334, "num_tokens": 2757803264.0, "step": 22580 }, { "epoch": 0.0301318870027572, "grad_norm": 2.125, "learning_rate": 1.5065196522852115e-06, "loss": 0.6455217361450195, "num_tokens": 2760290913.0, "step": 22600 }, { "epoch": 0.030158552389485303, "grad_norm": 2.328125, "learning_rate": 1.5078529145112262e-06, "loss": 0.6522225379943848, "num_tokens": 2762807455.0, "step": 22620 }, { "epoch": 0.03018521777621341, "grad_norm": 2.171875, "learning_rate": 1.5091861767372409e-06, "loss": 0.6611302375793457, "num_tokens": 2765333354.0, "step": 22640 }, { "epoch": 0.030211883162941513, "grad_norm": 1.953125, "learning_rate": 1.5105194389632556e-06, "loss": 0.6262163162231446, "num_tokens": 2767774439.0, "step": 22660 }, { "epoch": 0.030238548549669616, "grad_norm": 2.1875, "learning_rate": 1.51185270118927e-06, "loss": 0.6473803520202637, "num_tokens": 2770210486.0, "step": 22680 }, { "epoch": 0.03026521393639772, "grad_norm": 2.203125, "learning_rate": 1.5131859634152846e-06, "loss": 0.6200320243835449, "num_tokens": 2772495731.0, "step": 22700 }, { "epoch": 0.030291879323125822, "grad_norm": 2.234375, "learning_rate": 1.5145192256412995e-06, "loss": 0.6151028633117676, "num_tokens": 2774848657.0, "step": 22720 }, { "epoch": 0.03031854470985393, "grad_norm": 2.296875, "learning_rate": 1.515852487867314e-06, "loss": 0.6281490325927734, "num_tokens": 2777351042.0, "step": 22740 }, { "epoch": 0.03034521009658203, "grad_norm": 1.8671875, "learning_rate": 1.5171857500933285e-06, "loss": 0.645830488204956, "num_tokens": 2779856103.0, "step": 22760 }, { "epoch": 0.030371875483310135, "grad_norm": 1.8671875, "learning_rate": 1.518519012319343e-06, "loss": 0.6264664649963378, "num_tokens": 2782285962.0, "step": 22780 }, { "epoch": 0.030398540870038238, "grad_norm": 2.203125, "learning_rate": 1.5198522745453579e-06, "loss": 0.6450338363647461, "num_tokens": 2784677844.0, "step": 22800 }, { "epoch": 0.03042520625676634, "grad_norm": 2.09375, "learning_rate": 1.5211855367713723e-06, "loss": 0.641165828704834, "num_tokens": 2787165592.0, "step": 22820 }, { "epoch": 0.030451871643494447, "grad_norm": 2.125, "learning_rate": 1.5225187989973868e-06, "loss": 0.6359877586364746, "num_tokens": 2789673826.0, "step": 22840 }, { "epoch": 0.03047853703022255, "grad_norm": 2.078125, "learning_rate": 1.5238520612234015e-06, "loss": 0.6440974235534668, "num_tokens": 2792135039.0, "step": 22860 }, { "epoch": 0.030505202416950653, "grad_norm": 2.234375, "learning_rate": 1.5251853234494162e-06, "loss": 0.6460522651672364, "num_tokens": 2794511550.0, "step": 22880 }, { "epoch": 0.030531867803678756, "grad_norm": 2.1875, "learning_rate": 1.5265185856754307e-06, "loss": 0.6302179336547852, "num_tokens": 2796781885.0, "step": 22900 }, { "epoch": 0.03055853319040686, "grad_norm": 2.21875, "learning_rate": 1.5278518479014454e-06, "loss": 0.6480396270751954, "num_tokens": 2799095155.0, "step": 22920 }, { "epoch": 0.030585198577134966, "grad_norm": 2.203125, "learning_rate": 1.52918511012746e-06, "loss": 0.6432117938995361, "num_tokens": 2801406000.0, "step": 22940 }, { "epoch": 0.03061186396386307, "grad_norm": 1.96875, "learning_rate": 1.5305183723534746e-06, "loss": 0.6596826553344727, "num_tokens": 2803648092.0, "step": 22960 }, { "epoch": 0.030638529350591172, "grad_norm": 1.8125, "learning_rate": 1.5318516345794893e-06, "loss": 0.6662649154663086, "num_tokens": 2806243055.0, "step": 22980 }, { "epoch": 0.030665194737319275, "grad_norm": 1.9296875, "learning_rate": 1.5331848968055038e-06, "loss": 0.6584803581237793, "num_tokens": 2808729726.0, "step": 23000 }, { "epoch": 0.030691860124047378, "grad_norm": 2.203125, "learning_rate": 1.5345181590315183e-06, "loss": 0.6267842769622802, "num_tokens": 2811375793.0, "step": 23020 }, { "epoch": 0.030718525510775484, "grad_norm": 2.140625, "learning_rate": 1.5358514212575332e-06, "loss": 0.6584835529327393, "num_tokens": 2813945383.0, "step": 23040 }, { "epoch": 0.030745190897503587, "grad_norm": 2.21875, "learning_rate": 1.5371846834835477e-06, "loss": 0.6461225509643554, "num_tokens": 2816393686.0, "step": 23060 }, { "epoch": 0.03077185628423169, "grad_norm": 2.46875, "learning_rate": 1.5385179457095622e-06, "loss": 0.6355864048004151, "num_tokens": 2818939502.0, "step": 23080 }, { "epoch": 0.030798521670959794, "grad_norm": 2.828125, "learning_rate": 1.539851207935577e-06, "loss": 0.6550135612487793, "num_tokens": 2821487472.0, "step": 23100 }, { "epoch": 0.030825187057687897, "grad_norm": 1.921875, "learning_rate": 1.5411844701615916e-06, "loss": 0.6398042678833008, "num_tokens": 2823886165.0, "step": 23120 }, { "epoch": 0.030851852444416003, "grad_norm": 2.140625, "learning_rate": 1.542517732387606e-06, "loss": 0.6425868034362793, "num_tokens": 2826393245.0, "step": 23140 }, { "epoch": 0.030878517831144106, "grad_norm": 2.078125, "learning_rate": 1.5438509946136208e-06, "loss": 0.6256866931915284, "num_tokens": 2829025149.0, "step": 23160 }, { "epoch": 0.03090518321787221, "grad_norm": 2.4375, "learning_rate": 1.5451842568396353e-06, "loss": 0.6305691242218018, "num_tokens": 2831633627.0, "step": 23180 }, { "epoch": 0.030931848604600312, "grad_norm": 2.078125, "learning_rate": 1.54651751906565e-06, "loss": 0.6328725814819336, "num_tokens": 2833948088.0, "step": 23200 }, { "epoch": 0.030958513991328415, "grad_norm": 2.171875, "learning_rate": 1.5478507812916647e-06, "loss": 0.6366674423217773, "num_tokens": 2836500461.0, "step": 23220 }, { "epoch": 0.030985179378056518, "grad_norm": 2.296875, "learning_rate": 1.5491840435176792e-06, "loss": 0.6447062015533447, "num_tokens": 2838979908.0, "step": 23240 }, { "epoch": 0.031011844764784625, "grad_norm": 2.25, "learning_rate": 1.5505173057436937e-06, "loss": 0.6518318176269531, "num_tokens": 2841283141.0, "step": 23260 }, { "epoch": 0.031038510151512728, "grad_norm": 1.9140625, "learning_rate": 1.5518505679697086e-06, "loss": 0.6347177028656006, "num_tokens": 2843529997.0, "step": 23280 }, { "epoch": 0.03106517553824083, "grad_norm": 2.3125, "learning_rate": 1.553183830195723e-06, "loss": 0.6583044528961182, "num_tokens": 2846199239.0, "step": 23300 }, { "epoch": 0.031091840924968934, "grad_norm": 2.28125, "learning_rate": 1.5545170924217376e-06, "loss": 0.6260956764221192, "num_tokens": 2848406513.0, "step": 23320 }, { "epoch": 0.031118506311697037, "grad_norm": 2.203125, "learning_rate": 1.555850354647752e-06, "loss": 0.6553624153137207, "num_tokens": 2850669229.0, "step": 23340 }, { "epoch": 0.031145171698425143, "grad_norm": 1.984375, "learning_rate": 1.557183616873767e-06, "loss": 0.6239649772644043, "num_tokens": 2853210385.0, "step": 23360 }, { "epoch": 0.031171837085153246, "grad_norm": 1.640625, "learning_rate": 1.5585168790997815e-06, "loss": 0.6283647537231445, "num_tokens": 2855707122.0, "step": 23380 }, { "epoch": 0.03119850247188135, "grad_norm": 2.578125, "learning_rate": 1.559850141325796e-06, "loss": 0.6394136905670166, "num_tokens": 2858188041.0, "step": 23400 }, { "epoch": 0.031225167858609452, "grad_norm": 2.625, "learning_rate": 1.5611834035518106e-06, "loss": 0.6407447338104248, "num_tokens": 2860445866.0, "step": 23420 }, { "epoch": 0.031251833245337556, "grad_norm": 1.96875, "learning_rate": 1.5625166657778253e-06, "loss": 0.6303815364837646, "num_tokens": 2862826621.0, "step": 23440 }, { "epoch": 0.03127849863206566, "grad_norm": 2.3125, "learning_rate": 1.56384992800384e-06, "loss": 0.6519020080566407, "num_tokens": 2865169546.0, "step": 23460 }, { "epoch": 0.03130516401879376, "grad_norm": 2.0625, "learning_rate": 1.5651831902298545e-06, "loss": 0.6578223228454589, "num_tokens": 2867684604.0, "step": 23480 }, { "epoch": 0.03133182940552187, "grad_norm": 2.265625, "learning_rate": 1.566516452455869e-06, "loss": 0.6411592483520507, "num_tokens": 2870160480.0, "step": 23500 }, { "epoch": 0.031358494792249975, "grad_norm": 2.203125, "learning_rate": 1.567849714681884e-06, "loss": 0.627800464630127, "num_tokens": 2872767263.0, "step": 23520 }, { "epoch": 0.03138516017897808, "grad_norm": 1.734375, "learning_rate": 1.5691829769078984e-06, "loss": 0.6281266212463379, "num_tokens": 2875218339.0, "step": 23540 }, { "epoch": 0.03141182556570618, "grad_norm": 1.9296875, "learning_rate": 1.570516239133913e-06, "loss": 0.643648338317871, "num_tokens": 2877717726.0, "step": 23560 }, { "epoch": 0.031438490952434284, "grad_norm": 1.96875, "learning_rate": 1.5718495013599274e-06, "loss": 0.6400154113769532, "num_tokens": 2880193159.0, "step": 23580 }, { "epoch": 0.03146515633916239, "grad_norm": 1.8359375, "learning_rate": 1.5731827635859423e-06, "loss": 0.6318115711212158, "num_tokens": 2882628664.0, "step": 23600 }, { "epoch": 0.03149182172589049, "grad_norm": 2.265625, "learning_rate": 1.5745160258119568e-06, "loss": 0.6481915473937988, "num_tokens": 2884998927.0, "step": 23620 }, { "epoch": 0.03151848711261859, "grad_norm": 1.9375, "learning_rate": 1.5758492880379713e-06, "loss": 0.6484064102172852, "num_tokens": 2887468284.0, "step": 23640 }, { "epoch": 0.031545152499346696, "grad_norm": 2.0625, "learning_rate": 1.577182550263986e-06, "loss": 0.6501659393310547, "num_tokens": 2889754816.0, "step": 23660 }, { "epoch": 0.0315718178860748, "grad_norm": 1.828125, "learning_rate": 1.5785158124900007e-06, "loss": 0.6509631156921387, "num_tokens": 2891935017.0, "step": 23680 }, { "epoch": 0.03159848327280291, "grad_norm": 2.0625, "learning_rate": 1.5798490747160152e-06, "loss": 0.6354250907897949, "num_tokens": 2894477919.0, "step": 23700 }, { "epoch": 0.03162514865953101, "grad_norm": 1.9609375, "learning_rate": 1.5811823369420299e-06, "loss": 0.6593732357025146, "num_tokens": 2896668205.0, "step": 23720 }, { "epoch": 0.031651814046259115, "grad_norm": 2.28125, "learning_rate": 1.5825155991680444e-06, "loss": 0.6139673233032227, "num_tokens": 2899246909.0, "step": 23740 }, { "epoch": 0.03167847943298722, "grad_norm": 2.21875, "learning_rate": 1.583848861394059e-06, "loss": 0.6259615898132325, "num_tokens": 2901669128.0, "step": 23760 }, { "epoch": 0.03170514481971532, "grad_norm": 1.9765625, "learning_rate": 1.5851821236200738e-06, "loss": 0.6527829170227051, "num_tokens": 2903997690.0, "step": 23780 }, { "epoch": 0.031731810206443424, "grad_norm": 2.234375, "learning_rate": 1.5865153858460883e-06, "loss": 0.6137964725494385, "num_tokens": 2906587369.0, "step": 23800 }, { "epoch": 0.03175847559317153, "grad_norm": 2.015625, "learning_rate": 1.5878486480721028e-06, "loss": 0.6270277023315429, "num_tokens": 2909093241.0, "step": 23820 }, { "epoch": 0.03178514097989963, "grad_norm": 2.328125, "learning_rate": 1.5891819102981177e-06, "loss": 0.6266649246215821, "num_tokens": 2911415281.0, "step": 23840 }, { "epoch": 0.03181180636662773, "grad_norm": 1.7734375, "learning_rate": 1.5905151725241322e-06, "loss": 0.6358146667480469, "num_tokens": 2913991137.0, "step": 23860 }, { "epoch": 0.031838471753355836, "grad_norm": 2.015625, "learning_rate": 1.5918484347501467e-06, "loss": 0.6198408126831054, "num_tokens": 2916311421.0, "step": 23880 }, { "epoch": 0.03186513714008394, "grad_norm": 2.125, "learning_rate": 1.5931816969761616e-06, "loss": 0.6303413391113282, "num_tokens": 2918590838.0, "step": 23900 }, { "epoch": 0.03189180252681205, "grad_norm": 1.921875, "learning_rate": 1.594514959202176e-06, "loss": 0.64332275390625, "num_tokens": 2920910182.0, "step": 23920 }, { "epoch": 0.03191846791354015, "grad_norm": 1.6875, "learning_rate": 1.5958482214281906e-06, "loss": 0.6367537498474121, "num_tokens": 2923372868.0, "step": 23940 }, { "epoch": 0.031945133300268255, "grad_norm": 2.09375, "learning_rate": 1.5971814836542053e-06, "loss": 0.6234843254089355, "num_tokens": 2925834435.0, "step": 23960 }, { "epoch": 0.03197179868699636, "grad_norm": 1.90625, "learning_rate": 1.59851474588022e-06, "loss": 0.6447709083557129, "num_tokens": 2928328916.0, "step": 23980 }, { "epoch": 0.03199846407372446, "grad_norm": 2.1875, "learning_rate": 1.5998480081062344e-06, "loss": 0.6330533027648926, "num_tokens": 2930763010.0, "step": 24000 }, { "epoch": 0.032025129460452564, "grad_norm": 1.9921875, "learning_rate": 1.6011812703322491e-06, "loss": 0.6390158653259277, "num_tokens": 2933330186.0, "step": 24020 }, { "epoch": 0.03205179484718067, "grad_norm": 2.078125, "learning_rate": 1.6025145325582636e-06, "loss": 0.6411489009857178, "num_tokens": 2935743321.0, "step": 24040 }, { "epoch": 0.03207846023390877, "grad_norm": 1.890625, "learning_rate": 1.6038477947842783e-06, "loss": 0.6227865219116211, "num_tokens": 2938415604.0, "step": 24060 }, { "epoch": 0.032105125620636873, "grad_norm": 2.1875, "learning_rate": 1.605181057010293e-06, "loss": 0.6260591506958008, "num_tokens": 2940967269.0, "step": 24080 }, { "epoch": 0.032131791007364977, "grad_norm": 2.375, "learning_rate": 1.6065143192363075e-06, "loss": 0.6231477737426758, "num_tokens": 2943303871.0, "step": 24100 }, { "epoch": 0.032158456394093086, "grad_norm": 2.203125, "learning_rate": 1.607847581462322e-06, "loss": 0.6540850639343262, "num_tokens": 2945562015.0, "step": 24120 }, { "epoch": 0.03218512178082119, "grad_norm": 1.859375, "learning_rate": 1.609180843688337e-06, "loss": 0.6217320442199707, "num_tokens": 2948063998.0, "step": 24140 }, { "epoch": 0.03221178716754929, "grad_norm": 1.8046875, "learning_rate": 1.6105141059143514e-06, "loss": 0.6417155265808105, "num_tokens": 2950489315.0, "step": 24160 }, { "epoch": 0.032238452554277396, "grad_norm": 3.125, "learning_rate": 1.611847368140366e-06, "loss": 0.6226126670837402, "num_tokens": 2953027208.0, "step": 24180 }, { "epoch": 0.0322651179410055, "grad_norm": 2.25, "learning_rate": 1.6131806303663804e-06, "loss": 0.6263279914855957, "num_tokens": 2955585816.0, "step": 24200 }, { "epoch": 0.0322917833277336, "grad_norm": 2.453125, "learning_rate": 1.6145138925923953e-06, "loss": 0.6250102996826172, "num_tokens": 2957818654.0, "step": 24220 }, { "epoch": 0.032318448714461705, "grad_norm": 2.171875, "learning_rate": 1.6158471548184098e-06, "loss": 0.6314012050628662, "num_tokens": 2960120130.0, "step": 24240 }, { "epoch": 0.03234511410118981, "grad_norm": 2.09375, "learning_rate": 1.6171804170444245e-06, "loss": 0.6157777786254883, "num_tokens": 2962343053.0, "step": 24260 }, { "epoch": 0.03237177948791791, "grad_norm": 2.265625, "learning_rate": 1.618513679270439e-06, "loss": 0.6379231929779052, "num_tokens": 2964613052.0, "step": 24280 }, { "epoch": 0.032398444874646014, "grad_norm": 2.21875, "learning_rate": 1.6198469414964537e-06, "loss": 0.6358602523803711, "num_tokens": 2967092881.0, "step": 24300 }, { "epoch": 0.032425110261374124, "grad_norm": 2.53125, "learning_rate": 1.6211802037224684e-06, "loss": 0.630261754989624, "num_tokens": 2969536995.0, "step": 24320 }, { "epoch": 0.03245177564810223, "grad_norm": 1.9609375, "learning_rate": 1.6225134659484829e-06, "loss": 0.6309950351715088, "num_tokens": 2971773277.0, "step": 24340 }, { "epoch": 0.03247844103483033, "grad_norm": 1.96875, "learning_rate": 1.6238467281744974e-06, "loss": 0.6284933090209961, "num_tokens": 2974090247.0, "step": 24360 }, { "epoch": 0.03250510642155843, "grad_norm": 1.9921875, "learning_rate": 1.6251799904005123e-06, "loss": 0.6288747787475586, "num_tokens": 2976657405.0, "step": 24380 }, { "epoch": 0.032531771808286536, "grad_norm": 1.7265625, "learning_rate": 1.6265132526265268e-06, "loss": 0.6323487281799316, "num_tokens": 2979367541.0, "step": 24400 }, { "epoch": 0.03255843719501464, "grad_norm": 2.1875, "learning_rate": 1.6278465148525413e-06, "loss": 0.6487548351287842, "num_tokens": 2981948048.0, "step": 24420 }, { "epoch": 0.03258510258174274, "grad_norm": 1.9140625, "learning_rate": 1.6291797770785558e-06, "loss": 0.6313107490539551, "num_tokens": 2984257445.0, "step": 24440 }, { "epoch": 0.032611767968470845, "grad_norm": 2.015625, "learning_rate": 1.6305130393045707e-06, "loss": 0.6300215721130371, "num_tokens": 2986557411.0, "step": 24460 }, { "epoch": 0.03263843335519895, "grad_norm": 1.828125, "learning_rate": 1.6318463015305852e-06, "loss": 0.6355443000793457, "num_tokens": 2988982382.0, "step": 24480 }, { "epoch": 0.03266509874192705, "grad_norm": 2.15625, "learning_rate": 1.6331795637565997e-06, "loss": 0.6237887382507324, "num_tokens": 2991492168.0, "step": 24500 }, { "epoch": 0.03269176412865516, "grad_norm": 2.265625, "learning_rate": 1.6345128259826144e-06, "loss": 0.6363686561584473, "num_tokens": 2993906442.0, "step": 24520 }, { "epoch": 0.032718429515383264, "grad_norm": 2.09375, "learning_rate": 1.635846088208629e-06, "loss": 0.6115591049194335, "num_tokens": 2996157382.0, "step": 24540 }, { "epoch": 0.03274509490211137, "grad_norm": 2.15625, "learning_rate": 1.6371793504346438e-06, "loss": 0.6223283767700195, "num_tokens": 2998520927.0, "step": 24560 }, { "epoch": 0.03277176028883947, "grad_norm": 2.15625, "learning_rate": 1.6385126126606582e-06, "loss": 0.6330824375152588, "num_tokens": 3000874089.0, "step": 24580 }, { "epoch": 0.03279842567556757, "grad_norm": 1.8359375, "learning_rate": 1.6398458748866727e-06, "loss": 0.6356575012207031, "num_tokens": 3003413940.0, "step": 24600 }, { "epoch": 0.032825091062295676, "grad_norm": 1.96875, "learning_rate": 1.6411791371126877e-06, "loss": 0.6173009395599365, "num_tokens": 3006019729.0, "step": 24620 }, { "epoch": 0.03285175644902378, "grad_norm": 1.859375, "learning_rate": 1.6425123993387021e-06, "loss": 0.6358421802520752, "num_tokens": 3008598928.0, "step": 24640 }, { "epoch": 0.03287842183575188, "grad_norm": 2.0625, "learning_rate": 1.6438456615647166e-06, "loss": 0.6429950714111328, "num_tokens": 3010808594.0, "step": 24660 }, { "epoch": 0.032905087222479985, "grad_norm": 2.234375, "learning_rate": 1.6451789237907311e-06, "loss": 0.6077485084533691, "num_tokens": 3013282242.0, "step": 24680 }, { "epoch": 0.03293175260920809, "grad_norm": 1.796875, "learning_rate": 1.646512186016746e-06, "loss": 0.6278625965118408, "num_tokens": 3015757191.0, "step": 24700 }, { "epoch": 0.0329584179959362, "grad_norm": 1.90625, "learning_rate": 1.6478454482427605e-06, "loss": 0.6263289451599121, "num_tokens": 3018290251.0, "step": 24720 }, { "epoch": 0.0329850833826643, "grad_norm": 1.6875, "learning_rate": 1.649178710468775e-06, "loss": 0.6417227745056152, "num_tokens": 3020657928.0, "step": 24740 }, { "epoch": 0.033011748769392404, "grad_norm": 2.0625, "learning_rate": 1.6505119726947897e-06, "loss": 0.645597267150879, "num_tokens": 3023198132.0, "step": 24760 }, { "epoch": 0.03303841415612051, "grad_norm": 1.890625, "learning_rate": 1.6518452349208044e-06, "loss": 0.6509032726287842, "num_tokens": 3025799575.0, "step": 24780 }, { "epoch": 0.03306507954284861, "grad_norm": 2.328125, "learning_rate": 1.653178497146819e-06, "loss": 0.6317996501922607, "num_tokens": 3028075501.0, "step": 24800 }, { "epoch": 0.033091744929576714, "grad_norm": 2.40625, "learning_rate": 1.6545117593728336e-06, "loss": 0.6289810180664063, "num_tokens": 3030513898.0, "step": 24820 }, { "epoch": 0.03311841031630482, "grad_norm": 1.859375, "learning_rate": 1.655845021598848e-06, "loss": 0.645758056640625, "num_tokens": 3032788811.0, "step": 24840 }, { "epoch": 0.03314507570303292, "grad_norm": 2.015625, "learning_rate": 1.6571782838248628e-06, "loss": 0.6316349983215332, "num_tokens": 3035240980.0, "step": 24860 }, { "epoch": 0.03317174108976102, "grad_norm": 2.234375, "learning_rate": 1.6585115460508775e-06, "loss": 0.6281387329101562, "num_tokens": 3037671548.0, "step": 24880 }, { "epoch": 0.033198406476489126, "grad_norm": 2.390625, "learning_rate": 1.659844808276892e-06, "loss": 0.6185981750488281, "num_tokens": 3040156259.0, "step": 24900 }, { "epoch": 0.033225071863217236, "grad_norm": 2.0, "learning_rate": 1.6611780705029065e-06, "loss": 0.6233536243438721, "num_tokens": 3042772517.0, "step": 24920 }, { "epoch": 0.03325173724994534, "grad_norm": 2.265625, "learning_rate": 1.6625113327289214e-06, "loss": 0.6487018585205078, "num_tokens": 3045301632.0, "step": 24940 }, { "epoch": 0.03327840263667344, "grad_norm": 2.28125, "learning_rate": 1.6638445949549359e-06, "loss": 0.6297139167785645, "num_tokens": 3047726025.0, "step": 24960 }, { "epoch": 0.033305068023401545, "grad_norm": 1.8984375, "learning_rate": 1.6651778571809504e-06, "loss": 0.6288226127624512, "num_tokens": 3050218883.0, "step": 24980 }, { "epoch": 0.03333173341012965, "grad_norm": 2.203125, "learning_rate": 1.666511119406965e-06, "loss": 0.6308753967285157, "num_tokens": 3052729463.0, "step": 25000 }, { "epoch": 0.03335839879685775, "grad_norm": 2.234375, "learning_rate": 1.6678443816329798e-06, "loss": 0.6438465595245362, "num_tokens": 3055336128.0, "step": 25020 }, { "epoch": 0.033385064183585854, "grad_norm": 2.25, "learning_rate": 1.6691776438589943e-06, "loss": 0.6219906806945801, "num_tokens": 3057950802.0, "step": 25040 }, { "epoch": 0.03341172957031396, "grad_norm": 2.53125, "learning_rate": 1.670510906085009e-06, "loss": 0.6242225646972657, "num_tokens": 3060384382.0, "step": 25060 }, { "epoch": 0.03343839495704206, "grad_norm": 2.46875, "learning_rate": 1.6718441683110235e-06, "loss": 0.6430449485778809, "num_tokens": 3062860362.0, "step": 25080 }, { "epoch": 0.03346506034377016, "grad_norm": 1.9296875, "learning_rate": 1.6731774305370382e-06, "loss": 0.6212871551513672, "num_tokens": 3065218568.0, "step": 25100 }, { "epoch": 0.033491725730498266, "grad_norm": 2.53125, "learning_rate": 1.6745106927630529e-06, "loss": 0.6253182411193847, "num_tokens": 3067553361.0, "step": 25120 }, { "epoch": 0.033518391117226376, "grad_norm": 1.9609375, "learning_rate": 1.6758439549890673e-06, "loss": 0.6063651084899903, "num_tokens": 3069894377.0, "step": 25140 }, { "epoch": 0.03354505650395448, "grad_norm": 1.90625, "learning_rate": 1.6771772172150818e-06, "loss": 0.634208345413208, "num_tokens": 3072338799.0, "step": 25160 }, { "epoch": 0.03357172189068258, "grad_norm": 2.140625, "learning_rate": 1.6785104794410968e-06, "loss": 0.622739315032959, "num_tokens": 3074653310.0, "step": 25180 }, { "epoch": 0.033598387277410685, "grad_norm": 2.234375, "learning_rate": 1.6798437416671112e-06, "loss": 0.607841157913208, "num_tokens": 3077216482.0, "step": 25200 }, { "epoch": 0.03362505266413879, "grad_norm": 2.171875, "learning_rate": 1.6811770038931257e-06, "loss": 0.6276363372802735, "num_tokens": 3079769230.0, "step": 25220 }, { "epoch": 0.03365171805086689, "grad_norm": 2.390625, "learning_rate": 1.6825102661191402e-06, "loss": 0.6175715446472168, "num_tokens": 3082293223.0, "step": 25240 }, { "epoch": 0.033678383437594994, "grad_norm": 2.296875, "learning_rate": 1.6838435283451551e-06, "loss": 0.6267451763153076, "num_tokens": 3084623660.0, "step": 25260 }, { "epoch": 0.0337050488243231, "grad_norm": 1.8671875, "learning_rate": 1.6851767905711696e-06, "loss": 0.6102179527282715, "num_tokens": 3087047562.0, "step": 25280 }, { "epoch": 0.0337317142110512, "grad_norm": 1.9765625, "learning_rate": 1.6865100527971841e-06, "loss": 0.6220061302185058, "num_tokens": 3089514987.0, "step": 25300 }, { "epoch": 0.0337583795977793, "grad_norm": 2.296875, "learning_rate": 1.687843315023199e-06, "loss": 0.6574182510375977, "num_tokens": 3091865128.0, "step": 25320 }, { "epoch": 0.03378504498450741, "grad_norm": 1.96875, "learning_rate": 1.6891765772492135e-06, "loss": 0.6357899665832519, "num_tokens": 3094542167.0, "step": 25340 }, { "epoch": 0.033811710371235516, "grad_norm": 2.0, "learning_rate": 1.6905098394752282e-06, "loss": 0.605553674697876, "num_tokens": 3096864697.0, "step": 25360 }, { "epoch": 0.03383837575796362, "grad_norm": 1.9375, "learning_rate": 1.6918431017012427e-06, "loss": 0.631989860534668, "num_tokens": 3099150949.0, "step": 25380 }, { "epoch": 0.03386504114469172, "grad_norm": 2.21875, "learning_rate": 1.6931763639272574e-06, "loss": 0.6105825901031494, "num_tokens": 3101780931.0, "step": 25400 }, { "epoch": 0.033891706531419825, "grad_norm": 2.375, "learning_rate": 1.6945096261532721e-06, "loss": 0.6307942390441894, "num_tokens": 3104283521.0, "step": 25420 }, { "epoch": 0.03391837191814793, "grad_norm": 1.7109375, "learning_rate": 1.6958428883792866e-06, "loss": 0.6287823677062988, "num_tokens": 3106820180.0, "step": 25440 }, { "epoch": 0.03394503730487603, "grad_norm": 2.40625, "learning_rate": 1.697176150605301e-06, "loss": 0.633525037765503, "num_tokens": 3109498733.0, "step": 25460 }, { "epoch": 0.033971702691604135, "grad_norm": 1.9140625, "learning_rate": 1.698509412831316e-06, "loss": 0.6168752670288086, "num_tokens": 3111547542.0, "step": 25480 }, { "epoch": 0.03399836807833224, "grad_norm": 2.28125, "learning_rate": 1.6998426750573305e-06, "loss": 0.6184468269348145, "num_tokens": 3113863489.0, "step": 25500 }, { "epoch": 0.03402503346506034, "grad_norm": 2.46875, "learning_rate": 1.701175937283345e-06, "loss": 0.6081828117370606, "num_tokens": 3116517201.0, "step": 25520 }, { "epoch": 0.03405169885178845, "grad_norm": 2.265625, "learning_rate": 1.7025091995093595e-06, "loss": 0.6041893005371094, "num_tokens": 3118875269.0, "step": 25540 }, { "epoch": 0.034078364238516554, "grad_norm": 2.28125, "learning_rate": 1.7038424617353744e-06, "loss": 0.6310075759887696, "num_tokens": 3121464134.0, "step": 25560 }, { "epoch": 0.03410502962524466, "grad_norm": 2.265625, "learning_rate": 1.7051757239613889e-06, "loss": 0.6199881553649902, "num_tokens": 3124061081.0, "step": 25580 }, { "epoch": 0.03413169501197276, "grad_norm": 2.21875, "learning_rate": 1.7065089861874034e-06, "loss": 0.6204976081848145, "num_tokens": 3126364391.0, "step": 25600 }, { "epoch": 0.03415836039870086, "grad_norm": 2.15625, "learning_rate": 1.707842248413418e-06, "loss": 0.6159585952758789, "num_tokens": 3128738973.0, "step": 25620 }, { "epoch": 0.034185025785428966, "grad_norm": 2.125, "learning_rate": 1.7091755106394328e-06, "loss": 0.6202960014343262, "num_tokens": 3131440142.0, "step": 25640 }, { "epoch": 0.03421169117215707, "grad_norm": 2.21875, "learning_rate": 1.7105087728654473e-06, "loss": 0.6259657859802246, "num_tokens": 3133801107.0, "step": 25660 }, { "epoch": 0.03423835655888517, "grad_norm": 1.8046875, "learning_rate": 1.711842035091462e-06, "loss": 0.6283342838287354, "num_tokens": 3136114265.0, "step": 25680 }, { "epoch": 0.034265021945613275, "grad_norm": 2.25, "learning_rate": 1.7131752973174765e-06, "loss": 0.636676549911499, "num_tokens": 3138684931.0, "step": 25700 }, { "epoch": 0.03429168733234138, "grad_norm": 1.8984375, "learning_rate": 1.7145085595434914e-06, "loss": 0.6233251094818115, "num_tokens": 3140967677.0, "step": 25720 }, { "epoch": 0.03431835271906949, "grad_norm": 1.9375, "learning_rate": 1.7158418217695059e-06, "loss": 0.6133048057556152, "num_tokens": 3143549924.0, "step": 25740 }, { "epoch": 0.03434501810579759, "grad_norm": 2.046875, "learning_rate": 1.7171750839955203e-06, "loss": 0.6453736305236817, "num_tokens": 3145993556.0, "step": 25760 }, { "epoch": 0.034371683492525694, "grad_norm": 2.15625, "learning_rate": 1.7185083462215348e-06, "loss": 0.6007702827453614, "num_tokens": 3148433262.0, "step": 25780 }, { "epoch": 0.0343983488792538, "grad_norm": 1.890625, "learning_rate": 1.7198416084475497e-06, "loss": 0.616670799255371, "num_tokens": 3150616173.0, "step": 25800 }, { "epoch": 0.0344250142659819, "grad_norm": 1.7421875, "learning_rate": 1.7211748706735642e-06, "loss": 0.627319049835205, "num_tokens": 3153154247.0, "step": 25820 }, { "epoch": 0.03445167965271, "grad_norm": 2.3125, "learning_rate": 1.7225081328995787e-06, "loss": 0.6184469699859619, "num_tokens": 3155625161.0, "step": 25840 }, { "epoch": 0.034478345039438106, "grad_norm": 2.0, "learning_rate": 1.7238413951255934e-06, "loss": 0.619990348815918, "num_tokens": 3158137463.0, "step": 25860 }, { "epoch": 0.03450501042616621, "grad_norm": 2.140625, "learning_rate": 1.7251746573516081e-06, "loss": 0.6258575439453125, "num_tokens": 3160510949.0, "step": 25880 }, { "epoch": 0.03453167581289431, "grad_norm": 2.015625, "learning_rate": 1.7265079195776226e-06, "loss": 0.6218071937561035, "num_tokens": 3163116103.0, "step": 25900 }, { "epoch": 0.034558341199622415, "grad_norm": 2.03125, "learning_rate": 1.7278411818036373e-06, "loss": 0.6269781112670898, "num_tokens": 3165485360.0, "step": 25920 }, { "epoch": 0.034585006586350525, "grad_norm": 1.875, "learning_rate": 1.7291744440296518e-06, "loss": 0.6306398391723633, "num_tokens": 3167866186.0, "step": 25940 }, { "epoch": 0.03461167197307863, "grad_norm": 2.0, "learning_rate": 1.7305077062556665e-06, "loss": 0.61153244972229, "num_tokens": 3170299850.0, "step": 25960 }, { "epoch": 0.03463833735980673, "grad_norm": 2.359375, "learning_rate": 1.7318409684816812e-06, "loss": 0.6155884742736817, "num_tokens": 3172868388.0, "step": 25980 }, { "epoch": 0.034665002746534834, "grad_norm": 1.7421875, "learning_rate": 1.7331742307076957e-06, "loss": 0.6216565132141113, "num_tokens": 3175450115.0, "step": 26000 }, { "epoch": 0.03469166813326294, "grad_norm": 2.015625, "learning_rate": 1.7345074929337102e-06, "loss": 0.6271504402160645, "num_tokens": 3178050846.0, "step": 26020 }, { "epoch": 0.03471833351999104, "grad_norm": 1.921875, "learning_rate": 1.735840755159725e-06, "loss": 0.6105159759521485, "num_tokens": 3180742127.0, "step": 26040 }, { "epoch": 0.03474499890671914, "grad_norm": 2.03125, "learning_rate": 1.7371740173857396e-06, "loss": 0.6123240470886231, "num_tokens": 3183254174.0, "step": 26060 }, { "epoch": 0.034771664293447246, "grad_norm": 2.078125, "learning_rate": 1.738507279611754e-06, "loss": 0.6216839790344239, "num_tokens": 3185585447.0, "step": 26080 }, { "epoch": 0.03479832968017535, "grad_norm": 1.9453125, "learning_rate": 1.7398405418377688e-06, "loss": 0.6297017097473144, "num_tokens": 3187933626.0, "step": 26100 }, { "epoch": 0.03482499506690345, "grad_norm": 2.0625, "learning_rate": 1.7411738040637835e-06, "loss": 0.625577163696289, "num_tokens": 3190279603.0, "step": 26120 }, { "epoch": 0.03485166045363156, "grad_norm": 2.0, "learning_rate": 1.742507066289798e-06, "loss": 0.6187499046325684, "num_tokens": 3192829061.0, "step": 26140 }, { "epoch": 0.034878325840359665, "grad_norm": 2.046875, "learning_rate": 1.7438403285158127e-06, "loss": 0.6073351860046386, "num_tokens": 3195219451.0, "step": 26160 }, { "epoch": 0.03490499122708777, "grad_norm": 2.03125, "learning_rate": 1.7451735907418272e-06, "loss": 0.5955542087554931, "num_tokens": 3197589000.0, "step": 26180 }, { "epoch": 0.03493165661381587, "grad_norm": 2.03125, "learning_rate": 1.7465068529678419e-06, "loss": 0.61219482421875, "num_tokens": 3200203573.0, "step": 26200 }, { "epoch": 0.034958322000543975, "grad_norm": 1.65625, "learning_rate": 1.7478401151938566e-06, "loss": 0.6210914611816406, "num_tokens": 3202830953.0, "step": 26220 }, { "epoch": 0.03498498738727208, "grad_norm": 1.765625, "learning_rate": 1.749173377419871e-06, "loss": 0.6030632019042969, "num_tokens": 3205095231.0, "step": 26240 }, { "epoch": 0.03501165277400018, "grad_norm": 2.421875, "learning_rate": 1.7505066396458856e-06, "loss": 0.6101356506347656, "num_tokens": 3207537822.0, "step": 26260 }, { "epoch": 0.035038318160728284, "grad_norm": 2.15625, "learning_rate": 1.7518399018719005e-06, "loss": 0.6135547161102295, "num_tokens": 3210045484.0, "step": 26280 }, { "epoch": 0.03506498354745639, "grad_norm": 2.171875, "learning_rate": 1.753173164097915e-06, "loss": 0.5904407978057862, "num_tokens": 3212350256.0, "step": 26300 }, { "epoch": 0.03509164893418449, "grad_norm": 2.03125, "learning_rate": 1.7545064263239294e-06, "loss": 0.6252163887023926, "num_tokens": 3214826914.0, "step": 26320 }, { "epoch": 0.03511831432091259, "grad_norm": 2.0625, "learning_rate": 1.755839688549944e-06, "loss": 0.6200079917907715, "num_tokens": 3216925002.0, "step": 26340 }, { "epoch": 0.0351449797076407, "grad_norm": 2.4375, "learning_rate": 1.7571729507759588e-06, "loss": 0.6187105178833008, "num_tokens": 3219350292.0, "step": 26360 }, { "epoch": 0.035171645094368806, "grad_norm": 2.234375, "learning_rate": 1.7585062130019733e-06, "loss": 0.6165566444396973, "num_tokens": 3221699678.0, "step": 26380 }, { "epoch": 0.03519831048109691, "grad_norm": 2.40625, "learning_rate": 1.7598394752279878e-06, "loss": 0.6263745307922364, "num_tokens": 3224100553.0, "step": 26400 }, { "epoch": 0.03522497586782501, "grad_norm": 1.9375, "learning_rate": 1.7611727374540025e-06, "loss": 0.6139388084411621, "num_tokens": 3226532827.0, "step": 26420 }, { "epoch": 0.035251641254553115, "grad_norm": 2.546875, "learning_rate": 1.7625059996800172e-06, "loss": 0.6078828334808349, "num_tokens": 3228960781.0, "step": 26440 }, { "epoch": 0.03527830664128122, "grad_norm": 1.921875, "learning_rate": 1.763839261906032e-06, "loss": 0.6024033546447753, "num_tokens": 3231324441.0, "step": 26460 }, { "epoch": 0.03530497202800932, "grad_norm": 2.328125, "learning_rate": 1.7651725241320464e-06, "loss": 0.6181482315063477, "num_tokens": 3233812722.0, "step": 26480 }, { "epoch": 0.035331637414737424, "grad_norm": 2.03125, "learning_rate": 1.766505786358061e-06, "loss": 0.6304929733276368, "num_tokens": 3236073413.0, "step": 26500 }, { "epoch": 0.03535830280146553, "grad_norm": 1.96875, "learning_rate": 1.7678390485840758e-06, "loss": 0.6187681198120117, "num_tokens": 3238412745.0, "step": 26520 }, { "epoch": 0.03538496818819363, "grad_norm": 2.03125, "learning_rate": 1.7691723108100903e-06, "loss": 0.6052851676940918, "num_tokens": 3240974533.0, "step": 26540 }, { "epoch": 0.03541163357492174, "grad_norm": 2.0625, "learning_rate": 1.7705055730361048e-06, "loss": 0.6264597415924072, "num_tokens": 3243560785.0, "step": 26560 }, { "epoch": 0.03543829896164984, "grad_norm": 2.25, "learning_rate": 1.7718388352621193e-06, "loss": 0.624454402923584, "num_tokens": 3246186466.0, "step": 26580 }, { "epoch": 0.035464964348377946, "grad_norm": 1.8828125, "learning_rate": 1.7731720974881342e-06, "loss": 0.6164828777313233, "num_tokens": 3248724441.0, "step": 26600 }, { "epoch": 0.03549162973510605, "grad_norm": 1.921875, "learning_rate": 1.7745053597141487e-06, "loss": 0.611149263381958, "num_tokens": 3251269592.0, "step": 26620 }, { "epoch": 0.03551829512183415, "grad_norm": 2.140625, "learning_rate": 1.7758386219401632e-06, "loss": 0.617389440536499, "num_tokens": 3254012368.0, "step": 26640 }, { "epoch": 0.035544960508562255, "grad_norm": 2.078125, "learning_rate": 1.777171884166178e-06, "loss": 0.5948190689086914, "num_tokens": 3256356043.0, "step": 26660 }, { "epoch": 0.03557162589529036, "grad_norm": 1.9765625, "learning_rate": 1.7785051463921926e-06, "loss": 0.6038343429565429, "num_tokens": 3258786177.0, "step": 26680 }, { "epoch": 0.03559829128201846, "grad_norm": 1.8359375, "learning_rate": 1.779838408618207e-06, "loss": 0.6174479484558105, "num_tokens": 3261328484.0, "step": 26700 }, { "epoch": 0.035624956668746564, "grad_norm": 1.90625, "learning_rate": 1.7811716708442218e-06, "loss": 0.5989080905914307, "num_tokens": 3263796185.0, "step": 26720 }, { "epoch": 0.03565162205547467, "grad_norm": 2.015625, "learning_rate": 1.7825049330702365e-06, "loss": 0.618648624420166, "num_tokens": 3266405047.0, "step": 26740 }, { "epoch": 0.03567828744220278, "grad_norm": 1.8125, "learning_rate": 1.783838195296251e-06, "loss": 0.6096789836883545, "num_tokens": 3268858751.0, "step": 26760 }, { "epoch": 0.03570495282893088, "grad_norm": 2.15625, "learning_rate": 1.7851714575222657e-06, "loss": 0.6151275157928466, "num_tokens": 3271106315.0, "step": 26780 }, { "epoch": 0.03573161821565898, "grad_norm": 1.8515625, "learning_rate": 1.7865047197482802e-06, "loss": 0.6250404834747314, "num_tokens": 3273473074.0, "step": 26800 }, { "epoch": 0.035758283602387086, "grad_norm": 2.4375, "learning_rate": 1.787837981974295e-06, "loss": 0.6239864349365234, "num_tokens": 3275653944.0, "step": 26820 }, { "epoch": 0.03578494898911519, "grad_norm": 2.09375, "learning_rate": 1.7891712442003096e-06, "loss": 0.6250487327575683, "num_tokens": 3277936599.0, "step": 26840 }, { "epoch": 0.03581161437584329, "grad_norm": 2.15625, "learning_rate": 1.790504506426324e-06, "loss": 0.621727466583252, "num_tokens": 3280545635.0, "step": 26860 }, { "epoch": 0.035838279762571396, "grad_norm": 2.1875, "learning_rate": 1.7918377686523385e-06, "loss": 0.6151091575622558, "num_tokens": 3282920023.0, "step": 26880 }, { "epoch": 0.0358649451492995, "grad_norm": 2.34375, "learning_rate": 1.7931710308783535e-06, "loss": 0.6117008209228516, "num_tokens": 3285184619.0, "step": 26900 }, { "epoch": 0.0358916105360276, "grad_norm": 2.296875, "learning_rate": 1.794504293104368e-06, "loss": 0.6169661521911621, "num_tokens": 3287576759.0, "step": 26920 }, { "epoch": 0.035918275922755705, "grad_norm": 2.390625, "learning_rate": 1.7958375553303824e-06, "loss": 0.6417620658874512, "num_tokens": 3290090551.0, "step": 26940 }, { "epoch": 0.035944941309483815, "grad_norm": 1.8515625, "learning_rate": 1.7971708175563971e-06, "loss": 0.6129684925079346, "num_tokens": 3292616965.0, "step": 26960 }, { "epoch": 0.03597160669621192, "grad_norm": 1.8828125, "learning_rate": 1.7985040797824118e-06, "loss": 0.6242838859558105, "num_tokens": 3295130552.0, "step": 26980 }, { "epoch": 0.03599827208294002, "grad_norm": 1.984375, "learning_rate": 1.7998373420084263e-06, "loss": 0.6107208728790283, "num_tokens": 3297525582.0, "step": 27000 }, { "epoch": 0.036024937469668124, "grad_norm": 1.6328125, "learning_rate": 1.801170604234441e-06, "loss": 0.6250434875488281, "num_tokens": 3299911508.0, "step": 27020 }, { "epoch": 0.03605160285639623, "grad_norm": 1.8984375, "learning_rate": 1.8025038664604555e-06, "loss": 0.6185595035552979, "num_tokens": 3302315491.0, "step": 27040 }, { "epoch": 0.03607826824312433, "grad_norm": 1.7734375, "learning_rate": 1.8038371286864702e-06, "loss": 0.6144409656524659, "num_tokens": 3304583221.0, "step": 27060 }, { "epoch": 0.03610493362985243, "grad_norm": 1.7734375, "learning_rate": 1.805170390912485e-06, "loss": 0.611473560333252, "num_tokens": 3306942413.0, "step": 27080 }, { "epoch": 0.036131599016580536, "grad_norm": 1.8828125, "learning_rate": 1.8065036531384994e-06, "loss": 0.6090626239776611, "num_tokens": 3309286979.0, "step": 27100 }, { "epoch": 0.03615826440330864, "grad_norm": 1.984375, "learning_rate": 1.807836915364514e-06, "loss": 0.619596004486084, "num_tokens": 3311848467.0, "step": 27120 }, { "epoch": 0.03618492979003674, "grad_norm": 2.171875, "learning_rate": 1.8091701775905288e-06, "loss": 0.6152605056762696, "num_tokens": 3314477166.0, "step": 27140 }, { "epoch": 0.03621159517676485, "grad_norm": 1.8046875, "learning_rate": 1.8105034398165433e-06, "loss": 0.6048743247985839, "num_tokens": 3316762236.0, "step": 27160 }, { "epoch": 0.036238260563492955, "grad_norm": 2.21875, "learning_rate": 1.8118367020425578e-06, "loss": 0.6164613246917725, "num_tokens": 3319380047.0, "step": 27180 }, { "epoch": 0.03626492595022106, "grad_norm": 2.328125, "learning_rate": 1.8131699642685723e-06, "loss": 0.6087224960327149, "num_tokens": 3321938874.0, "step": 27200 }, { "epoch": 0.03629159133694916, "grad_norm": 1.6484375, "learning_rate": 1.8145032264945872e-06, "loss": 0.596917724609375, "num_tokens": 3324524101.0, "step": 27220 }, { "epoch": 0.036318256723677264, "grad_norm": 2.0625, "learning_rate": 1.8158364887206017e-06, "loss": 0.6298905372619629, "num_tokens": 3327017169.0, "step": 27240 }, { "epoch": 0.03634492211040537, "grad_norm": 2.34375, "learning_rate": 1.8171697509466164e-06, "loss": 0.6154132843017578, "num_tokens": 3329301757.0, "step": 27260 }, { "epoch": 0.03637158749713347, "grad_norm": 2.40625, "learning_rate": 1.8185030131726309e-06, "loss": 0.6152504920959473, "num_tokens": 3331733176.0, "step": 27280 }, { "epoch": 0.03639825288386157, "grad_norm": 1.8125, "learning_rate": 1.8198362753986456e-06, "loss": 0.5896107196807862, "num_tokens": 3334162637.0, "step": 27300 }, { "epoch": 0.036424918270589676, "grad_norm": 2.25, "learning_rate": 1.8211695376246603e-06, "loss": 0.6012941360473633, "num_tokens": 3336428998.0, "step": 27320 }, { "epoch": 0.03645158365731778, "grad_norm": 1.640625, "learning_rate": 1.8225027998506748e-06, "loss": 0.6154028415679932, "num_tokens": 3338932544.0, "step": 27340 }, { "epoch": 0.03647824904404589, "grad_norm": 2.578125, "learning_rate": 1.8238360620766893e-06, "loss": 0.6078596591949463, "num_tokens": 3341367322.0, "step": 27360 }, { "epoch": 0.03650491443077399, "grad_norm": 1.9453125, "learning_rate": 1.8251693243027042e-06, "loss": 0.6114880084991455, "num_tokens": 3343824597.0, "step": 27380 }, { "epoch": 0.036531579817502095, "grad_norm": 2.0, "learning_rate": 1.8265025865287187e-06, "loss": 0.6030231475830078, "num_tokens": 3346323193.0, "step": 27400 }, { "epoch": 0.0365582452042302, "grad_norm": 2.125, "learning_rate": 1.8278358487547332e-06, "loss": 0.6099690437316895, "num_tokens": 3348691778.0, "step": 27420 }, { "epoch": 0.0365849105909583, "grad_norm": 2.453125, "learning_rate": 1.8291691109807476e-06, "loss": 0.6187773227691651, "num_tokens": 3351019041.0, "step": 27440 }, { "epoch": 0.036611575977686404, "grad_norm": 2.53125, "learning_rate": 1.8305023732067626e-06, "loss": 0.6162085056304931, "num_tokens": 3353491218.0, "step": 27460 }, { "epoch": 0.03663824136441451, "grad_norm": 1.8828125, "learning_rate": 1.831835635432777e-06, "loss": 0.594570255279541, "num_tokens": 3355957061.0, "step": 27480 }, { "epoch": 0.03666490675114261, "grad_norm": 2.078125, "learning_rate": 1.8331688976587915e-06, "loss": 0.6083543300628662, "num_tokens": 3358370135.0, "step": 27500 }, { "epoch": 0.036691572137870714, "grad_norm": 1.96875, "learning_rate": 1.8345021598848062e-06, "loss": 0.6152499198913575, "num_tokens": 3360859209.0, "step": 27520 }, { "epoch": 0.03671823752459882, "grad_norm": 1.8515625, "learning_rate": 1.835835422110821e-06, "loss": 0.6261263370513916, "num_tokens": 3363280137.0, "step": 27540 }, { "epoch": 0.03674490291132692, "grad_norm": 2.28125, "learning_rate": 1.8371686843368356e-06, "loss": 0.624005937576294, "num_tokens": 3365652961.0, "step": 27560 }, { "epoch": 0.03677156829805503, "grad_norm": 2.546875, "learning_rate": 1.8385019465628501e-06, "loss": 0.6175934314727783, "num_tokens": 3368214719.0, "step": 27580 }, { "epoch": 0.03679823368478313, "grad_norm": 1.59375, "learning_rate": 1.8398352087888646e-06, "loss": 0.5999637603759765, "num_tokens": 3370807492.0, "step": 27600 }, { "epoch": 0.036824899071511236, "grad_norm": 2.21875, "learning_rate": 1.8411684710148795e-06, "loss": 0.6249133110046386, "num_tokens": 3373060155.0, "step": 27620 }, { "epoch": 0.03685156445823934, "grad_norm": 1.9453125, "learning_rate": 1.842501733240894e-06, "loss": 0.5988304138183593, "num_tokens": 3375547930.0, "step": 27640 }, { "epoch": 0.03687822984496744, "grad_norm": 1.8046875, "learning_rate": 1.8438349954669085e-06, "loss": 0.6170344352722168, "num_tokens": 3378055157.0, "step": 27660 }, { "epoch": 0.036904895231695545, "grad_norm": 1.8046875, "learning_rate": 1.845168257692923e-06, "loss": 0.6076711654663086, "num_tokens": 3380545823.0, "step": 27680 }, { "epoch": 0.03693156061842365, "grad_norm": 2.234375, "learning_rate": 1.846501519918938e-06, "loss": 0.6209087371826172, "num_tokens": 3383053966.0, "step": 27700 }, { "epoch": 0.03695822600515175, "grad_norm": 1.984375, "learning_rate": 1.8478347821449524e-06, "loss": 0.6130331993103028, "num_tokens": 3385817584.0, "step": 27720 }, { "epoch": 0.036984891391879854, "grad_norm": 2.0625, "learning_rate": 1.849168044370967e-06, "loss": 0.6342805862426758, "num_tokens": 3388071483.0, "step": 27740 }, { "epoch": 0.03701155677860796, "grad_norm": 2.390625, "learning_rate": 1.8505013065969816e-06, "loss": 0.6123454093933105, "num_tokens": 3390716843.0, "step": 27760 }, { "epoch": 0.03703822216533607, "grad_norm": 2.28125, "learning_rate": 1.8518345688229963e-06, "loss": 0.623469066619873, "num_tokens": 3393285123.0, "step": 27780 }, { "epoch": 0.03706488755206417, "grad_norm": 1.8515625, "learning_rate": 1.8531678310490108e-06, "loss": 0.6035736083984375, "num_tokens": 3395544320.0, "step": 27800 }, { "epoch": 0.03709155293879227, "grad_norm": 2.453125, "learning_rate": 1.8545010932750255e-06, "loss": 0.5866366386413574, "num_tokens": 3398118307.0, "step": 27820 }, { "epoch": 0.037118218325520376, "grad_norm": 2.0625, "learning_rate": 1.85583435550104e-06, "loss": 0.6214004516601562, "num_tokens": 3400504853.0, "step": 27840 }, { "epoch": 0.03714488371224848, "grad_norm": 2.21875, "learning_rate": 1.8571676177270547e-06, "loss": 0.59541015625, "num_tokens": 3403166252.0, "step": 27860 }, { "epoch": 0.03717154909897658, "grad_norm": 1.7890625, "learning_rate": 1.8585008799530694e-06, "loss": 0.59789137840271, "num_tokens": 3405690312.0, "step": 27880 }, { "epoch": 0.037198214485704685, "grad_norm": 2.15625, "learning_rate": 1.8598341421790839e-06, "loss": 0.6117040634155273, "num_tokens": 3408305561.0, "step": 27900 }, { "epoch": 0.03722487987243279, "grad_norm": 2.5, "learning_rate": 1.8611674044050984e-06, "loss": 0.6129666328430176, "num_tokens": 3410806998.0, "step": 27920 }, { "epoch": 0.03725154525916089, "grad_norm": 1.734375, "learning_rate": 1.8625006666311133e-06, "loss": 0.6055480480194092, "num_tokens": 3413334418.0, "step": 27940 }, { "epoch": 0.037278210645888994, "grad_norm": 2.328125, "learning_rate": 1.8638339288571278e-06, "loss": 0.6080033302307128, "num_tokens": 3415935793.0, "step": 27960 }, { "epoch": 0.037304876032617104, "grad_norm": 2.109375, "learning_rate": 1.8651671910831423e-06, "loss": 0.5973377227783203, "num_tokens": 3418521625.0, "step": 27980 }, { "epoch": 0.03733154141934521, "grad_norm": 2.296875, "learning_rate": 1.8665004533091572e-06, "loss": 0.6292350769042969, "num_tokens": 3420908774.0, "step": 28000 }, { "epoch": 0.03735820680607331, "grad_norm": 1.71875, "learning_rate": 1.8678337155351717e-06, "loss": 0.6090744018554688, "num_tokens": 3423423264.0, "step": 28020 }, { "epoch": 0.03738487219280141, "grad_norm": 1.8671875, "learning_rate": 1.8691669777611862e-06, "loss": 0.6129346370697022, "num_tokens": 3425682436.0, "step": 28040 }, { "epoch": 0.037411537579529516, "grad_norm": 1.9453125, "learning_rate": 1.8705002399872009e-06, "loss": 0.583068561553955, "num_tokens": 3428121455.0, "step": 28060 }, { "epoch": 0.03743820296625762, "grad_norm": 1.6640625, "learning_rate": 1.8718335022132156e-06, "loss": 0.5862391948699951, "num_tokens": 3430539127.0, "step": 28080 }, { "epoch": 0.03746486835298572, "grad_norm": 2.140625, "learning_rate": 1.87316676443923e-06, "loss": 0.6062004089355468, "num_tokens": 3433123723.0, "step": 28100 }, { "epoch": 0.037491533739713825, "grad_norm": 2.328125, "learning_rate": 1.8745000266652447e-06, "loss": 0.6034299850463867, "num_tokens": 3435595903.0, "step": 28120 }, { "epoch": 0.03751819912644193, "grad_norm": 2.390625, "learning_rate": 1.8758332888912592e-06, "loss": 0.6104693412780762, "num_tokens": 3438091343.0, "step": 28140 }, { "epoch": 0.03754486451317003, "grad_norm": 1.8671875, "learning_rate": 1.877166551117274e-06, "loss": 0.5952731132507324, "num_tokens": 3440475520.0, "step": 28160 }, { "epoch": 0.03757152989989814, "grad_norm": 1.7265625, "learning_rate": 1.8784998133432886e-06, "loss": 0.6085066795349121, "num_tokens": 3443027605.0, "step": 28180 }, { "epoch": 0.037598195286626244, "grad_norm": 1.828125, "learning_rate": 1.8798330755693031e-06, "loss": 0.6141615867614746, "num_tokens": 3445218884.0, "step": 28200 }, { "epoch": 0.03762486067335435, "grad_norm": 2.28125, "learning_rate": 1.8811663377953176e-06, "loss": 0.5981005668640137, "num_tokens": 3447541377.0, "step": 28220 }, { "epoch": 0.03765152606008245, "grad_norm": 2.484375, "learning_rate": 1.8824996000213325e-06, "loss": 0.618048620223999, "num_tokens": 3450232391.0, "step": 28240 }, { "epoch": 0.037678191446810554, "grad_norm": 2.078125, "learning_rate": 1.883832862247347e-06, "loss": 0.6049121379852295, "num_tokens": 3452659586.0, "step": 28260 }, { "epoch": 0.03770485683353866, "grad_norm": 1.6875, "learning_rate": 1.8851661244733615e-06, "loss": 0.6016382217407227, "num_tokens": 3455159832.0, "step": 28280 }, { "epoch": 0.03773152222026676, "grad_norm": 1.7734375, "learning_rate": 1.886499386699376e-06, "loss": 0.625649356842041, "num_tokens": 3457460595.0, "step": 28300 }, { "epoch": 0.03775818760699486, "grad_norm": 1.90625, "learning_rate": 1.887832648925391e-06, "loss": 0.6207383155822754, "num_tokens": 3460006586.0, "step": 28320 }, { "epoch": 0.037784852993722966, "grad_norm": 2.375, "learning_rate": 1.8891659111514054e-06, "loss": 0.6041228294372558, "num_tokens": 3462628299.0, "step": 28340 }, { "epoch": 0.03781151838045107, "grad_norm": 2.390625, "learning_rate": 1.89049917337742e-06, "loss": 0.6151886940002441, "num_tokens": 3465028943.0, "step": 28360 }, { "epoch": 0.03783818376717918, "grad_norm": 2.15625, "learning_rate": 1.8918324356034346e-06, "loss": 0.6016088485717773, "num_tokens": 3467519293.0, "step": 28380 }, { "epoch": 0.03786484915390728, "grad_norm": 1.8046875, "learning_rate": 1.8931656978294493e-06, "loss": 0.587982177734375, "num_tokens": 3469937052.0, "step": 28400 }, { "epoch": 0.037891514540635385, "grad_norm": 2.03125, "learning_rate": 1.894498960055464e-06, "loss": 0.617464256286621, "num_tokens": 3472587628.0, "step": 28420 }, { "epoch": 0.03791817992736349, "grad_norm": 2.046875, "learning_rate": 1.8958322222814785e-06, "loss": 0.5835497856140137, "num_tokens": 3475156652.0, "step": 28440 }, { "epoch": 0.03794484531409159, "grad_norm": 2.09375, "learning_rate": 1.897165484507493e-06, "loss": 0.6219272613525391, "num_tokens": 3477648591.0, "step": 28460 }, { "epoch": 0.037971510700819694, "grad_norm": 1.90625, "learning_rate": 1.8984987467335079e-06, "loss": 0.5921152591705322, "num_tokens": 3480068921.0, "step": 28480 }, { "epoch": 0.0379981760875478, "grad_norm": 1.703125, "learning_rate": 1.8998320089595224e-06, "loss": 0.6017546653747559, "num_tokens": 3482501051.0, "step": 28500 }, { "epoch": 0.0380248414742759, "grad_norm": 2.46875, "learning_rate": 1.9011652711855369e-06, "loss": 0.5971287727355957, "num_tokens": 3485169138.0, "step": 28520 }, { "epoch": 0.038051506861004, "grad_norm": 1.6640625, "learning_rate": 1.9024985334115514e-06, "loss": 0.6036543846130371, "num_tokens": 3487510147.0, "step": 28540 }, { "epoch": 0.038078172247732106, "grad_norm": 2.53125, "learning_rate": 1.9038317956375663e-06, "loss": 0.6068888664245605, "num_tokens": 3489650885.0, "step": 28560 }, { "epoch": 0.038104837634460216, "grad_norm": 2.328125, "learning_rate": 1.9051650578635808e-06, "loss": 0.620691967010498, "num_tokens": 3492263325.0, "step": 28580 }, { "epoch": 0.03813150302118832, "grad_norm": 2.046875, "learning_rate": 1.9064983200895953e-06, "loss": 0.5917590618133545, "num_tokens": 3494583662.0, "step": 28600 }, { "epoch": 0.03815816840791642, "grad_norm": 2.15625, "learning_rate": 1.9078315823156097e-06, "loss": 0.6000723838806152, "num_tokens": 3496861241.0, "step": 28620 }, { "epoch": 0.038184833794644525, "grad_norm": 2.109375, "learning_rate": 1.909164844541625e-06, "loss": 0.6218320846557617, "num_tokens": 3499410936.0, "step": 28640 }, { "epoch": 0.03821149918137263, "grad_norm": 2.078125, "learning_rate": 1.910498106767639e-06, "loss": 0.6004826545715332, "num_tokens": 3502118163.0, "step": 28660 }, { "epoch": 0.03823816456810073, "grad_norm": 2.0625, "learning_rate": 1.911831368993654e-06, "loss": 0.613385009765625, "num_tokens": 3504539773.0, "step": 28680 }, { "epoch": 0.038264829954828834, "grad_norm": 2.09375, "learning_rate": 1.913164631219668e-06, "loss": 0.5880148887634278, "num_tokens": 3507009086.0, "step": 28700 }, { "epoch": 0.03829149534155694, "grad_norm": 2.234375, "learning_rate": 1.9144978934456833e-06, "loss": 0.6127802848815918, "num_tokens": 3509609884.0, "step": 28720 }, { "epoch": 0.03831816072828504, "grad_norm": 2.734375, "learning_rate": 1.9158311556716975e-06, "loss": 0.6109260559082031, "num_tokens": 3511955477.0, "step": 28740 }, { "epoch": 0.03834482611501314, "grad_norm": 2.0625, "learning_rate": 1.9171644178977122e-06, "loss": 0.5910392761230469, "num_tokens": 3514238646.0, "step": 28760 }, { "epoch": 0.038371491501741246, "grad_norm": 2.078125, "learning_rate": 1.918497680123727e-06, "loss": 0.6068500518798828, "num_tokens": 3516625518.0, "step": 28780 }, { "epoch": 0.038398156888469356, "grad_norm": 1.9453125, "learning_rate": 1.9198309423497416e-06, "loss": 0.599970531463623, "num_tokens": 3519029619.0, "step": 28800 }, { "epoch": 0.03842482227519746, "grad_norm": 1.8046875, "learning_rate": 1.9211642045757563e-06, "loss": 0.6309585094451904, "num_tokens": 3521513283.0, "step": 28820 }, { "epoch": 0.03845148766192556, "grad_norm": 2.125, "learning_rate": 1.9224974668017706e-06, "loss": 0.5892726898193359, "num_tokens": 3523999337.0, "step": 28840 }, { "epoch": 0.038478153048653665, "grad_norm": 2.046875, "learning_rate": 1.9238307290277853e-06, "loss": 0.581696605682373, "num_tokens": 3526560974.0, "step": 28860 }, { "epoch": 0.03850481843538177, "grad_norm": 2.125, "learning_rate": 1.9251639912538e-06, "loss": 0.5952348709106445, "num_tokens": 3528891171.0, "step": 28880 }, { "epoch": 0.03853148382210987, "grad_norm": 2.453125, "learning_rate": 1.9264972534798147e-06, "loss": 0.5929898262023926, "num_tokens": 3531393095.0, "step": 28900 }, { "epoch": 0.038558149208837975, "grad_norm": 2.015625, "learning_rate": 1.927830515705829e-06, "loss": 0.6165133476257324, "num_tokens": 3533791812.0, "step": 28920 }, { "epoch": 0.03858481459556608, "grad_norm": 1.7109375, "learning_rate": 1.9291637779318437e-06, "loss": 0.5939981460571289, "num_tokens": 3536200501.0, "step": 28940 }, { "epoch": 0.03861147998229418, "grad_norm": 1.8125, "learning_rate": 1.9304970401578584e-06, "loss": 0.6252096652984619, "num_tokens": 3538613189.0, "step": 28960 }, { "epoch": 0.038638145369022284, "grad_norm": 1.78125, "learning_rate": 1.931830302383873e-06, "loss": 0.6170331954956054, "num_tokens": 3541087144.0, "step": 28980 }, { "epoch": 0.038664810755750394, "grad_norm": 1.6953125, "learning_rate": 1.9331635646098874e-06, "loss": 0.601634931564331, "num_tokens": 3543414393.0, "step": 29000 }, { "epoch": 0.0386914761424785, "grad_norm": 2.0625, "learning_rate": 1.934496826835902e-06, "loss": 0.590277099609375, "num_tokens": 3545718898.0, "step": 29020 }, { "epoch": 0.0387181415292066, "grad_norm": 2.109375, "learning_rate": 1.9358300890619168e-06, "loss": 0.5967676639556885, "num_tokens": 3548170507.0, "step": 29040 }, { "epoch": 0.0387448069159347, "grad_norm": 2.125, "learning_rate": 1.9371633512879315e-06, "loss": 0.6056265354156494, "num_tokens": 3550680838.0, "step": 29060 }, { "epoch": 0.038771472302662806, "grad_norm": 2.078125, "learning_rate": 1.938496613513946e-06, "loss": 0.6163152694702149, "num_tokens": 3553205041.0, "step": 29080 }, { "epoch": 0.03879813768939091, "grad_norm": 1.96875, "learning_rate": 1.9398298757399605e-06, "loss": 0.6162360191345215, "num_tokens": 3555503098.0, "step": 29100 }, { "epoch": 0.03882480307611901, "grad_norm": 2.078125, "learning_rate": 1.9411631379659756e-06, "loss": 0.6004575729370117, "num_tokens": 3557856044.0, "step": 29120 }, { "epoch": 0.038851468462847115, "grad_norm": 2.34375, "learning_rate": 1.94249640019199e-06, "loss": 0.607326889038086, "num_tokens": 3560357801.0, "step": 29140 }, { "epoch": 0.03887813384957522, "grad_norm": 1.8515625, "learning_rate": 1.9438296624180046e-06, "loss": 0.5997065544128418, "num_tokens": 3562903127.0, "step": 29160 }, { "epoch": 0.03890479923630332, "grad_norm": 2.109375, "learning_rate": 1.945162924644019e-06, "loss": 0.6219728469848633, "num_tokens": 3565447415.0, "step": 29180 }, { "epoch": 0.03893146462303143, "grad_norm": 2.125, "learning_rate": 1.946496186870034e-06, "loss": 0.5962278366088867, "num_tokens": 3568037454.0, "step": 29200 }, { "epoch": 0.038958130009759534, "grad_norm": 2.046875, "learning_rate": 1.9478294490960482e-06, "loss": 0.6080905437469483, "num_tokens": 3570474895.0, "step": 29220 }, { "epoch": 0.03898479539648764, "grad_norm": 1.8359375, "learning_rate": 1.949162711322063e-06, "loss": 0.6140369415283203, "num_tokens": 3572921279.0, "step": 29240 }, { "epoch": 0.03901146078321574, "grad_norm": 2.28125, "learning_rate": 1.9504959735480777e-06, "loss": 0.6027388572692871, "num_tokens": 3575486352.0, "step": 29260 }, { "epoch": 0.03903812616994384, "grad_norm": 2.3125, "learning_rate": 1.9518292357740924e-06, "loss": 0.59066801071167, "num_tokens": 3578010321.0, "step": 29280 }, { "epoch": 0.039064791556671946, "grad_norm": 2.28125, "learning_rate": 1.9531624980001066e-06, "loss": 0.597724723815918, "num_tokens": 3580450969.0, "step": 29300 }, { "epoch": 0.03909145694340005, "grad_norm": 1.78125, "learning_rate": 1.9544957602261213e-06, "loss": 0.5957501411437989, "num_tokens": 3582910797.0, "step": 29320 }, { "epoch": 0.03911812233012815, "grad_norm": 1.90625, "learning_rate": 1.955829022452136e-06, "loss": 0.6011806488037109, "num_tokens": 3585286537.0, "step": 29340 }, { "epoch": 0.039144787716856255, "grad_norm": 2.265625, "learning_rate": 1.9571622846781507e-06, "loss": 0.6056885719299316, "num_tokens": 3587796437.0, "step": 29360 }, { "epoch": 0.03917145310358436, "grad_norm": 2.09375, "learning_rate": 1.9584955469041654e-06, "loss": 0.6101085662841796, "num_tokens": 3590487372.0, "step": 29380 }, { "epoch": 0.03919811849031247, "grad_norm": 2.203125, "learning_rate": 1.9598288091301797e-06, "loss": 0.6006514549255371, "num_tokens": 3593014379.0, "step": 29400 }, { "epoch": 0.03922478387704057, "grad_norm": 1.8828125, "learning_rate": 1.961162071356195e-06, "loss": 0.6131966590881348, "num_tokens": 3595579894.0, "step": 29420 }, { "epoch": 0.039251449263768674, "grad_norm": 1.859375, "learning_rate": 1.962495333582209e-06, "loss": 0.5672598838806152, "num_tokens": 3598032973.0, "step": 29440 }, { "epoch": 0.03927811465049678, "grad_norm": 2.15625, "learning_rate": 1.963828595808224e-06, "loss": 0.6050101280212402, "num_tokens": 3600494885.0, "step": 29460 }, { "epoch": 0.03930478003722488, "grad_norm": 2.34375, "learning_rate": 1.965161858034238e-06, "loss": 0.6054593086242676, "num_tokens": 3603011116.0, "step": 29480 }, { "epoch": 0.03933144542395298, "grad_norm": 2.03125, "learning_rate": 1.9664951202602532e-06, "loss": 0.5957494735717773, "num_tokens": 3605325588.0, "step": 29500 }, { "epoch": 0.039358110810681086, "grad_norm": 1.9609375, "learning_rate": 1.9678283824862675e-06, "loss": 0.6057615756988526, "num_tokens": 3607762226.0, "step": 29520 }, { "epoch": 0.03938477619740919, "grad_norm": 2.015625, "learning_rate": 1.969161644712282e-06, "loss": 0.5853971004486084, "num_tokens": 3610241048.0, "step": 29540 }, { "epoch": 0.03941144158413729, "grad_norm": 1.78125, "learning_rate": 1.970494906938297e-06, "loss": 0.6064301013946534, "num_tokens": 3612621737.0, "step": 29560 }, { "epoch": 0.039438106970865396, "grad_norm": 1.9765625, "learning_rate": 1.9718281691643116e-06, "loss": 0.6061646461486816, "num_tokens": 3615259482.0, "step": 29580 }, { "epoch": 0.039464772357593506, "grad_norm": 1.78125, "learning_rate": 1.973161431390326e-06, "loss": 0.6164580345153808, "num_tokens": 3617594265.0, "step": 29600 }, { "epoch": 0.03949143774432161, "grad_norm": 2.046875, "learning_rate": 1.9744946936163406e-06, "loss": 0.5985123634338378, "num_tokens": 3620040051.0, "step": 29620 }, { "epoch": 0.03951810313104971, "grad_norm": 1.953125, "learning_rate": 1.9758279558423553e-06, "loss": 0.6038503646850586, "num_tokens": 3622253622.0, "step": 29640 }, { "epoch": 0.039544768517777815, "grad_norm": 2.296875, "learning_rate": 1.97716121806837e-06, "loss": 0.6124662399291992, "num_tokens": 3624627684.0, "step": 29660 }, { "epoch": 0.03957143390450592, "grad_norm": 1.8828125, "learning_rate": 1.9784944802943847e-06, "loss": 0.5985119819641114, "num_tokens": 3627011978.0, "step": 29680 }, { "epoch": 0.03959809929123402, "grad_norm": 1.7734375, "learning_rate": 1.979827742520399e-06, "loss": 0.598232364654541, "num_tokens": 3629354405.0, "step": 29700 }, { "epoch": 0.039624764677962124, "grad_norm": 2.171875, "learning_rate": 1.9811610047464137e-06, "loss": 0.5791296005249024, "num_tokens": 3631772184.0, "step": 29720 }, { "epoch": 0.03965143006469023, "grad_norm": 1.796875, "learning_rate": 1.9824942669724284e-06, "loss": 0.607183837890625, "num_tokens": 3634122160.0, "step": 29740 }, { "epoch": 0.03967809545141833, "grad_norm": 1.9609375, "learning_rate": 1.983827529198443e-06, "loss": 0.6043066501617431, "num_tokens": 3636634038.0, "step": 29760 }, { "epoch": 0.03970476083814643, "grad_norm": 2.046875, "learning_rate": 1.9851607914244573e-06, "loss": 0.6038048744201661, "num_tokens": 3638862382.0, "step": 29780 }, { "epoch": 0.039731426224874536, "grad_norm": 1.5234375, "learning_rate": 1.986494053650472e-06, "loss": 0.6054328918457031, "num_tokens": 3641537298.0, "step": 29800 }, { "epoch": 0.039758091611602646, "grad_norm": 2.015625, "learning_rate": 1.9878273158764868e-06, "loss": 0.6017300605773925, "num_tokens": 3644162528.0, "step": 29820 }, { "epoch": 0.03978475699833075, "grad_norm": 2.109375, "learning_rate": 1.9891605781025015e-06, "loss": 0.591191577911377, "num_tokens": 3646722665.0, "step": 29840 }, { "epoch": 0.03981142238505885, "grad_norm": 1.9375, "learning_rate": 1.990493840328516e-06, "loss": 0.5904626846313477, "num_tokens": 3649257916.0, "step": 29860 }, { "epoch": 0.039838087771786955, "grad_norm": 1.8125, "learning_rate": 1.9918271025545304e-06, "loss": 0.6224621295928955, "num_tokens": 3651738694.0, "step": 29880 }, { "epoch": 0.03986475315851506, "grad_norm": 1.90625, "learning_rate": 1.993160364780545e-06, "loss": 0.6131059646606445, "num_tokens": 3654091464.0, "step": 29900 }, { "epoch": 0.03989141854524316, "grad_norm": 2.21875, "learning_rate": 1.99449362700656e-06, "loss": 0.6072184562683105, "num_tokens": 3656614769.0, "step": 29920 }, { "epoch": 0.039918083931971264, "grad_norm": 2.046875, "learning_rate": 1.9958268892325745e-06, "loss": 0.5673712253570556, "num_tokens": 3659134390.0, "step": 29940 }, { "epoch": 0.03994474931869937, "grad_norm": 2.1875, "learning_rate": 1.997160151458589e-06, "loss": 0.5926235675811767, "num_tokens": 3661803347.0, "step": 29960 }, { "epoch": 0.03997141470542747, "grad_norm": 2.125, "learning_rate": 1.998493413684604e-06, "loss": 0.5940802574157715, "num_tokens": 3664181550.0, "step": 29980 }, { "epoch": 0.03999808009215557, "grad_norm": 1.9375, "learning_rate": 1.9998266759106182e-06, "loss": 0.6119240760803223, "num_tokens": 3666705518.0, "step": 30000 }, { "epoch": 0.04002474547888368, "grad_norm": 2.09375, "learning_rate": 2.001159938136633e-06, "loss": 0.6178216934204102, "num_tokens": 3669230406.0, "step": 30020 }, { "epoch": 0.040051410865611786, "grad_norm": 1.8203125, "learning_rate": 2.002493200362647e-06, "loss": 0.5887371063232422, "num_tokens": 3671822524.0, "step": 30040 }, { "epoch": 0.04007807625233989, "grad_norm": 2.234375, "learning_rate": 2.0038264625886623e-06, "loss": 0.592092227935791, "num_tokens": 3674183123.0, "step": 30060 }, { "epoch": 0.04010474163906799, "grad_norm": 2.53125, "learning_rate": 2.0051597248146766e-06, "loss": 0.598025131225586, "num_tokens": 3676604020.0, "step": 30080 }, { "epoch": 0.040131407025796095, "grad_norm": 2.203125, "learning_rate": 2.0064929870406913e-06, "loss": 0.5762229442596436, "num_tokens": 3678979862.0, "step": 30100 }, { "epoch": 0.0401580724125242, "grad_norm": 2.09375, "learning_rate": 2.007826249266706e-06, "loss": 0.5943465232849121, "num_tokens": 3681643550.0, "step": 30120 }, { "epoch": 0.0401847377992523, "grad_norm": 2.515625, "learning_rate": 2.0091595114927207e-06, "loss": 0.5795404434204101, "num_tokens": 3683874360.0, "step": 30140 }, { "epoch": 0.040211403185980404, "grad_norm": 2.09375, "learning_rate": 2.010492773718735e-06, "loss": 0.607211685180664, "num_tokens": 3686394241.0, "step": 30160 }, { "epoch": 0.04023806857270851, "grad_norm": 1.9765625, "learning_rate": 2.0118260359447497e-06, "loss": 0.5958292007446289, "num_tokens": 3688853242.0, "step": 30180 }, { "epoch": 0.04026473395943661, "grad_norm": 2.046875, "learning_rate": 2.0131592981707644e-06, "loss": 0.5687320709228516, "num_tokens": 3691310160.0, "step": 30200 }, { "epoch": 0.04029139934616472, "grad_norm": 1.78125, "learning_rate": 2.014492560396779e-06, "loss": 0.5915982246398925, "num_tokens": 3693740345.0, "step": 30220 }, { "epoch": 0.040318064732892823, "grad_norm": 1.9140625, "learning_rate": 2.015825822622794e-06, "loss": 0.5877120971679688, "num_tokens": 3696128299.0, "step": 30240 }, { "epoch": 0.04034473011962093, "grad_norm": 1.84375, "learning_rate": 2.017159084848808e-06, "loss": 0.586133623123169, "num_tokens": 3698802505.0, "step": 30260 }, { "epoch": 0.04037139550634903, "grad_norm": 1.828125, "learning_rate": 2.0184923470748228e-06, "loss": 0.5889925956726074, "num_tokens": 3701191955.0, "step": 30280 }, { "epoch": 0.04039806089307713, "grad_norm": 1.6171875, "learning_rate": 2.0198256093008375e-06, "loss": 0.6108860492706298, "num_tokens": 3703532401.0, "step": 30300 }, { "epoch": 0.040424726279805236, "grad_norm": 1.609375, "learning_rate": 2.021158871526852e-06, "loss": 0.5981254577636719, "num_tokens": 3705988416.0, "step": 30320 }, { "epoch": 0.04045139166653334, "grad_norm": 1.5859375, "learning_rate": 2.0224921337528665e-06, "loss": 0.5860631465911865, "num_tokens": 3708281185.0, "step": 30340 }, { "epoch": 0.04047805705326144, "grad_norm": 2.15625, "learning_rate": 2.023825395978881e-06, "loss": 0.5841417789459229, "num_tokens": 3710877021.0, "step": 30360 }, { "epoch": 0.040504722439989545, "grad_norm": 1.734375, "learning_rate": 2.025158658204896e-06, "loss": 0.5843054771423339, "num_tokens": 3713161983.0, "step": 30380 }, { "epoch": 0.04053138782671765, "grad_norm": 1.7265625, "learning_rate": 2.0264919204309106e-06, "loss": 0.6072335720062256, "num_tokens": 3715558959.0, "step": 30400 }, { "epoch": 0.04055805321344576, "grad_norm": 2.03125, "learning_rate": 2.0278251826569253e-06, "loss": 0.6070553302764893, "num_tokens": 3717861358.0, "step": 30420 }, { "epoch": 0.04058471860017386, "grad_norm": 2.390625, "learning_rate": 2.0291584448829395e-06, "loss": 0.6054606437683105, "num_tokens": 3720258518.0, "step": 30440 }, { "epoch": 0.040611383986901964, "grad_norm": 2.140625, "learning_rate": 2.0304917071089542e-06, "loss": 0.5924922943115234, "num_tokens": 3722775989.0, "step": 30460 }, { "epoch": 0.04063804937363007, "grad_norm": 2.015625, "learning_rate": 2.031824969334969e-06, "loss": 0.5847885131835937, "num_tokens": 3725314865.0, "step": 30480 }, { "epoch": 0.04066471476035817, "grad_norm": 1.6796875, "learning_rate": 2.0331582315609836e-06, "loss": 0.5963840961456299, "num_tokens": 3727744182.0, "step": 30500 }, { "epoch": 0.04069138014708627, "grad_norm": 1.8203125, "learning_rate": 2.034491493786998e-06, "loss": 0.6036111831665039, "num_tokens": 3730161825.0, "step": 30520 }, { "epoch": 0.040718045533814376, "grad_norm": 2.078125, "learning_rate": 2.035824756013013e-06, "loss": 0.6133113384246827, "num_tokens": 3732654947.0, "step": 30540 }, { "epoch": 0.04074471092054248, "grad_norm": 1.8515625, "learning_rate": 2.0371580182390273e-06, "loss": 0.6126099586486816, "num_tokens": 3735237457.0, "step": 30560 }, { "epoch": 0.04077137630727058, "grad_norm": 1.984375, "learning_rate": 2.038491280465042e-06, "loss": 0.5995914459228515, "num_tokens": 3737447048.0, "step": 30580 }, { "epoch": 0.040798041693998685, "grad_norm": 1.9140625, "learning_rate": 2.0398245426910563e-06, "loss": 0.5878136634826661, "num_tokens": 3739806839.0, "step": 30600 }, { "epoch": 0.040824707080726795, "grad_norm": 2.03125, "learning_rate": 2.0411578049170714e-06, "loss": 0.6023162841796875, "num_tokens": 3742270006.0, "step": 30620 }, { "epoch": 0.0408513724674549, "grad_norm": 1.84375, "learning_rate": 2.0424910671430857e-06, "loss": 0.5960041046142578, "num_tokens": 3744828703.0, "step": 30640 }, { "epoch": 0.040878037854183, "grad_norm": 2.078125, "learning_rate": 2.0438243293691004e-06, "loss": 0.5999528884887695, "num_tokens": 3747303820.0, "step": 30660 }, { "epoch": 0.040904703240911104, "grad_norm": 2.0625, "learning_rate": 2.045157591595115e-06, "loss": 0.611652946472168, "num_tokens": 3749991876.0, "step": 30680 }, { "epoch": 0.04093136862763921, "grad_norm": 1.9140625, "learning_rate": 2.04649085382113e-06, "loss": 0.5825932025909424, "num_tokens": 3752427677.0, "step": 30700 }, { "epoch": 0.04095803401436731, "grad_norm": 2.0625, "learning_rate": 2.0478241160471445e-06, "loss": 0.6046145915985107, "num_tokens": 3754988014.0, "step": 30720 }, { "epoch": 0.04098469940109541, "grad_norm": 1.8125, "learning_rate": 2.0491573782731588e-06, "loss": 0.6004762649536133, "num_tokens": 3757693786.0, "step": 30740 }, { "epoch": 0.041011364787823516, "grad_norm": 1.90625, "learning_rate": 2.0504906404991735e-06, "loss": 0.5981609344482421, "num_tokens": 3760011166.0, "step": 30760 }, { "epoch": 0.04103803017455162, "grad_norm": 2.140625, "learning_rate": 2.051823902725188e-06, "loss": 0.602381420135498, "num_tokens": 3762469265.0, "step": 30780 }, { "epoch": 0.04106469556127972, "grad_norm": 1.7578125, "learning_rate": 2.053157164951203e-06, "loss": 0.5855010509490967, "num_tokens": 3764989867.0, "step": 30800 }, { "epoch": 0.04109136094800783, "grad_norm": 2.3125, "learning_rate": 2.054490427177217e-06, "loss": 0.5999053955078125, "num_tokens": 3767688625.0, "step": 30820 }, { "epoch": 0.041118026334735935, "grad_norm": 2.046875, "learning_rate": 2.0558236894032323e-06, "loss": 0.5903294563293457, "num_tokens": 3769991019.0, "step": 30840 }, { "epoch": 0.04114469172146404, "grad_norm": 2.0625, "learning_rate": 2.0571569516292466e-06, "loss": 0.6027491569519043, "num_tokens": 3772301728.0, "step": 30860 }, { "epoch": 0.04117135710819214, "grad_norm": 1.796875, "learning_rate": 2.0584902138552613e-06, "loss": 0.6025005340576172, "num_tokens": 3774593688.0, "step": 30880 }, { "epoch": 0.041198022494920244, "grad_norm": 2.125, "learning_rate": 2.0598234760812756e-06, "loss": 0.5947816371917725, "num_tokens": 3776987167.0, "step": 30900 }, { "epoch": 0.04122468788164835, "grad_norm": 2.0625, "learning_rate": 2.0611567383072907e-06, "loss": 0.5891386985778808, "num_tokens": 3779443351.0, "step": 30920 }, { "epoch": 0.04125135326837645, "grad_norm": 1.9921875, "learning_rate": 2.062490000533305e-06, "loss": 0.5957648754119873, "num_tokens": 3781945076.0, "step": 30940 }, { "epoch": 0.041278018655104554, "grad_norm": 1.953125, "learning_rate": 2.0638232627593197e-06, "loss": 0.5917267799377441, "num_tokens": 3784268965.0, "step": 30960 }, { "epoch": 0.04130468404183266, "grad_norm": 2.390625, "learning_rate": 2.0651565249853344e-06, "loss": 0.608423376083374, "num_tokens": 3786693046.0, "step": 30980 }, { "epoch": 0.04133134942856076, "grad_norm": 2.09375, "learning_rate": 2.066489787211349e-06, "loss": 0.6011165618896485, "num_tokens": 3789253385.0, "step": 31000 }, { "epoch": 0.04135801481528886, "grad_norm": 2.1875, "learning_rate": 2.0678230494373638e-06, "loss": 0.6016400814056396, "num_tokens": 3791652533.0, "step": 31020 }, { "epoch": 0.04138468020201697, "grad_norm": 2.1875, "learning_rate": 2.069156311663378e-06, "loss": 0.5825358390808105, "num_tokens": 3794157060.0, "step": 31040 }, { "epoch": 0.041411345588745076, "grad_norm": 1.859375, "learning_rate": 2.0704895738893927e-06, "loss": 0.5874851226806641, "num_tokens": 3796514838.0, "step": 31060 }, { "epoch": 0.04143801097547318, "grad_norm": 1.7421875, "learning_rate": 2.0718228361154074e-06, "loss": 0.5907467842102051, "num_tokens": 3799142795.0, "step": 31080 }, { "epoch": 0.04146467636220128, "grad_norm": 1.765625, "learning_rate": 2.073156098341422e-06, "loss": 0.5837721824645996, "num_tokens": 3801380453.0, "step": 31100 }, { "epoch": 0.041491341748929385, "grad_norm": 2.296875, "learning_rate": 2.0744893605674364e-06, "loss": 0.6005255699157714, "num_tokens": 3803925269.0, "step": 31120 }, { "epoch": 0.04151800713565749, "grad_norm": 1.953125, "learning_rate": 2.075822622793451e-06, "loss": 0.5911087989807129, "num_tokens": 3806270077.0, "step": 31140 }, { "epoch": 0.04154467252238559, "grad_norm": 2.34375, "learning_rate": 2.077155885019466e-06, "loss": 0.5932591915130615, "num_tokens": 3808839969.0, "step": 31160 }, { "epoch": 0.041571337909113694, "grad_norm": 1.8125, "learning_rate": 2.0784891472454805e-06, "loss": 0.5837250709533691, "num_tokens": 3811211557.0, "step": 31180 }, { "epoch": 0.0415980032958418, "grad_norm": 2.171875, "learning_rate": 2.079822409471495e-06, "loss": 0.577457046508789, "num_tokens": 3813537689.0, "step": 31200 }, { "epoch": 0.0416246686825699, "grad_norm": 1.4765625, "learning_rate": 2.0811556716975095e-06, "loss": 0.5756291389465332, "num_tokens": 3815945194.0, "step": 31220 }, { "epoch": 0.04165133406929801, "grad_norm": 2.171875, "learning_rate": 2.082488933923524e-06, "loss": 0.5798913478851319, "num_tokens": 3818250769.0, "step": 31240 }, { "epoch": 0.04167799945602611, "grad_norm": 2.34375, "learning_rate": 2.083822196149539e-06, "loss": 0.6049214363098144, "num_tokens": 3820841744.0, "step": 31260 }, { "epoch": 0.041704664842754216, "grad_norm": 2.25, "learning_rate": 2.0851554583755536e-06, "loss": 0.5885008811950684, "num_tokens": 3823278876.0, "step": 31280 }, { "epoch": 0.04173133022948232, "grad_norm": 1.9375, "learning_rate": 2.086488720601568e-06, "loss": 0.5849906444549561, "num_tokens": 3825747336.0, "step": 31300 }, { "epoch": 0.04175799561621042, "grad_norm": 1.84375, "learning_rate": 2.087821982827583e-06, "loss": 0.5759910583496094, "num_tokens": 3827994806.0, "step": 31320 }, { "epoch": 0.041784661002938525, "grad_norm": 2.25, "learning_rate": 2.0891552450535973e-06, "loss": 0.5986478805541993, "num_tokens": 3830410867.0, "step": 31340 }, { "epoch": 0.04181132638966663, "grad_norm": 1.8984375, "learning_rate": 2.090488507279612e-06, "loss": 0.5961959838867188, "num_tokens": 3832782274.0, "step": 31360 }, { "epoch": 0.04183799177639473, "grad_norm": 1.6640625, "learning_rate": 2.0918217695056263e-06, "loss": 0.5772043228149414, "num_tokens": 3834994568.0, "step": 31380 }, { "epoch": 0.041864657163122834, "grad_norm": 1.9921875, "learning_rate": 2.0931550317316414e-06, "loss": 0.5880722999572754, "num_tokens": 3837479058.0, "step": 31400 }, { "epoch": 0.04189132254985094, "grad_norm": 2.46875, "learning_rate": 2.0944882939576557e-06, "loss": 0.6053415298461914, "num_tokens": 3840115397.0, "step": 31420 }, { "epoch": 0.04191798793657905, "grad_norm": 2.15625, "learning_rate": 2.0958215561836704e-06, "loss": 0.5623603820800781, "num_tokens": 3842507169.0, "step": 31440 }, { "epoch": 0.04194465332330715, "grad_norm": 1.6328125, "learning_rate": 2.097154818409685e-06, "loss": 0.5961043357849121, "num_tokens": 3845050376.0, "step": 31460 }, { "epoch": 0.04197131871003525, "grad_norm": 1.9921875, "learning_rate": 2.0984880806356998e-06, "loss": 0.5967862129211425, "num_tokens": 3847590366.0, "step": 31480 }, { "epoch": 0.041997984096763356, "grad_norm": 2.1875, "learning_rate": 2.099821342861714e-06, "loss": 0.5930452346801758, "num_tokens": 3849998419.0, "step": 31500 }, { "epoch": 0.04202464948349146, "grad_norm": 2.0, "learning_rate": 2.1011546050877288e-06, "loss": 0.590850830078125, "num_tokens": 3852540783.0, "step": 31520 }, { "epoch": 0.04205131487021956, "grad_norm": 1.7265625, "learning_rate": 2.1024878673137435e-06, "loss": 0.5936983108520508, "num_tokens": 3854794528.0, "step": 31540 }, { "epoch": 0.042077980256947665, "grad_norm": 2.078125, "learning_rate": 2.103821129539758e-06, "loss": 0.5923136711120606, "num_tokens": 3857177364.0, "step": 31560 }, { "epoch": 0.04210464564367577, "grad_norm": 2.046875, "learning_rate": 2.105154391765773e-06, "loss": 0.5896283626556397, "num_tokens": 3859722318.0, "step": 31580 }, { "epoch": 0.04213131103040387, "grad_norm": 2.3125, "learning_rate": 2.106487653991787e-06, "loss": 0.5963266849517822, "num_tokens": 3862231319.0, "step": 31600 }, { "epoch": 0.042157976417131975, "grad_norm": 1.75, "learning_rate": 2.107820916217802e-06, "loss": 0.5825855255126953, "num_tokens": 3864771428.0, "step": 31620 }, { "epoch": 0.042184641803860085, "grad_norm": 2.484375, "learning_rate": 2.1091541784438165e-06, "loss": 0.5893041133880615, "num_tokens": 3867270615.0, "step": 31640 }, { "epoch": 0.04221130719058819, "grad_norm": 2.234375, "learning_rate": 2.1104874406698312e-06, "loss": 0.6148755550384521, "num_tokens": 3869569437.0, "step": 31660 }, { "epoch": 0.04223797257731629, "grad_norm": 2.171875, "learning_rate": 2.1118207028958455e-06, "loss": 0.5729104518890381, "num_tokens": 3872035022.0, "step": 31680 }, { "epoch": 0.042264637964044394, "grad_norm": 1.625, "learning_rate": 2.1131539651218602e-06, "loss": 0.5842064380645752, "num_tokens": 3874328244.0, "step": 31700 }, { "epoch": 0.0422913033507725, "grad_norm": 2.0625, "learning_rate": 2.114487227347875e-06, "loss": 0.6075069427490234, "num_tokens": 3876669854.0, "step": 31720 }, { "epoch": 0.0423179687375006, "grad_norm": 2.1875, "learning_rate": 2.1158204895738896e-06, "loss": 0.5996311187744141, "num_tokens": 3879160249.0, "step": 31740 }, { "epoch": 0.0423446341242287, "grad_norm": 2.09375, "learning_rate": 2.1171537517999043e-06, "loss": 0.5951138973236084, "num_tokens": 3881570613.0, "step": 31760 }, { "epoch": 0.042371299510956806, "grad_norm": 2.296875, "learning_rate": 2.1184870140259186e-06, "loss": 0.5661855697631836, "num_tokens": 3883898097.0, "step": 31780 }, { "epoch": 0.04239796489768491, "grad_norm": 1.75, "learning_rate": 2.1198202762519333e-06, "loss": 0.5640949249267578, "num_tokens": 3886257773.0, "step": 31800 }, { "epoch": 0.04242463028441301, "grad_norm": 1.8984375, "learning_rate": 2.121153538477948e-06, "loss": 0.5829188346862793, "num_tokens": 3888663637.0, "step": 31820 }, { "epoch": 0.04245129567114112, "grad_norm": 1.9765625, "learning_rate": 2.1224868007039627e-06, "loss": 0.5914799213409424, "num_tokens": 3890979344.0, "step": 31840 }, { "epoch": 0.042477961057869225, "grad_norm": 1.6796875, "learning_rate": 2.123820062929977e-06, "loss": 0.5809403419494629, "num_tokens": 3893376470.0, "step": 31860 }, { "epoch": 0.04250462644459733, "grad_norm": 2.109375, "learning_rate": 2.125153325155992e-06, "loss": 0.5951649665832519, "num_tokens": 3895975443.0, "step": 31880 }, { "epoch": 0.04253129183132543, "grad_norm": 2.03125, "learning_rate": 2.1264865873820064e-06, "loss": 0.5806430816650391, "num_tokens": 3898351509.0, "step": 31900 }, { "epoch": 0.042557957218053534, "grad_norm": 1.9921875, "learning_rate": 2.127819849608021e-06, "loss": 0.6020665168762207, "num_tokens": 3900825567.0, "step": 31920 }, { "epoch": 0.04258462260478164, "grad_norm": 1.90625, "learning_rate": 2.1291531118340354e-06, "loss": 0.602116060256958, "num_tokens": 3903397056.0, "step": 31940 }, { "epoch": 0.04261128799150974, "grad_norm": 2.046875, "learning_rate": 2.1304863740600505e-06, "loss": 0.5717715263366699, "num_tokens": 3905863287.0, "step": 31960 }, { "epoch": 0.04263795337823784, "grad_norm": 2.125, "learning_rate": 2.1318196362860648e-06, "loss": 0.5729499340057373, "num_tokens": 3908631463.0, "step": 31980 }, { "epoch": 0.042664618764965946, "grad_norm": 2.03125, "learning_rate": 2.1331528985120795e-06, "loss": 0.5520978927612304, "num_tokens": 3910995220.0, "step": 32000 }, { "epoch": 0.04269128415169405, "grad_norm": 2.15625, "learning_rate": 2.134486160738094e-06, "loss": 0.5927410125732422, "num_tokens": 3913454871.0, "step": 32020 }, { "epoch": 0.04271794953842216, "grad_norm": 1.921875, "learning_rate": 2.135819422964109e-06, "loss": 0.5765074253082275, "num_tokens": 3916123105.0, "step": 32040 }, { "epoch": 0.04274461492515026, "grad_norm": 1.734375, "learning_rate": 2.137152685190123e-06, "loss": 0.6029845237731933, "num_tokens": 3918743393.0, "step": 32060 }, { "epoch": 0.042771280311878365, "grad_norm": 1.6171875, "learning_rate": 2.138485947416138e-06, "loss": 0.597195053100586, "num_tokens": 3921548211.0, "step": 32080 }, { "epoch": 0.04279794569860647, "grad_norm": 1.9140625, "learning_rate": 2.1398192096421526e-06, "loss": 0.5877774238586426, "num_tokens": 3924043151.0, "step": 32100 }, { "epoch": 0.04282461108533457, "grad_norm": 1.8046875, "learning_rate": 2.1411524718681673e-06, "loss": 0.5593714237213134, "num_tokens": 3926379661.0, "step": 32120 }, { "epoch": 0.042851276472062674, "grad_norm": 2.625, "learning_rate": 2.142485734094182e-06, "loss": 0.5869334220886231, "num_tokens": 3928692083.0, "step": 32140 }, { "epoch": 0.04287794185879078, "grad_norm": 1.953125, "learning_rate": 2.1438189963201962e-06, "loss": 0.5993717670440674, "num_tokens": 3931277190.0, "step": 32160 }, { "epoch": 0.04290460724551888, "grad_norm": 1.8984375, "learning_rate": 2.1451522585462114e-06, "loss": 0.5810956954956055, "num_tokens": 3933737959.0, "step": 32180 }, { "epoch": 0.04293127263224698, "grad_norm": 1.703125, "learning_rate": 2.1464855207722256e-06, "loss": 0.6040315151214599, "num_tokens": 3936271973.0, "step": 32200 }, { "epoch": 0.042957938018975086, "grad_norm": 2.09375, "learning_rate": 2.1478187829982403e-06, "loss": 0.5809077262878418, "num_tokens": 3938837809.0, "step": 32220 }, { "epoch": 0.04298460340570319, "grad_norm": 2.34375, "learning_rate": 2.1491520452242546e-06, "loss": 0.5965569019317627, "num_tokens": 3941207996.0, "step": 32240 }, { "epoch": 0.0430112687924313, "grad_norm": 2.125, "learning_rate": 2.1504853074502697e-06, "loss": 0.5738657474517822, "num_tokens": 3943666594.0, "step": 32260 }, { "epoch": 0.0430379341791594, "grad_norm": 2.15625, "learning_rate": 2.151818569676284e-06, "loss": 0.5903338432312012, "num_tokens": 3945837534.0, "step": 32280 }, { "epoch": 0.043064599565887506, "grad_norm": 2.0625, "learning_rate": 2.1531518319022987e-06, "loss": 0.5750972270965576, "num_tokens": 3948169825.0, "step": 32300 }, { "epoch": 0.04309126495261561, "grad_norm": 1.84375, "learning_rate": 2.1544850941283134e-06, "loss": 0.5762073993682861, "num_tokens": 3950548495.0, "step": 32320 }, { "epoch": 0.04311793033934371, "grad_norm": 2.109375, "learning_rate": 2.155818356354328e-06, "loss": 0.5847964763641358, "num_tokens": 3952711618.0, "step": 32340 }, { "epoch": 0.043144595726071815, "grad_norm": 2.09375, "learning_rate": 2.1571516185803424e-06, "loss": 0.6002538681030274, "num_tokens": 3955071792.0, "step": 32360 }, { "epoch": 0.04317126111279992, "grad_norm": 2.109375, "learning_rate": 2.158484880806357e-06, "loss": 0.59998459815979, "num_tokens": 3957340039.0, "step": 32380 }, { "epoch": 0.04319792649952802, "grad_norm": 2.0625, "learning_rate": 2.159818143032372e-06, "loss": 0.5902047634124756, "num_tokens": 3959729491.0, "step": 32400 }, { "epoch": 0.043224591886256124, "grad_norm": 2.171875, "learning_rate": 2.1611514052583865e-06, "loss": 0.5885507583618164, "num_tokens": 3962191182.0, "step": 32420 }, { "epoch": 0.04325125727298423, "grad_norm": 2.25, "learning_rate": 2.1624846674844012e-06, "loss": 0.6069028377532959, "num_tokens": 3964569995.0, "step": 32440 }, { "epoch": 0.04327792265971234, "grad_norm": 2.234375, "learning_rate": 2.1638179297104155e-06, "loss": 0.5571132659912109, "num_tokens": 3966867176.0, "step": 32460 }, { "epoch": 0.04330458804644044, "grad_norm": 1.8828125, "learning_rate": 2.16515119193643e-06, "loss": 0.5842974185943604, "num_tokens": 3969414693.0, "step": 32480 }, { "epoch": 0.04333125343316854, "grad_norm": 2.09375, "learning_rate": 2.166484454162445e-06, "loss": 0.590128755569458, "num_tokens": 3971818451.0, "step": 32500 }, { "epoch": 0.043357918819896646, "grad_norm": 2.171875, "learning_rate": 2.1678177163884596e-06, "loss": 0.5919103622436523, "num_tokens": 3974243555.0, "step": 32520 }, { "epoch": 0.04338458420662475, "grad_norm": 1.7890625, "learning_rate": 2.169150978614474e-06, "loss": 0.5852648735046386, "num_tokens": 3976662451.0, "step": 32540 }, { "epoch": 0.04341124959335285, "grad_norm": 2.1875, "learning_rate": 2.1704842408404886e-06, "loss": 0.5782969474792481, "num_tokens": 3978966810.0, "step": 32560 }, { "epoch": 0.043437914980080955, "grad_norm": 2.15625, "learning_rate": 2.1718175030665033e-06, "loss": 0.5736003875732422, "num_tokens": 3981511375.0, "step": 32580 }, { "epoch": 0.04346458036680906, "grad_norm": 1.6953125, "learning_rate": 2.173150765292518e-06, "loss": 0.5931709766387939, "num_tokens": 3984090516.0, "step": 32600 }, { "epoch": 0.04349124575353716, "grad_norm": 1.7421875, "learning_rate": 2.1744840275185327e-06, "loss": 0.5696157455444336, "num_tokens": 3986796406.0, "step": 32620 }, { "epoch": 0.043517911140265264, "grad_norm": 2.140625, "learning_rate": 2.175817289744547e-06, "loss": 0.5979225158691406, "num_tokens": 3989310649.0, "step": 32640 }, { "epoch": 0.043544576526993374, "grad_norm": 1.6875, "learning_rate": 2.1771505519705617e-06, "loss": 0.5703463077545166, "num_tokens": 3991763025.0, "step": 32660 }, { "epoch": 0.04357124191372148, "grad_norm": 2.171875, "learning_rate": 2.1784838141965764e-06, "loss": 0.579607343673706, "num_tokens": 3994227547.0, "step": 32680 }, { "epoch": 0.04359790730044958, "grad_norm": 2.234375, "learning_rate": 2.179817076422591e-06, "loss": 0.5774590015411377, "num_tokens": 3996720243.0, "step": 32700 }, { "epoch": 0.04362457268717768, "grad_norm": 1.6796875, "learning_rate": 2.1811503386486053e-06, "loss": 0.5860306262969971, "num_tokens": 3999332830.0, "step": 32720 }, { "epoch": 0.043651238073905786, "grad_norm": 2.125, "learning_rate": 2.1824836008746205e-06, "loss": 0.5858887672424317, "num_tokens": 4001771046.0, "step": 32740 }, { "epoch": 0.04367790346063389, "grad_norm": 2.46875, "learning_rate": 2.1838168631006347e-06, "loss": 0.5954487800598145, "num_tokens": 4004256422.0, "step": 32760 }, { "epoch": 0.04370456884736199, "grad_norm": 2.015625, "learning_rate": 2.1851501253266494e-06, "loss": 0.5895700454711914, "num_tokens": 4006549892.0, "step": 32780 }, { "epoch": 0.043731234234090095, "grad_norm": 1.8515625, "learning_rate": 2.1864833875526637e-06, "loss": 0.5698602676391602, "num_tokens": 4008748007.0, "step": 32800 }, { "epoch": 0.0437578996208182, "grad_norm": 2.03125, "learning_rate": 2.187816649778679e-06, "loss": 0.5959589004516601, "num_tokens": 4011414855.0, "step": 32820 }, { "epoch": 0.0437845650075463, "grad_norm": 2.015625, "learning_rate": 2.189149912004693e-06, "loss": 0.5953973293304443, "num_tokens": 4013478770.0, "step": 32840 }, { "epoch": 0.04381123039427441, "grad_norm": 1.7265625, "learning_rate": 2.190483174230708e-06, "loss": 0.601181936264038, "num_tokens": 4015657572.0, "step": 32860 }, { "epoch": 0.043837895781002514, "grad_norm": 1.7421875, "learning_rate": 2.1918164364567225e-06, "loss": 0.5843685626983642, "num_tokens": 4018183965.0, "step": 32880 }, { "epoch": 0.04386456116773062, "grad_norm": 1.984375, "learning_rate": 2.1931496986827372e-06, "loss": 0.5725042343139648, "num_tokens": 4020432258.0, "step": 32900 }, { "epoch": 0.04389122655445872, "grad_norm": 2.125, "learning_rate": 2.194482960908752e-06, "loss": 0.585968017578125, "num_tokens": 4022918853.0, "step": 32920 }, { "epoch": 0.043917891941186823, "grad_norm": 1.7734375, "learning_rate": 2.1958162231347662e-06, "loss": 0.592253303527832, "num_tokens": 4025484887.0, "step": 32940 }, { "epoch": 0.043944557327914927, "grad_norm": 1.5859375, "learning_rate": 2.197149485360781e-06, "loss": 0.5895500659942627, "num_tokens": 4027980846.0, "step": 32960 }, { "epoch": 0.04397122271464303, "grad_norm": 2.21875, "learning_rate": 2.1984827475867956e-06, "loss": 0.594182014465332, "num_tokens": 4030576159.0, "step": 32980 }, { "epoch": 0.04399788810137113, "grad_norm": 2.078125, "learning_rate": 2.1998160098128103e-06, "loss": 0.5937049865722657, "num_tokens": 4033035440.0, "step": 33000 }, { "epoch": 0.044024553488099236, "grad_norm": 2.203125, "learning_rate": 2.2011492720388246e-06, "loss": 0.5845088958740234, "num_tokens": 4035407691.0, "step": 33020 }, { "epoch": 0.04405121887482734, "grad_norm": 2.359375, "learning_rate": 2.2024825342648393e-06, "loss": 0.5925871849060058, "num_tokens": 4037801002.0, "step": 33040 }, { "epoch": 0.04407788426155545, "grad_norm": 2.234375, "learning_rate": 2.203815796490854e-06, "loss": 0.5896501541137695, "num_tokens": 4040009422.0, "step": 33060 }, { "epoch": 0.04410454964828355, "grad_norm": 1.828125, "learning_rate": 2.2051490587168687e-06, "loss": 0.5686233520507813, "num_tokens": 4042491289.0, "step": 33080 }, { "epoch": 0.044131215035011655, "grad_norm": 2.09375, "learning_rate": 2.206482320942883e-06, "loss": 0.5747260570526123, "num_tokens": 4044953917.0, "step": 33100 }, { "epoch": 0.04415788042173976, "grad_norm": 2.25, "learning_rate": 2.2078155831688977e-06, "loss": 0.5992817878723145, "num_tokens": 4047382264.0, "step": 33120 }, { "epoch": 0.04418454580846786, "grad_norm": 2.046875, "learning_rate": 2.2091488453949124e-06, "loss": 0.5979694843292236, "num_tokens": 4049629440.0, "step": 33140 }, { "epoch": 0.044211211195195964, "grad_norm": 1.953125, "learning_rate": 2.210482107620927e-06, "loss": 0.6016552925109864, "num_tokens": 4052289964.0, "step": 33160 }, { "epoch": 0.04423787658192407, "grad_norm": 2.4375, "learning_rate": 2.2118153698469418e-06, "loss": 0.5955633163452149, "num_tokens": 4054738268.0, "step": 33180 }, { "epoch": 0.04426454196865217, "grad_norm": 2.140625, "learning_rate": 2.213148632072956e-06, "loss": 0.5887774467468262, "num_tokens": 4057205295.0, "step": 33200 }, { "epoch": 0.04429120735538027, "grad_norm": 2.453125, "learning_rate": 2.214481894298971e-06, "loss": 0.5832075119018555, "num_tokens": 4059607566.0, "step": 33220 }, { "epoch": 0.044317872742108376, "grad_norm": 2.21875, "learning_rate": 2.2158151565249855e-06, "loss": 0.594365406036377, "num_tokens": 4062069771.0, "step": 33240 }, { "epoch": 0.044344538128836486, "grad_norm": 2.25, "learning_rate": 2.217148418751e-06, "loss": 0.6107869148254395, "num_tokens": 4064436941.0, "step": 33260 }, { "epoch": 0.04437120351556459, "grad_norm": 2.21875, "learning_rate": 2.2184816809770144e-06, "loss": 0.5896612167358398, "num_tokens": 4067065815.0, "step": 33280 }, { "epoch": 0.04439786890229269, "grad_norm": 2.4375, "learning_rate": 2.2198149432030296e-06, "loss": 0.5826992988586426, "num_tokens": 4069532714.0, "step": 33300 }, { "epoch": 0.044424534289020795, "grad_norm": 2.109375, "learning_rate": 2.221148205429044e-06, "loss": 0.5785590171813965, "num_tokens": 4071763698.0, "step": 33320 }, { "epoch": 0.0444511996757489, "grad_norm": 2.1875, "learning_rate": 2.2224814676550585e-06, "loss": 0.5841691017150878, "num_tokens": 4074183825.0, "step": 33340 }, { "epoch": 0.044477865062477, "grad_norm": 2.484375, "learning_rate": 2.2238147298810733e-06, "loss": 0.6011745452880859, "num_tokens": 4076614070.0, "step": 33360 }, { "epoch": 0.044504530449205104, "grad_norm": 1.6875, "learning_rate": 2.225147992107088e-06, "loss": 0.5792616844177246, "num_tokens": 4078942304.0, "step": 33380 }, { "epoch": 0.04453119583593321, "grad_norm": 1.96875, "learning_rate": 2.2264812543331022e-06, "loss": 0.5873154163360595, "num_tokens": 4081400737.0, "step": 33400 }, { "epoch": 0.04455786122266131, "grad_norm": 2.578125, "learning_rate": 2.227814516559117e-06, "loss": 0.5788686752319336, "num_tokens": 4083793188.0, "step": 33420 }, { "epoch": 0.04458452660938941, "grad_norm": 1.7265625, "learning_rate": 2.2291477787851316e-06, "loss": 0.589005422592163, "num_tokens": 4086321826.0, "step": 33440 }, { "epoch": 0.044611191996117516, "grad_norm": 1.4140625, "learning_rate": 2.2304810410111463e-06, "loss": 0.5708154678344727, "num_tokens": 4089039001.0, "step": 33460 }, { "epoch": 0.044637857382845626, "grad_norm": 2.265625, "learning_rate": 2.231814303237161e-06, "loss": 0.5733915328979492, "num_tokens": 4091499222.0, "step": 33480 }, { "epoch": 0.04466452276957373, "grad_norm": 2.203125, "learning_rate": 2.2331475654631753e-06, "loss": 0.5946397304534912, "num_tokens": 4093890304.0, "step": 33500 }, { "epoch": 0.04469118815630183, "grad_norm": 2.359375, "learning_rate": 2.23448082768919e-06, "loss": 0.5828713417053223, "num_tokens": 4096475725.0, "step": 33520 }, { "epoch": 0.044717853543029935, "grad_norm": 2.5, "learning_rate": 2.2358140899152047e-06, "loss": 0.5760603904724121, "num_tokens": 4098840528.0, "step": 33540 }, { "epoch": 0.04474451892975804, "grad_norm": 1.9375, "learning_rate": 2.2371473521412194e-06, "loss": 0.5790250778198243, "num_tokens": 4101148702.0, "step": 33560 }, { "epoch": 0.04477118431648614, "grad_norm": 2.421875, "learning_rate": 2.2384806143672337e-06, "loss": 0.6010103225708008, "num_tokens": 4103697435.0, "step": 33580 }, { "epoch": 0.044797849703214244, "grad_norm": 2.078125, "learning_rate": 2.239813876593249e-06, "loss": 0.5672161102294921, "num_tokens": 4105997082.0, "step": 33600 }, { "epoch": 0.04482451508994235, "grad_norm": 1.8828125, "learning_rate": 2.241147138819263e-06, "loss": 0.5905350685119629, "num_tokens": 4108305270.0, "step": 33620 }, { "epoch": 0.04485118047667045, "grad_norm": 2.046875, "learning_rate": 2.242480401045278e-06, "loss": 0.561850643157959, "num_tokens": 4110938325.0, "step": 33640 }, { "epoch": 0.044877845863398554, "grad_norm": 2.0, "learning_rate": 2.2438136632712925e-06, "loss": 0.559334659576416, "num_tokens": 4113470593.0, "step": 33660 }, { "epoch": 0.044904511250126664, "grad_norm": 2.34375, "learning_rate": 2.245146925497307e-06, "loss": 0.5743498802185059, "num_tokens": 4115857159.0, "step": 33680 }, { "epoch": 0.04493117663685477, "grad_norm": 2.171875, "learning_rate": 2.2464801877233215e-06, "loss": 0.5842267990112304, "num_tokens": 4118386966.0, "step": 33700 }, { "epoch": 0.04495784202358287, "grad_norm": 1.828125, "learning_rate": 2.247813449949336e-06, "loss": 0.5724524021148681, "num_tokens": 4120819059.0, "step": 33720 }, { "epoch": 0.04498450741031097, "grad_norm": 2.359375, "learning_rate": 2.249146712175351e-06, "loss": 0.5936177253723145, "num_tokens": 4123159911.0, "step": 33740 }, { "epoch": 0.045011172797039076, "grad_norm": 2.234375, "learning_rate": 2.2504799744013656e-06, "loss": 0.5977114677429199, "num_tokens": 4125581970.0, "step": 33760 }, { "epoch": 0.04503783818376718, "grad_norm": 2.09375, "learning_rate": 2.2518132366273803e-06, "loss": 0.5994404792785645, "num_tokens": 4127918384.0, "step": 33780 }, { "epoch": 0.04506450357049528, "grad_norm": 1.53125, "learning_rate": 2.2531464988533946e-06, "loss": 0.5913943290710449, "num_tokens": 4130340505.0, "step": 33800 }, { "epoch": 0.045091168957223385, "grad_norm": 2.21875, "learning_rate": 2.2544797610794093e-06, "loss": 0.5773977279663086, "num_tokens": 4133001387.0, "step": 33820 }, { "epoch": 0.04511783434395149, "grad_norm": 1.765625, "learning_rate": 2.255813023305424e-06, "loss": 0.5862225532531739, "num_tokens": 4135395954.0, "step": 33840 }, { "epoch": 0.04514449973067959, "grad_norm": 2.3125, "learning_rate": 2.2571462855314387e-06, "loss": 0.5878985404968262, "num_tokens": 4137912260.0, "step": 33860 }, { "epoch": 0.0451711651174077, "grad_norm": 2.546875, "learning_rate": 2.258479547757453e-06, "loss": 0.5804977416992188, "num_tokens": 4140195501.0, "step": 33880 }, { "epoch": 0.045197830504135804, "grad_norm": 2.328125, "learning_rate": 2.2598128099834677e-06, "loss": 0.5714584350585937, "num_tokens": 4142797424.0, "step": 33900 }, { "epoch": 0.04522449589086391, "grad_norm": 2.1875, "learning_rate": 2.2611460722094824e-06, "loss": 0.5735685348510742, "num_tokens": 4145150788.0, "step": 33920 }, { "epoch": 0.04525116127759201, "grad_norm": 2.59375, "learning_rate": 2.262479334435497e-06, "loss": 0.5915765285491943, "num_tokens": 4147850827.0, "step": 33940 }, { "epoch": 0.04527782666432011, "grad_norm": 1.921875, "learning_rate": 2.2638125966615113e-06, "loss": 0.5866440773010254, "num_tokens": 4150394820.0, "step": 33960 }, { "epoch": 0.045304492051048216, "grad_norm": 1.9375, "learning_rate": 2.265145858887526e-06, "loss": 0.5951231479644775, "num_tokens": 4152732148.0, "step": 33980 }, { "epoch": 0.04533115743777632, "grad_norm": 1.8046875, "learning_rate": 2.2664791211135407e-06, "loss": 0.5894374847412109, "num_tokens": 4155194451.0, "step": 34000 }, { "epoch": 0.04535782282450442, "grad_norm": 2.09375, "learning_rate": 2.2678123833395554e-06, "loss": 0.5928818702697753, "num_tokens": 4157476188.0, "step": 34020 }, { "epoch": 0.045384488211232525, "grad_norm": 2.28125, "learning_rate": 2.26914564556557e-06, "loss": 0.6130811691284179, "num_tokens": 4159993435.0, "step": 34040 }, { "epoch": 0.04541115359796063, "grad_norm": 2.125, "learning_rate": 2.2704789077915844e-06, "loss": 0.5937159061431885, "num_tokens": 4162444951.0, "step": 34060 }, { "epoch": 0.04543781898468874, "grad_norm": 2.125, "learning_rate": 2.2718121700175995e-06, "loss": 0.5880386352539062, "num_tokens": 4165013286.0, "step": 34080 }, { "epoch": 0.04546448437141684, "grad_norm": 1.8828125, "learning_rate": 2.273145432243614e-06, "loss": 0.5978429794311524, "num_tokens": 4167233028.0, "step": 34100 }, { "epoch": 0.045491149758144944, "grad_norm": 1.8828125, "learning_rate": 2.2744786944696285e-06, "loss": 0.5882061958312989, "num_tokens": 4169678898.0, "step": 34120 }, { "epoch": 0.04551781514487305, "grad_norm": 1.921875, "learning_rate": 2.275811956695643e-06, "loss": 0.5667818069458008, "num_tokens": 4172101027.0, "step": 34140 }, { "epoch": 0.04554448053160115, "grad_norm": 1.9921875, "learning_rate": 2.277145218921658e-06, "loss": 0.5896302223205566, "num_tokens": 4174564072.0, "step": 34160 }, { "epoch": 0.04557114591832925, "grad_norm": 1.75, "learning_rate": 2.278478481147672e-06, "loss": 0.5914902687072754, "num_tokens": 4176929843.0, "step": 34180 }, { "epoch": 0.045597811305057356, "grad_norm": 1.8515625, "learning_rate": 2.279811743373687e-06, "loss": 0.5889092445373535, "num_tokens": 4179399437.0, "step": 34200 }, { "epoch": 0.04562447669178546, "grad_norm": 1.8125, "learning_rate": 2.2811450055997016e-06, "loss": 0.5836125373840332, "num_tokens": 4181828216.0, "step": 34220 }, { "epoch": 0.04565114207851356, "grad_norm": 1.859375, "learning_rate": 2.2824782678257163e-06, "loss": 0.5955025672912597, "num_tokens": 4184169589.0, "step": 34240 }, { "epoch": 0.045677807465241665, "grad_norm": 1.8828125, "learning_rate": 2.2838115300517306e-06, "loss": 0.5776950359344483, "num_tokens": 4186598232.0, "step": 34260 }, { "epoch": 0.045704472851969775, "grad_norm": 2.140625, "learning_rate": 2.2851447922777453e-06, "loss": 0.5726178646087646, "num_tokens": 4189103628.0, "step": 34280 }, { "epoch": 0.04573113823869788, "grad_norm": 2.0625, "learning_rate": 2.28647805450376e-06, "loss": 0.5878458023071289, "num_tokens": 4191557954.0, "step": 34300 }, { "epoch": 0.04575780362542598, "grad_norm": 2.09375, "learning_rate": 2.2878113167297747e-06, "loss": 0.5845067977905274, "num_tokens": 4193805863.0, "step": 34320 }, { "epoch": 0.045784469012154085, "grad_norm": 1.9375, "learning_rate": 2.2891445789557894e-06, "loss": 0.582273292541504, "num_tokens": 4196148208.0, "step": 34340 }, { "epoch": 0.04581113439888219, "grad_norm": 2.109375, "learning_rate": 2.2904778411818037e-06, "loss": 0.5757678985595703, "num_tokens": 4198369341.0, "step": 34360 }, { "epoch": 0.04583779978561029, "grad_norm": 1.640625, "learning_rate": 2.2918111034078184e-06, "loss": 0.5862479209899902, "num_tokens": 4200832342.0, "step": 34380 }, { "epoch": 0.045864465172338394, "grad_norm": 2.03125, "learning_rate": 2.293144365633833e-06, "loss": 0.5852499008178711, "num_tokens": 4203382043.0, "step": 34400 }, { "epoch": 0.0458911305590665, "grad_norm": 2.046875, "learning_rate": 2.2944776278598478e-06, "loss": 0.5649433135986328, "num_tokens": 4205796558.0, "step": 34420 }, { "epoch": 0.0459177959457946, "grad_norm": 2.125, "learning_rate": 2.295810890085862e-06, "loss": 0.5831337928771972, "num_tokens": 4208327563.0, "step": 34440 }, { "epoch": 0.0459444613325227, "grad_norm": 1.84375, "learning_rate": 2.2971441523118768e-06, "loss": 0.5859396934509278, "num_tokens": 4210784789.0, "step": 34460 }, { "epoch": 0.04597112671925081, "grad_norm": 2.328125, "learning_rate": 2.2984774145378915e-06, "loss": 0.5964279174804688, "num_tokens": 4213065894.0, "step": 34480 }, { "epoch": 0.045997792105978916, "grad_norm": 2.3125, "learning_rate": 2.299810676763906e-06, "loss": 0.5986865997314453, "num_tokens": 4215373492.0, "step": 34500 }, { "epoch": 0.04602445749270702, "grad_norm": 2.453125, "learning_rate": 2.301143938989921e-06, "loss": 0.609312915802002, "num_tokens": 4217716296.0, "step": 34520 }, { "epoch": 0.04605112287943512, "grad_norm": 1.8828125, "learning_rate": 2.302477201215935e-06, "loss": 0.5835745811462403, "num_tokens": 4219906423.0, "step": 34540 }, { "epoch": 0.046077788266163225, "grad_norm": 2.0625, "learning_rate": 2.30381046344195e-06, "loss": 0.5758328437805176, "num_tokens": 4222305615.0, "step": 34560 }, { "epoch": 0.04610445365289133, "grad_norm": 2.359375, "learning_rate": 2.3051437256679645e-06, "loss": 0.56881103515625, "num_tokens": 4224692293.0, "step": 34580 }, { "epoch": 0.04613111903961943, "grad_norm": 1.7109375, "learning_rate": 2.3064769878939792e-06, "loss": 0.5942038536071778, "num_tokens": 4227029371.0, "step": 34600 }, { "epoch": 0.046157784426347534, "grad_norm": 2.34375, "learning_rate": 2.3078102501199935e-06, "loss": 0.5749639511108399, "num_tokens": 4229318639.0, "step": 34620 }, { "epoch": 0.04618444981307564, "grad_norm": 2.1875, "learning_rate": 2.3091435123460086e-06, "loss": 0.5951471328735352, "num_tokens": 4231730566.0, "step": 34640 }, { "epoch": 0.04621111519980374, "grad_norm": 1.7890625, "learning_rate": 2.310476774572023e-06, "loss": 0.5747695446014405, "num_tokens": 4234155300.0, "step": 34660 }, { "epoch": 0.04623778058653184, "grad_norm": 2.921875, "learning_rate": 2.3118100367980376e-06, "loss": 0.5761466503143311, "num_tokens": 4236840738.0, "step": 34680 }, { "epoch": 0.04626444597325995, "grad_norm": 1.734375, "learning_rate": 2.313143299024052e-06, "loss": 0.5840266227722168, "num_tokens": 4239363812.0, "step": 34700 }, { "epoch": 0.046291111359988056, "grad_norm": 2.015625, "learning_rate": 2.314476561250067e-06, "loss": 0.5658749580383301, "num_tokens": 4241620048.0, "step": 34720 }, { "epoch": 0.04631777674671616, "grad_norm": 2.546875, "learning_rate": 2.3158098234760813e-06, "loss": 0.6052217960357666, "num_tokens": 4244045044.0, "step": 34740 }, { "epoch": 0.04634444213344426, "grad_norm": 2.234375, "learning_rate": 2.317143085702096e-06, "loss": 0.5610939025878906, "num_tokens": 4246335521.0, "step": 34760 }, { "epoch": 0.046371107520172365, "grad_norm": 1.8125, "learning_rate": 2.3184763479281107e-06, "loss": 0.5723869800567627, "num_tokens": 4248767829.0, "step": 34780 }, { "epoch": 0.04639777290690047, "grad_norm": 2.796875, "learning_rate": 2.3198096101541254e-06, "loss": 0.5796066284179687, "num_tokens": 4251417762.0, "step": 34800 }, { "epoch": 0.04642443829362857, "grad_norm": 2.328125, "learning_rate": 2.32114287238014e-06, "loss": 0.5757519721984863, "num_tokens": 4253827336.0, "step": 34820 }, { "epoch": 0.046451103680356674, "grad_norm": 2.390625, "learning_rate": 2.3224761346061544e-06, "loss": 0.5617054462432861, "num_tokens": 4256210155.0, "step": 34840 }, { "epoch": 0.04647776906708478, "grad_norm": 2.171875, "learning_rate": 2.323809396832169e-06, "loss": 0.5897148132324219, "num_tokens": 4258770119.0, "step": 34860 }, { "epoch": 0.04650443445381288, "grad_norm": 2.015625, "learning_rate": 2.325142659058184e-06, "loss": 0.5836329460144043, "num_tokens": 4261251726.0, "step": 34880 }, { "epoch": 0.04653109984054099, "grad_norm": 1.9921875, "learning_rate": 2.3264759212841985e-06, "loss": 0.5760283946990967, "num_tokens": 4263671935.0, "step": 34900 }, { "epoch": 0.04655776522726909, "grad_norm": 1.7265625, "learning_rate": 2.3278091835102128e-06, "loss": 0.5699273586273194, "num_tokens": 4266056023.0, "step": 34920 }, { "epoch": 0.046584430613997196, "grad_norm": 1.859375, "learning_rate": 2.329142445736228e-06, "loss": 0.5889734268188477, "num_tokens": 4268751614.0, "step": 34940 }, { "epoch": 0.0466110960007253, "grad_norm": 1.8046875, "learning_rate": 2.330475707962242e-06, "loss": 0.5727960109710694, "num_tokens": 4270999222.0, "step": 34960 }, { "epoch": 0.0466377613874534, "grad_norm": 2.25, "learning_rate": 2.331808970188257e-06, "loss": 0.585659122467041, "num_tokens": 4273570886.0, "step": 34980 }, { "epoch": 0.046664426774181506, "grad_norm": 2.6875, "learning_rate": 2.333142232414271e-06, "loss": 0.6129819869995117, "num_tokens": 4275960785.0, "step": 35000 }, { "epoch": 0.04669109216090961, "grad_norm": 1.640625, "learning_rate": 2.3344754946402863e-06, "loss": 0.5975214958190918, "num_tokens": 4278615769.0, "step": 35020 }, { "epoch": 0.04671775754763771, "grad_norm": 2.015625, "learning_rate": 2.3358087568663006e-06, "loss": 0.5906186103820801, "num_tokens": 4281093668.0, "step": 35040 }, { "epoch": 0.046744422934365815, "grad_norm": 1.5546875, "learning_rate": 2.3371420190923153e-06, "loss": 0.5769091606140136, "num_tokens": 4283494077.0, "step": 35060 }, { "epoch": 0.04677108832109392, "grad_norm": 2.265625, "learning_rate": 2.33847528131833e-06, "loss": 0.5654668807983398, "num_tokens": 4286048827.0, "step": 35080 }, { "epoch": 0.04679775370782203, "grad_norm": 1.9609375, "learning_rate": 2.3398085435443447e-06, "loss": 0.5966020584106445, "num_tokens": 4288394427.0, "step": 35100 }, { "epoch": 0.04682441909455013, "grad_norm": 2.046875, "learning_rate": 2.3411418057703594e-06, "loss": 0.5941848754882812, "num_tokens": 4290838391.0, "step": 35120 }, { "epoch": 0.046851084481278234, "grad_norm": 1.9140625, "learning_rate": 2.3424750679963736e-06, "loss": 0.5827459335327149, "num_tokens": 4293265041.0, "step": 35140 }, { "epoch": 0.04687774986800634, "grad_norm": 2.046875, "learning_rate": 2.3438083302223883e-06, "loss": 0.5804231643676758, "num_tokens": 4295796089.0, "step": 35160 }, { "epoch": 0.04690441525473444, "grad_norm": 1.8125, "learning_rate": 2.345141592448403e-06, "loss": 0.5685562133789063, "num_tokens": 4298176357.0, "step": 35180 }, { "epoch": 0.04693108064146254, "grad_norm": 2.125, "learning_rate": 2.3464748546744177e-06, "loss": 0.6007543087005616, "num_tokens": 4300720253.0, "step": 35200 }, { "epoch": 0.046957746028190646, "grad_norm": 2.046875, "learning_rate": 2.347808116900432e-06, "loss": 0.5820492744445801, "num_tokens": 4303404200.0, "step": 35220 }, { "epoch": 0.04698441141491875, "grad_norm": 1.8359375, "learning_rate": 2.3491413791264467e-06, "loss": 0.5850357055664063, "num_tokens": 4305822640.0, "step": 35240 }, { "epoch": 0.04701107680164685, "grad_norm": 1.7109375, "learning_rate": 2.3504746413524614e-06, "loss": 0.5913040161132812, "num_tokens": 4308385414.0, "step": 35260 }, { "epoch": 0.047037742188374955, "grad_norm": 1.7578125, "learning_rate": 2.351807903578476e-06, "loss": 0.575281810760498, "num_tokens": 4310639701.0, "step": 35280 }, { "epoch": 0.047064407575103065, "grad_norm": 2.203125, "learning_rate": 2.3531411658044904e-06, "loss": 0.5715903759002685, "num_tokens": 4313015032.0, "step": 35300 }, { "epoch": 0.04709107296183117, "grad_norm": 2.125, "learning_rate": 2.354474428030505e-06, "loss": 0.5636407852172851, "num_tokens": 4315613393.0, "step": 35320 }, { "epoch": 0.04711773834855927, "grad_norm": 1.78125, "learning_rate": 2.35580769025652e-06, "loss": 0.5834566116333008, "num_tokens": 4318047305.0, "step": 35340 }, { "epoch": 0.047144403735287374, "grad_norm": 1.765625, "learning_rate": 2.3571409524825345e-06, "loss": 0.5687760353088379, "num_tokens": 4320647318.0, "step": 35360 }, { "epoch": 0.04717106912201548, "grad_norm": 1.96875, "learning_rate": 2.358474214708549e-06, "loss": 0.5860897064208984, "num_tokens": 4323224001.0, "step": 35380 }, { "epoch": 0.04719773450874358, "grad_norm": 1.96875, "learning_rate": 2.3598074769345635e-06, "loss": 0.5742205619812012, "num_tokens": 4325485217.0, "step": 35400 }, { "epoch": 0.04722439989547168, "grad_norm": 1.4609375, "learning_rate": 2.361140739160578e-06, "loss": 0.5631166458129883, "num_tokens": 4327946603.0, "step": 35420 }, { "epoch": 0.047251065282199786, "grad_norm": 1.71875, "learning_rate": 2.362474001386593e-06, "loss": 0.5820592880249024, "num_tokens": 4330503569.0, "step": 35440 }, { "epoch": 0.04727773066892789, "grad_norm": 1.7265625, "learning_rate": 2.3638072636126076e-06, "loss": 0.5608628273010254, "num_tokens": 4332865083.0, "step": 35460 }, { "epoch": 0.04730439605565599, "grad_norm": 1.984375, "learning_rate": 2.365140525838622e-06, "loss": 0.5815488338470459, "num_tokens": 4335324417.0, "step": 35480 }, { "epoch": 0.0473310614423841, "grad_norm": 1.625, "learning_rate": 2.366473788064637e-06, "loss": 0.574526596069336, "num_tokens": 4337947522.0, "step": 35500 }, { "epoch": 0.047357726829112205, "grad_norm": 2.0, "learning_rate": 2.3678070502906513e-06, "loss": 0.5659475326538086, "num_tokens": 4340506580.0, "step": 35520 }, { "epoch": 0.04738439221584031, "grad_norm": 2.09375, "learning_rate": 2.369140312516666e-06, "loss": 0.5809325218200684, "num_tokens": 4342876290.0, "step": 35540 }, { "epoch": 0.04741105760256841, "grad_norm": 1.8359375, "learning_rate": 2.3704735747426807e-06, "loss": 0.5887007236480712, "num_tokens": 4345275505.0, "step": 35560 }, { "epoch": 0.047437722989296514, "grad_norm": 1.9921875, "learning_rate": 2.3718068369686954e-06, "loss": 0.57115797996521, "num_tokens": 4347677434.0, "step": 35580 }, { "epoch": 0.04746438837602462, "grad_norm": 1.8203125, "learning_rate": 2.3731400991947097e-06, "loss": 0.5819626808166504, "num_tokens": 4349967599.0, "step": 35600 }, { "epoch": 0.04749105376275272, "grad_norm": 1.8984375, "learning_rate": 2.3744733614207244e-06, "loss": 0.5961565971374512, "num_tokens": 4352598025.0, "step": 35620 }, { "epoch": 0.047517719149480823, "grad_norm": 1.9609375, "learning_rate": 2.375806623646739e-06, "loss": 0.6072005271911621, "num_tokens": 4354977574.0, "step": 35640 }, { "epoch": 0.047544384536208927, "grad_norm": 2.03125, "learning_rate": 2.3771398858727538e-06, "loss": 0.5777068614959717, "num_tokens": 4357190355.0, "step": 35660 }, { "epoch": 0.04757104992293703, "grad_norm": 1.765625, "learning_rate": 2.3784731480987685e-06, "loss": 0.5766937732696533, "num_tokens": 4359682869.0, "step": 35680 }, { "epoch": 0.04759771530966514, "grad_norm": 2.46875, "learning_rate": 2.3798064103247827e-06, "loss": 0.5790962219238281, "num_tokens": 4362026818.0, "step": 35700 }, { "epoch": 0.04762438069639324, "grad_norm": 2.328125, "learning_rate": 2.3811396725507974e-06, "loss": 0.5947413921356202, "num_tokens": 4364457079.0, "step": 35720 }, { "epoch": 0.047651046083121346, "grad_norm": 2.265625, "learning_rate": 2.382472934776812e-06, "loss": 0.5697731971740723, "num_tokens": 4366777909.0, "step": 35740 }, { "epoch": 0.04767771146984945, "grad_norm": 1.5546875, "learning_rate": 2.383806197002827e-06, "loss": 0.5861158847808838, "num_tokens": 4369237175.0, "step": 35760 }, { "epoch": 0.04770437685657755, "grad_norm": 2.28125, "learning_rate": 2.385139459228841e-06, "loss": 0.5824409008026123, "num_tokens": 4371670251.0, "step": 35780 }, { "epoch": 0.047731042243305655, "grad_norm": 2.21875, "learning_rate": 2.386472721454856e-06, "loss": 0.5849963188171386, "num_tokens": 4374184055.0, "step": 35800 }, { "epoch": 0.04775770763003376, "grad_norm": 1.46875, "learning_rate": 2.3878059836808705e-06, "loss": 0.5561548233032226, "num_tokens": 4376502059.0, "step": 35820 }, { "epoch": 0.04778437301676186, "grad_norm": 2.28125, "learning_rate": 2.3891392459068852e-06, "loss": 0.5789816856384278, "num_tokens": 4378910375.0, "step": 35840 }, { "epoch": 0.047811038403489964, "grad_norm": 1.9296875, "learning_rate": 2.3904725081328995e-06, "loss": 0.5885865211486816, "num_tokens": 4381405470.0, "step": 35860 }, { "epoch": 0.04783770379021807, "grad_norm": 2.140625, "learning_rate": 2.391805770358914e-06, "loss": 0.5616921424865723, "num_tokens": 4383710376.0, "step": 35880 }, { "epoch": 0.04786436917694617, "grad_norm": 2.703125, "learning_rate": 2.393139032584929e-06, "loss": 0.5897364616394043, "num_tokens": 4385998917.0, "step": 35900 }, { "epoch": 0.04789103456367428, "grad_norm": 2.046875, "learning_rate": 2.3944722948109436e-06, "loss": 0.5823076725006103, "num_tokens": 4388406684.0, "step": 35920 }, { "epoch": 0.04791769995040238, "grad_norm": 2.5, "learning_rate": 2.3958055570369583e-06, "loss": 0.5734066963195801, "num_tokens": 4390966118.0, "step": 35940 }, { "epoch": 0.047944365337130486, "grad_norm": 2.046875, "learning_rate": 2.3971388192629726e-06, "loss": 0.5893355369567871, "num_tokens": 4393382776.0, "step": 35960 }, { "epoch": 0.04797103072385859, "grad_norm": 2.421875, "learning_rate": 2.3984720814889877e-06, "loss": 0.5739480495452881, "num_tokens": 4395800962.0, "step": 35980 }, { "epoch": 0.04799769611058669, "grad_norm": 2.015625, "learning_rate": 2.399805343715002e-06, "loss": 0.5774190902709961, "num_tokens": 4398122346.0, "step": 36000 }, { "epoch": 0.048024361497314795, "grad_norm": 1.84375, "learning_rate": 2.4011386059410167e-06, "loss": 0.5914273262023926, "num_tokens": 4400732456.0, "step": 36020 }, { "epoch": 0.0480510268840429, "grad_norm": 1.875, "learning_rate": 2.402471868167031e-06, "loss": 0.5790183067321777, "num_tokens": 4403149932.0, "step": 36040 }, { "epoch": 0.048077692270771, "grad_norm": 2.09375, "learning_rate": 2.403805130393046e-06, "loss": 0.5701999664306641, "num_tokens": 4405453182.0, "step": 36060 }, { "epoch": 0.048104357657499104, "grad_norm": 1.765625, "learning_rate": 2.4051383926190604e-06, "loss": 0.5884335041046143, "num_tokens": 4408005159.0, "step": 36080 }, { "epoch": 0.04813102304422721, "grad_norm": 1.8046875, "learning_rate": 2.406471654845075e-06, "loss": 0.5873204231262207, "num_tokens": 4410296196.0, "step": 36100 }, { "epoch": 0.04815768843095532, "grad_norm": 1.8828125, "learning_rate": 2.4078049170710898e-06, "loss": 0.5618722438812256, "num_tokens": 4412720216.0, "step": 36120 }, { "epoch": 0.04818435381768342, "grad_norm": 2.0625, "learning_rate": 2.4091381792971045e-06, "loss": 0.5740681648254394, "num_tokens": 4415202294.0, "step": 36140 }, { "epoch": 0.04821101920441152, "grad_norm": 1.6796875, "learning_rate": 2.4104714415231188e-06, "loss": 0.5848861694335937, "num_tokens": 4417819352.0, "step": 36160 }, { "epoch": 0.048237684591139626, "grad_norm": 2.0625, "learning_rate": 2.4118047037491335e-06, "loss": 0.5851637840270996, "num_tokens": 4420358592.0, "step": 36180 }, { "epoch": 0.04826434997786773, "grad_norm": 1.6875, "learning_rate": 2.413137965975148e-06, "loss": 0.5844018459320068, "num_tokens": 4422668762.0, "step": 36200 }, { "epoch": 0.04829101536459583, "grad_norm": 2.09375, "learning_rate": 2.414471228201163e-06, "loss": 0.5867934226989746, "num_tokens": 4425171045.0, "step": 36220 }, { "epoch": 0.048317680751323935, "grad_norm": 1.859375, "learning_rate": 2.4158044904271776e-06, "loss": 0.5858034133911133, "num_tokens": 4427605229.0, "step": 36240 }, { "epoch": 0.04834434613805204, "grad_norm": 2.296875, "learning_rate": 2.417137752653192e-06, "loss": 0.5793878555297851, "num_tokens": 4429965273.0, "step": 36260 }, { "epoch": 0.04837101152478014, "grad_norm": 1.96875, "learning_rate": 2.418471014879207e-06, "loss": 0.5822982788085938, "num_tokens": 4432317319.0, "step": 36280 }, { "epoch": 0.048397676911508244, "grad_norm": 2.515625, "learning_rate": 2.4198042771052212e-06, "loss": 0.575800085067749, "num_tokens": 4434686124.0, "step": 36300 }, { "epoch": 0.048424342298236354, "grad_norm": 2.078125, "learning_rate": 2.421137539331236e-06, "loss": 0.586005687713623, "num_tokens": 4437236500.0, "step": 36320 }, { "epoch": 0.04845100768496446, "grad_norm": 2.0, "learning_rate": 2.4224708015572502e-06, "loss": 0.5844363212585449, "num_tokens": 4439580643.0, "step": 36340 }, { "epoch": 0.04847767307169256, "grad_norm": 1.7265625, "learning_rate": 2.4238040637832653e-06, "loss": 0.5896830081939697, "num_tokens": 4442238598.0, "step": 36360 }, { "epoch": 0.048504338458420664, "grad_norm": 2.125, "learning_rate": 2.4251373260092796e-06, "loss": 0.5923621177673339, "num_tokens": 4444602717.0, "step": 36380 }, { "epoch": 0.04853100384514877, "grad_norm": 2.0, "learning_rate": 2.4264705882352943e-06, "loss": 0.5714093208312988, "num_tokens": 4447131461.0, "step": 36400 }, { "epoch": 0.04855766923187687, "grad_norm": 1.7734375, "learning_rate": 2.427803850461309e-06, "loss": 0.564515209197998, "num_tokens": 4449458031.0, "step": 36420 }, { "epoch": 0.04858433461860497, "grad_norm": 2.0625, "learning_rate": 2.4291371126873237e-06, "loss": 0.5842608451843262, "num_tokens": 4451890900.0, "step": 36440 }, { "epoch": 0.048611000005333076, "grad_norm": 2.203125, "learning_rate": 2.430470374913338e-06, "loss": 0.5512424945831299, "num_tokens": 4454473110.0, "step": 36460 }, { "epoch": 0.04863766539206118, "grad_norm": 2.21875, "learning_rate": 2.4318036371393527e-06, "loss": 0.5665528297424316, "num_tokens": 4457130275.0, "step": 36480 }, { "epoch": 0.04866433077878928, "grad_norm": 1.6015625, "learning_rate": 2.4331368993653674e-06, "loss": 0.5771777153015136, "num_tokens": 4459683494.0, "step": 36500 }, { "epoch": 0.04869099616551739, "grad_norm": 2.265625, "learning_rate": 2.434470161591382e-06, "loss": 0.5753138065338135, "num_tokens": 4461994279.0, "step": 36520 }, { "epoch": 0.048717661552245495, "grad_norm": 2.015625, "learning_rate": 2.435803423817397e-06, "loss": 0.5928221702575683, "num_tokens": 4464409288.0, "step": 36540 }, { "epoch": 0.0487443269389736, "grad_norm": 1.6328125, "learning_rate": 2.437136686043411e-06, "loss": 0.5796599388122559, "num_tokens": 4466732940.0, "step": 36560 }, { "epoch": 0.0487709923257017, "grad_norm": 2.375, "learning_rate": 2.438469948269426e-06, "loss": 0.5929506301879883, "num_tokens": 4469180510.0, "step": 36580 }, { "epoch": 0.048797657712429804, "grad_norm": 2.328125, "learning_rate": 2.4398032104954405e-06, "loss": 0.5788221836090088, "num_tokens": 4471536445.0, "step": 36600 }, { "epoch": 0.04882432309915791, "grad_norm": 1.9140625, "learning_rate": 2.441136472721455e-06, "loss": 0.5844032287597656, "num_tokens": 4474264517.0, "step": 36620 }, { "epoch": 0.04885098848588601, "grad_norm": 1.9140625, "learning_rate": 2.4424697349474695e-06, "loss": 0.5674295425415039, "num_tokens": 4476612668.0, "step": 36640 }, { "epoch": 0.04887765387261411, "grad_norm": 2.25, "learning_rate": 2.443802997173484e-06, "loss": 0.5782337188720703, "num_tokens": 4479085671.0, "step": 36660 }, { "epoch": 0.048904319259342216, "grad_norm": 2.0625, "learning_rate": 2.445136259399499e-06, "loss": 0.5807512283325196, "num_tokens": 4481331523.0, "step": 36680 }, { "epoch": 0.04893098464607032, "grad_norm": 2.28125, "learning_rate": 2.4464695216255136e-06, "loss": 0.5772196769714355, "num_tokens": 4483709890.0, "step": 36700 }, { "epoch": 0.04895765003279843, "grad_norm": 1.828125, "learning_rate": 2.4478027838515283e-06, "loss": 0.5840914249420166, "num_tokens": 4486341381.0, "step": 36720 }, { "epoch": 0.04898431541952653, "grad_norm": 2.03125, "learning_rate": 2.4491360460775426e-06, "loss": 0.5623960494995117, "num_tokens": 4488909206.0, "step": 36740 }, { "epoch": 0.049010980806254635, "grad_norm": 1.6796875, "learning_rate": 2.4504693083035573e-06, "loss": 0.5817697525024415, "num_tokens": 4491292264.0, "step": 36760 }, { "epoch": 0.04903764619298274, "grad_norm": 1.890625, "learning_rate": 2.451802570529572e-06, "loss": 0.5834967613220214, "num_tokens": 4493743829.0, "step": 36780 }, { "epoch": 0.04906431157971084, "grad_norm": 1.9296875, "learning_rate": 2.4531358327555867e-06, "loss": 0.6011902332305908, "num_tokens": 4496151228.0, "step": 36800 }, { "epoch": 0.049090976966438944, "grad_norm": 1.8046875, "learning_rate": 2.454469094981601e-06, "loss": 0.5830549716949462, "num_tokens": 4498794006.0, "step": 36820 }, { "epoch": 0.04911764235316705, "grad_norm": 1.8828125, "learning_rate": 2.455802357207616e-06, "loss": 0.5731137275695801, "num_tokens": 4501282494.0, "step": 36840 }, { "epoch": 0.04914430773989515, "grad_norm": 1.90625, "learning_rate": 2.4571356194336303e-06, "loss": 0.5767498970031738, "num_tokens": 4503720788.0, "step": 36860 }, { "epoch": 0.04917097312662325, "grad_norm": 1.65625, "learning_rate": 2.458468881659645e-06, "loss": 0.5782618999481202, "num_tokens": 4506201091.0, "step": 36880 }, { "epoch": 0.049197638513351356, "grad_norm": 1.9140625, "learning_rate": 2.4598021438856593e-06, "loss": 0.5782634735107421, "num_tokens": 4508417474.0, "step": 36900 }, { "epoch": 0.04922430390007946, "grad_norm": 2.140625, "learning_rate": 2.4611354061116745e-06, "loss": 0.5906059265136718, "num_tokens": 4510855802.0, "step": 36920 }, { "epoch": 0.04925096928680757, "grad_norm": 2.09375, "learning_rate": 2.4624686683376887e-06, "loss": 0.57467041015625, "num_tokens": 4513178129.0, "step": 36940 }, { "epoch": 0.04927763467353567, "grad_norm": 1.8046875, "learning_rate": 2.4638019305637034e-06, "loss": 0.5607968807220459, "num_tokens": 4515519134.0, "step": 36960 }, { "epoch": 0.049304300060263775, "grad_norm": 1.7421875, "learning_rate": 2.465135192789718e-06, "loss": 0.5647533893585205, "num_tokens": 4518093569.0, "step": 36980 }, { "epoch": 0.04933096544699188, "grad_norm": 2.03125, "learning_rate": 2.466468455015733e-06, "loss": 0.5716274738311767, "num_tokens": 4520629555.0, "step": 37000 }, { "epoch": 0.04935763083371998, "grad_norm": 2.03125, "learning_rate": 2.4678017172417475e-06, "loss": 0.573189640045166, "num_tokens": 4522800337.0, "step": 37020 }, { "epoch": 0.049384296220448085, "grad_norm": 2.140625, "learning_rate": 2.469134979467762e-06, "loss": 0.5786411762237549, "num_tokens": 4525046106.0, "step": 37040 }, { "epoch": 0.04941096160717619, "grad_norm": 1.84375, "learning_rate": 2.4704682416937765e-06, "loss": 0.5605227470397949, "num_tokens": 4527488239.0, "step": 37060 }, { "epoch": 0.04943762699390429, "grad_norm": 2.09375, "learning_rate": 2.4718015039197912e-06, "loss": 0.5745234489440918, "num_tokens": 4529890058.0, "step": 37080 }, { "epoch": 0.049464292380632394, "grad_norm": 1.96875, "learning_rate": 2.473134766145806e-06, "loss": 0.5807751178741455, "num_tokens": 4532339077.0, "step": 37100 }, { "epoch": 0.0494909577673605, "grad_norm": 2.625, "learning_rate": 2.47446802837182e-06, "loss": 0.5926365852355957, "num_tokens": 4534795165.0, "step": 37120 }, { "epoch": 0.04951762315408861, "grad_norm": 2.0625, "learning_rate": 2.475801290597835e-06, "loss": 0.5742351055145264, "num_tokens": 4537090004.0, "step": 37140 }, { "epoch": 0.04954428854081671, "grad_norm": 2.140625, "learning_rate": 2.4771345528238496e-06, "loss": 0.5797761917114258, "num_tokens": 4539453865.0, "step": 37160 }, { "epoch": 0.04957095392754481, "grad_norm": 2.359375, "learning_rate": 2.4784678150498643e-06, "loss": 0.571104907989502, "num_tokens": 4541951530.0, "step": 37180 }, { "epoch": 0.049597619314272916, "grad_norm": 1.8203125, "learning_rate": 2.4798010772758786e-06, "loss": 0.5799038887023926, "num_tokens": 4544368280.0, "step": 37200 }, { "epoch": 0.04962428470100102, "grad_norm": 1.765625, "learning_rate": 2.4811343395018933e-06, "loss": 0.5801840782165527, "num_tokens": 4546660745.0, "step": 37220 }, { "epoch": 0.04965095008772912, "grad_norm": 2.0625, "learning_rate": 2.482467601727908e-06, "loss": 0.5606669902801513, "num_tokens": 4549300565.0, "step": 37240 }, { "epoch": 0.049677615474457225, "grad_norm": 1.9765625, "learning_rate": 2.4838008639539227e-06, "loss": 0.5585212230682373, "num_tokens": 4551686312.0, "step": 37260 }, { "epoch": 0.04970428086118533, "grad_norm": 2.09375, "learning_rate": 2.4851341261799374e-06, "loss": 0.5771899700164795, "num_tokens": 4554090097.0, "step": 37280 }, { "epoch": 0.04973094624791343, "grad_norm": 2.390625, "learning_rate": 2.4864673884059517e-06, "loss": 0.580379867553711, "num_tokens": 4556475082.0, "step": 37300 }, { "epoch": 0.049757611634641534, "grad_norm": 1.90625, "learning_rate": 2.4878006506319664e-06, "loss": 0.5724353790283203, "num_tokens": 4558880556.0, "step": 37320 }, { "epoch": 0.049784277021369644, "grad_norm": 1.7109375, "learning_rate": 2.489133912857981e-06, "loss": 0.5922900199890136, "num_tokens": 4561414319.0, "step": 37340 }, { "epoch": 0.04981094240809775, "grad_norm": 2.25, "learning_rate": 2.4904671750839958e-06, "loss": 0.6024827003479004, "num_tokens": 4563805324.0, "step": 37360 }, { "epoch": 0.04983760779482585, "grad_norm": 1.7890625, "learning_rate": 2.49180043731001e-06, "loss": 0.5710187911987304, "num_tokens": 4566364953.0, "step": 37380 }, { "epoch": 0.04986427318155395, "grad_norm": 1.9296875, "learning_rate": 2.493133699536025e-06, "loss": 0.5620617389678955, "num_tokens": 4568782966.0, "step": 37400 }, { "epoch": 0.049890938568282056, "grad_norm": 2.03125, "learning_rate": 2.4944669617620394e-06, "loss": 0.5752213478088379, "num_tokens": 4571077469.0, "step": 37420 }, { "epoch": 0.04991760395501016, "grad_norm": 1.8984375, "learning_rate": 2.495800223988054e-06, "loss": 0.5690714836120605, "num_tokens": 4573445834.0, "step": 37440 }, { "epoch": 0.04994426934173826, "grad_norm": 2.3125, "learning_rate": 2.497133486214069e-06, "loss": 0.5794489860534668, "num_tokens": 4575903997.0, "step": 37460 }, { "epoch": 0.049970934728466365, "grad_norm": 2.25, "learning_rate": 2.4984667484400836e-06, "loss": 0.5830409526824951, "num_tokens": 4578509288.0, "step": 37480 }, { "epoch": 0.04999760011519447, "grad_norm": 2.015625, "learning_rate": 2.499800010666098e-06, "loss": 0.571955680847168, "num_tokens": 4581059302.0, "step": 37500 }, { "epoch": 0.05002426550192257, "grad_norm": 1.8515625, "learning_rate": 2.501133272892113e-06, "loss": 0.566652488708496, "num_tokens": 4583410764.0, "step": 37520 }, { "epoch": 0.05005093088865068, "grad_norm": 1.7265625, "learning_rate": 2.5024665351181272e-06, "loss": 0.5730606079101562, "num_tokens": 4585764140.0, "step": 37540 }, { "epoch": 0.050077596275378784, "grad_norm": 1.8828125, "learning_rate": 2.503799797344142e-06, "loss": 0.5639748573303223, "num_tokens": 4588359925.0, "step": 37560 }, { "epoch": 0.05010426166210689, "grad_norm": 1.8984375, "learning_rate": 2.5051330595701566e-06, "loss": 0.5701787948608399, "num_tokens": 4591043155.0, "step": 37580 }, { "epoch": 0.05013092704883499, "grad_norm": 1.90625, "learning_rate": 2.506466321796171e-06, "loss": 0.5749680995941162, "num_tokens": 4593331738.0, "step": 37600 }, { "epoch": 0.05015759243556309, "grad_norm": 1.9765625, "learning_rate": 2.5077995840221856e-06, "loss": 0.5842374324798584, "num_tokens": 4595705996.0, "step": 37620 }, { "epoch": 0.050184257822291196, "grad_norm": 1.9140625, "learning_rate": 2.5091328462482e-06, "loss": 0.5657750129699707, "num_tokens": 4598109414.0, "step": 37640 }, { "epoch": 0.0502109232090193, "grad_norm": 2.15625, "learning_rate": 2.510466108474215e-06, "loss": 0.573691987991333, "num_tokens": 4600534024.0, "step": 37660 }, { "epoch": 0.0502375885957474, "grad_norm": 2.15625, "learning_rate": 2.5117993707002297e-06, "loss": 0.5796340942382813, "num_tokens": 4603106967.0, "step": 37680 }, { "epoch": 0.050264253982475506, "grad_norm": 2.25, "learning_rate": 2.513132632926244e-06, "loss": 0.5694514274597168, "num_tokens": 4605538948.0, "step": 37700 }, { "epoch": 0.05029091936920361, "grad_norm": 1.640625, "learning_rate": 2.5144658951522587e-06, "loss": 0.5663919448852539, "num_tokens": 4607903588.0, "step": 37720 }, { "epoch": 0.05031758475593172, "grad_norm": 2.0625, "learning_rate": 2.515799157378274e-06, "loss": 0.5781597137451172, "num_tokens": 4610387735.0, "step": 37740 }, { "epoch": 0.05034425014265982, "grad_norm": 2.015625, "learning_rate": 2.517132419604288e-06, "loss": 0.5907382965087891, "num_tokens": 4612911035.0, "step": 37760 }, { "epoch": 0.050370915529387925, "grad_norm": 2.21875, "learning_rate": 2.518465681830303e-06, "loss": 0.5760993480682373, "num_tokens": 4615341515.0, "step": 37780 }, { "epoch": 0.05039758091611603, "grad_norm": 2.046875, "learning_rate": 2.519798944056317e-06, "loss": 0.5639971733093262, "num_tokens": 4617761667.0, "step": 37800 }, { "epoch": 0.05042424630284413, "grad_norm": 2.34375, "learning_rate": 2.5211322062823318e-06, "loss": 0.5714266777038575, "num_tokens": 4620282540.0, "step": 37820 }, { "epoch": 0.050450911689572234, "grad_norm": 1.9453125, "learning_rate": 2.5224654685083465e-06, "loss": 0.566836929321289, "num_tokens": 4622895819.0, "step": 37840 }, { "epoch": 0.05047757707630034, "grad_norm": 1.7890625, "learning_rate": 2.5237987307343608e-06, "loss": 0.5709798336029053, "num_tokens": 4625237421.0, "step": 37860 }, { "epoch": 0.05050424246302844, "grad_norm": 2.234375, "learning_rate": 2.525131992960376e-06, "loss": 0.590483570098877, "num_tokens": 4627559368.0, "step": 37880 }, { "epoch": 0.05053090784975654, "grad_norm": 2.203125, "learning_rate": 2.5264652551863906e-06, "loss": 0.5702613830566406, "num_tokens": 4630042663.0, "step": 37900 }, { "epoch": 0.050557573236484646, "grad_norm": 2.484375, "learning_rate": 2.527798517412405e-06, "loss": 0.5748115539550781, "num_tokens": 4632494436.0, "step": 37920 }, { "epoch": 0.050584238623212756, "grad_norm": 2.265625, "learning_rate": 2.5291317796384196e-06, "loss": 0.5604733467102051, "num_tokens": 4634837406.0, "step": 37940 }, { "epoch": 0.05061090400994086, "grad_norm": 2.53125, "learning_rate": 2.530465041864434e-06, "loss": 0.564818286895752, "num_tokens": 4637244353.0, "step": 37960 }, { "epoch": 0.05063756939666896, "grad_norm": 1.9921875, "learning_rate": 2.5317983040904485e-06, "loss": 0.5709281921386719, "num_tokens": 4639625456.0, "step": 37980 }, { "epoch": 0.050664234783397065, "grad_norm": 1.8984375, "learning_rate": 2.5331315663164637e-06, "loss": 0.5884544372558593, "num_tokens": 4641859519.0, "step": 38000 }, { "epoch": 0.05069090017012517, "grad_norm": 2.015625, "learning_rate": 2.534464828542478e-06, "loss": 0.548668384552002, "num_tokens": 4644289826.0, "step": 38020 }, { "epoch": 0.05071756555685327, "grad_norm": 1.78125, "learning_rate": 2.5357980907684927e-06, "loss": 0.5771371364593506, "num_tokens": 4646986417.0, "step": 38040 }, { "epoch": 0.050744230943581374, "grad_norm": 1.640625, "learning_rate": 2.5371313529945074e-06, "loss": 0.5758931636810303, "num_tokens": 4649273850.0, "step": 38060 }, { "epoch": 0.05077089633030948, "grad_norm": 1.703125, "learning_rate": 2.5384646152205216e-06, "loss": 0.5778156280517578, "num_tokens": 4651708881.0, "step": 38080 }, { "epoch": 0.05079756171703758, "grad_norm": 1.953125, "learning_rate": 2.5397978774465363e-06, "loss": 0.5649430274963378, "num_tokens": 4654111436.0, "step": 38100 }, { "epoch": 0.05082422710376568, "grad_norm": 2.1875, "learning_rate": 2.5411311396725506e-06, "loss": 0.5738075256347657, "num_tokens": 4656666031.0, "step": 38120 }, { "epoch": 0.050850892490493786, "grad_norm": 1.4609375, "learning_rate": 2.5424644018985657e-06, "loss": 0.57691330909729, "num_tokens": 4659217930.0, "step": 38140 }, { "epoch": 0.050877557877221896, "grad_norm": 2.40625, "learning_rate": 2.5437976641245804e-06, "loss": 0.5690332889556885, "num_tokens": 4661802810.0, "step": 38160 }, { "epoch": 0.05090422326395, "grad_norm": 1.9921875, "learning_rate": 2.5451309263505947e-06, "loss": 0.5561785221099853, "num_tokens": 4664128897.0, "step": 38180 }, { "epoch": 0.0509308886506781, "grad_norm": 1.8984375, "learning_rate": 2.5464641885766094e-06, "loss": 0.5715235710144043, "num_tokens": 4666427849.0, "step": 38200 }, { "epoch": 0.050957554037406205, "grad_norm": 2.015625, "learning_rate": 2.547797450802624e-06, "loss": 0.5687835693359375, "num_tokens": 4668618909.0, "step": 38220 }, { "epoch": 0.05098421942413431, "grad_norm": 1.53125, "learning_rate": 2.5491307130286384e-06, "loss": 0.5526377201080322, "num_tokens": 4670909528.0, "step": 38240 }, { "epoch": 0.05101088481086241, "grad_norm": 2.171875, "learning_rate": 2.5504639752546535e-06, "loss": 0.5623409271240234, "num_tokens": 4673306698.0, "step": 38260 }, { "epoch": 0.051037550197590514, "grad_norm": 1.765625, "learning_rate": 2.551797237480668e-06, "loss": 0.5657556056976318, "num_tokens": 4675701562.0, "step": 38280 }, { "epoch": 0.05106421558431862, "grad_norm": 1.828125, "learning_rate": 2.5531304997066825e-06, "loss": 0.5896822929382324, "num_tokens": 4678310788.0, "step": 38300 }, { "epoch": 0.05109088097104672, "grad_norm": 2.046875, "learning_rate": 2.554463761932697e-06, "loss": 0.5644119262695313, "num_tokens": 4680710988.0, "step": 38320 }, { "epoch": 0.051117546357774823, "grad_norm": 2.234375, "learning_rate": 2.5557970241587115e-06, "loss": 0.5772613525390625, "num_tokens": 4683033923.0, "step": 38340 }, { "epoch": 0.05114421174450293, "grad_norm": 2.234375, "learning_rate": 2.557130286384726e-06, "loss": 0.5498970508575439, "num_tokens": 4685315556.0, "step": 38360 }, { "epoch": 0.051170877131231036, "grad_norm": 2.734375, "learning_rate": 2.5584635486107413e-06, "loss": 0.569824504852295, "num_tokens": 4687641794.0, "step": 38380 }, { "epoch": 0.05119754251795914, "grad_norm": 1.8984375, "learning_rate": 2.5597968108367556e-06, "loss": 0.5687330722808838, "num_tokens": 4690048601.0, "step": 38400 }, { "epoch": 0.05122420790468724, "grad_norm": 1.9453125, "learning_rate": 2.5611300730627703e-06, "loss": 0.5695808887481689, "num_tokens": 4692468525.0, "step": 38420 }, { "epoch": 0.051250873291415346, "grad_norm": 1.7421875, "learning_rate": 2.5624633352887846e-06, "loss": 0.5754270553588867, "num_tokens": 4694841674.0, "step": 38440 }, { "epoch": 0.05127753867814345, "grad_norm": 2.265625, "learning_rate": 2.5637965975147993e-06, "loss": 0.5541526794433593, "num_tokens": 4697310482.0, "step": 38460 }, { "epoch": 0.05130420406487155, "grad_norm": 1.84375, "learning_rate": 2.5651298597408144e-06, "loss": 0.5665322303771972, "num_tokens": 4699897910.0, "step": 38480 }, { "epoch": 0.051330869451599655, "grad_norm": 2.1875, "learning_rate": 2.5664631219668282e-06, "loss": 0.5769216537475585, "num_tokens": 4702329619.0, "step": 38500 }, { "epoch": 0.05135753483832776, "grad_norm": 1.984375, "learning_rate": 2.5677963841928434e-06, "loss": 0.5788371086120605, "num_tokens": 4704638786.0, "step": 38520 }, { "epoch": 0.05138420022505586, "grad_norm": 2.171875, "learning_rate": 2.569129646418858e-06, "loss": 0.596444034576416, "num_tokens": 4707299591.0, "step": 38540 }, { "epoch": 0.05141086561178397, "grad_norm": 1.875, "learning_rate": 2.5704629086448724e-06, "loss": 0.5722668170928955, "num_tokens": 4710046879.0, "step": 38560 }, { "epoch": 0.051437530998512074, "grad_norm": 2.140625, "learning_rate": 2.571796170870887e-06, "loss": 0.5916857719421387, "num_tokens": 4712463776.0, "step": 38580 }, { "epoch": 0.05146419638524018, "grad_norm": 1.9296875, "learning_rate": 2.5731294330969013e-06, "loss": 0.5586331367492676, "num_tokens": 4714886264.0, "step": 38600 }, { "epoch": 0.05149086177196828, "grad_norm": 1.9609375, "learning_rate": 2.5744626953229165e-06, "loss": 0.5728902339935302, "num_tokens": 4717455710.0, "step": 38620 }, { "epoch": 0.05151752715869638, "grad_norm": 2.203125, "learning_rate": 2.575795957548931e-06, "loss": 0.5961880207061767, "num_tokens": 4719731493.0, "step": 38640 }, { "epoch": 0.051544192545424486, "grad_norm": 2.03125, "learning_rate": 2.5771292197749454e-06, "loss": 0.564938735961914, "num_tokens": 4722177950.0, "step": 38660 }, { "epoch": 0.05157085793215259, "grad_norm": 1.6015625, "learning_rate": 2.57846248200096e-06, "loss": 0.5575816631317139, "num_tokens": 4724484818.0, "step": 38680 }, { "epoch": 0.05159752331888069, "grad_norm": 1.828125, "learning_rate": 2.579795744226975e-06, "loss": 0.5654013633728028, "num_tokens": 4726951696.0, "step": 38700 }, { "epoch": 0.051624188705608795, "grad_norm": 1.7734375, "learning_rate": 2.581129006452989e-06, "loss": 0.5640960216522217, "num_tokens": 4729272417.0, "step": 38720 }, { "epoch": 0.0516508540923369, "grad_norm": 2.28125, "learning_rate": 2.5824622686790042e-06, "loss": 0.5932245254516602, "num_tokens": 4731633690.0, "step": 38740 }, { "epoch": 0.05167751947906501, "grad_norm": 1.7734375, "learning_rate": 2.5837955309050185e-06, "loss": 0.5885885238647461, "num_tokens": 4733988337.0, "step": 38760 }, { "epoch": 0.05170418486579311, "grad_norm": 2.453125, "learning_rate": 2.5851287931310332e-06, "loss": 0.5593679428100586, "num_tokens": 4736393115.0, "step": 38780 }, { "epoch": 0.051730850252521214, "grad_norm": 1.796875, "learning_rate": 2.586462055357048e-06, "loss": 0.538523817062378, "num_tokens": 4738839817.0, "step": 38800 }, { "epoch": 0.05175751563924932, "grad_norm": 1.8671875, "learning_rate": 2.587795317583062e-06, "loss": 0.5830682754516602, "num_tokens": 4741058817.0, "step": 38820 }, { "epoch": 0.05178418102597742, "grad_norm": 1.8671875, "learning_rate": 2.589128579809077e-06, "loss": 0.564363670349121, "num_tokens": 4743419958.0, "step": 38840 }, { "epoch": 0.05181084641270552, "grad_norm": 2.125, "learning_rate": 2.590461842035092e-06, "loss": 0.5667238235473633, "num_tokens": 4745707465.0, "step": 38860 }, { "epoch": 0.051837511799433626, "grad_norm": 1.7734375, "learning_rate": 2.5917951042611063e-06, "loss": 0.5740908622741699, "num_tokens": 4748267682.0, "step": 38880 }, { "epoch": 0.05186417718616173, "grad_norm": 2.171875, "learning_rate": 2.593128366487121e-06, "loss": 0.558940839767456, "num_tokens": 4750643647.0, "step": 38900 }, { "epoch": 0.05189084257288983, "grad_norm": 2.15625, "learning_rate": 2.5944616287131357e-06, "loss": 0.5687429428100585, "num_tokens": 4753475279.0, "step": 38920 }, { "epoch": 0.051917507959617935, "grad_norm": 2.015625, "learning_rate": 2.59579489093915e-06, "loss": 0.5680813789367676, "num_tokens": 4755744804.0, "step": 38940 }, { "epoch": 0.051944173346346045, "grad_norm": 2.109375, "learning_rate": 2.5971281531651647e-06, "loss": 0.567289400100708, "num_tokens": 4758365725.0, "step": 38960 }, { "epoch": 0.05197083873307415, "grad_norm": 2.03125, "learning_rate": 2.598461415391179e-06, "loss": 0.5624903202056885, "num_tokens": 4760836255.0, "step": 38980 }, { "epoch": 0.05199750411980225, "grad_norm": 2.140625, "learning_rate": 2.599794677617194e-06, "loss": 0.5594189167022705, "num_tokens": 4763176043.0, "step": 39000 }, { "epoch": 0.052024169506530354, "grad_norm": 1.90625, "learning_rate": 2.601127939843209e-06, "loss": 0.5781681060791015, "num_tokens": 4765435504.0, "step": 39020 }, { "epoch": 0.05205083489325846, "grad_norm": 2.078125, "learning_rate": 2.602461202069223e-06, "loss": 0.5759788513183594, "num_tokens": 4767864504.0, "step": 39040 }, { "epoch": 0.05207750027998656, "grad_norm": 1.9921875, "learning_rate": 2.6037944642952378e-06, "loss": 0.5559906959533691, "num_tokens": 4770383485.0, "step": 39060 }, { "epoch": 0.052104165666714664, "grad_norm": 2.03125, "learning_rate": 2.6051277265212525e-06, "loss": 0.5642509937286377, "num_tokens": 4772757679.0, "step": 39080 }, { "epoch": 0.05213083105344277, "grad_norm": 2.15625, "learning_rate": 2.6064609887472668e-06, "loss": 0.5725903987884522, "num_tokens": 4775066084.0, "step": 39100 }, { "epoch": 0.05215749644017087, "grad_norm": 2.25, "learning_rate": 2.607794250973282e-06, "loss": 0.5687328338623047, "num_tokens": 4777394870.0, "step": 39120 }, { "epoch": 0.05218416182689897, "grad_norm": 1.7890625, "learning_rate": 2.609127513199296e-06, "loss": 0.5729855537414551, "num_tokens": 4780126064.0, "step": 39140 }, { "epoch": 0.05221082721362708, "grad_norm": 2.046875, "learning_rate": 2.610460775425311e-06, "loss": 0.5770554542541504, "num_tokens": 4782661445.0, "step": 39160 }, { "epoch": 0.052237492600355186, "grad_norm": 2.15625, "learning_rate": 2.6117940376513256e-06, "loss": 0.571690845489502, "num_tokens": 4785098048.0, "step": 39180 }, { "epoch": 0.05226415798708329, "grad_norm": 2.09375, "learning_rate": 2.61312729987734e-06, "loss": 0.5570829391479493, "num_tokens": 4787542024.0, "step": 39200 }, { "epoch": 0.05229082337381139, "grad_norm": 2.109375, "learning_rate": 2.614460562103355e-06, "loss": 0.5710809230804443, "num_tokens": 4790065278.0, "step": 39220 }, { "epoch": 0.052317488760539495, "grad_norm": 1.7421875, "learning_rate": 2.6157938243293697e-06, "loss": 0.5635059356689454, "num_tokens": 4792612171.0, "step": 39240 }, { "epoch": 0.0523441541472676, "grad_norm": 2.234375, "learning_rate": 2.617127086555384e-06, "loss": 0.5642606258392334, "num_tokens": 4795115032.0, "step": 39260 }, { "epoch": 0.0523708195339957, "grad_norm": 2.015625, "learning_rate": 2.6184603487813986e-06, "loss": 0.5632792472839355, "num_tokens": 4797705585.0, "step": 39280 }, { "epoch": 0.052397484920723804, "grad_norm": 1.5859375, "learning_rate": 2.619793611007413e-06, "loss": 0.576460313796997, "num_tokens": 4800254361.0, "step": 39300 }, { "epoch": 0.05242415030745191, "grad_norm": 1.6640625, "learning_rate": 2.6211268732334276e-06, "loss": 0.5845154285430908, "num_tokens": 4802546044.0, "step": 39320 }, { "epoch": 0.05245081569418001, "grad_norm": 2.078125, "learning_rate": 2.6224601354594427e-06, "loss": 0.5578715801239014, "num_tokens": 4804921550.0, "step": 39340 }, { "epoch": 0.05247748108090811, "grad_norm": 1.6328125, "learning_rate": 2.623793397685457e-06, "loss": 0.5618347644805908, "num_tokens": 4807294563.0, "step": 39360 }, { "epoch": 0.05250414646763622, "grad_norm": 1.65625, "learning_rate": 2.6251266599114717e-06, "loss": 0.5769556999206543, "num_tokens": 4809638282.0, "step": 39380 }, { "epoch": 0.052530811854364326, "grad_norm": 1.8828125, "learning_rate": 2.6264599221374864e-06, "loss": 0.5754478454589844, "num_tokens": 4811965443.0, "step": 39400 }, { "epoch": 0.05255747724109243, "grad_norm": 2.0625, "learning_rate": 2.6277931843635007e-06, "loss": 0.5707569122314453, "num_tokens": 4814173125.0, "step": 39420 }, { "epoch": 0.05258414262782053, "grad_norm": 1.765625, "learning_rate": 2.6291264465895154e-06, "loss": 0.5719766616821289, "num_tokens": 4816729527.0, "step": 39440 }, { "epoch": 0.052610808014548635, "grad_norm": 2.4375, "learning_rate": 2.6304597088155297e-06, "loss": 0.5723834037780762, "num_tokens": 4819150044.0, "step": 39460 }, { "epoch": 0.05263747340127674, "grad_norm": 2.1875, "learning_rate": 2.631792971041545e-06, "loss": 0.5767436027526855, "num_tokens": 4821509090.0, "step": 39480 }, { "epoch": 0.05266413878800484, "grad_norm": 1.359375, "learning_rate": 2.6331262332675595e-06, "loss": 0.5833814144134521, "num_tokens": 4824245204.0, "step": 39500 }, { "epoch": 0.052690804174732944, "grad_norm": 1.9921875, "learning_rate": 2.634459495493574e-06, "loss": 0.5712910652160644, "num_tokens": 4826675158.0, "step": 39520 }, { "epoch": 0.05271746956146105, "grad_norm": 1.8828125, "learning_rate": 2.6357927577195885e-06, "loss": 0.5418304443359375, "num_tokens": 4829034716.0, "step": 39540 }, { "epoch": 0.05274413494818915, "grad_norm": 1.890625, "learning_rate": 2.637126019945603e-06, "loss": 0.5870815277099609, "num_tokens": 4831409509.0, "step": 39560 }, { "epoch": 0.05277080033491726, "grad_norm": 3.046875, "learning_rate": 2.6384592821716175e-06, "loss": 0.5674934387207031, "num_tokens": 4833855618.0, "step": 39580 }, { "epoch": 0.05279746572164536, "grad_norm": 1.375, "learning_rate": 2.6397925443976326e-06, "loss": 0.5710509300231934, "num_tokens": 4836382527.0, "step": 39600 }, { "epoch": 0.052824131108373466, "grad_norm": 1.953125, "learning_rate": 2.641125806623647e-06, "loss": 0.5832141876220703, "num_tokens": 4838817150.0, "step": 39620 }, { "epoch": 0.05285079649510157, "grad_norm": 2.140625, "learning_rate": 2.6424590688496616e-06, "loss": 0.5703206062316895, "num_tokens": 4841102518.0, "step": 39640 }, { "epoch": 0.05287746188182967, "grad_norm": 1.828125, "learning_rate": 2.6437923310756763e-06, "loss": 0.555994987487793, "num_tokens": 4843477363.0, "step": 39660 }, { "epoch": 0.052904127268557775, "grad_norm": 1.8671875, "learning_rate": 2.6451255933016906e-06, "loss": 0.5837357044219971, "num_tokens": 4845963276.0, "step": 39680 }, { "epoch": 0.05293079265528588, "grad_norm": 2.109375, "learning_rate": 2.6464588555277053e-06, "loss": 0.563063669204712, "num_tokens": 4848559200.0, "step": 39700 }, { "epoch": 0.05295745804201398, "grad_norm": 1.90625, "learning_rate": 2.6477921177537204e-06, "loss": 0.5453168869018554, "num_tokens": 4851011214.0, "step": 39720 }, { "epoch": 0.052984123428742085, "grad_norm": 1.65625, "learning_rate": 2.6491253799797347e-06, "loss": 0.5663634777069092, "num_tokens": 4853364356.0, "step": 39740 }, { "epoch": 0.05301078881547019, "grad_norm": 2.265625, "learning_rate": 2.6504586422057494e-06, "loss": 0.579893970489502, "num_tokens": 4855841077.0, "step": 39760 }, { "epoch": 0.0530374542021983, "grad_norm": 2.359375, "learning_rate": 2.6517919044317636e-06, "loss": 0.5706693172454834, "num_tokens": 4858313512.0, "step": 39780 }, { "epoch": 0.0530641195889264, "grad_norm": 2.25, "learning_rate": 2.6531251666577783e-06, "loss": 0.5599720478057861, "num_tokens": 4860679189.0, "step": 39800 }, { "epoch": 0.053090784975654504, "grad_norm": 2.28125, "learning_rate": 2.654458428883793e-06, "loss": 0.5648465156555176, "num_tokens": 4863072212.0, "step": 39820 }, { "epoch": 0.05311745036238261, "grad_norm": 2.390625, "learning_rate": 2.6557916911098073e-06, "loss": 0.5882476806640625, "num_tokens": 4865602713.0, "step": 39840 }, { "epoch": 0.05314411574911071, "grad_norm": 2.359375, "learning_rate": 2.6571249533358224e-06, "loss": 0.5829530239105225, "num_tokens": 4867993767.0, "step": 39860 }, { "epoch": 0.05317078113583881, "grad_norm": 1.921875, "learning_rate": 2.658458215561837e-06, "loss": 0.5704826354980469, "num_tokens": 4870393559.0, "step": 39880 }, { "epoch": 0.053197446522566916, "grad_norm": 2.046875, "learning_rate": 2.6597914777878514e-06, "loss": 0.5651288986206054, "num_tokens": 4872706517.0, "step": 39900 }, { "epoch": 0.05322411190929502, "grad_norm": 2.171875, "learning_rate": 2.661124740013866e-06, "loss": 0.5710834980010986, "num_tokens": 4875331424.0, "step": 39920 }, { "epoch": 0.05325077729602312, "grad_norm": 2.203125, "learning_rate": 2.6624580022398804e-06, "loss": 0.569947624206543, "num_tokens": 4877737688.0, "step": 39940 }, { "epoch": 0.053277442682751225, "grad_norm": 1.984375, "learning_rate": 2.663791264465895e-06, "loss": 0.5565849304199219, "num_tokens": 4880266327.0, "step": 39960 }, { "epoch": 0.053304108069479335, "grad_norm": 1.9296875, "learning_rate": 2.6651245266919102e-06, "loss": 0.5668535232543945, "num_tokens": 4882789719.0, "step": 39980 }, { "epoch": 0.05333077345620744, "grad_norm": 1.8984375, "learning_rate": 2.6664577889179245e-06, "loss": 0.5816582679748535, "num_tokens": 4885299602.0, "step": 40000 }, { "epoch": 0.05335743884293554, "grad_norm": 2.25, "learning_rate": 2.667791051143939e-06, "loss": 0.5619098663330078, "num_tokens": 4887668369.0, "step": 40020 }, { "epoch": 0.053384104229663644, "grad_norm": 2.390625, "learning_rate": 2.669124313369954e-06, "loss": 0.5731304168701172, "num_tokens": 4890044030.0, "step": 40040 }, { "epoch": 0.05341076961639175, "grad_norm": 1.859375, "learning_rate": 2.670457575595968e-06, "loss": 0.5761724472045898, "num_tokens": 4892389262.0, "step": 40060 }, { "epoch": 0.05343743500311985, "grad_norm": 2.21875, "learning_rate": 2.6717908378219833e-06, "loss": 0.5661208152770996, "num_tokens": 4894801856.0, "step": 40080 }, { "epoch": 0.05346410038984795, "grad_norm": 1.6015625, "learning_rate": 2.6731241000479976e-06, "loss": 0.5548598289489746, "num_tokens": 4897195977.0, "step": 40100 }, { "epoch": 0.053490765776576056, "grad_norm": 1.953125, "learning_rate": 2.6744573622740123e-06, "loss": 0.5523515701293945, "num_tokens": 4899663438.0, "step": 40120 }, { "epoch": 0.05351743116330416, "grad_norm": 2.1875, "learning_rate": 2.675790624500027e-06, "loss": 0.5757133483886718, "num_tokens": 4901966260.0, "step": 40140 }, { "epoch": 0.05354409655003226, "grad_norm": 1.84375, "learning_rate": 2.6771238867260413e-06, "loss": 0.5623663902282715, "num_tokens": 4904549796.0, "step": 40160 }, { "epoch": 0.05357076193676037, "grad_norm": 1.8671875, "learning_rate": 2.678457148952056e-06, "loss": 0.573157262802124, "num_tokens": 4906897283.0, "step": 40180 }, { "epoch": 0.053597427323488475, "grad_norm": 1.96875, "learning_rate": 2.679790411178071e-06, "loss": 0.5727049350738526, "num_tokens": 4909535103.0, "step": 40200 }, { "epoch": 0.05362409271021658, "grad_norm": 2.109375, "learning_rate": 2.6811236734040854e-06, "loss": 0.5506998062133789, "num_tokens": 4912016795.0, "step": 40220 }, { "epoch": 0.05365075809694468, "grad_norm": 1.75, "learning_rate": 2.6824569356301e-06, "loss": 0.5660148620605469, "num_tokens": 4914221971.0, "step": 40240 }, { "epoch": 0.053677423483672784, "grad_norm": 2.359375, "learning_rate": 2.6837901978561148e-06, "loss": 0.5559757232666016, "num_tokens": 4916509154.0, "step": 40260 }, { "epoch": 0.05370408887040089, "grad_norm": 2.015625, "learning_rate": 2.685123460082129e-06, "loss": 0.5957226753234863, "num_tokens": 4918745603.0, "step": 40280 }, { "epoch": 0.05373075425712899, "grad_norm": 2.0, "learning_rate": 2.6864567223081438e-06, "loss": 0.5739258766174317, "num_tokens": 4921309585.0, "step": 40300 }, { "epoch": 0.05375741964385709, "grad_norm": 2.09375, "learning_rate": 2.687789984534158e-06, "loss": 0.5725159645080566, "num_tokens": 4923878227.0, "step": 40320 }, { "epoch": 0.053784085030585196, "grad_norm": 2.0, "learning_rate": 2.689123246760173e-06, "loss": 0.5883553504943848, "num_tokens": 4926146758.0, "step": 40340 }, { "epoch": 0.0538107504173133, "grad_norm": 1.9296875, "learning_rate": 2.690456508986188e-06, "loss": 0.5548114776611328, "num_tokens": 4928605090.0, "step": 40360 }, { "epoch": 0.05383741580404141, "grad_norm": 1.984375, "learning_rate": 2.691789771212202e-06, "loss": 0.562455701828003, "num_tokens": 4931024404.0, "step": 40380 }, { "epoch": 0.05386408119076951, "grad_norm": 1.921875, "learning_rate": 2.693123033438217e-06, "loss": 0.5754639148712158, "num_tokens": 4933707126.0, "step": 40400 }, { "epoch": 0.053890746577497615, "grad_norm": 2.03125, "learning_rate": 2.6944562956642315e-06, "loss": 0.5438894748687744, "num_tokens": 4936050447.0, "step": 40420 }, { "epoch": 0.05391741196422572, "grad_norm": 1.7734375, "learning_rate": 2.695789557890246e-06, "loss": 0.565340518951416, "num_tokens": 4938481181.0, "step": 40440 }, { "epoch": 0.05394407735095382, "grad_norm": 2.109375, "learning_rate": 2.697122820116261e-06, "loss": 0.5602404117584229, "num_tokens": 4941027925.0, "step": 40460 }, { "epoch": 0.053970742737681925, "grad_norm": 1.9453125, "learning_rate": 2.6984560823422752e-06, "loss": 0.5580632209777832, "num_tokens": 4943467274.0, "step": 40480 }, { "epoch": 0.05399740812441003, "grad_norm": 1.765625, "learning_rate": 2.69978934456829e-06, "loss": 0.5568287372589111, "num_tokens": 4945972417.0, "step": 40500 }, { "epoch": 0.05402407351113813, "grad_norm": 1.890625, "learning_rate": 2.7011226067943046e-06, "loss": 0.5837249279022216, "num_tokens": 4948168401.0, "step": 40520 }, { "epoch": 0.054050738897866234, "grad_norm": 1.7265625, "learning_rate": 2.702455869020319e-06, "loss": 0.5569504737854004, "num_tokens": 4950485653.0, "step": 40540 }, { "epoch": 0.05407740428459434, "grad_norm": 1.9140625, "learning_rate": 2.7037891312463336e-06, "loss": 0.5643914222717286, "num_tokens": 4953082944.0, "step": 40560 }, { "epoch": 0.05410406967132244, "grad_norm": 2.015625, "learning_rate": 2.7051223934723487e-06, "loss": 0.5709804058074951, "num_tokens": 4955383417.0, "step": 40580 }, { "epoch": 0.05413073505805055, "grad_norm": 2.515625, "learning_rate": 2.706455655698363e-06, "loss": 0.5701814651489258, "num_tokens": 4958095523.0, "step": 40600 }, { "epoch": 0.05415740044477865, "grad_norm": 1.734375, "learning_rate": 2.7077889179243777e-06, "loss": 0.5548561573028564, "num_tokens": 4960539397.0, "step": 40620 }, { "epoch": 0.054184065831506756, "grad_norm": 2.359375, "learning_rate": 2.709122180150392e-06, "loss": 0.5495084285736084, "num_tokens": 4962863721.0, "step": 40640 }, { "epoch": 0.05421073121823486, "grad_norm": 2.171875, "learning_rate": 2.7104554423764067e-06, "loss": 0.5765475273132324, "num_tokens": 4965566287.0, "step": 40660 }, { "epoch": 0.05423739660496296, "grad_norm": 2.390625, "learning_rate": 2.7117887046024214e-06, "loss": 0.5607692718505859, "num_tokens": 4968255614.0, "step": 40680 }, { "epoch": 0.054264061991691065, "grad_norm": 1.96875, "learning_rate": 2.7131219668284357e-06, "loss": 0.5672411918640137, "num_tokens": 4970650585.0, "step": 40700 }, { "epoch": 0.05429072737841917, "grad_norm": 1.953125, "learning_rate": 2.714455229054451e-06, "loss": 0.5646695613861084, "num_tokens": 4973217822.0, "step": 40720 }, { "epoch": 0.05431739276514727, "grad_norm": 2.0625, "learning_rate": 2.7157884912804655e-06, "loss": 0.5701622009277344, "num_tokens": 4975413203.0, "step": 40740 }, { "epoch": 0.054344058151875374, "grad_norm": 2.15625, "learning_rate": 2.7171217535064798e-06, "loss": 0.5590911865234375, "num_tokens": 4977866538.0, "step": 40760 }, { "epoch": 0.05437072353860348, "grad_norm": 1.7578125, "learning_rate": 2.7184550157324945e-06, "loss": 0.5687292098999024, "num_tokens": 4980320669.0, "step": 40780 }, { "epoch": 0.05439738892533159, "grad_norm": 2.03125, "learning_rate": 2.7197882779585088e-06, "loss": 0.5872002601623535, "num_tokens": 4982835692.0, "step": 40800 }, { "epoch": 0.05442405431205969, "grad_norm": 2.0, "learning_rate": 2.721121540184524e-06, "loss": 0.575001335144043, "num_tokens": 4985114807.0, "step": 40820 }, { "epoch": 0.05445071969878779, "grad_norm": 2.328125, "learning_rate": 2.7224548024105386e-06, "loss": 0.5624849796295166, "num_tokens": 4987567789.0, "step": 40840 }, { "epoch": 0.054477385085515896, "grad_norm": 1.7890625, "learning_rate": 2.723788064636553e-06, "loss": 0.5705245971679688, "num_tokens": 4990045786.0, "step": 40860 }, { "epoch": 0.054504050472244, "grad_norm": 1.9375, "learning_rate": 2.7251213268625676e-06, "loss": 0.5646733283996582, "num_tokens": 4992445702.0, "step": 40880 }, { "epoch": 0.0545307158589721, "grad_norm": 2.21875, "learning_rate": 2.7264545890885823e-06, "loss": 0.559202003479004, "num_tokens": 4994986602.0, "step": 40900 }, { "epoch": 0.054557381245700205, "grad_norm": 2.21875, "learning_rate": 2.7277878513145965e-06, "loss": 0.5658269882202148, "num_tokens": 4997229099.0, "step": 40920 }, { "epoch": 0.05458404663242831, "grad_norm": 2.1875, "learning_rate": 2.7291211135406117e-06, "loss": 0.5725070476531983, "num_tokens": 4999745718.0, "step": 40940 }, { "epoch": 0.05461071201915641, "grad_norm": 2.171875, "learning_rate": 2.730454375766626e-06, "loss": 0.5690284729003906, "num_tokens": 5002310098.0, "step": 40960 }, { "epoch": 0.054637377405884514, "grad_norm": 2.359375, "learning_rate": 2.7317876379926406e-06, "loss": 0.5561964035034179, "num_tokens": 5004558832.0, "step": 40980 }, { "epoch": 0.054664042792612624, "grad_norm": 2.0625, "learning_rate": 2.7331209002186553e-06, "loss": 0.5606954574584961, "num_tokens": 5007143521.0, "step": 41000 }, { "epoch": 0.05469070817934073, "grad_norm": 1.7578125, "learning_rate": 2.7344541624446696e-06, "loss": 0.571602725982666, "num_tokens": 5009457858.0, "step": 41020 }, { "epoch": 0.05471737356606883, "grad_norm": 1.8828125, "learning_rate": 2.7357874246706843e-06, "loss": 0.5675442218780518, "num_tokens": 5011828506.0, "step": 41040 }, { "epoch": 0.05474403895279693, "grad_norm": 1.9453125, "learning_rate": 2.7371206868966995e-06, "loss": 0.5609289169311523, "num_tokens": 5014181154.0, "step": 41060 }, { "epoch": 0.054770704339525036, "grad_norm": 1.7578125, "learning_rate": 2.7384539491227137e-06, "loss": 0.554793119430542, "num_tokens": 5016488689.0, "step": 41080 }, { "epoch": 0.05479736972625314, "grad_norm": 1.90625, "learning_rate": 2.7397872113487284e-06, "loss": 0.5576037406921387, "num_tokens": 5018697000.0, "step": 41100 }, { "epoch": 0.05482403511298124, "grad_norm": 1.890625, "learning_rate": 2.7411204735747427e-06, "loss": 0.5751092433929443, "num_tokens": 5021033463.0, "step": 41120 }, { "epoch": 0.054850700499709346, "grad_norm": 2.25, "learning_rate": 2.7424537358007574e-06, "loss": 0.5604073524475097, "num_tokens": 5023370215.0, "step": 41140 }, { "epoch": 0.05487736588643745, "grad_norm": 2.5625, "learning_rate": 2.743786998026772e-06, "loss": 0.5703061103820801, "num_tokens": 5025904917.0, "step": 41160 }, { "epoch": 0.05490403127316555, "grad_norm": 1.890625, "learning_rate": 2.7451202602527864e-06, "loss": 0.5621533393859863, "num_tokens": 5028294736.0, "step": 41180 }, { "epoch": 0.05493069665989366, "grad_norm": 1.9921875, "learning_rate": 2.7464535224788015e-06, "loss": 0.5639982700347901, "num_tokens": 5030832567.0, "step": 41200 }, { "epoch": 0.054957362046621765, "grad_norm": 1.8125, "learning_rate": 2.7477867847048162e-06, "loss": 0.5688241481781006, "num_tokens": 5033531640.0, "step": 41220 }, { "epoch": 0.05498402743334987, "grad_norm": 1.96875, "learning_rate": 2.7491200469308305e-06, "loss": 0.5693353176116943, "num_tokens": 5036194197.0, "step": 41240 }, { "epoch": 0.05501069282007797, "grad_norm": 2.046875, "learning_rate": 2.750453309156845e-06, "loss": 0.5748385429382324, "num_tokens": 5038474080.0, "step": 41260 }, { "epoch": 0.055037358206806074, "grad_norm": 2.0625, "learning_rate": 2.7517865713828595e-06, "loss": 0.5424162864685058, "num_tokens": 5040846134.0, "step": 41280 }, { "epoch": 0.05506402359353418, "grad_norm": 1.796875, "learning_rate": 2.753119833608874e-06, "loss": 0.5703257560729981, "num_tokens": 5043329489.0, "step": 41300 }, { "epoch": 0.05509068898026228, "grad_norm": 2.515625, "learning_rate": 2.7544530958348893e-06, "loss": 0.5511187553405762, "num_tokens": 5045865483.0, "step": 41320 }, { "epoch": 0.05511735436699038, "grad_norm": 1.9140625, "learning_rate": 2.7557863580609036e-06, "loss": 0.5613036155700684, "num_tokens": 5048253118.0, "step": 41340 }, { "epoch": 0.055144019753718486, "grad_norm": 2.046875, "learning_rate": 2.7571196202869183e-06, "loss": 0.5738344192504883, "num_tokens": 5050893082.0, "step": 41360 }, { "epoch": 0.05517068514044659, "grad_norm": 1.9375, "learning_rate": 2.758452882512933e-06, "loss": 0.5773234844207764, "num_tokens": 5053248067.0, "step": 41380 }, { "epoch": 0.0551973505271747, "grad_norm": 2.0, "learning_rate": 2.7597861447389473e-06, "loss": 0.5554906845092773, "num_tokens": 5055713634.0, "step": 41400 }, { "epoch": 0.0552240159139028, "grad_norm": 2.265625, "learning_rate": 2.761119406964962e-06, "loss": 0.5427943229675293, "num_tokens": 5058230112.0, "step": 41420 }, { "epoch": 0.055250681300630905, "grad_norm": 1.9140625, "learning_rate": 2.7624526691909762e-06, "loss": 0.555330467224121, "num_tokens": 5060891548.0, "step": 41440 }, { "epoch": 0.05527734668735901, "grad_norm": 2.109375, "learning_rate": 2.7637859314169914e-06, "loss": 0.5740046024322509, "num_tokens": 5063445175.0, "step": 41460 }, { "epoch": 0.05530401207408711, "grad_norm": 2.421875, "learning_rate": 2.765119193643006e-06, "loss": 0.5771974563598633, "num_tokens": 5065813405.0, "step": 41480 }, { "epoch": 0.055330677460815214, "grad_norm": 2.109375, "learning_rate": 2.7664524558690203e-06, "loss": 0.5850881576538086, "num_tokens": 5068230781.0, "step": 41500 }, { "epoch": 0.05535734284754332, "grad_norm": 1.984375, "learning_rate": 2.767785718095035e-06, "loss": 0.5724714279174805, "num_tokens": 5070879266.0, "step": 41520 }, { "epoch": 0.05538400823427142, "grad_norm": 1.953125, "learning_rate": 2.76911898032105e-06, "loss": 0.5530391693115234, "num_tokens": 5073066934.0, "step": 41540 }, { "epoch": 0.05541067362099952, "grad_norm": 2.0625, "learning_rate": 2.7704522425470645e-06, "loss": 0.5655012130737305, "num_tokens": 5075428521.0, "step": 41560 }, { "epoch": 0.055437339007727626, "grad_norm": 1.96875, "learning_rate": 2.771785504773079e-06, "loss": 0.5647048950195312, "num_tokens": 5077797929.0, "step": 41580 }, { "epoch": 0.055464004394455736, "grad_norm": 2.40625, "learning_rate": 2.773118766999094e-06, "loss": 0.5449264526367188, "num_tokens": 5079961634.0, "step": 41600 }, { "epoch": 0.05549066978118384, "grad_norm": 1.6015625, "learning_rate": 2.774452029225108e-06, "loss": 0.5550439834594727, "num_tokens": 5082179904.0, "step": 41620 }, { "epoch": 0.05551733516791194, "grad_norm": 1.9765625, "learning_rate": 2.775785291451123e-06, "loss": 0.5725230216979981, "num_tokens": 5084640727.0, "step": 41640 }, { "epoch": 0.055544000554640045, "grad_norm": 2.296875, "learning_rate": 2.777118553677137e-06, "loss": 0.574660062789917, "num_tokens": 5087244105.0, "step": 41660 }, { "epoch": 0.05557066594136815, "grad_norm": 1.9140625, "learning_rate": 2.7784518159031522e-06, "loss": 0.5741231918334961, "num_tokens": 5089711707.0, "step": 41680 }, { "epoch": 0.05559733132809625, "grad_norm": 1.9765625, "learning_rate": 2.779785078129167e-06, "loss": 0.5752320766448975, "num_tokens": 5092230835.0, "step": 41700 }, { "epoch": 0.055623996714824354, "grad_norm": 2.1875, "learning_rate": 2.7811183403551812e-06, "loss": 0.5594711780548096, "num_tokens": 5094858825.0, "step": 41720 }, { "epoch": 0.05565066210155246, "grad_norm": 2.171875, "learning_rate": 2.782451602581196e-06, "loss": 0.5508533954620362, "num_tokens": 5097126964.0, "step": 41740 }, { "epoch": 0.05567732748828056, "grad_norm": 2.0625, "learning_rate": 2.7837848648072106e-06, "loss": 0.5618049621582031, "num_tokens": 5099559764.0, "step": 41760 }, { "epoch": 0.055703992875008664, "grad_norm": 1.953125, "learning_rate": 2.785118127033225e-06, "loss": 0.5706851959228516, "num_tokens": 5102156466.0, "step": 41780 }, { "epoch": 0.05573065826173677, "grad_norm": 2.171875, "learning_rate": 2.78645138925924e-06, "loss": 0.5635170936584473, "num_tokens": 5104560004.0, "step": 41800 }, { "epoch": 0.05575732364846488, "grad_norm": 1.90625, "learning_rate": 2.7877846514852543e-06, "loss": 0.5759524345397949, "num_tokens": 5106962789.0, "step": 41820 }, { "epoch": 0.05578398903519298, "grad_norm": 1.8125, "learning_rate": 2.789117913711269e-06, "loss": 0.5602315902709961, "num_tokens": 5109461682.0, "step": 41840 }, { "epoch": 0.05581065442192108, "grad_norm": 1.75, "learning_rate": 2.7904511759372837e-06, "loss": 0.5308775901794434, "num_tokens": 5111983109.0, "step": 41860 }, { "epoch": 0.055837319808649186, "grad_norm": 1.8671875, "learning_rate": 2.791784438163298e-06, "loss": 0.5494311809539795, "num_tokens": 5114372207.0, "step": 41880 }, { "epoch": 0.05586398519537729, "grad_norm": 1.8515625, "learning_rate": 2.7931177003893127e-06, "loss": 0.5588988780975341, "num_tokens": 5116809734.0, "step": 41900 }, { "epoch": 0.05589065058210539, "grad_norm": 1.953125, "learning_rate": 2.794450962615328e-06, "loss": 0.5613656997680664, "num_tokens": 5119346909.0, "step": 41920 }, { "epoch": 0.055917315968833495, "grad_norm": 1.8125, "learning_rate": 2.795784224841342e-06, "loss": 0.5579114437103272, "num_tokens": 5121976446.0, "step": 41940 }, { "epoch": 0.0559439813555616, "grad_norm": 1.4296875, "learning_rate": 2.7971174870673568e-06, "loss": 0.5527019500732422, "num_tokens": 5124305496.0, "step": 41960 }, { "epoch": 0.0559706467422897, "grad_norm": 1.9921875, "learning_rate": 2.798450749293371e-06, "loss": 0.5513127326965332, "num_tokens": 5126643299.0, "step": 41980 }, { "epoch": 0.055997312129017804, "grad_norm": 2.078125, "learning_rate": 2.7997840115193858e-06, "loss": 0.5602551460266113, "num_tokens": 5129019367.0, "step": 42000 }, { "epoch": 0.056023977515745914, "grad_norm": 1.8125, "learning_rate": 2.8011172737454005e-06, "loss": 0.5557358264923096, "num_tokens": 5131527097.0, "step": 42020 }, { "epoch": 0.05605064290247402, "grad_norm": 1.7890625, "learning_rate": 2.8024505359714147e-06, "loss": 0.5615857124328614, "num_tokens": 5133901727.0, "step": 42040 }, { "epoch": 0.05607730828920212, "grad_norm": 1.9609375, "learning_rate": 2.80378379819743e-06, "loss": 0.5674990653991699, "num_tokens": 5136412579.0, "step": 42060 }, { "epoch": 0.05610397367593022, "grad_norm": 2.390625, "learning_rate": 2.8051170604234446e-06, "loss": 0.5626194953918457, "num_tokens": 5138838514.0, "step": 42080 }, { "epoch": 0.056130639062658326, "grad_norm": 1.921875, "learning_rate": 2.806450322649459e-06, "loss": 0.5678108215332032, "num_tokens": 5141234752.0, "step": 42100 }, { "epoch": 0.05615730444938643, "grad_norm": 2.5, "learning_rate": 2.8077835848754736e-06, "loss": 0.5785044670104981, "num_tokens": 5143660188.0, "step": 42120 }, { "epoch": 0.05618396983611453, "grad_norm": 2.25, "learning_rate": 2.809116847101488e-06, "loss": 0.5507454872131348, "num_tokens": 5146294273.0, "step": 42140 }, { "epoch": 0.056210635222842635, "grad_norm": 1.78125, "learning_rate": 2.8104501093275025e-06, "loss": 0.5660810947418213, "num_tokens": 5148784479.0, "step": 42160 }, { "epoch": 0.05623730060957074, "grad_norm": 2.0625, "learning_rate": 2.8117833715535177e-06, "loss": 0.5720130920410156, "num_tokens": 5151218615.0, "step": 42180 }, { "epoch": 0.05626396599629884, "grad_norm": 2.3125, "learning_rate": 2.813116633779532e-06, "loss": 0.5705379962921142, "num_tokens": 5153646549.0, "step": 42200 }, { "epoch": 0.05629063138302695, "grad_norm": 2.09375, "learning_rate": 2.8144498960055466e-06, "loss": 0.5746924400329589, "num_tokens": 5155877092.0, "step": 42220 }, { "epoch": 0.056317296769755054, "grad_norm": 2.421875, "learning_rate": 2.8157831582315613e-06, "loss": 0.5502326965332032, "num_tokens": 5158639320.0, "step": 42240 }, { "epoch": 0.05634396215648316, "grad_norm": 2.515625, "learning_rate": 2.8171164204575756e-06, "loss": 0.554074478149414, "num_tokens": 5160800504.0, "step": 42260 }, { "epoch": 0.05637062754321126, "grad_norm": 2.140625, "learning_rate": 2.8184496826835907e-06, "loss": 0.5464458465576172, "num_tokens": 5163466426.0, "step": 42280 }, { "epoch": 0.05639729292993936, "grad_norm": 2.125, "learning_rate": 2.8197829449096046e-06, "loss": 0.5542789459228515, "num_tokens": 5165823380.0, "step": 42300 }, { "epoch": 0.056423958316667466, "grad_norm": 2.125, "learning_rate": 2.8211162071356197e-06, "loss": 0.5588768482208252, "num_tokens": 5168203159.0, "step": 42320 }, { "epoch": 0.05645062370339557, "grad_norm": 1.828125, "learning_rate": 2.8224494693616344e-06, "loss": 0.5393937110900879, "num_tokens": 5170458019.0, "step": 42340 }, { "epoch": 0.05647728909012367, "grad_norm": 2.25, "learning_rate": 2.8237827315876487e-06, "loss": 0.5705527782440185, "num_tokens": 5172954275.0, "step": 42360 }, { "epoch": 0.056503954476851775, "grad_norm": 1.78125, "learning_rate": 2.8251159938136634e-06, "loss": 0.5466403961181641, "num_tokens": 5175452732.0, "step": 42380 }, { "epoch": 0.05653061986357988, "grad_norm": 2.109375, "learning_rate": 2.8264492560396785e-06, "loss": 0.5420970439910888, "num_tokens": 5177725371.0, "step": 42400 }, { "epoch": 0.05655728525030799, "grad_norm": 1.7578125, "learning_rate": 2.827782518265693e-06, "loss": 0.5501201629638672, "num_tokens": 5180225472.0, "step": 42420 }, { "epoch": 0.05658395063703609, "grad_norm": 2.359375, "learning_rate": 2.8291157804917075e-06, "loss": 0.5476831436157227, "num_tokens": 5182830487.0, "step": 42440 }, { "epoch": 0.056610616023764194, "grad_norm": 2.53125, "learning_rate": 2.8304490427177218e-06, "loss": 0.580449104309082, "num_tokens": 5185159275.0, "step": 42460 }, { "epoch": 0.0566372814104923, "grad_norm": 1.890625, "learning_rate": 2.8317823049437365e-06, "loss": 0.5788988590240478, "num_tokens": 5187280566.0, "step": 42480 }, { "epoch": 0.0566639467972204, "grad_norm": 1.78125, "learning_rate": 2.833115567169751e-06, "loss": 0.5515979766845703, "num_tokens": 5189316645.0, "step": 42500 }, { "epoch": 0.056690612183948504, "grad_norm": 1.90625, "learning_rate": 2.8344488293957655e-06, "loss": 0.5647371292114258, "num_tokens": 5191753127.0, "step": 42520 }, { "epoch": 0.05671727757067661, "grad_norm": 2.390625, "learning_rate": 2.8357820916217806e-06, "loss": 0.5497525215148926, "num_tokens": 5194116631.0, "step": 42540 }, { "epoch": 0.05674394295740471, "grad_norm": 1.96875, "learning_rate": 2.8371153538477953e-06, "loss": 0.546711540222168, "num_tokens": 5196614618.0, "step": 42560 }, { "epoch": 0.05677060834413281, "grad_norm": 1.859375, "learning_rate": 2.8384486160738096e-06, "loss": 0.5728527069091797, "num_tokens": 5199144495.0, "step": 42580 }, { "epoch": 0.056797273730860916, "grad_norm": 1.8828125, "learning_rate": 2.8397818782998243e-06, "loss": 0.5628312587738037, "num_tokens": 5201692417.0, "step": 42600 }, { "epoch": 0.056823939117589026, "grad_norm": 1.6875, "learning_rate": 2.8411151405258385e-06, "loss": 0.5654199123382568, "num_tokens": 5204199404.0, "step": 42620 }, { "epoch": 0.05685060450431713, "grad_norm": 2.21875, "learning_rate": 2.8424484027518533e-06, "loss": 0.537633752822876, "num_tokens": 5206582613.0, "step": 42640 }, { "epoch": 0.05687726989104523, "grad_norm": 1.9296875, "learning_rate": 2.8437816649778684e-06, "loss": 0.5602578163146973, "num_tokens": 5209122146.0, "step": 42660 }, { "epoch": 0.056903935277773335, "grad_norm": 2.15625, "learning_rate": 2.8451149272038827e-06, "loss": 0.5901079177856445, "num_tokens": 5211587686.0, "step": 42680 }, { "epoch": 0.05693060066450144, "grad_norm": 2.21875, "learning_rate": 2.8464481894298974e-06, "loss": 0.5670871734619141, "num_tokens": 5214025857.0, "step": 42700 }, { "epoch": 0.05695726605122954, "grad_norm": 2.4375, "learning_rate": 2.847781451655912e-06, "loss": 0.5693605422973633, "num_tokens": 5216537246.0, "step": 42720 }, { "epoch": 0.056983931437957644, "grad_norm": 1.8671875, "learning_rate": 2.8491147138819263e-06, "loss": 0.5489970684051514, "num_tokens": 5219033956.0, "step": 42740 }, { "epoch": 0.05701059682468575, "grad_norm": 2.0625, "learning_rate": 2.850447976107941e-06, "loss": 0.566986083984375, "num_tokens": 5221249253.0, "step": 42760 }, { "epoch": 0.05703726221141385, "grad_norm": 2.765625, "learning_rate": 2.8517812383339553e-06, "loss": 0.550727128982544, "num_tokens": 5223784730.0, "step": 42780 }, { "epoch": 0.05706392759814195, "grad_norm": 1.75, "learning_rate": 2.8531145005599704e-06, "loss": 0.543402624130249, "num_tokens": 5226182153.0, "step": 42800 }, { "epoch": 0.05709059298487006, "grad_norm": 2.15625, "learning_rate": 2.854447762785985e-06, "loss": 0.5549656391143799, "num_tokens": 5228840001.0, "step": 42820 }, { "epoch": 0.057117258371598166, "grad_norm": 2.1875, "learning_rate": 2.8557810250119994e-06, "loss": 0.5614211082458496, "num_tokens": 5231270281.0, "step": 42840 }, { "epoch": 0.05714392375832627, "grad_norm": 1.875, "learning_rate": 2.857114287238014e-06, "loss": 0.5510891914367676, "num_tokens": 5233668695.0, "step": 42860 }, { "epoch": 0.05717058914505437, "grad_norm": 2.015625, "learning_rate": 2.858447549464029e-06, "loss": 0.5499856472015381, "num_tokens": 5236046935.0, "step": 42880 }, { "epoch": 0.057197254531782475, "grad_norm": 1.7265625, "learning_rate": 2.859780811690043e-06, "loss": 0.5445239067077636, "num_tokens": 5238634622.0, "step": 42900 }, { "epoch": 0.05722391991851058, "grad_norm": 1.9375, "learning_rate": 2.8611140739160582e-06, "loss": 0.5638851165771485, "num_tokens": 5240928612.0, "step": 42920 }, { "epoch": 0.05725058530523868, "grad_norm": 2.125, "learning_rate": 2.862447336142073e-06, "loss": 0.573390531539917, "num_tokens": 5243438320.0, "step": 42940 }, { "epoch": 0.057277250691966784, "grad_norm": 2.21875, "learning_rate": 2.863780598368087e-06, "loss": 0.5602263450622559, "num_tokens": 5245653296.0, "step": 42960 }, { "epoch": 0.05730391607869489, "grad_norm": 1.984375, "learning_rate": 2.865113860594102e-06, "loss": 0.5564072608947754, "num_tokens": 5248031069.0, "step": 42980 }, { "epoch": 0.05733058146542299, "grad_norm": 1.8125, "learning_rate": 2.866447122820116e-06, "loss": 0.5480839729309082, "num_tokens": 5250540486.0, "step": 43000 }, { "epoch": 0.05735724685215109, "grad_norm": 1.8203125, "learning_rate": 2.8677803850461313e-06, "loss": 0.5549724578857422, "num_tokens": 5253135983.0, "step": 43020 }, { "epoch": 0.0573839122388792, "grad_norm": 2.0625, "learning_rate": 2.869113647272146e-06, "loss": 0.5693498611450195, "num_tokens": 5255726761.0, "step": 43040 }, { "epoch": 0.057410577625607306, "grad_norm": 2.109375, "learning_rate": 2.8704469094981603e-06, "loss": 0.5651771545410156, "num_tokens": 5258120337.0, "step": 43060 }, { "epoch": 0.05743724301233541, "grad_norm": 2.28125, "learning_rate": 2.871780171724175e-06, "loss": 0.5566565513610839, "num_tokens": 5260558461.0, "step": 43080 }, { "epoch": 0.05746390839906351, "grad_norm": 2.546875, "learning_rate": 2.8731134339501897e-06, "loss": 0.5649967193603516, "num_tokens": 5263063391.0, "step": 43100 }, { "epoch": 0.057490573785791615, "grad_norm": 1.84375, "learning_rate": 2.874446696176204e-06, "loss": 0.5601821899414062, "num_tokens": 5265595110.0, "step": 43120 }, { "epoch": 0.05751723917251972, "grad_norm": 2.125, "learning_rate": 2.875779958402219e-06, "loss": 0.561586570739746, "num_tokens": 5267998051.0, "step": 43140 }, { "epoch": 0.05754390455924782, "grad_norm": 2.015625, "learning_rate": 2.8771132206282334e-06, "loss": 0.5510809898376465, "num_tokens": 5270553223.0, "step": 43160 }, { "epoch": 0.057570569945975925, "grad_norm": 2.265625, "learning_rate": 2.878446482854248e-06, "loss": 0.5668904304504394, "num_tokens": 5273090008.0, "step": 43180 }, { "epoch": 0.05759723533270403, "grad_norm": 2.046875, "learning_rate": 2.8797797450802628e-06, "loss": 0.5454262733459473, "num_tokens": 5275546570.0, "step": 43200 }, { "epoch": 0.05762390071943213, "grad_norm": 1.75, "learning_rate": 2.881113007306277e-06, "loss": 0.5744504451751709, "num_tokens": 5277856362.0, "step": 43220 }, { "epoch": 0.05765056610616024, "grad_norm": 1.7265625, "learning_rate": 2.8824462695322918e-06, "loss": 0.5642081260681152, "num_tokens": 5280294980.0, "step": 43240 }, { "epoch": 0.057677231492888344, "grad_norm": 1.78125, "learning_rate": 2.883779531758307e-06, "loss": 0.5634406089782715, "num_tokens": 5282779603.0, "step": 43260 }, { "epoch": 0.05770389687961645, "grad_norm": 2.28125, "learning_rate": 2.885112793984321e-06, "loss": 0.5441810607910156, "num_tokens": 5285125841.0, "step": 43280 }, { "epoch": 0.05773056226634455, "grad_norm": 2.171875, "learning_rate": 2.886446056210336e-06, "loss": 0.5569170951843262, "num_tokens": 5287377957.0, "step": 43300 }, { "epoch": 0.05775722765307265, "grad_norm": 1.8671875, "learning_rate": 2.88777931843635e-06, "loss": 0.5573057174682617, "num_tokens": 5289779711.0, "step": 43320 }, { "epoch": 0.057783893039800756, "grad_norm": 1.8671875, "learning_rate": 2.889112580662365e-06, "loss": 0.5570324897766114, "num_tokens": 5292124616.0, "step": 43340 }, { "epoch": 0.05781055842652886, "grad_norm": 1.7890625, "learning_rate": 2.8904458428883795e-06, "loss": 0.5678677558898926, "num_tokens": 5294589461.0, "step": 43360 }, { "epoch": 0.05783722381325696, "grad_norm": 1.875, "learning_rate": 2.891779105114394e-06, "loss": 0.5549739837646485, "num_tokens": 5296956999.0, "step": 43380 }, { "epoch": 0.057863889199985065, "grad_norm": 2.234375, "learning_rate": 2.893112367340409e-06, "loss": 0.5917101383209229, "num_tokens": 5299378574.0, "step": 43400 }, { "epoch": 0.05789055458671317, "grad_norm": 2.5625, "learning_rate": 2.8944456295664236e-06, "loss": 0.5611878871917725, "num_tokens": 5301706719.0, "step": 43420 }, { "epoch": 0.05791721997344128, "grad_norm": 2.078125, "learning_rate": 2.895778891792438e-06, "loss": 0.5704119682312012, "num_tokens": 5304033350.0, "step": 43440 }, { "epoch": 0.05794388536016938, "grad_norm": 1.6796875, "learning_rate": 2.8971121540184526e-06, "loss": 0.5459627151489258, "num_tokens": 5306494975.0, "step": 43460 }, { "epoch": 0.057970550746897484, "grad_norm": 1.625, "learning_rate": 2.898445416244467e-06, "loss": 0.5618823528289795, "num_tokens": 5308942782.0, "step": 43480 }, { "epoch": 0.05799721613362559, "grad_norm": 1.8359375, "learning_rate": 2.8997786784704816e-06, "loss": 0.5761143684387207, "num_tokens": 5311380113.0, "step": 43500 }, { "epoch": 0.05802388152035369, "grad_norm": 2.0625, "learning_rate": 2.9011119406964967e-06, "loss": 0.5546536445617676, "num_tokens": 5313971744.0, "step": 43520 }, { "epoch": 0.05805054690708179, "grad_norm": 1.4140625, "learning_rate": 2.902445202922511e-06, "loss": 0.5514304161071777, "num_tokens": 5316478956.0, "step": 43540 }, { "epoch": 0.058077212293809896, "grad_norm": 2.40625, "learning_rate": 2.9037784651485257e-06, "loss": 0.5653586387634277, "num_tokens": 5318946165.0, "step": 43560 }, { "epoch": 0.058103877680538, "grad_norm": 2.40625, "learning_rate": 2.9051117273745404e-06, "loss": 0.5635555744171142, "num_tokens": 5321359381.0, "step": 43580 }, { "epoch": 0.0581305430672661, "grad_norm": 1.4609375, "learning_rate": 2.9064449896005547e-06, "loss": 0.5579904556274414, "num_tokens": 5323892414.0, "step": 43600 }, { "epoch": 0.058157208453994205, "grad_norm": 2.09375, "learning_rate": 2.9077782518265694e-06, "loss": 0.5437138557434082, "num_tokens": 5326450819.0, "step": 43620 }, { "epoch": 0.058183873840722315, "grad_norm": 2.015625, "learning_rate": 2.9091115140525837e-06, "loss": 0.5554631233215332, "num_tokens": 5329060913.0, "step": 43640 }, { "epoch": 0.05821053922745042, "grad_norm": 2.3125, "learning_rate": 2.910444776278599e-06, "loss": 0.552609920501709, "num_tokens": 5331624125.0, "step": 43660 }, { "epoch": 0.05823720461417852, "grad_norm": 1.875, "learning_rate": 2.9117780385046135e-06, "loss": 0.5439935207366944, "num_tokens": 5334201804.0, "step": 43680 }, { "epoch": 0.058263870000906624, "grad_norm": 1.6015625, "learning_rate": 2.9131113007306278e-06, "loss": 0.5510547637939454, "num_tokens": 5336684948.0, "step": 43700 }, { "epoch": 0.05829053538763473, "grad_norm": 2.203125, "learning_rate": 2.9144445629566425e-06, "loss": 0.5725919723510742, "num_tokens": 5339305701.0, "step": 43720 }, { "epoch": 0.05831720077436283, "grad_norm": 1.7890625, "learning_rate": 2.9157778251826576e-06, "loss": 0.5652130126953125, "num_tokens": 5341624519.0, "step": 43740 }, { "epoch": 0.05834386616109093, "grad_norm": 1.921875, "learning_rate": 2.9171110874086715e-06, "loss": 0.5436337471008301, "num_tokens": 5344174897.0, "step": 43760 }, { "epoch": 0.058370531547819036, "grad_norm": 2.109375, "learning_rate": 2.9184443496346866e-06, "loss": 0.5644166946411133, "num_tokens": 5346443303.0, "step": 43780 }, { "epoch": 0.05839719693454714, "grad_norm": 2.0625, "learning_rate": 2.919777611860701e-06, "loss": 0.5825698852539063, "num_tokens": 5348755372.0, "step": 43800 }, { "epoch": 0.05842386232127524, "grad_norm": 1.8828125, "learning_rate": 2.9211108740867156e-06, "loss": 0.5425711631774902, "num_tokens": 5351321297.0, "step": 43820 }, { "epoch": 0.05845052770800335, "grad_norm": 2.1875, "learning_rate": 2.9224441363127303e-06, "loss": 0.5418630599975586, "num_tokens": 5353835016.0, "step": 43840 }, { "epoch": 0.058477193094731456, "grad_norm": 1.7734375, "learning_rate": 2.9237773985387445e-06, "loss": 0.5532734394073486, "num_tokens": 5356198229.0, "step": 43860 }, { "epoch": 0.05850385848145956, "grad_norm": 2.0625, "learning_rate": 2.9251106607647597e-06, "loss": 0.5610249519348145, "num_tokens": 5358567406.0, "step": 43880 }, { "epoch": 0.05853052386818766, "grad_norm": 2.046875, "learning_rate": 2.9264439229907744e-06, "loss": 0.5692193031311035, "num_tokens": 5361002187.0, "step": 43900 }, { "epoch": 0.058557189254915765, "grad_norm": 1.8671875, "learning_rate": 2.9277771852167886e-06, "loss": 0.5685036659240723, "num_tokens": 5363578117.0, "step": 43920 }, { "epoch": 0.05858385464164387, "grad_norm": 2.125, "learning_rate": 2.9291104474428033e-06, "loss": 0.5738728523254395, "num_tokens": 5365832579.0, "step": 43940 }, { "epoch": 0.05861052002837197, "grad_norm": 1.796875, "learning_rate": 2.9304437096688176e-06, "loss": 0.5559286594390869, "num_tokens": 5368517356.0, "step": 43960 }, { "epoch": 0.058637185415100074, "grad_norm": 2.03125, "learning_rate": 2.9317769718948323e-06, "loss": 0.5551742553710938, "num_tokens": 5371231133.0, "step": 43980 }, { "epoch": 0.05866385080182818, "grad_norm": 1.6953125, "learning_rate": 2.9331102341208474e-06, "loss": 0.5565482616424561, "num_tokens": 5373736114.0, "step": 44000 }, { "epoch": 0.05869051618855628, "grad_norm": 1.9921875, "learning_rate": 2.9344434963468617e-06, "loss": 0.565239953994751, "num_tokens": 5376183385.0, "step": 44020 }, { "epoch": 0.05871718157528439, "grad_norm": 2.25, "learning_rate": 2.9357767585728764e-06, "loss": 0.5330796718597413, "num_tokens": 5378659398.0, "step": 44040 }, { "epoch": 0.05874384696201249, "grad_norm": 1.8125, "learning_rate": 2.937110020798891e-06, "loss": 0.5603416442871094, "num_tokens": 5381152242.0, "step": 44060 }, { "epoch": 0.058770512348740596, "grad_norm": 2.53125, "learning_rate": 2.9384432830249054e-06, "loss": 0.5651694774627686, "num_tokens": 5383439418.0, "step": 44080 }, { "epoch": 0.0587971777354687, "grad_norm": 2.0625, "learning_rate": 2.93977654525092e-06, "loss": 0.5596924304962159, "num_tokens": 5385833335.0, "step": 44100 }, { "epoch": 0.0588238431221968, "grad_norm": 1.96875, "learning_rate": 2.9411098074769344e-06, "loss": 0.5317980766296386, "num_tokens": 5388098604.0, "step": 44120 }, { "epoch": 0.058850508508924905, "grad_norm": 1.90625, "learning_rate": 2.9424430697029495e-06, "loss": 0.5660120964050293, "num_tokens": 5390666595.0, "step": 44140 }, { "epoch": 0.05887717389565301, "grad_norm": 2.203125, "learning_rate": 2.9437763319289642e-06, "loss": 0.5505851745605469, "num_tokens": 5392997081.0, "step": 44160 }, { "epoch": 0.05890383928238111, "grad_norm": 2.0, "learning_rate": 2.9451095941549785e-06, "loss": 0.5450937747955322, "num_tokens": 5395528017.0, "step": 44180 }, { "epoch": 0.058930504669109214, "grad_norm": 2.078125, "learning_rate": 2.946442856380993e-06, "loss": 0.5645681381225586, "num_tokens": 5397831792.0, "step": 44200 }, { "epoch": 0.05895717005583732, "grad_norm": 2.109375, "learning_rate": 2.947776118607008e-06, "loss": 0.542004919052124, "num_tokens": 5400188261.0, "step": 44220 }, { "epoch": 0.05898383544256542, "grad_norm": 2.109375, "learning_rate": 2.949109380833022e-06, "loss": 0.5445000648498535, "num_tokens": 5402888684.0, "step": 44240 }, { "epoch": 0.05901050082929353, "grad_norm": 1.796875, "learning_rate": 2.9504426430590373e-06, "loss": 0.5445682048797608, "num_tokens": 5405344224.0, "step": 44260 }, { "epoch": 0.05903716621602163, "grad_norm": 2.15625, "learning_rate": 2.951775905285052e-06, "loss": 0.5698367118835449, "num_tokens": 5407811063.0, "step": 44280 }, { "epoch": 0.059063831602749736, "grad_norm": 2.109375, "learning_rate": 2.9531091675110663e-06, "loss": 0.5553272247314454, "num_tokens": 5410337058.0, "step": 44300 }, { "epoch": 0.05909049698947784, "grad_norm": 1.96875, "learning_rate": 2.954442429737081e-06, "loss": 0.5610277652740479, "num_tokens": 5412630573.0, "step": 44320 }, { "epoch": 0.05911716237620594, "grad_norm": 1.8359375, "learning_rate": 2.9557756919630953e-06, "loss": 0.5787065029144287, "num_tokens": 5414827786.0, "step": 44340 }, { "epoch": 0.059143827762934045, "grad_norm": 2.015625, "learning_rate": 2.95710895418911e-06, "loss": 0.5426331520080566, "num_tokens": 5417350115.0, "step": 44360 }, { "epoch": 0.05917049314966215, "grad_norm": 2.234375, "learning_rate": 2.958442216415125e-06, "loss": 0.5659657955169678, "num_tokens": 5419625864.0, "step": 44380 }, { "epoch": 0.05919715853639025, "grad_norm": 1.953125, "learning_rate": 2.9597754786411394e-06, "loss": 0.5452654838562012, "num_tokens": 5421963386.0, "step": 44400 }, { "epoch": 0.059223823923118354, "grad_norm": 1.9921875, "learning_rate": 2.961108740867154e-06, "loss": 0.5528129577636719, "num_tokens": 5424298654.0, "step": 44420 }, { "epoch": 0.05925048930984646, "grad_norm": 1.8046875, "learning_rate": 2.9624420030931688e-06, "loss": 0.5875215530395508, "num_tokens": 5426663624.0, "step": 44440 }, { "epoch": 0.05927715469657457, "grad_norm": 2.125, "learning_rate": 2.963775265319183e-06, "loss": 0.5374615669250489, "num_tokens": 5429231714.0, "step": 44460 }, { "epoch": 0.05930382008330267, "grad_norm": 2.03125, "learning_rate": 2.965108527545198e-06, "loss": 0.5608007907867432, "num_tokens": 5431917962.0, "step": 44480 }, { "epoch": 0.059330485470030773, "grad_norm": 1.6640625, "learning_rate": 2.966441789771212e-06, "loss": 0.5750746726989746, "num_tokens": 5434367566.0, "step": 44500 }, { "epoch": 0.05935715085675888, "grad_norm": 2.0625, "learning_rate": 2.967775051997227e-06, "loss": 0.5731200218200684, "num_tokens": 5436898555.0, "step": 44520 }, { "epoch": 0.05938381624348698, "grad_norm": 1.796875, "learning_rate": 2.969108314223242e-06, "loss": 0.5626747131347656, "num_tokens": 5439314371.0, "step": 44540 }, { "epoch": 0.05941048163021508, "grad_norm": 1.7890625, "learning_rate": 2.970441576449256e-06, "loss": 0.551784896850586, "num_tokens": 5441697285.0, "step": 44560 }, { "epoch": 0.059437147016943186, "grad_norm": 1.5859375, "learning_rate": 2.971774838675271e-06, "loss": 0.5550895690917969, "num_tokens": 5444129033.0, "step": 44580 }, { "epoch": 0.05946381240367129, "grad_norm": 2.078125, "learning_rate": 2.973108100901286e-06, "loss": 0.5517182350158691, "num_tokens": 5446546697.0, "step": 44600 }, { "epoch": 0.05949047779039939, "grad_norm": 2.125, "learning_rate": 2.9744413631273002e-06, "loss": 0.5573521137237549, "num_tokens": 5448944538.0, "step": 44620 }, { "epoch": 0.059517143177127495, "grad_norm": 1.7421875, "learning_rate": 2.975774625353315e-06, "loss": 0.5596677780151367, "num_tokens": 5451328883.0, "step": 44640 }, { "epoch": 0.059543808563855605, "grad_norm": 2.109375, "learning_rate": 2.977107887579329e-06, "loss": 0.5578299522399902, "num_tokens": 5453656481.0, "step": 44660 }, { "epoch": 0.05957047395058371, "grad_norm": 2.25, "learning_rate": 2.978441149805344e-06, "loss": 0.569923210144043, "num_tokens": 5455865844.0, "step": 44680 }, { "epoch": 0.05959713933731181, "grad_norm": 1.9609375, "learning_rate": 2.9797744120313586e-06, "loss": 0.5631815910339355, "num_tokens": 5458133325.0, "step": 44700 }, { "epoch": 0.059623804724039914, "grad_norm": 1.8203125, "learning_rate": 2.981107674257373e-06, "loss": 0.561203145980835, "num_tokens": 5460672699.0, "step": 44720 }, { "epoch": 0.05965047011076802, "grad_norm": 2.125, "learning_rate": 2.982440936483388e-06, "loss": 0.5614548683166504, "num_tokens": 5463086607.0, "step": 44740 }, { "epoch": 0.05967713549749612, "grad_norm": 1.9375, "learning_rate": 2.9837741987094027e-06, "loss": 0.5593116760253907, "num_tokens": 5465459275.0, "step": 44760 }, { "epoch": 0.05970380088422422, "grad_norm": 2.1875, "learning_rate": 2.985107460935417e-06, "loss": 0.5487263679504395, "num_tokens": 5467790416.0, "step": 44780 }, { "epoch": 0.059730466270952326, "grad_norm": 1.859375, "learning_rate": 2.9864407231614317e-06, "loss": 0.544179630279541, "num_tokens": 5470131718.0, "step": 44800 }, { "epoch": 0.05975713165768043, "grad_norm": 2.3125, "learning_rate": 2.987773985387446e-06, "loss": 0.5460361480712891, "num_tokens": 5472313777.0, "step": 44820 }, { "epoch": 0.05978379704440853, "grad_norm": 2.21875, "learning_rate": 2.9891072476134607e-06, "loss": 0.5328914642333984, "num_tokens": 5474722632.0, "step": 44840 }, { "epoch": 0.05981046243113664, "grad_norm": 2.0625, "learning_rate": 2.990440509839476e-06, "loss": 0.551598072052002, "num_tokens": 5477091982.0, "step": 44860 }, { "epoch": 0.059837127817864745, "grad_norm": 2.171875, "learning_rate": 2.99177377206549e-06, "loss": 0.5544760704040528, "num_tokens": 5479548312.0, "step": 44880 }, { "epoch": 0.05986379320459285, "grad_norm": 2.265625, "learning_rate": 2.9931070342915048e-06, "loss": 0.5537401676177979, "num_tokens": 5481879429.0, "step": 44900 }, { "epoch": 0.05989045859132095, "grad_norm": 2.3125, "learning_rate": 2.9944402965175195e-06, "loss": 0.5679311752319336, "num_tokens": 5484325048.0, "step": 44920 }, { "epoch": 0.059917123978049054, "grad_norm": 1.8125, "learning_rate": 2.9957735587435338e-06, "loss": 0.5542384624481201, "num_tokens": 5486890039.0, "step": 44940 }, { "epoch": 0.05994378936477716, "grad_norm": 2.4375, "learning_rate": 2.9971068209695485e-06, "loss": 0.5458348274230957, "num_tokens": 5489278974.0, "step": 44960 }, { "epoch": 0.05997045475150526, "grad_norm": 1.8828125, "learning_rate": 2.9984400831955627e-06, "loss": 0.5332141876220703, "num_tokens": 5491558451.0, "step": 44980 }, { "epoch": 0.05999712013823336, "grad_norm": 2.015625, "learning_rate": 2.999773345421578e-06, "loss": 0.5553738594055175, "num_tokens": 5493920569.0, "step": 45000 }, { "epoch": 0.060023785524961466, "grad_norm": 1.7734375, "learning_rate": 3.0011066076475926e-06, "loss": 0.5480708122253418, "num_tokens": 5496312222.0, "step": 45020 }, { "epoch": 0.06005045091168957, "grad_norm": 2.5, "learning_rate": 3.002439869873607e-06, "loss": 0.5600409507751465, "num_tokens": 5498448129.0, "step": 45040 }, { "epoch": 0.06007711629841768, "grad_norm": 1.7109375, "learning_rate": 3.0037731320996215e-06, "loss": 0.5457588195800781, "num_tokens": 5500898611.0, "step": 45060 }, { "epoch": 0.06010378168514578, "grad_norm": 2.28125, "learning_rate": 3.0051063943256362e-06, "loss": 0.5461817264556885, "num_tokens": 5503175294.0, "step": 45080 }, { "epoch": 0.060130447071873885, "grad_norm": 2.0625, "learning_rate": 3.0064396565516505e-06, "loss": 0.5745823860168457, "num_tokens": 5505503292.0, "step": 45100 }, { "epoch": 0.06015711245860199, "grad_norm": 1.7734375, "learning_rate": 3.0077729187776657e-06, "loss": 0.5567599296569824, "num_tokens": 5507901194.0, "step": 45120 }, { "epoch": 0.06018377784533009, "grad_norm": 1.8671875, "learning_rate": 3.00910618100368e-06, "loss": 0.5528716087341309, "num_tokens": 5510351993.0, "step": 45140 }, { "epoch": 0.060210443232058194, "grad_norm": 1.875, "learning_rate": 3.0104394432296946e-06, "loss": 0.5723042488098145, "num_tokens": 5513054368.0, "step": 45160 }, { "epoch": 0.0602371086187863, "grad_norm": 2.0, "learning_rate": 3.0117727054557093e-06, "loss": 0.5436952590942383, "num_tokens": 5515562738.0, "step": 45180 }, { "epoch": 0.0602637740055144, "grad_norm": 2.21875, "learning_rate": 3.0131059676817236e-06, "loss": 0.5665431499481202, "num_tokens": 5517918463.0, "step": 45200 }, { "epoch": 0.060290439392242504, "grad_norm": 2.03125, "learning_rate": 3.0144392299077383e-06, "loss": 0.5642599582672119, "num_tokens": 5520354075.0, "step": 45220 }, { "epoch": 0.06031710477897061, "grad_norm": 2.28125, "learning_rate": 3.0157724921337534e-06, "loss": 0.5628135681152344, "num_tokens": 5522668565.0, "step": 45240 }, { "epoch": 0.06034377016569871, "grad_norm": 2.171875, "learning_rate": 3.0171057543597677e-06, "loss": 0.5355167388916016, "num_tokens": 5525022589.0, "step": 45260 }, { "epoch": 0.06037043555242682, "grad_norm": 1.9453125, "learning_rate": 3.0184390165857824e-06, "loss": 0.5620964050292969, "num_tokens": 5527459868.0, "step": 45280 }, { "epoch": 0.06039710093915492, "grad_norm": 1.875, "learning_rate": 3.0197722788117967e-06, "loss": 0.5605289936065674, "num_tokens": 5529742568.0, "step": 45300 }, { "epoch": 0.060423766325883026, "grad_norm": 2.296875, "learning_rate": 3.0211055410378114e-06, "loss": 0.5382152557373047, "num_tokens": 5531990919.0, "step": 45320 }, { "epoch": 0.06045043171261113, "grad_norm": 1.8984375, "learning_rate": 3.0224388032638265e-06, "loss": 0.5535208225250244, "num_tokens": 5534442846.0, "step": 45340 }, { "epoch": 0.06047709709933923, "grad_norm": 2.03125, "learning_rate": 3.023772065489841e-06, "loss": 0.5512959957122803, "num_tokens": 5536961624.0, "step": 45360 }, { "epoch": 0.060503762486067335, "grad_norm": 2.609375, "learning_rate": 3.0251053277158555e-06, "loss": 0.5574007034301758, "num_tokens": 5539432479.0, "step": 45380 }, { "epoch": 0.06053042787279544, "grad_norm": 2.296875, "learning_rate": 3.02643858994187e-06, "loss": 0.5491018295288086, "num_tokens": 5541995665.0, "step": 45400 }, { "epoch": 0.06055709325952354, "grad_norm": 1.765625, "learning_rate": 3.0277718521678845e-06, "loss": 0.5537927627563477, "num_tokens": 5544438376.0, "step": 45420 }, { "epoch": 0.060583758646251644, "grad_norm": 2.3125, "learning_rate": 3.029105114393899e-06, "loss": 0.5585022926330566, "num_tokens": 5546811198.0, "step": 45440 }, { "epoch": 0.06061042403297975, "grad_norm": 2.375, "learning_rate": 3.0304383766199135e-06, "loss": 0.5389616966247559, "num_tokens": 5549142977.0, "step": 45460 }, { "epoch": 0.06063708941970786, "grad_norm": 2.015625, "learning_rate": 3.0317716388459286e-06, "loss": 0.5349095344543457, "num_tokens": 5551789271.0, "step": 45480 }, { "epoch": 0.06066375480643596, "grad_norm": 1.8125, "learning_rate": 3.0331049010719433e-06, "loss": 0.5734536170959472, "num_tokens": 5554186971.0, "step": 45500 }, { "epoch": 0.06069042019316406, "grad_norm": 1.9921875, "learning_rate": 3.0344381632979576e-06, "loss": 0.5459677696228027, "num_tokens": 5556520166.0, "step": 45520 }, { "epoch": 0.060717085579892166, "grad_norm": 2.109375, "learning_rate": 3.0357714255239723e-06, "loss": 0.5501240730285645, "num_tokens": 5558940055.0, "step": 45540 }, { "epoch": 0.06074375096662027, "grad_norm": 1.9140625, "learning_rate": 3.037104687749987e-06, "loss": 0.5534132957458496, "num_tokens": 5561142394.0, "step": 45560 }, { "epoch": 0.06077041635334837, "grad_norm": 1.671875, "learning_rate": 3.0384379499760012e-06, "loss": 0.5496338844299317, "num_tokens": 5563458820.0, "step": 45580 }, { "epoch": 0.060797081740076475, "grad_norm": 2.0, "learning_rate": 3.0397712122020164e-06, "loss": 0.5547810554504394, "num_tokens": 5565710639.0, "step": 45600 }, { "epoch": 0.06082374712680458, "grad_norm": 1.734375, "learning_rate": 3.041104474428031e-06, "loss": 0.5682735443115234, "num_tokens": 5568118352.0, "step": 45620 }, { "epoch": 0.06085041251353268, "grad_norm": 1.6796875, "learning_rate": 3.0424377366540453e-06, "loss": 0.5523097038269043, "num_tokens": 5570511459.0, "step": 45640 }, { "epoch": 0.060877077900260784, "grad_norm": 2.140625, "learning_rate": 3.04377099888006e-06, "loss": 0.5549149513244629, "num_tokens": 5572887920.0, "step": 45660 }, { "epoch": 0.060903743286988894, "grad_norm": 1.8203125, "learning_rate": 3.0451042611060743e-06, "loss": 0.5496911525726318, "num_tokens": 5575346646.0, "step": 45680 }, { "epoch": 0.060930408673717, "grad_norm": 1.765625, "learning_rate": 3.046437523332089e-06, "loss": 0.5399347305297851, "num_tokens": 5577728777.0, "step": 45700 }, { "epoch": 0.0609570740604451, "grad_norm": 1.7890625, "learning_rate": 3.047770785558104e-06, "loss": 0.5426033020019532, "num_tokens": 5580124793.0, "step": 45720 }, { "epoch": 0.0609837394471732, "grad_norm": 1.7578125, "learning_rate": 3.0491040477841184e-06, "loss": 0.5561683654785157, "num_tokens": 5582490593.0, "step": 45740 }, { "epoch": 0.061010404833901306, "grad_norm": 2.390625, "learning_rate": 3.050437310010133e-06, "loss": 0.5583030223846436, "num_tokens": 5584933327.0, "step": 45760 }, { "epoch": 0.06103707022062941, "grad_norm": 2.140625, "learning_rate": 3.051770572236148e-06, "loss": 0.5791233062744141, "num_tokens": 5587545014.0, "step": 45780 }, { "epoch": 0.06106373560735751, "grad_norm": 2.3125, "learning_rate": 3.053103834462162e-06, "loss": 0.5535370826721191, "num_tokens": 5589869078.0, "step": 45800 }, { "epoch": 0.061090400994085615, "grad_norm": 2.328125, "learning_rate": 3.054437096688177e-06, "loss": 0.5582178115844727, "num_tokens": 5592230052.0, "step": 45820 }, { "epoch": 0.06111706638081372, "grad_norm": 1.6328125, "learning_rate": 3.055770358914191e-06, "loss": 0.5350105285644531, "num_tokens": 5594663594.0, "step": 45840 }, { "epoch": 0.06114373176754182, "grad_norm": 1.90625, "learning_rate": 3.0571036211402062e-06, "loss": 0.5478509902954102, "num_tokens": 5597068688.0, "step": 45860 }, { "epoch": 0.06117039715426993, "grad_norm": 2.3125, "learning_rate": 3.058436883366221e-06, "loss": 0.5463482856750488, "num_tokens": 5599544278.0, "step": 45880 }, { "epoch": 0.061197062540998035, "grad_norm": 2.296875, "learning_rate": 3.059770145592235e-06, "loss": 0.5465571403503418, "num_tokens": 5602083845.0, "step": 45900 }, { "epoch": 0.06122372792772614, "grad_norm": 2.34375, "learning_rate": 3.06110340781825e-06, "loss": 0.5451560020446777, "num_tokens": 5604405847.0, "step": 45920 }, { "epoch": 0.06125039331445424, "grad_norm": 1.78125, "learning_rate": 3.062436670044265e-06, "loss": 0.5578379154205322, "num_tokens": 5606932850.0, "step": 45940 }, { "epoch": 0.061277058701182344, "grad_norm": 1.8984375, "learning_rate": 3.063769932270279e-06, "loss": 0.5455524444580078, "num_tokens": 5609165997.0, "step": 45960 }, { "epoch": 0.06130372408791045, "grad_norm": 1.953125, "learning_rate": 3.065103194496294e-06, "loss": 0.5342559337615966, "num_tokens": 5611501691.0, "step": 45980 }, { "epoch": 0.06133038947463855, "grad_norm": 2.1875, "learning_rate": 3.0664364567223083e-06, "loss": 0.573914909362793, "num_tokens": 5614105112.0, "step": 46000 }, { "epoch": 0.06135705486136665, "grad_norm": 1.765625, "learning_rate": 3.067769718948323e-06, "loss": 0.5512654304504394, "num_tokens": 5616656678.0, "step": 46020 }, { "epoch": 0.061383720248094756, "grad_norm": 2.078125, "learning_rate": 3.0691029811743377e-06, "loss": 0.5371935844421387, "num_tokens": 5619191419.0, "step": 46040 }, { "epoch": 0.06141038563482286, "grad_norm": 1.9921875, "learning_rate": 3.070436243400352e-06, "loss": 0.547773027420044, "num_tokens": 5621587819.0, "step": 46060 }, { "epoch": 0.06143705102155097, "grad_norm": 2.078125, "learning_rate": 3.071769505626367e-06, "loss": 0.5434441566467285, "num_tokens": 5623805379.0, "step": 46080 }, { "epoch": 0.06146371640827907, "grad_norm": 1.7109375, "learning_rate": 3.073102767852382e-06, "loss": 0.5667917728424072, "num_tokens": 5626392242.0, "step": 46100 }, { "epoch": 0.061490381795007175, "grad_norm": 2.09375, "learning_rate": 3.074436030078396e-06, "loss": 0.5418491363525391, "num_tokens": 5628679868.0, "step": 46120 }, { "epoch": 0.06151704718173528, "grad_norm": 2.140625, "learning_rate": 3.0757692923044108e-06, "loss": 0.5570039749145508, "num_tokens": 5631076812.0, "step": 46140 }, { "epoch": 0.06154371256846338, "grad_norm": 1.9453125, "learning_rate": 3.077102554530425e-06, "loss": 0.5654218673706055, "num_tokens": 5633621685.0, "step": 46160 }, { "epoch": 0.061570377955191484, "grad_norm": 1.6015625, "learning_rate": 3.0784358167564397e-06, "loss": 0.5579648971557617, "num_tokens": 5636085239.0, "step": 46180 }, { "epoch": 0.06159704334191959, "grad_norm": 1.78125, "learning_rate": 3.079769078982455e-06, "loss": 0.5495635509490967, "num_tokens": 5638608300.0, "step": 46200 }, { "epoch": 0.06162370872864769, "grad_norm": 1.9609375, "learning_rate": 3.081102341208469e-06, "loss": 0.5550924301147461, "num_tokens": 5640965682.0, "step": 46220 }, { "epoch": 0.06165037411537579, "grad_norm": 2.078125, "learning_rate": 3.082435603434484e-06, "loss": 0.5542848587036133, "num_tokens": 5643231712.0, "step": 46240 }, { "epoch": 0.061677039502103896, "grad_norm": 1.9453125, "learning_rate": 3.0837688656604986e-06, "loss": 0.5340061187744141, "num_tokens": 5645716012.0, "step": 46260 }, { "epoch": 0.061703704888832006, "grad_norm": 2.015625, "learning_rate": 3.085102127886513e-06, "loss": 0.5514540672302246, "num_tokens": 5648226751.0, "step": 46280 }, { "epoch": 0.06173037027556011, "grad_norm": 2.078125, "learning_rate": 3.0864353901125275e-06, "loss": 0.5682505607604981, "num_tokens": 5650640887.0, "step": 46300 }, { "epoch": 0.06175703566228821, "grad_norm": 1.625, "learning_rate": 3.087768652338542e-06, "loss": 0.5656480312347412, "num_tokens": 5653102842.0, "step": 46320 }, { "epoch": 0.061783701049016315, "grad_norm": 1.9453125, "learning_rate": 3.089101914564557e-06, "loss": 0.5520481109619141, "num_tokens": 5655552592.0, "step": 46340 }, { "epoch": 0.06181036643574442, "grad_norm": 2.078125, "learning_rate": 3.0904351767905716e-06, "loss": 0.5390124320983887, "num_tokens": 5658003654.0, "step": 46360 }, { "epoch": 0.06183703182247252, "grad_norm": 1.6796875, "learning_rate": 3.091768439016586e-06, "loss": 0.5403827667236328, "num_tokens": 5660432789.0, "step": 46380 }, { "epoch": 0.061863697209200624, "grad_norm": 2.171875, "learning_rate": 3.0931017012426006e-06, "loss": 0.5512421607971192, "num_tokens": 5662811442.0, "step": 46400 }, { "epoch": 0.06189036259592873, "grad_norm": 1.96875, "learning_rate": 3.0944349634686153e-06, "loss": 0.5434258937835693, "num_tokens": 5665322552.0, "step": 46420 }, { "epoch": 0.06191702798265683, "grad_norm": 2.0, "learning_rate": 3.0957682256946296e-06, "loss": 0.535888957977295, "num_tokens": 5667582010.0, "step": 46440 }, { "epoch": 0.06194369336938493, "grad_norm": 1.90625, "learning_rate": 3.0971014879206447e-06, "loss": 0.5495459556579589, "num_tokens": 5670026314.0, "step": 46460 }, { "epoch": 0.061970358756113036, "grad_norm": 2.25, "learning_rate": 3.098434750146659e-06, "loss": 0.5561540603637696, "num_tokens": 5672513251.0, "step": 46480 }, { "epoch": 0.061997024142841146, "grad_norm": 1.78125, "learning_rate": 3.0997680123726737e-06, "loss": 0.5618414878845215, "num_tokens": 5674888787.0, "step": 46500 }, { "epoch": 0.06202368952956925, "grad_norm": 1.9921875, "learning_rate": 3.1011012745986884e-06, "loss": 0.5433036327362061, "num_tokens": 5677351558.0, "step": 46520 }, { "epoch": 0.06205035491629735, "grad_norm": 2.21875, "learning_rate": 3.1024345368247027e-06, "loss": 0.5709066390991211, "num_tokens": 5679797259.0, "step": 46540 }, { "epoch": 0.062077020303025456, "grad_norm": 2.015625, "learning_rate": 3.1037677990507174e-06, "loss": 0.5671977996826172, "num_tokens": 5682140478.0, "step": 46560 }, { "epoch": 0.06210368568975356, "grad_norm": 2.15625, "learning_rate": 3.1051010612767325e-06, "loss": 0.5700483798980713, "num_tokens": 5684562142.0, "step": 46580 }, { "epoch": 0.06213035107648166, "grad_norm": 1.859375, "learning_rate": 3.1064343235027468e-06, "loss": 0.546644926071167, "num_tokens": 5687076098.0, "step": 46600 }, { "epoch": 0.062157016463209765, "grad_norm": 2.140625, "learning_rate": 3.1077675857287615e-06, "loss": 0.5551890373229981, "num_tokens": 5689631741.0, "step": 46620 }, { "epoch": 0.06218368184993787, "grad_norm": 2.03125, "learning_rate": 3.1091008479547758e-06, "loss": 0.5475635528564453, "num_tokens": 5691947293.0, "step": 46640 }, { "epoch": 0.06221034723666597, "grad_norm": 2.25, "learning_rate": 3.1104341101807905e-06, "loss": 0.555121898651123, "num_tokens": 5694273822.0, "step": 46660 }, { "epoch": 0.062237012623394074, "grad_norm": 1.921875, "learning_rate": 3.111767372406805e-06, "loss": 0.5636539459228516, "num_tokens": 5696898859.0, "step": 46680 }, { "epoch": 0.062263678010122184, "grad_norm": 2.03125, "learning_rate": 3.1131006346328194e-06, "loss": 0.5562507629394531, "num_tokens": 5699341804.0, "step": 46700 }, { "epoch": 0.06229034339685029, "grad_norm": 2.171875, "learning_rate": 3.1144338968588346e-06, "loss": 0.5531933307647705, "num_tokens": 5701772329.0, "step": 46720 }, { "epoch": 0.06231700878357839, "grad_norm": 2.125, "learning_rate": 3.1157671590848493e-06, "loss": 0.5500737190246582, "num_tokens": 5704247826.0, "step": 46740 }, { "epoch": 0.06234367417030649, "grad_norm": 1.8359375, "learning_rate": 3.1171004213108636e-06, "loss": 0.5525367736816407, "num_tokens": 5706800750.0, "step": 46760 }, { "epoch": 0.062370339557034596, "grad_norm": 1.84375, "learning_rate": 3.1184336835368783e-06, "loss": 0.534196138381958, "num_tokens": 5709023311.0, "step": 46780 }, { "epoch": 0.0623970049437627, "grad_norm": 1.6796875, "learning_rate": 3.1197669457628925e-06, "loss": 0.5450634956359863, "num_tokens": 5711471490.0, "step": 46800 }, { "epoch": 0.0624236703304908, "grad_norm": 1.84375, "learning_rate": 3.1211002079889077e-06, "loss": 0.5683589935302734, "num_tokens": 5713895614.0, "step": 46820 }, { "epoch": 0.062450335717218905, "grad_norm": 2.453125, "learning_rate": 3.1224334702149224e-06, "loss": 0.5388218879699707, "num_tokens": 5716356424.0, "step": 46840 }, { "epoch": 0.06247700110394701, "grad_norm": 1.9609375, "learning_rate": 3.1237667324409366e-06, "loss": 0.555736780166626, "num_tokens": 5718804225.0, "step": 46860 }, { "epoch": 0.06250366649067511, "grad_norm": 2.03125, "learning_rate": 3.1250999946669513e-06, "loss": 0.5456063747406006, "num_tokens": 5721019639.0, "step": 46880 }, { "epoch": 0.06253033187740321, "grad_norm": 2.1875, "learning_rate": 3.126433256892966e-06, "loss": 0.542991304397583, "num_tokens": 5723454349.0, "step": 46900 }, { "epoch": 0.06255699726413132, "grad_norm": 2.015625, "learning_rate": 3.1277665191189803e-06, "loss": 0.5475061893463135, "num_tokens": 5725613499.0, "step": 46920 }, { "epoch": 0.06258366265085942, "grad_norm": 2.140625, "learning_rate": 3.1290997813449954e-06, "loss": 0.5477382183074951, "num_tokens": 5727930056.0, "step": 46940 }, { "epoch": 0.06261032803758752, "grad_norm": 2.296875, "learning_rate": 3.13043304357101e-06, "loss": 0.5538121223449707, "num_tokens": 5730414197.0, "step": 46960 }, { "epoch": 0.06263699342431563, "grad_norm": 1.953125, "learning_rate": 3.1317663057970244e-06, "loss": 0.5382927894592285, "num_tokens": 5732773771.0, "step": 46980 }, { "epoch": 0.06266365881104374, "grad_norm": 2.421875, "learning_rate": 3.133099568023039e-06, "loss": 0.5368349075317382, "num_tokens": 5735048047.0, "step": 47000 }, { "epoch": 0.06269032419777185, "grad_norm": 2.109375, "learning_rate": 3.1344328302490534e-06, "loss": 0.5449503898620606, "num_tokens": 5737584747.0, "step": 47020 }, { "epoch": 0.06271698958449995, "grad_norm": 1.9765625, "learning_rate": 3.135766092475068e-06, "loss": 0.5247677803039551, "num_tokens": 5740165757.0, "step": 47040 }, { "epoch": 0.06274365497122805, "grad_norm": 2.15625, "learning_rate": 3.1370993547010832e-06, "loss": 0.5442595481872559, "num_tokens": 5742681398.0, "step": 47060 }, { "epoch": 0.06277032035795616, "grad_norm": 2.09375, "learning_rate": 3.1384326169270975e-06, "loss": 0.5466197967529297, "num_tokens": 5745189127.0, "step": 47080 }, { "epoch": 0.06279698574468426, "grad_norm": 1.578125, "learning_rate": 3.139765879153112e-06, "loss": 0.5318994522094727, "num_tokens": 5747540950.0, "step": 47100 }, { "epoch": 0.06282365113141236, "grad_norm": 1.84375, "learning_rate": 3.141099141379127e-06, "loss": 0.5650910377502442, "num_tokens": 5749900327.0, "step": 47120 }, { "epoch": 0.06285031651814046, "grad_norm": 1.8203125, "learning_rate": 3.142432403605141e-06, "loss": 0.5455311298370361, "num_tokens": 5752372270.0, "step": 47140 }, { "epoch": 0.06287698190486857, "grad_norm": 1.9453125, "learning_rate": 3.143765665831156e-06, "loss": 0.5499954223632812, "num_tokens": 5754920422.0, "step": 47160 }, { "epoch": 0.06290364729159667, "grad_norm": 2.09375, "learning_rate": 3.14509892805717e-06, "loss": 0.5332972526550293, "num_tokens": 5757255978.0, "step": 47180 }, { "epoch": 0.06293031267832477, "grad_norm": 1.8203125, "learning_rate": 3.1464321902831853e-06, "loss": 0.5481165885925293, "num_tokens": 5759586621.0, "step": 47200 }, { "epoch": 0.06295697806505288, "grad_norm": 2.15625, "learning_rate": 3.1477654525092e-06, "loss": 0.5471556186676025, "num_tokens": 5761927282.0, "step": 47220 }, { "epoch": 0.06298364345178098, "grad_norm": 2.125, "learning_rate": 3.1490987147352143e-06, "loss": 0.5408550262451172, "num_tokens": 5764377558.0, "step": 47240 }, { "epoch": 0.06301030883850908, "grad_norm": 2.03125, "learning_rate": 3.150431976961229e-06, "loss": 0.5579921245574951, "num_tokens": 5766992254.0, "step": 47260 }, { "epoch": 0.06303697422523719, "grad_norm": 2.078125, "learning_rate": 3.1517652391872437e-06, "loss": 0.5500329971313477, "num_tokens": 5769424072.0, "step": 47280 }, { "epoch": 0.06306363961196529, "grad_norm": 1.96875, "learning_rate": 3.153098501413258e-06, "loss": 0.5553366661071777, "num_tokens": 5772000566.0, "step": 47300 }, { "epoch": 0.06309030499869339, "grad_norm": 2.234375, "learning_rate": 3.154431763639273e-06, "loss": 0.5404666900634766, "num_tokens": 5774511329.0, "step": 47320 }, { "epoch": 0.0631169703854215, "grad_norm": 1.71875, "learning_rate": 3.1557650258652874e-06, "loss": 0.543001937866211, "num_tokens": 5777036355.0, "step": 47340 }, { "epoch": 0.0631436357721496, "grad_norm": 2.15625, "learning_rate": 3.157098288091302e-06, "loss": 0.5553371429443359, "num_tokens": 5779574676.0, "step": 47360 }, { "epoch": 0.0631703011588777, "grad_norm": 1.890625, "learning_rate": 3.1584315503173168e-06, "loss": 0.5618331909179688, "num_tokens": 5781957704.0, "step": 47380 }, { "epoch": 0.06319696654560582, "grad_norm": 2.0, "learning_rate": 3.159764812543331e-06, "loss": 0.5478918552398682, "num_tokens": 5784435128.0, "step": 47400 }, { "epoch": 0.06322363193233392, "grad_norm": 1.9453125, "learning_rate": 3.1610980747693457e-06, "loss": 0.5487288475036621, "num_tokens": 5786877174.0, "step": 47420 }, { "epoch": 0.06325029731906202, "grad_norm": 2.078125, "learning_rate": 3.162431336995361e-06, "loss": 0.5401861667633057, "num_tokens": 5789285491.0, "step": 47440 }, { "epoch": 0.06327696270579013, "grad_norm": 2.234375, "learning_rate": 3.163764599221375e-06, "loss": 0.5435902595520019, "num_tokens": 5791751006.0, "step": 47460 }, { "epoch": 0.06330362809251823, "grad_norm": 2.28125, "learning_rate": 3.16509786144739e-06, "loss": 0.5514764785766602, "num_tokens": 5794383096.0, "step": 47480 }, { "epoch": 0.06333029347924633, "grad_norm": 1.78125, "learning_rate": 3.166431123673404e-06, "loss": 0.5547398567199707, "num_tokens": 5796619213.0, "step": 47500 }, { "epoch": 0.06335695886597444, "grad_norm": 2.140625, "learning_rate": 3.167764385899419e-06, "loss": 0.5552446365356445, "num_tokens": 5799081808.0, "step": 47520 }, { "epoch": 0.06338362425270254, "grad_norm": 2.140625, "learning_rate": 3.169097648125434e-06, "loss": 0.547721529006958, "num_tokens": 5801409885.0, "step": 47540 }, { "epoch": 0.06341028963943064, "grad_norm": 2.390625, "learning_rate": 3.170430910351448e-06, "loss": 0.5321070671081543, "num_tokens": 5803652110.0, "step": 47560 }, { "epoch": 0.06343695502615875, "grad_norm": 2.078125, "learning_rate": 3.171764172577463e-06, "loss": 0.5456575393676758, "num_tokens": 5806004759.0, "step": 47580 }, { "epoch": 0.06346362041288685, "grad_norm": 2.25, "learning_rate": 3.1730974348034776e-06, "loss": 0.5368069648742676, "num_tokens": 5808234040.0, "step": 47600 }, { "epoch": 0.06349028579961495, "grad_norm": 2.296875, "learning_rate": 3.174430697029492e-06, "loss": 0.5487019062042237, "num_tokens": 5810497604.0, "step": 47620 }, { "epoch": 0.06351695118634305, "grad_norm": 2.234375, "learning_rate": 3.1757639592555066e-06, "loss": 0.5381615161895752, "num_tokens": 5812959247.0, "step": 47640 }, { "epoch": 0.06354361657307116, "grad_norm": 2.171875, "learning_rate": 3.177097221481521e-06, "loss": 0.5319629192352295, "num_tokens": 5815498809.0, "step": 47660 }, { "epoch": 0.06357028195979926, "grad_norm": 2.15625, "learning_rate": 3.178430483707536e-06, "loss": 0.525675916671753, "num_tokens": 5818077490.0, "step": 47680 }, { "epoch": 0.06359694734652736, "grad_norm": 1.890625, "learning_rate": 3.1797637459335507e-06, "loss": 0.5494894504547119, "num_tokens": 5820466740.0, "step": 47700 }, { "epoch": 0.06362361273325547, "grad_norm": 1.953125, "learning_rate": 3.181097008159565e-06, "loss": 0.532435417175293, "num_tokens": 5823034249.0, "step": 47720 }, { "epoch": 0.06365027811998357, "grad_norm": 1.6953125, "learning_rate": 3.1824302703855797e-06, "loss": 0.5614911556243897, "num_tokens": 5825500195.0, "step": 47740 }, { "epoch": 0.06367694350671167, "grad_norm": 1.9609375, "learning_rate": 3.1837635326115944e-06, "loss": 0.5436235427856445, "num_tokens": 5828003035.0, "step": 47760 }, { "epoch": 0.06370360889343978, "grad_norm": 2.390625, "learning_rate": 3.1850967948376087e-06, "loss": 0.5564045906066895, "num_tokens": 5830526703.0, "step": 47780 }, { "epoch": 0.06373027428016788, "grad_norm": 1.9609375, "learning_rate": 3.186430057063624e-06, "loss": 0.5622060775756836, "num_tokens": 5832992370.0, "step": 47800 }, { "epoch": 0.063756939666896, "grad_norm": 2.125, "learning_rate": 3.187763319289638e-06, "loss": 0.5566381931304931, "num_tokens": 5835661700.0, "step": 47820 }, { "epoch": 0.0637836050536241, "grad_norm": 1.84375, "learning_rate": 3.1890965815156528e-06, "loss": 0.5590667724609375, "num_tokens": 5838086799.0, "step": 47840 }, { "epoch": 0.0638102704403522, "grad_norm": 1.34375, "learning_rate": 3.1904298437416675e-06, "loss": 0.5300308227539062, "num_tokens": 5840677352.0, "step": 47860 }, { "epoch": 0.0638369358270803, "grad_norm": 2.234375, "learning_rate": 3.1917631059676818e-06, "loss": 0.5471683502197265, "num_tokens": 5843056395.0, "step": 47880 }, { "epoch": 0.06386360121380841, "grad_norm": 1.828125, "learning_rate": 3.1930963681936965e-06, "loss": 0.5362923622131348, "num_tokens": 5845557680.0, "step": 47900 }, { "epoch": 0.06389026660053651, "grad_norm": 2.046875, "learning_rate": 3.1944296304197116e-06, "loss": 0.5459730625152588, "num_tokens": 5847937735.0, "step": 47920 }, { "epoch": 0.06391693198726461, "grad_norm": 1.7734375, "learning_rate": 3.195762892645726e-06, "loss": 0.5239994049072265, "num_tokens": 5850417358.0, "step": 47940 }, { "epoch": 0.06394359737399272, "grad_norm": 2.453125, "learning_rate": 3.1970961548717406e-06, "loss": 0.5410839557647705, "num_tokens": 5852515057.0, "step": 47960 }, { "epoch": 0.06397026276072082, "grad_norm": 2.140625, "learning_rate": 3.198429417097755e-06, "loss": 0.5485435485839844, "num_tokens": 5855204425.0, "step": 47980 }, { "epoch": 0.06399692814744892, "grad_norm": 1.9609375, "learning_rate": 3.1997626793237695e-06, "loss": 0.5450697898864746, "num_tokens": 5857773796.0, "step": 48000 }, { "epoch": 0.06402359353417703, "grad_norm": 1.71875, "learning_rate": 3.2010959415497842e-06, "loss": 0.5409571647644043, "num_tokens": 5860241058.0, "step": 48020 }, { "epoch": 0.06405025892090513, "grad_norm": 2.078125, "learning_rate": 3.2024292037757985e-06, "loss": 0.5461203575134277, "num_tokens": 5862644899.0, "step": 48040 }, { "epoch": 0.06407692430763323, "grad_norm": 2.390625, "learning_rate": 3.2037624660018136e-06, "loss": 0.5479022979736328, "num_tokens": 5865004792.0, "step": 48060 }, { "epoch": 0.06410358969436133, "grad_norm": 2.46875, "learning_rate": 3.2050957282278283e-06, "loss": 0.560971736907959, "num_tokens": 5867318471.0, "step": 48080 }, { "epoch": 0.06413025508108944, "grad_norm": 2.3125, "learning_rate": 3.2064289904538426e-06, "loss": 0.5449859142303467, "num_tokens": 5869706181.0, "step": 48100 }, { "epoch": 0.06415692046781754, "grad_norm": 2.15625, "learning_rate": 3.2077622526798573e-06, "loss": 0.5506255149841308, "num_tokens": 5872183927.0, "step": 48120 }, { "epoch": 0.06418358585454564, "grad_norm": 2.125, "learning_rate": 3.2090955149058716e-06, "loss": 0.5343596935272217, "num_tokens": 5874944126.0, "step": 48140 }, { "epoch": 0.06421025124127375, "grad_norm": 2.0, "learning_rate": 3.2104287771318863e-06, "loss": 0.5464014530181884, "num_tokens": 5877406361.0, "step": 48160 }, { "epoch": 0.06423691662800185, "grad_norm": 2.0, "learning_rate": 3.2117620393579014e-06, "loss": 0.5438418388366699, "num_tokens": 5879800369.0, "step": 48180 }, { "epoch": 0.06426358201472995, "grad_norm": 2.640625, "learning_rate": 3.2130953015839157e-06, "loss": 0.5384643077850342, "num_tokens": 5882233829.0, "step": 48200 }, { "epoch": 0.06429024740145807, "grad_norm": 1.6796875, "learning_rate": 3.2144285638099304e-06, "loss": 0.5463719367980957, "num_tokens": 5884674060.0, "step": 48220 }, { "epoch": 0.06431691278818617, "grad_norm": 1.546875, "learning_rate": 3.215761826035945e-06, "loss": 0.5475447654724122, "num_tokens": 5887116278.0, "step": 48240 }, { "epoch": 0.06434357817491428, "grad_norm": 2.328125, "learning_rate": 3.2170950882619594e-06, "loss": 0.552181339263916, "num_tokens": 5889703112.0, "step": 48260 }, { "epoch": 0.06437024356164238, "grad_norm": 2.53125, "learning_rate": 3.2184283504879745e-06, "loss": 0.5499826431274414, "num_tokens": 5892349663.0, "step": 48280 }, { "epoch": 0.06439690894837048, "grad_norm": 2.0, "learning_rate": 3.2197616127139892e-06, "loss": 0.5330401420593261, "num_tokens": 5894783449.0, "step": 48300 }, { "epoch": 0.06442357433509859, "grad_norm": 2.15625, "learning_rate": 3.2210948749400035e-06, "loss": 0.544071102142334, "num_tokens": 5897052554.0, "step": 48320 }, { "epoch": 0.06445023972182669, "grad_norm": 1.984375, "learning_rate": 3.222428137166018e-06, "loss": 0.533531379699707, "num_tokens": 5899556049.0, "step": 48340 }, { "epoch": 0.06447690510855479, "grad_norm": 2.40625, "learning_rate": 3.2237613993920325e-06, "loss": 0.5445006847381592, "num_tokens": 5902104498.0, "step": 48360 }, { "epoch": 0.0645035704952829, "grad_norm": 1.8359375, "learning_rate": 3.225094661618047e-06, "loss": 0.5524098873138428, "num_tokens": 5904602503.0, "step": 48380 }, { "epoch": 0.064530235882011, "grad_norm": 1.96875, "learning_rate": 3.2264279238440623e-06, "loss": 0.5515236377716064, "num_tokens": 5906956213.0, "step": 48400 }, { "epoch": 0.0645569012687391, "grad_norm": 1.7421875, "learning_rate": 3.2277611860700766e-06, "loss": 0.5395146369934082, "num_tokens": 5909343045.0, "step": 48420 }, { "epoch": 0.0645835666554672, "grad_norm": 2.078125, "learning_rate": 3.2290944482960913e-06, "loss": 0.5466301918029786, "num_tokens": 5911771048.0, "step": 48440 }, { "epoch": 0.0646102320421953, "grad_norm": 2.0, "learning_rate": 3.230427710522106e-06, "loss": 0.5624408721923828, "num_tokens": 5914337030.0, "step": 48460 }, { "epoch": 0.06463689742892341, "grad_norm": 1.8046875, "learning_rate": 3.2317609727481203e-06, "loss": 0.5558467864990234, "num_tokens": 5916718341.0, "step": 48480 }, { "epoch": 0.06466356281565151, "grad_norm": 1.921875, "learning_rate": 3.233094234974135e-06, "loss": 0.5521312713623047, "num_tokens": 5918971003.0, "step": 48500 }, { "epoch": 0.06469022820237962, "grad_norm": 1.859375, "learning_rate": 3.2344274972001492e-06, "loss": 0.5460955619812011, "num_tokens": 5921274997.0, "step": 48520 }, { "epoch": 0.06471689358910772, "grad_norm": 2.734375, "learning_rate": 3.2357607594261644e-06, "loss": 0.5723299026489258, "num_tokens": 5923784663.0, "step": 48540 }, { "epoch": 0.06474355897583582, "grad_norm": 2.390625, "learning_rate": 3.237094021652179e-06, "loss": 0.5370918273925781, "num_tokens": 5926129305.0, "step": 48560 }, { "epoch": 0.06477022436256392, "grad_norm": 1.7265625, "learning_rate": 3.2384272838781933e-06, "loss": 0.5362760543823242, "num_tokens": 5928376443.0, "step": 48580 }, { "epoch": 0.06479688974929203, "grad_norm": 1.6171875, "learning_rate": 3.239760546104208e-06, "loss": 0.5393124580383301, "num_tokens": 5930554212.0, "step": 48600 }, { "epoch": 0.06482355513602014, "grad_norm": 1.84375, "learning_rate": 3.2410938083302227e-06, "loss": 0.5363768577575684, "num_tokens": 5932966716.0, "step": 48620 }, { "epoch": 0.06485022052274825, "grad_norm": 1.828125, "learning_rate": 3.242427070556237e-06, "loss": 0.5358203887939453, "num_tokens": 5935542569.0, "step": 48640 }, { "epoch": 0.06487688590947635, "grad_norm": 2.078125, "learning_rate": 3.243760332782252e-06, "loss": 0.5353310585021973, "num_tokens": 5937888300.0, "step": 48660 }, { "epoch": 0.06490355129620445, "grad_norm": 2.0, "learning_rate": 3.2450935950082664e-06, "loss": 0.5311526298522949, "num_tokens": 5940444641.0, "step": 48680 }, { "epoch": 0.06493021668293256, "grad_norm": 1.765625, "learning_rate": 3.246426857234281e-06, "loss": 0.5527884006500244, "num_tokens": 5942951916.0, "step": 48700 }, { "epoch": 0.06495688206966066, "grad_norm": 2.171875, "learning_rate": 3.247760119460296e-06, "loss": 0.5453107833862305, "num_tokens": 5945612652.0, "step": 48720 }, { "epoch": 0.06498354745638876, "grad_norm": 1.9453125, "learning_rate": 3.24909338168631e-06, "loss": 0.5434340953826904, "num_tokens": 5948226826.0, "step": 48740 }, { "epoch": 0.06501021284311687, "grad_norm": 2.140625, "learning_rate": 3.250426643912325e-06, "loss": 0.5410999298095703, "num_tokens": 5950621077.0, "step": 48760 }, { "epoch": 0.06503687822984497, "grad_norm": 2.046875, "learning_rate": 3.25175990613834e-06, "loss": 0.5544160842895508, "num_tokens": 5952846730.0, "step": 48780 }, { "epoch": 0.06506354361657307, "grad_norm": 1.59375, "learning_rate": 3.2530931683643542e-06, "loss": 0.541264820098877, "num_tokens": 5955389789.0, "step": 48800 }, { "epoch": 0.06509020900330117, "grad_norm": 2.0625, "learning_rate": 3.254426430590369e-06, "loss": 0.5242452621459961, "num_tokens": 5957781745.0, "step": 48820 }, { "epoch": 0.06511687439002928, "grad_norm": 1.671875, "learning_rate": 3.255759692816383e-06, "loss": 0.5302740573883057, "num_tokens": 5960204427.0, "step": 48840 }, { "epoch": 0.06514353977675738, "grad_norm": 1.6875, "learning_rate": 3.257092955042398e-06, "loss": 0.5472068786621094, "num_tokens": 5962569792.0, "step": 48860 }, { "epoch": 0.06517020516348548, "grad_norm": 1.921875, "learning_rate": 3.2584262172684126e-06, "loss": 0.5260383605957031, "num_tokens": 5965061911.0, "step": 48880 }, { "epoch": 0.06519687055021359, "grad_norm": 2.03125, "learning_rate": 3.259759479494427e-06, "loss": 0.5402394294738769, "num_tokens": 5967638954.0, "step": 48900 }, { "epoch": 0.06522353593694169, "grad_norm": 2.234375, "learning_rate": 3.261092741720442e-06, "loss": 0.5331315040588379, "num_tokens": 5969960922.0, "step": 48920 }, { "epoch": 0.0652502013236698, "grad_norm": 2.09375, "learning_rate": 3.2624260039464567e-06, "loss": 0.5475040435791015, "num_tokens": 5972636707.0, "step": 48940 }, { "epoch": 0.0652768667103979, "grad_norm": 1.8515625, "learning_rate": 3.263759266172471e-06, "loss": 0.5558913707733154, "num_tokens": 5975074755.0, "step": 48960 }, { "epoch": 0.065303532097126, "grad_norm": 2.609375, "learning_rate": 3.2650925283984857e-06, "loss": 0.538751220703125, "num_tokens": 5977497745.0, "step": 48980 }, { "epoch": 0.0653301974838541, "grad_norm": 2.234375, "learning_rate": 3.2664257906245e-06, "loss": 0.5450959205627441, "num_tokens": 5980107157.0, "step": 49000 }, { "epoch": 0.0653568628705822, "grad_norm": 1.765625, "learning_rate": 3.2677590528505147e-06, "loss": 0.5470452308654785, "num_tokens": 5982642090.0, "step": 49020 }, { "epoch": 0.06538352825731032, "grad_norm": 2.25, "learning_rate": 3.2690923150765298e-06, "loss": 0.5292469024658203, "num_tokens": 5985270738.0, "step": 49040 }, { "epoch": 0.06541019364403843, "grad_norm": 2.03125, "learning_rate": 3.270425577302544e-06, "loss": 0.5417457580566406, "num_tokens": 5987676530.0, "step": 49060 }, { "epoch": 0.06543685903076653, "grad_norm": 2.640625, "learning_rate": 3.2717588395285588e-06, "loss": 0.5194435596466065, "num_tokens": 5990108915.0, "step": 49080 }, { "epoch": 0.06546352441749463, "grad_norm": 2.34375, "learning_rate": 3.2730921017545735e-06, "loss": 0.5342529296875, "num_tokens": 5992694177.0, "step": 49100 }, { "epoch": 0.06549018980422273, "grad_norm": 2.34375, "learning_rate": 3.2744253639805877e-06, "loss": 0.5335217952728272, "num_tokens": 5995141217.0, "step": 49120 }, { "epoch": 0.06551685519095084, "grad_norm": 1.671875, "learning_rate": 3.275758626206603e-06, "loss": 0.5415543556213379, "num_tokens": 5997536718.0, "step": 49140 }, { "epoch": 0.06554352057767894, "grad_norm": 2.171875, "learning_rate": 3.277091888432617e-06, "loss": 0.5392942428588867, "num_tokens": 5999988944.0, "step": 49160 }, { "epoch": 0.06557018596440704, "grad_norm": 2.390625, "learning_rate": 3.278425150658632e-06, "loss": 0.5578566074371338, "num_tokens": 6002425447.0, "step": 49180 }, { "epoch": 0.06559685135113515, "grad_norm": 2.234375, "learning_rate": 3.2797584128846465e-06, "loss": 0.542392635345459, "num_tokens": 6004900302.0, "step": 49200 }, { "epoch": 0.06562351673786325, "grad_norm": 1.96875, "learning_rate": 3.281091675110661e-06, "loss": 0.5381040573120117, "num_tokens": 6007232036.0, "step": 49220 }, { "epoch": 0.06565018212459135, "grad_norm": 2.234375, "learning_rate": 3.2824249373366755e-06, "loss": 0.5542178153991699, "num_tokens": 6009632407.0, "step": 49240 }, { "epoch": 0.06567684751131946, "grad_norm": 2.125, "learning_rate": 3.2837581995626907e-06, "loss": 0.5235449790954589, "num_tokens": 6012274996.0, "step": 49260 }, { "epoch": 0.06570351289804756, "grad_norm": 2.40625, "learning_rate": 3.285091461788705e-06, "loss": 0.5436861991882325, "num_tokens": 6015035744.0, "step": 49280 }, { "epoch": 0.06573017828477566, "grad_norm": 1.7890625, "learning_rate": 3.2864247240147196e-06, "loss": 0.534356164932251, "num_tokens": 6017471816.0, "step": 49300 }, { "epoch": 0.06575684367150376, "grad_norm": 2.375, "learning_rate": 3.287757986240734e-06, "loss": 0.5406752109527588, "num_tokens": 6020033454.0, "step": 49320 }, { "epoch": 0.06578350905823187, "grad_norm": 1.78125, "learning_rate": 3.2890912484667486e-06, "loss": 0.5413959503173829, "num_tokens": 6022492083.0, "step": 49340 }, { "epoch": 0.06581017444495997, "grad_norm": 2.5, "learning_rate": 3.2904245106927633e-06, "loss": 0.5382605075836182, "num_tokens": 6024679399.0, "step": 49360 }, { "epoch": 0.06583683983168807, "grad_norm": 2.15625, "learning_rate": 3.2917577729187776e-06, "loss": 0.5314518451690674, "num_tokens": 6027178427.0, "step": 49380 }, { "epoch": 0.06586350521841618, "grad_norm": 1.7734375, "learning_rate": 3.2930910351447927e-06, "loss": 0.543619966506958, "num_tokens": 6029843422.0, "step": 49400 }, { "epoch": 0.06589017060514428, "grad_norm": 1.859375, "learning_rate": 3.2944242973708074e-06, "loss": 0.5367343902587891, "num_tokens": 6032208268.0, "step": 49420 }, { "epoch": 0.0659168359918724, "grad_norm": 2.34375, "learning_rate": 3.2957575595968217e-06, "loss": 0.5397954463958741, "num_tokens": 6034861562.0, "step": 49440 }, { "epoch": 0.0659435013786005, "grad_norm": 1.765625, "learning_rate": 3.2970908218228364e-06, "loss": 0.5235308647155762, "num_tokens": 6037155547.0, "step": 49460 }, { "epoch": 0.0659701667653286, "grad_norm": 1.8203125, "learning_rate": 3.2984240840488507e-06, "loss": 0.5400534629821777, "num_tokens": 6039551664.0, "step": 49480 }, { "epoch": 0.0659968321520567, "grad_norm": 2.015625, "learning_rate": 3.2997573462748654e-06, "loss": 0.5258391380310059, "num_tokens": 6042189201.0, "step": 49500 }, { "epoch": 0.06602349753878481, "grad_norm": 2.15625, "learning_rate": 3.3010906085008805e-06, "loss": 0.5516746520996094, "num_tokens": 6044610965.0, "step": 49520 }, { "epoch": 0.06605016292551291, "grad_norm": 2.21875, "learning_rate": 3.3024238707268948e-06, "loss": 0.5690983772277832, "num_tokens": 6047120353.0, "step": 49540 }, { "epoch": 0.06607682831224101, "grad_norm": 2.140625, "learning_rate": 3.3037571329529095e-06, "loss": 0.542128610610962, "num_tokens": 6049538811.0, "step": 49560 }, { "epoch": 0.06610349369896912, "grad_norm": 2.0625, "learning_rate": 3.305090395178924e-06, "loss": 0.5420652866363526, "num_tokens": 6052120659.0, "step": 49580 }, { "epoch": 0.06613015908569722, "grad_norm": 2.078125, "learning_rate": 3.3064236574049385e-06, "loss": 0.5361234664916992, "num_tokens": 6054540271.0, "step": 49600 }, { "epoch": 0.06615682447242532, "grad_norm": 2.4375, "learning_rate": 3.307756919630953e-06, "loss": 0.5400485992431641, "num_tokens": 6056979187.0, "step": 49620 }, { "epoch": 0.06618348985915343, "grad_norm": 2.125, "learning_rate": 3.3090901818569683e-06, "loss": 0.5313386917114258, "num_tokens": 6059379605.0, "step": 49640 }, { "epoch": 0.06621015524588153, "grad_norm": 2.296875, "learning_rate": 3.3104234440829826e-06, "loss": 0.5389875411987305, "num_tokens": 6061725573.0, "step": 49660 }, { "epoch": 0.06623682063260963, "grad_norm": 2.046875, "learning_rate": 3.3117567063089973e-06, "loss": 0.543066930770874, "num_tokens": 6064083585.0, "step": 49680 }, { "epoch": 0.06626348601933774, "grad_norm": 2.109375, "learning_rate": 3.3130899685350115e-06, "loss": 0.5317746639251709, "num_tokens": 6066574189.0, "step": 49700 }, { "epoch": 0.06629015140606584, "grad_norm": 2.125, "learning_rate": 3.3144232307610262e-06, "loss": 0.5334152698516845, "num_tokens": 6069138360.0, "step": 49720 }, { "epoch": 0.06631681679279394, "grad_norm": 1.984375, "learning_rate": 3.3157564929870414e-06, "loss": 0.5356434345245361, "num_tokens": 6071638127.0, "step": 49740 }, { "epoch": 0.06634348217952205, "grad_norm": 1.6640625, "learning_rate": 3.3170897552130552e-06, "loss": 0.5333372592926026, "num_tokens": 6073931919.0, "step": 49760 }, { "epoch": 0.06637014756625015, "grad_norm": 1.734375, "learning_rate": 3.3184230174390704e-06, "loss": 0.5367679595947266, "num_tokens": 6076505297.0, "step": 49780 }, { "epoch": 0.06639681295297825, "grad_norm": 1.640625, "learning_rate": 3.319756279665085e-06, "loss": 0.5375979900360107, "num_tokens": 6078994052.0, "step": 49800 }, { "epoch": 0.06642347833970635, "grad_norm": 2.640625, "learning_rate": 3.3210895418910993e-06, "loss": 0.532705545425415, "num_tokens": 6081414936.0, "step": 49820 }, { "epoch": 0.06645014372643447, "grad_norm": 2.09375, "learning_rate": 3.322422804117114e-06, "loss": 0.5391650676727295, "num_tokens": 6083863753.0, "step": 49840 }, { "epoch": 0.06647680911316257, "grad_norm": 2.296875, "learning_rate": 3.3237560663431283e-06, "loss": 0.530933952331543, "num_tokens": 6086234237.0, "step": 49860 }, { "epoch": 0.06650347449989068, "grad_norm": 2.140625, "learning_rate": 3.3250893285691434e-06, "loss": 0.5451740741729736, "num_tokens": 6088528606.0, "step": 49880 }, { "epoch": 0.06653013988661878, "grad_norm": 1.7578125, "learning_rate": 3.326422590795158e-06, "loss": 0.5356489658355713, "num_tokens": 6090876684.0, "step": 49900 }, { "epoch": 0.06655680527334688, "grad_norm": 2.296875, "learning_rate": 3.3277558530211724e-06, "loss": 0.5318803787231445, "num_tokens": 6093272345.0, "step": 49920 }, { "epoch": 0.06658347066007499, "grad_norm": 2.1875, "learning_rate": 3.329089115247187e-06, "loss": 0.5519186019897461, "num_tokens": 6095662908.0, "step": 49940 }, { "epoch": 0.06661013604680309, "grad_norm": 1.6953125, "learning_rate": 3.330422377473202e-06, "loss": 0.511446762084961, "num_tokens": 6098046265.0, "step": 49960 }, { "epoch": 0.06663680143353119, "grad_norm": 2.125, "learning_rate": 3.331755639699216e-06, "loss": 0.5272444725036621, "num_tokens": 6100471543.0, "step": 49980 }, { "epoch": 0.0666634668202593, "grad_norm": 2.15625, "learning_rate": 3.3330889019252312e-06, "loss": 0.5399771690368652, "num_tokens": 6102731756.0, "step": 50000 }, { "epoch": 0.0666901322069874, "grad_norm": 2.015625, "learning_rate": 3.3344221641512455e-06, "loss": 0.5401214599609375, "num_tokens": 6105186845.0, "step": 50020 }, { "epoch": 0.0667167975937155, "grad_norm": 2.1875, "learning_rate": 3.33575542637726e-06, "loss": 0.5489205360412598, "num_tokens": 6107442596.0, "step": 50040 }, { "epoch": 0.0667434629804436, "grad_norm": 1.84375, "learning_rate": 3.337088688603275e-06, "loss": 0.5416342735290527, "num_tokens": 6109996673.0, "step": 50060 }, { "epoch": 0.06677012836717171, "grad_norm": 2.25, "learning_rate": 3.338421950829289e-06, "loss": 0.5305962085723877, "num_tokens": 6112547560.0, "step": 50080 }, { "epoch": 0.06679679375389981, "grad_norm": 2.03125, "learning_rate": 3.339755213055304e-06, "loss": 0.5438419342041015, "num_tokens": 6114832506.0, "step": 50100 }, { "epoch": 0.06682345914062791, "grad_norm": 2.390625, "learning_rate": 3.341088475281319e-06, "loss": 0.552715253829956, "num_tokens": 6117181012.0, "step": 50120 }, { "epoch": 0.06685012452735602, "grad_norm": 2.109375, "learning_rate": 3.3424217375073333e-06, "loss": 0.5323488235473632, "num_tokens": 6119519668.0, "step": 50140 }, { "epoch": 0.06687678991408412, "grad_norm": 2.0625, "learning_rate": 3.343754999733348e-06, "loss": 0.5190932273864746, "num_tokens": 6122127815.0, "step": 50160 }, { "epoch": 0.06690345530081222, "grad_norm": 1.8671875, "learning_rate": 3.3450882619593623e-06, "loss": 0.5390474319458007, "num_tokens": 6124673348.0, "step": 50180 }, { "epoch": 0.06693012068754033, "grad_norm": 2.1875, "learning_rate": 3.346421524185377e-06, "loss": 0.5331098556518554, "num_tokens": 6127122522.0, "step": 50200 }, { "epoch": 0.06695678607426843, "grad_norm": 1.9765625, "learning_rate": 3.3477547864113917e-06, "loss": 0.537296199798584, "num_tokens": 6129587964.0, "step": 50220 }, { "epoch": 0.06698345146099653, "grad_norm": 2.125, "learning_rate": 3.349088048637406e-06, "loss": 0.5311196327209473, "num_tokens": 6131866708.0, "step": 50240 }, { "epoch": 0.06701011684772465, "grad_norm": 1.9296875, "learning_rate": 3.350421310863421e-06, "loss": 0.5339411735534668, "num_tokens": 6134252973.0, "step": 50260 }, { "epoch": 0.06703678223445275, "grad_norm": 2.0, "learning_rate": 3.3517545730894358e-06, "loss": 0.5289644718170166, "num_tokens": 6136769896.0, "step": 50280 }, { "epoch": 0.06706344762118086, "grad_norm": 1.890625, "learning_rate": 3.35308783531545e-06, "loss": 0.5395390510559082, "num_tokens": 6139446576.0, "step": 50300 }, { "epoch": 0.06709011300790896, "grad_norm": 1.703125, "learning_rate": 3.3544210975414648e-06, "loss": 0.5436649322509766, "num_tokens": 6141753620.0, "step": 50320 }, { "epoch": 0.06711677839463706, "grad_norm": 2.28125, "learning_rate": 3.355754359767479e-06, "loss": 0.5549263954162598, "num_tokens": 6144223830.0, "step": 50340 }, { "epoch": 0.06714344378136516, "grad_norm": 1.90625, "learning_rate": 3.3570876219934937e-06, "loss": 0.5440001964569092, "num_tokens": 6146564544.0, "step": 50360 }, { "epoch": 0.06717010916809327, "grad_norm": 1.8671875, "learning_rate": 3.358420884219509e-06, "loss": 0.5221111297607421, "num_tokens": 6148917972.0, "step": 50380 }, { "epoch": 0.06719677455482137, "grad_norm": 1.9375, "learning_rate": 3.359754146445523e-06, "loss": 0.5262907028198243, "num_tokens": 6151450664.0, "step": 50400 }, { "epoch": 0.06722343994154947, "grad_norm": 2.03125, "learning_rate": 3.361087408671538e-06, "loss": 0.5438090324401855, "num_tokens": 6153774548.0, "step": 50420 }, { "epoch": 0.06725010532827758, "grad_norm": 1.5078125, "learning_rate": 3.3624206708975525e-06, "loss": 0.5361207008361817, "num_tokens": 6156365859.0, "step": 50440 }, { "epoch": 0.06727677071500568, "grad_norm": 1.7421875, "learning_rate": 3.363753933123567e-06, "loss": 0.539070987701416, "num_tokens": 6158741131.0, "step": 50460 }, { "epoch": 0.06730343610173378, "grad_norm": 2.09375, "learning_rate": 3.3650871953495815e-06, "loss": 0.5652263641357422, "num_tokens": 6161060767.0, "step": 50480 }, { "epoch": 0.06733010148846189, "grad_norm": 1.8203125, "learning_rate": 3.366420457575596e-06, "loss": 0.5453006744384765, "num_tokens": 6163535790.0, "step": 50500 }, { "epoch": 0.06735676687518999, "grad_norm": 1.765625, "learning_rate": 3.367753719801611e-06, "loss": 0.5390388011932373, "num_tokens": 6166178364.0, "step": 50520 }, { "epoch": 0.06738343226191809, "grad_norm": 2.109375, "learning_rate": 3.3690869820276256e-06, "loss": 0.5039910793304443, "num_tokens": 6168438644.0, "step": 50540 }, { "epoch": 0.0674100976486462, "grad_norm": 2.40625, "learning_rate": 3.37042024425364e-06, "loss": 0.5357933521270752, "num_tokens": 6171105415.0, "step": 50560 }, { "epoch": 0.0674367630353743, "grad_norm": 2.140625, "learning_rate": 3.3717535064796546e-06, "loss": 0.5534553527832031, "num_tokens": 6173575224.0, "step": 50580 }, { "epoch": 0.0674634284221024, "grad_norm": 1.7421875, "learning_rate": 3.3730867687056697e-06, "loss": 0.5137944698333741, "num_tokens": 6175937249.0, "step": 50600 }, { "epoch": 0.0674900938088305, "grad_norm": 1.921875, "learning_rate": 3.374420030931684e-06, "loss": 0.5212956428527832, "num_tokens": 6178265665.0, "step": 50620 }, { "epoch": 0.0675167591955586, "grad_norm": 2.09375, "learning_rate": 3.3757532931576987e-06, "loss": 0.5364024639129639, "num_tokens": 6180614897.0, "step": 50640 }, { "epoch": 0.06754342458228672, "grad_norm": 1.890625, "learning_rate": 3.377086555383713e-06, "loss": 0.5398343086242676, "num_tokens": 6183189350.0, "step": 50660 }, { "epoch": 0.06757008996901483, "grad_norm": 1.7421875, "learning_rate": 3.3784198176097277e-06, "loss": 0.5476925373077393, "num_tokens": 6185527230.0, "step": 50680 }, { "epoch": 0.06759675535574293, "grad_norm": 1.796875, "learning_rate": 3.3797530798357424e-06, "loss": 0.5285971641540528, "num_tokens": 6187889901.0, "step": 50700 }, { "epoch": 0.06762342074247103, "grad_norm": 1.9453125, "learning_rate": 3.3810863420617567e-06, "loss": 0.5271186828613281, "num_tokens": 6190217620.0, "step": 50720 }, { "epoch": 0.06765008612919914, "grad_norm": 2.453125, "learning_rate": 3.382419604287772e-06, "loss": 0.5204741954803467, "num_tokens": 6192703462.0, "step": 50740 }, { "epoch": 0.06767675151592724, "grad_norm": 1.984375, "learning_rate": 3.3837528665137865e-06, "loss": 0.5225767135620117, "num_tokens": 6194993411.0, "step": 50760 }, { "epoch": 0.06770341690265534, "grad_norm": 1.71875, "learning_rate": 3.3850861287398008e-06, "loss": 0.5448431968688965, "num_tokens": 6197542960.0, "step": 50780 }, { "epoch": 0.06773008228938344, "grad_norm": 2.21875, "learning_rate": 3.3864193909658155e-06, "loss": 0.5342453956604004, "num_tokens": 6199889749.0, "step": 50800 }, { "epoch": 0.06775674767611155, "grad_norm": 2.046875, "learning_rate": 3.3877526531918297e-06, "loss": 0.5389886379241944, "num_tokens": 6202662612.0, "step": 50820 }, { "epoch": 0.06778341306283965, "grad_norm": 2.3125, "learning_rate": 3.3890859154178444e-06, "loss": 0.5358201503753662, "num_tokens": 6204911643.0, "step": 50840 }, { "epoch": 0.06781007844956775, "grad_norm": 2.0625, "learning_rate": 3.3904191776438596e-06, "loss": 0.5354863166809082, "num_tokens": 6207333239.0, "step": 50860 }, { "epoch": 0.06783674383629586, "grad_norm": 1.921875, "learning_rate": 3.391752439869874e-06, "loss": 0.540461254119873, "num_tokens": 6209727786.0, "step": 50880 }, { "epoch": 0.06786340922302396, "grad_norm": 1.8125, "learning_rate": 3.3930857020958886e-06, "loss": 0.5464599132537842, "num_tokens": 6212127315.0, "step": 50900 }, { "epoch": 0.06789007460975206, "grad_norm": 2.125, "learning_rate": 3.3944189643219033e-06, "loss": 0.5135023117065429, "num_tokens": 6214428000.0, "step": 50920 }, { "epoch": 0.06791673999648017, "grad_norm": 2.125, "learning_rate": 3.3957522265479175e-06, "loss": 0.538272762298584, "num_tokens": 6216932814.0, "step": 50940 }, { "epoch": 0.06794340538320827, "grad_norm": 2.1875, "learning_rate": 3.3970854887739322e-06, "loss": 0.5500303745269776, "num_tokens": 6219159025.0, "step": 50960 }, { "epoch": 0.06797007076993637, "grad_norm": 2.296875, "learning_rate": 3.3984187509999474e-06, "loss": 0.5310359001159668, "num_tokens": 6221535946.0, "step": 50980 }, { "epoch": 0.06799673615666448, "grad_norm": 2.015625, "learning_rate": 3.3997520132259616e-06, "loss": 0.5272633075714112, "num_tokens": 6224068877.0, "step": 51000 }, { "epoch": 0.06802340154339258, "grad_norm": 2.09375, "learning_rate": 3.4010852754519763e-06, "loss": 0.550225830078125, "num_tokens": 6226388656.0, "step": 51020 }, { "epoch": 0.06805006693012068, "grad_norm": 2.15625, "learning_rate": 3.4024185376779906e-06, "loss": 0.5348897933959961, "num_tokens": 6228667674.0, "step": 51040 }, { "epoch": 0.0680767323168488, "grad_norm": 2.09375, "learning_rate": 3.4037517999040053e-06, "loss": 0.5255912780761719, "num_tokens": 6231238882.0, "step": 51060 }, { "epoch": 0.0681033977035769, "grad_norm": 2.125, "learning_rate": 3.40508506213002e-06, "loss": 0.54605712890625, "num_tokens": 6233781721.0, "step": 51080 }, { "epoch": 0.068130063090305, "grad_norm": 2.140625, "learning_rate": 3.4064183243560343e-06, "loss": 0.5244327545166015, "num_tokens": 6236226884.0, "step": 51100 }, { "epoch": 0.06815672847703311, "grad_norm": 2.0625, "learning_rate": 3.4077515865820494e-06, "loss": 0.5306081771850586, "num_tokens": 6238800370.0, "step": 51120 }, { "epoch": 0.06818339386376121, "grad_norm": 2.15625, "learning_rate": 3.409084848808064e-06, "loss": 0.5483811855316162, "num_tokens": 6241361660.0, "step": 51140 }, { "epoch": 0.06821005925048931, "grad_norm": 1.890625, "learning_rate": 3.4104181110340784e-06, "loss": 0.5210658073425293, "num_tokens": 6243709425.0, "step": 51160 }, { "epoch": 0.06823672463721742, "grad_norm": 2.390625, "learning_rate": 3.411751373260093e-06, "loss": 0.5389307022094727, "num_tokens": 6246101885.0, "step": 51180 }, { "epoch": 0.06826339002394552, "grad_norm": 1.7890625, "learning_rate": 3.4130846354861074e-06, "loss": 0.5190773963928222, "num_tokens": 6248793128.0, "step": 51200 }, { "epoch": 0.06829005541067362, "grad_norm": 2.109375, "learning_rate": 3.414417897712122e-06, "loss": 0.5484114646911621, "num_tokens": 6251163251.0, "step": 51220 }, { "epoch": 0.06831672079740173, "grad_norm": 2.296875, "learning_rate": 3.415751159938137e-06, "loss": 0.5284151077270508, "num_tokens": 6253716815.0, "step": 51240 }, { "epoch": 0.06834338618412983, "grad_norm": 1.578125, "learning_rate": 3.4170844221641515e-06, "loss": 0.53983154296875, "num_tokens": 6256393073.0, "step": 51260 }, { "epoch": 0.06837005157085793, "grad_norm": 1.8046875, "learning_rate": 3.418417684390166e-06, "loss": 0.543643569946289, "num_tokens": 6258838328.0, "step": 51280 }, { "epoch": 0.06839671695758603, "grad_norm": 1.984375, "learning_rate": 3.419750946616181e-06, "loss": 0.5212943077087402, "num_tokens": 6261193587.0, "step": 51300 }, { "epoch": 0.06842338234431414, "grad_norm": 2.515625, "learning_rate": 3.421084208842195e-06, "loss": 0.5428386688232422, "num_tokens": 6263448911.0, "step": 51320 }, { "epoch": 0.06845004773104224, "grad_norm": 1.8203125, "learning_rate": 3.4224174710682103e-06, "loss": 0.5420759201049805, "num_tokens": 6265862560.0, "step": 51340 }, { "epoch": 0.06847671311777034, "grad_norm": 2.265625, "learning_rate": 3.4237507332942246e-06, "loss": 0.5300112247467041, "num_tokens": 6268291399.0, "step": 51360 }, { "epoch": 0.06850337850449845, "grad_norm": 2.140625, "learning_rate": 3.4250839955202393e-06, "loss": 0.5144976615905762, "num_tokens": 6270758155.0, "step": 51380 }, { "epoch": 0.06853004389122655, "grad_norm": 1.921875, "learning_rate": 3.426417257746254e-06, "loss": 0.530122184753418, "num_tokens": 6273333044.0, "step": 51400 }, { "epoch": 0.06855670927795465, "grad_norm": 2.078125, "learning_rate": 3.4277505199722683e-06, "loss": 0.5373003005981445, "num_tokens": 6276050981.0, "step": 51420 }, { "epoch": 0.06858337466468276, "grad_norm": 1.578125, "learning_rate": 3.429083782198283e-06, "loss": 0.5274077415466308, "num_tokens": 6278410161.0, "step": 51440 }, { "epoch": 0.06861004005141086, "grad_norm": 1.8046875, "learning_rate": 3.430417044424298e-06, "loss": 0.5364313125610352, "num_tokens": 6280920437.0, "step": 51460 }, { "epoch": 0.06863670543813898, "grad_norm": 1.9609375, "learning_rate": 3.4317503066503124e-06, "loss": 0.5253308773040771, "num_tokens": 6283204390.0, "step": 51480 }, { "epoch": 0.06866337082486708, "grad_norm": 2.0, "learning_rate": 3.433083568876327e-06, "loss": 0.529520320892334, "num_tokens": 6285523190.0, "step": 51500 }, { "epoch": 0.06869003621159518, "grad_norm": 1.984375, "learning_rate": 3.4344168311023413e-06, "loss": 0.527641773223877, "num_tokens": 6287857052.0, "step": 51520 }, { "epoch": 0.06871670159832328, "grad_norm": 2.03125, "learning_rate": 3.435750093328356e-06, "loss": 0.5280624389648437, "num_tokens": 6290313083.0, "step": 51540 }, { "epoch": 0.06874336698505139, "grad_norm": 2.140625, "learning_rate": 3.4370833555543707e-06, "loss": 0.5341241836547852, "num_tokens": 6292843153.0, "step": 51560 }, { "epoch": 0.06877003237177949, "grad_norm": 2.078125, "learning_rate": 3.438416617780385e-06, "loss": 0.5156126022338867, "num_tokens": 6295202950.0, "step": 51580 }, { "epoch": 0.0687966977585076, "grad_norm": 2.015625, "learning_rate": 3.4397498800064e-06, "loss": 0.5342660903930664, "num_tokens": 6297605531.0, "step": 51600 }, { "epoch": 0.0688233631452357, "grad_norm": 1.9140625, "learning_rate": 3.441083142232415e-06, "loss": 0.530165147781372, "num_tokens": 6300075223.0, "step": 51620 }, { "epoch": 0.0688500285319638, "grad_norm": 1.8984375, "learning_rate": 3.442416404458429e-06, "loss": 0.5250254154205323, "num_tokens": 6302557782.0, "step": 51640 }, { "epoch": 0.0688766939186919, "grad_norm": 2.0625, "learning_rate": 3.443749666684444e-06, "loss": 0.5313936710357666, "num_tokens": 6304903373.0, "step": 51660 }, { "epoch": 0.06890335930542, "grad_norm": 1.7890625, "learning_rate": 3.445082928910458e-06, "loss": 0.5272002220153809, "num_tokens": 6307197666.0, "step": 51680 }, { "epoch": 0.06893002469214811, "grad_norm": 1.9375, "learning_rate": 3.446416191136473e-06, "loss": 0.517494010925293, "num_tokens": 6309736579.0, "step": 51700 }, { "epoch": 0.06895669007887621, "grad_norm": 1.78125, "learning_rate": 3.447749453362488e-06, "loss": 0.5222366333007813, "num_tokens": 6312017308.0, "step": 51720 }, { "epoch": 0.06898335546560432, "grad_norm": 1.984375, "learning_rate": 3.449082715588502e-06, "loss": 0.5291889190673829, "num_tokens": 6314477345.0, "step": 51740 }, { "epoch": 0.06901002085233242, "grad_norm": 1.8125, "learning_rate": 3.450415977814517e-06, "loss": 0.5325630664825439, "num_tokens": 6316926558.0, "step": 51760 }, { "epoch": 0.06903668623906052, "grad_norm": 2.4375, "learning_rate": 3.4517492400405316e-06, "loss": 0.5500381469726563, "num_tokens": 6319433056.0, "step": 51780 }, { "epoch": 0.06906335162578862, "grad_norm": 1.984375, "learning_rate": 3.453082502266546e-06, "loss": 0.5352190017700196, "num_tokens": 6321979448.0, "step": 51800 }, { "epoch": 0.06909001701251673, "grad_norm": 2.375, "learning_rate": 3.4544157644925606e-06, "loss": 0.5427927017211914, "num_tokens": 6324143495.0, "step": 51820 }, { "epoch": 0.06911668239924483, "grad_norm": 2.3125, "learning_rate": 3.455749026718575e-06, "loss": 0.5348101615905761, "num_tokens": 6326856021.0, "step": 51840 }, { "epoch": 0.06914334778597293, "grad_norm": 2.25, "learning_rate": 3.45708228894459e-06, "loss": 0.5364373683929443, "num_tokens": 6329495805.0, "step": 51860 }, { "epoch": 0.06917001317270105, "grad_norm": 2.375, "learning_rate": 3.4584155511706047e-06, "loss": 0.5255616188049317, "num_tokens": 6332087861.0, "step": 51880 }, { "epoch": 0.06919667855942915, "grad_norm": 1.78125, "learning_rate": 3.459748813396619e-06, "loss": 0.519011116027832, "num_tokens": 6334491557.0, "step": 51900 }, { "epoch": 0.06922334394615726, "grad_norm": 1.78125, "learning_rate": 3.4610820756226337e-06, "loss": 0.5338984489440918, "num_tokens": 6336854343.0, "step": 51920 }, { "epoch": 0.06925000933288536, "grad_norm": 2.15625, "learning_rate": 3.4624153378486484e-06, "loss": 0.5401854038238525, "num_tokens": 6339175250.0, "step": 51940 }, { "epoch": 0.06927667471961346, "grad_norm": 2.296875, "learning_rate": 3.4637486000746627e-06, "loss": 0.5291834354400635, "num_tokens": 6341740923.0, "step": 51960 }, { "epoch": 0.06930334010634157, "grad_norm": 1.9609375, "learning_rate": 3.4650818623006778e-06, "loss": 0.5459893703460693, "num_tokens": 6344342751.0, "step": 51980 }, { "epoch": 0.06933000549306967, "grad_norm": 2.078125, "learning_rate": 3.466415124526692e-06, "loss": 0.5322757244110108, "num_tokens": 6346790062.0, "step": 52000 }, { "epoch": 0.06935667087979777, "grad_norm": 2.328125, "learning_rate": 3.4677483867527068e-06, "loss": 0.5300116539001465, "num_tokens": 6349113754.0, "step": 52020 }, { "epoch": 0.06938333626652587, "grad_norm": 2.171875, "learning_rate": 3.4690816489787215e-06, "loss": 0.5385175704956054, "num_tokens": 6351751245.0, "step": 52040 }, { "epoch": 0.06941000165325398, "grad_norm": 1.7109375, "learning_rate": 3.4704149112047357e-06, "loss": 0.5387996673583985, "num_tokens": 6354075252.0, "step": 52060 }, { "epoch": 0.06943666703998208, "grad_norm": 2.140625, "learning_rate": 3.471748173430751e-06, "loss": 0.5283732414245605, "num_tokens": 6356504313.0, "step": 52080 }, { "epoch": 0.06946333242671018, "grad_norm": 1.6484375, "learning_rate": 3.4730814356567656e-06, "loss": 0.5266189575195312, "num_tokens": 6359128057.0, "step": 52100 }, { "epoch": 0.06948999781343829, "grad_norm": 1.9765625, "learning_rate": 3.47441469788278e-06, "loss": 0.528836727142334, "num_tokens": 6361571226.0, "step": 52120 }, { "epoch": 0.06951666320016639, "grad_norm": 1.6875, "learning_rate": 3.4757479601087945e-06, "loss": 0.5075318813323975, "num_tokens": 6364235810.0, "step": 52140 }, { "epoch": 0.06954332858689449, "grad_norm": 1.6640625, "learning_rate": 3.477081222334809e-06, "loss": 0.5325857162475586, "num_tokens": 6366884349.0, "step": 52160 }, { "epoch": 0.0695699939736226, "grad_norm": 1.8515625, "learning_rate": 3.4784144845608235e-06, "loss": 0.5483675003051758, "num_tokens": 6369498491.0, "step": 52180 }, { "epoch": 0.0695966593603507, "grad_norm": 1.8359375, "learning_rate": 3.4797477467868386e-06, "loss": 0.5422984600067139, "num_tokens": 6371969729.0, "step": 52200 }, { "epoch": 0.0696233247470788, "grad_norm": 1.921875, "learning_rate": 3.481081009012853e-06, "loss": 0.5339406967163086, "num_tokens": 6374440277.0, "step": 52220 }, { "epoch": 0.0696499901338069, "grad_norm": 2.421875, "learning_rate": 3.4824142712388676e-06, "loss": 0.5331489562988281, "num_tokens": 6376821221.0, "step": 52240 }, { "epoch": 0.06967665552053501, "grad_norm": 2.1875, "learning_rate": 3.4837475334648823e-06, "loss": 0.5229334354400634, "num_tokens": 6379582623.0, "step": 52260 }, { "epoch": 0.06970332090726312, "grad_norm": 2.171875, "learning_rate": 3.4850807956908966e-06, "loss": 0.5375972747802734, "num_tokens": 6382140060.0, "step": 52280 }, { "epoch": 0.06972998629399123, "grad_norm": 1.921875, "learning_rate": 3.4864140579169113e-06, "loss": 0.5326677799224854, "num_tokens": 6384444744.0, "step": 52300 }, { "epoch": 0.06975665168071933, "grad_norm": 1.9609375, "learning_rate": 3.4877473201429256e-06, "loss": 0.5213186264038085, "num_tokens": 6386876213.0, "step": 52320 }, { "epoch": 0.06978331706744743, "grad_norm": 2.15625, "learning_rate": 3.4890805823689407e-06, "loss": 0.5468411445617676, "num_tokens": 6389355157.0, "step": 52340 }, { "epoch": 0.06980998245417554, "grad_norm": 2.015625, "learning_rate": 3.4904138445949554e-06, "loss": 0.5211469650268554, "num_tokens": 6391505604.0, "step": 52360 }, { "epoch": 0.06983664784090364, "grad_norm": 1.8359375, "learning_rate": 3.4917471068209697e-06, "loss": 0.535191535949707, "num_tokens": 6393874936.0, "step": 52380 }, { "epoch": 0.06986331322763174, "grad_norm": 1.953125, "learning_rate": 3.4930803690469844e-06, "loss": 0.5314069747924804, "num_tokens": 6396490149.0, "step": 52400 }, { "epoch": 0.06988997861435985, "grad_norm": 2.34375, "learning_rate": 3.494413631272999e-06, "loss": 0.5179256439208985, "num_tokens": 6398860524.0, "step": 52420 }, { "epoch": 0.06991664400108795, "grad_norm": 2.109375, "learning_rate": 3.4957468934990134e-06, "loss": 0.5397612571716308, "num_tokens": 6401284564.0, "step": 52440 }, { "epoch": 0.06994330938781605, "grad_norm": 1.9609375, "learning_rate": 3.4970801557250285e-06, "loss": 0.545400857925415, "num_tokens": 6403687402.0, "step": 52460 }, { "epoch": 0.06996997477454416, "grad_norm": 1.78125, "learning_rate": 3.498413417951043e-06, "loss": 0.5161273956298829, "num_tokens": 6405884790.0, "step": 52480 }, { "epoch": 0.06999664016127226, "grad_norm": 2.21875, "learning_rate": 3.4997466801770575e-06, "loss": 0.5265392303466797, "num_tokens": 6408463886.0, "step": 52500 }, { "epoch": 0.07002330554800036, "grad_norm": 2.21875, "learning_rate": 3.501079942403072e-06, "loss": 0.5130844116210938, "num_tokens": 6410927458.0, "step": 52520 }, { "epoch": 0.07004997093472846, "grad_norm": 2.015625, "learning_rate": 3.5024132046290865e-06, "loss": 0.5276058197021485, "num_tokens": 6413564943.0, "step": 52540 }, { "epoch": 0.07007663632145657, "grad_norm": 2.078125, "learning_rate": 3.503746466855101e-06, "loss": 0.5299257278442383, "num_tokens": 6415897972.0, "step": 52560 }, { "epoch": 0.07010330170818467, "grad_norm": 2.03125, "learning_rate": 3.5050797290811163e-06, "loss": 0.5248366355895996, "num_tokens": 6418356278.0, "step": 52580 }, { "epoch": 0.07012996709491277, "grad_norm": 1.8984375, "learning_rate": 3.5064129913071306e-06, "loss": 0.544515323638916, "num_tokens": 6420690838.0, "step": 52600 }, { "epoch": 0.07015663248164088, "grad_norm": 1.9921875, "learning_rate": 3.5077462535331453e-06, "loss": 0.5287735939025879, "num_tokens": 6422946778.0, "step": 52620 }, { "epoch": 0.07018329786836898, "grad_norm": 1.78125, "learning_rate": 3.50907951575916e-06, "loss": 0.49872241020202634, "num_tokens": 6425516387.0, "step": 52640 }, { "epoch": 0.07020996325509708, "grad_norm": 2.0, "learning_rate": 3.5104127779851742e-06, "loss": 0.5253126144409179, "num_tokens": 6427914800.0, "step": 52660 }, { "epoch": 0.07023662864182519, "grad_norm": 2.109375, "learning_rate": 3.511746040211189e-06, "loss": 0.519146728515625, "num_tokens": 6430272913.0, "step": 52680 }, { "epoch": 0.0702632940285533, "grad_norm": 2.5, "learning_rate": 3.5130793024372032e-06, "loss": 0.541271162033081, "num_tokens": 6432754878.0, "step": 52700 }, { "epoch": 0.0702899594152814, "grad_norm": 1.6875, "learning_rate": 3.5144125646632183e-06, "loss": 0.518506669998169, "num_tokens": 6435137111.0, "step": 52720 }, { "epoch": 0.07031662480200951, "grad_norm": 1.6484375, "learning_rate": 3.515745826889233e-06, "loss": 0.509766960144043, "num_tokens": 6437712803.0, "step": 52740 }, { "epoch": 0.07034329018873761, "grad_norm": 2.484375, "learning_rate": 3.5170790891152473e-06, "loss": 0.5421776294708252, "num_tokens": 6440393411.0, "step": 52760 }, { "epoch": 0.07036995557546571, "grad_norm": 2.3125, "learning_rate": 3.518412351341262e-06, "loss": 0.5287978649139404, "num_tokens": 6442858183.0, "step": 52780 }, { "epoch": 0.07039662096219382, "grad_norm": 2.046875, "learning_rate": 3.519745613567277e-06, "loss": 0.5185744285583496, "num_tokens": 6445368662.0, "step": 52800 }, { "epoch": 0.07042328634892192, "grad_norm": 2.484375, "learning_rate": 3.521078875793291e-06, "loss": 0.5371851921081543, "num_tokens": 6447627381.0, "step": 52820 }, { "epoch": 0.07044995173565002, "grad_norm": 2.140625, "learning_rate": 3.522412138019306e-06, "loss": 0.5249725341796875, "num_tokens": 6450281503.0, "step": 52840 }, { "epoch": 0.07047661712237813, "grad_norm": 2.28125, "learning_rate": 3.5237454002453204e-06, "loss": 0.5376410484313965, "num_tokens": 6452674394.0, "step": 52860 }, { "epoch": 0.07050328250910623, "grad_norm": 1.703125, "learning_rate": 3.525078662471335e-06, "loss": 0.5282462120056153, "num_tokens": 6455057556.0, "step": 52880 }, { "epoch": 0.07052994789583433, "grad_norm": 2.078125, "learning_rate": 3.52641192469735e-06, "loss": 0.5197206020355225, "num_tokens": 6457632438.0, "step": 52900 }, { "epoch": 0.07055661328256244, "grad_norm": 1.796875, "learning_rate": 3.527745186923364e-06, "loss": 0.509952449798584, "num_tokens": 6460431057.0, "step": 52920 }, { "epoch": 0.07058327866929054, "grad_norm": 1.7421875, "learning_rate": 3.5290784491493792e-06, "loss": 0.5386894226074219, "num_tokens": 6462717262.0, "step": 52940 }, { "epoch": 0.07060994405601864, "grad_norm": 2.265625, "learning_rate": 3.530411711375394e-06, "loss": 0.5230261325836182, "num_tokens": 6465150495.0, "step": 52960 }, { "epoch": 0.07063660944274675, "grad_norm": 1.953125, "learning_rate": 3.531744973601408e-06, "loss": 0.5266946792602539, "num_tokens": 6467499216.0, "step": 52980 }, { "epoch": 0.07066327482947485, "grad_norm": 2.125, "learning_rate": 3.533078235827423e-06, "loss": 0.5231059074401856, "num_tokens": 6469676131.0, "step": 53000 }, { "epoch": 0.07068994021620295, "grad_norm": 2.09375, "learning_rate": 3.534411498053437e-06, "loss": 0.4971872329711914, "num_tokens": 6472110026.0, "step": 53020 }, { "epoch": 0.07071660560293105, "grad_norm": 2.34375, "learning_rate": 3.535744760279452e-06, "loss": 0.5378386497497558, "num_tokens": 6474679079.0, "step": 53040 }, { "epoch": 0.07074327098965916, "grad_norm": 2.109375, "learning_rate": 3.537078022505467e-06, "loss": 0.5471437931060791, "num_tokens": 6477318731.0, "step": 53060 }, { "epoch": 0.07076993637638726, "grad_norm": 2.390625, "learning_rate": 3.5384112847314813e-06, "loss": 0.5328311920166016, "num_tokens": 6479632446.0, "step": 53080 }, { "epoch": 0.07079660176311538, "grad_norm": 1.9296875, "learning_rate": 3.539744546957496e-06, "loss": 0.5208736896514893, "num_tokens": 6481847471.0, "step": 53100 }, { "epoch": 0.07082326714984348, "grad_norm": 1.8984375, "learning_rate": 3.5410778091835107e-06, "loss": 0.5210675239562989, "num_tokens": 6484162592.0, "step": 53120 }, { "epoch": 0.07084993253657158, "grad_norm": 2.078125, "learning_rate": 3.542411071409525e-06, "loss": 0.5240483283996582, "num_tokens": 6486809840.0, "step": 53140 }, { "epoch": 0.07087659792329969, "grad_norm": 1.703125, "learning_rate": 3.5437443336355397e-06, "loss": 0.5506404876708985, "num_tokens": 6489306635.0, "step": 53160 }, { "epoch": 0.07090326331002779, "grad_norm": 1.953125, "learning_rate": 3.545077595861554e-06, "loss": 0.5337660789489747, "num_tokens": 6491687160.0, "step": 53180 }, { "epoch": 0.07092992869675589, "grad_norm": 1.9375, "learning_rate": 3.546410858087569e-06, "loss": 0.5368584632873535, "num_tokens": 6494026189.0, "step": 53200 }, { "epoch": 0.070956594083484, "grad_norm": 2.015625, "learning_rate": 3.5477441203135838e-06, "loss": 0.5300660133361816, "num_tokens": 6496516571.0, "step": 53220 }, { "epoch": 0.0709832594702121, "grad_norm": 1.609375, "learning_rate": 3.549077382539598e-06, "loss": 0.5227825164794921, "num_tokens": 6498964315.0, "step": 53240 }, { "epoch": 0.0710099248569402, "grad_norm": 2.09375, "learning_rate": 3.5504106447656127e-06, "loss": 0.518368911743164, "num_tokens": 6501383666.0, "step": 53260 }, { "epoch": 0.0710365902436683, "grad_norm": 2.03125, "learning_rate": 3.5517439069916274e-06, "loss": 0.5168637275695801, "num_tokens": 6503791941.0, "step": 53280 }, { "epoch": 0.07106325563039641, "grad_norm": 2.046875, "learning_rate": 3.5530771692176417e-06, "loss": 0.5458626747131348, "num_tokens": 6506187006.0, "step": 53300 }, { "epoch": 0.07108992101712451, "grad_norm": 1.7421875, "learning_rate": 3.554410431443657e-06, "loss": 0.5393336296081543, "num_tokens": 6508381646.0, "step": 53320 }, { "epoch": 0.07111658640385261, "grad_norm": 1.734375, "learning_rate": 3.555743693669671e-06, "loss": 0.5114860534667969, "num_tokens": 6510758138.0, "step": 53340 }, { "epoch": 0.07114325179058072, "grad_norm": 2.078125, "learning_rate": 3.557076955895686e-06, "loss": 0.5111816883087158, "num_tokens": 6513424839.0, "step": 53360 }, { "epoch": 0.07116991717730882, "grad_norm": 2.34375, "learning_rate": 3.5584102181217005e-06, "loss": 0.5214189052581787, "num_tokens": 6515900262.0, "step": 53380 }, { "epoch": 0.07119658256403692, "grad_norm": 2.515625, "learning_rate": 3.559743480347715e-06, "loss": 0.5229798316955566, "num_tokens": 6518467885.0, "step": 53400 }, { "epoch": 0.07122324795076503, "grad_norm": 1.65625, "learning_rate": 3.5610767425737295e-06, "loss": 0.5152961730957031, "num_tokens": 6520985510.0, "step": 53420 }, { "epoch": 0.07124991333749313, "grad_norm": 1.8671875, "learning_rate": 3.5624100047997446e-06, "loss": 0.526695442199707, "num_tokens": 6523548258.0, "step": 53440 }, { "epoch": 0.07127657872422123, "grad_norm": 2.046875, "learning_rate": 3.563743267025759e-06, "loss": 0.5144525051116944, "num_tokens": 6526170006.0, "step": 53460 }, { "epoch": 0.07130324411094933, "grad_norm": 2.078125, "learning_rate": 3.5650765292517736e-06, "loss": 0.5168797016143799, "num_tokens": 6528662785.0, "step": 53480 }, { "epoch": 0.07132990949767745, "grad_norm": 2.296875, "learning_rate": 3.566409791477788e-06, "loss": 0.5269142150878906, "num_tokens": 6531071478.0, "step": 53500 }, { "epoch": 0.07135657488440555, "grad_norm": 2.28125, "learning_rate": 3.5677430537038026e-06, "loss": 0.523742389678955, "num_tokens": 6533392272.0, "step": 53520 }, { "epoch": 0.07138324027113366, "grad_norm": 1.9453125, "learning_rate": 3.5690763159298177e-06, "loss": 0.5365725517272949, "num_tokens": 6535605040.0, "step": 53540 }, { "epoch": 0.07140990565786176, "grad_norm": 2.25, "learning_rate": 3.5704095781558316e-06, "loss": 0.5205569267272949, "num_tokens": 6538027476.0, "step": 53560 }, { "epoch": 0.07143657104458986, "grad_norm": 2.0625, "learning_rate": 3.5717428403818467e-06, "loss": 0.5226686000823975, "num_tokens": 6540333138.0, "step": 53580 }, { "epoch": 0.07146323643131797, "grad_norm": 2.59375, "learning_rate": 3.5730761026078614e-06, "loss": 0.49932184219360354, "num_tokens": 6542624647.0, "step": 53600 }, { "epoch": 0.07148990181804607, "grad_norm": 1.90625, "learning_rate": 3.5744093648338757e-06, "loss": 0.5195679664611816, "num_tokens": 6545103803.0, "step": 53620 }, { "epoch": 0.07151656720477417, "grad_norm": 2.46875, "learning_rate": 3.5757426270598904e-06, "loss": 0.5313693046569824, "num_tokens": 6547663535.0, "step": 53640 }, { "epoch": 0.07154323259150228, "grad_norm": 2.0, "learning_rate": 3.5770758892859047e-06, "loss": 0.5278656005859375, "num_tokens": 6550211377.0, "step": 53660 }, { "epoch": 0.07156989797823038, "grad_norm": 1.9140625, "learning_rate": 3.5784091515119198e-06, "loss": 0.5351712226867675, "num_tokens": 6552679601.0, "step": 53680 }, { "epoch": 0.07159656336495848, "grad_norm": 1.625, "learning_rate": 3.5797424137379345e-06, "loss": 0.5356712341308594, "num_tokens": 6555156994.0, "step": 53700 }, { "epoch": 0.07162322875168659, "grad_norm": 2.015625, "learning_rate": 3.5810756759639488e-06, "loss": 0.512370252609253, "num_tokens": 6557545931.0, "step": 53720 }, { "epoch": 0.07164989413841469, "grad_norm": 2.109375, "learning_rate": 3.5824089381899635e-06, "loss": 0.5244126319885254, "num_tokens": 6559960639.0, "step": 53740 }, { "epoch": 0.07167655952514279, "grad_norm": 2.328125, "learning_rate": 3.583742200415978e-06, "loss": 0.507620906829834, "num_tokens": 6562592799.0, "step": 53760 }, { "epoch": 0.0717032249118709, "grad_norm": 2.390625, "learning_rate": 3.5850754626419924e-06, "loss": 0.5208851814270019, "num_tokens": 6564896059.0, "step": 53780 }, { "epoch": 0.071729890298599, "grad_norm": 2.375, "learning_rate": 3.5864087248680076e-06, "loss": 0.5271176815032959, "num_tokens": 6567413916.0, "step": 53800 }, { "epoch": 0.0717565556853271, "grad_norm": 2.25, "learning_rate": 3.5877419870940223e-06, "loss": 0.5291646957397461, "num_tokens": 6569744507.0, "step": 53820 }, { "epoch": 0.0717832210720552, "grad_norm": 1.84375, "learning_rate": 3.5890752493200365e-06, "loss": 0.5126989841461181, "num_tokens": 6572323970.0, "step": 53840 }, { "epoch": 0.0718098864587833, "grad_norm": 1.6484375, "learning_rate": 3.5904085115460513e-06, "loss": 0.5271441459655761, "num_tokens": 6574927325.0, "step": 53860 }, { "epoch": 0.07183655184551141, "grad_norm": 2.09375, "learning_rate": 3.5917417737720655e-06, "loss": 0.5249778747558593, "num_tokens": 6577507284.0, "step": 53880 }, { "epoch": 0.07186321723223951, "grad_norm": 2.171875, "learning_rate": 3.5930750359980802e-06, "loss": 0.531436538696289, "num_tokens": 6579844473.0, "step": 53900 }, { "epoch": 0.07188988261896763, "grad_norm": 1.9765625, "learning_rate": 3.5944082982240954e-06, "loss": 0.5153876781463623, "num_tokens": 6582183139.0, "step": 53920 }, { "epoch": 0.07191654800569573, "grad_norm": 2.125, "learning_rate": 3.5957415604501096e-06, "loss": 0.5239740371704101, "num_tokens": 6584605502.0, "step": 53940 }, { "epoch": 0.07194321339242384, "grad_norm": 2.015625, "learning_rate": 3.5970748226761243e-06, "loss": 0.5178711414337158, "num_tokens": 6586926639.0, "step": 53960 }, { "epoch": 0.07196987877915194, "grad_norm": 1.9453125, "learning_rate": 3.598408084902139e-06, "loss": 0.5138510227203369, "num_tokens": 6589494192.0, "step": 53980 }, { "epoch": 0.07199654416588004, "grad_norm": 1.4921875, "learning_rate": 3.5997413471281533e-06, "loss": 0.5224038124084472, "num_tokens": 6591928942.0, "step": 54000 }, { "epoch": 0.07202320955260814, "grad_norm": 2.28125, "learning_rate": 3.601074609354168e-06, "loss": 0.5181239128112793, "num_tokens": 6594394781.0, "step": 54020 }, { "epoch": 0.07204987493933625, "grad_norm": 1.8671875, "learning_rate": 3.6024078715801823e-06, "loss": 0.5207575798034668, "num_tokens": 6596948960.0, "step": 54040 }, { "epoch": 0.07207654032606435, "grad_norm": 1.7890625, "learning_rate": 3.6037411338061974e-06, "loss": 0.5168529510498047, "num_tokens": 6599489721.0, "step": 54060 }, { "epoch": 0.07210320571279245, "grad_norm": 2.390625, "learning_rate": 3.605074396032212e-06, "loss": 0.5214683532714843, "num_tokens": 6601897823.0, "step": 54080 }, { "epoch": 0.07212987109952056, "grad_norm": 1.9765625, "learning_rate": 3.6064076582582264e-06, "loss": 0.5274959564208984, "num_tokens": 6604041560.0, "step": 54100 }, { "epoch": 0.07215653648624866, "grad_norm": 2.125, "learning_rate": 3.607740920484241e-06, "loss": 0.5322141647338867, "num_tokens": 6606401689.0, "step": 54120 }, { "epoch": 0.07218320187297676, "grad_norm": 1.9140625, "learning_rate": 3.609074182710256e-06, "loss": 0.5239228248596192, "num_tokens": 6609018409.0, "step": 54140 }, { "epoch": 0.07220986725970487, "grad_norm": 2.015625, "learning_rate": 3.61040744493627e-06, "loss": 0.5242129325866699, "num_tokens": 6611446705.0, "step": 54160 }, { "epoch": 0.07223653264643297, "grad_norm": 2.0625, "learning_rate": 3.611740707162285e-06, "loss": 0.525784969329834, "num_tokens": 6613953044.0, "step": 54180 }, { "epoch": 0.07226319803316107, "grad_norm": 2.171875, "learning_rate": 3.6130739693882995e-06, "loss": 0.5091629028320312, "num_tokens": 6616509276.0, "step": 54200 }, { "epoch": 0.07228986341988917, "grad_norm": 2.1875, "learning_rate": 3.614407231614314e-06, "loss": 0.5275533676147461, "num_tokens": 6618964033.0, "step": 54220 }, { "epoch": 0.07231652880661728, "grad_norm": 2.015625, "learning_rate": 3.615740493840329e-06, "loss": 0.5155405044555664, "num_tokens": 6621235795.0, "step": 54240 }, { "epoch": 0.07234319419334538, "grad_norm": 1.71875, "learning_rate": 3.617073756066343e-06, "loss": 0.5196176528930664, "num_tokens": 6623659640.0, "step": 54260 }, { "epoch": 0.07236985958007348, "grad_norm": 1.8671875, "learning_rate": 3.618407018292358e-06, "loss": 0.5252342224121094, "num_tokens": 6626214046.0, "step": 54280 }, { "epoch": 0.07239652496680159, "grad_norm": 1.7734375, "learning_rate": 3.619740280518373e-06, "loss": 0.5182094573974609, "num_tokens": 6628607022.0, "step": 54300 }, { "epoch": 0.0724231903535297, "grad_norm": 2.140625, "learning_rate": 3.6210735427443873e-06, "loss": 0.5254890441894531, "num_tokens": 6631062299.0, "step": 54320 }, { "epoch": 0.07244985574025781, "grad_norm": 2.078125, "learning_rate": 3.622406804970402e-06, "loss": 0.5154942989349365, "num_tokens": 6633314094.0, "step": 54340 }, { "epoch": 0.07247652112698591, "grad_norm": 2.046875, "learning_rate": 3.6237400671964162e-06, "loss": 0.5144772529602051, "num_tokens": 6636070103.0, "step": 54360 }, { "epoch": 0.07250318651371401, "grad_norm": 1.75, "learning_rate": 3.625073329422431e-06, "loss": 0.5308319091796875, "num_tokens": 6638446990.0, "step": 54380 }, { "epoch": 0.07252985190044212, "grad_norm": 2.140625, "learning_rate": 3.626406591648446e-06, "loss": 0.5259249687194825, "num_tokens": 6640918824.0, "step": 54400 }, { "epoch": 0.07255651728717022, "grad_norm": 2.21875, "learning_rate": 3.6277398538744604e-06, "loss": 0.5151149749755859, "num_tokens": 6643289964.0, "step": 54420 }, { "epoch": 0.07258318267389832, "grad_norm": 2.0625, "learning_rate": 3.629073116100475e-06, "loss": 0.5249894142150879, "num_tokens": 6645696000.0, "step": 54440 }, { "epoch": 0.07260984806062643, "grad_norm": 2.015625, "learning_rate": 3.6304063783264898e-06, "loss": 0.517611026763916, "num_tokens": 6648080850.0, "step": 54460 }, { "epoch": 0.07263651344735453, "grad_norm": 1.953125, "learning_rate": 3.631739640552504e-06, "loss": 0.5388753890991211, "num_tokens": 6650490065.0, "step": 54480 }, { "epoch": 0.07266317883408263, "grad_norm": 2.234375, "learning_rate": 3.6330729027785187e-06, "loss": 0.5452832221984864, "num_tokens": 6652917985.0, "step": 54500 }, { "epoch": 0.07268984422081073, "grad_norm": 2.140625, "learning_rate": 3.634406165004533e-06, "loss": 0.5229849815368652, "num_tokens": 6655461497.0, "step": 54520 }, { "epoch": 0.07271650960753884, "grad_norm": 1.6640625, "learning_rate": 3.635739427230548e-06, "loss": 0.5251845359802246, "num_tokens": 6657785143.0, "step": 54540 }, { "epoch": 0.07274317499426694, "grad_norm": 1.9375, "learning_rate": 3.637072689456563e-06, "loss": 0.517825698852539, "num_tokens": 6659999513.0, "step": 54560 }, { "epoch": 0.07276984038099504, "grad_norm": 2.203125, "learning_rate": 3.638405951682577e-06, "loss": 0.5229023933410645, "num_tokens": 6662335413.0, "step": 54580 }, { "epoch": 0.07279650576772315, "grad_norm": 2.3125, "learning_rate": 3.639739213908592e-06, "loss": 0.5092246532440186, "num_tokens": 6664574171.0, "step": 54600 }, { "epoch": 0.07282317115445125, "grad_norm": 1.953125, "learning_rate": 3.6410724761346065e-06, "loss": 0.5140758037567139, "num_tokens": 6667063705.0, "step": 54620 }, { "epoch": 0.07284983654117935, "grad_norm": 2.046875, "learning_rate": 3.642405738360621e-06, "loss": 0.5262406826019287, "num_tokens": 6669575142.0, "step": 54640 }, { "epoch": 0.07287650192790746, "grad_norm": 1.859375, "learning_rate": 3.643739000586636e-06, "loss": 0.5341741561889648, "num_tokens": 6672172831.0, "step": 54660 }, { "epoch": 0.07290316731463556, "grad_norm": 1.71875, "learning_rate": 3.64507226281265e-06, "loss": 0.5001356124877929, "num_tokens": 6674647303.0, "step": 54680 }, { "epoch": 0.07292983270136366, "grad_norm": 2.1875, "learning_rate": 3.646405525038665e-06, "loss": 0.5079076766967774, "num_tokens": 6677344191.0, "step": 54700 }, { "epoch": 0.07295649808809178, "grad_norm": 2.03125, "learning_rate": 3.6477387872646796e-06, "loss": 0.5104122161865234, "num_tokens": 6679797863.0, "step": 54720 }, { "epoch": 0.07298316347481988, "grad_norm": 1.8046875, "learning_rate": 3.649072049490694e-06, "loss": 0.5052909851074219, "num_tokens": 6682027297.0, "step": 54740 }, { "epoch": 0.07300982886154798, "grad_norm": 1.6328125, "learning_rate": 3.6504053117167086e-06, "loss": 0.5326651096343994, "num_tokens": 6684413775.0, "step": 54760 }, { "epoch": 0.07303649424827609, "grad_norm": 2.375, "learning_rate": 3.6517385739427237e-06, "loss": 0.5087228775024414, "num_tokens": 6686558510.0, "step": 54780 }, { "epoch": 0.07306315963500419, "grad_norm": 1.6640625, "learning_rate": 3.653071836168738e-06, "loss": 0.5225750923156738, "num_tokens": 6688922921.0, "step": 54800 }, { "epoch": 0.0730898250217323, "grad_norm": 1.7890625, "learning_rate": 3.6544050983947527e-06, "loss": 0.5218105316162109, "num_tokens": 6691437650.0, "step": 54820 }, { "epoch": 0.0731164904084604, "grad_norm": 1.9296875, "learning_rate": 3.655738360620767e-06, "loss": 0.5047486305236817, "num_tokens": 6693904542.0, "step": 54840 }, { "epoch": 0.0731431557951885, "grad_norm": 1.7421875, "learning_rate": 3.6570716228467817e-06, "loss": 0.5447283744812011, "num_tokens": 6696217286.0, "step": 54860 }, { "epoch": 0.0731698211819166, "grad_norm": 2.109375, "learning_rate": 3.6584048850727964e-06, "loss": 0.5272963047027588, "num_tokens": 6698753264.0, "step": 54880 }, { "epoch": 0.0731964865686447, "grad_norm": 2.140625, "learning_rate": 3.6597381472988106e-06, "loss": 0.5187786102294922, "num_tokens": 6701322872.0, "step": 54900 }, { "epoch": 0.07322315195537281, "grad_norm": 2.015625, "learning_rate": 3.6610714095248258e-06, "loss": 0.5069812774658203, "num_tokens": 6703791814.0, "step": 54920 }, { "epoch": 0.07324981734210091, "grad_norm": 2.09375, "learning_rate": 3.6624046717508405e-06, "loss": 0.5109529018402099, "num_tokens": 6706263214.0, "step": 54940 }, { "epoch": 0.07327648272882901, "grad_norm": 2.25, "learning_rate": 3.6637379339768548e-06, "loss": 0.529175090789795, "num_tokens": 6708831819.0, "step": 54960 }, { "epoch": 0.07330314811555712, "grad_norm": 1.953125, "learning_rate": 3.6650711962028695e-06, "loss": 0.5159867763519287, "num_tokens": 6711153297.0, "step": 54980 }, { "epoch": 0.07332981350228522, "grad_norm": 1.9765625, "learning_rate": 3.6664044584288837e-06, "loss": 0.5291895866394043, "num_tokens": 6713678697.0, "step": 55000 }, { "epoch": 0.07335647888901332, "grad_norm": 2.125, "learning_rate": 3.6677377206548984e-06, "loss": 0.5191746234893799, "num_tokens": 6716168219.0, "step": 55020 }, { "epoch": 0.07338314427574143, "grad_norm": 2.1875, "learning_rate": 3.6690709828809136e-06, "loss": 0.5224302291870118, "num_tokens": 6718790132.0, "step": 55040 }, { "epoch": 0.07340980966246953, "grad_norm": 2.375, "learning_rate": 3.670404245106928e-06, "loss": 0.5067867279052735, "num_tokens": 6721252824.0, "step": 55060 }, { "epoch": 0.07343647504919763, "grad_norm": 1.96875, "learning_rate": 3.6717375073329425e-06, "loss": 0.5204421520233155, "num_tokens": 6723695081.0, "step": 55080 }, { "epoch": 0.07346314043592574, "grad_norm": 2.4375, "learning_rate": 3.6730707695589572e-06, "loss": 0.5137584686279297, "num_tokens": 6726179882.0, "step": 55100 }, { "epoch": 0.07348980582265384, "grad_norm": 2.0625, "learning_rate": 3.6744040317849715e-06, "loss": 0.512301254272461, "num_tokens": 6728492775.0, "step": 55120 }, { "epoch": 0.07351647120938196, "grad_norm": 2.09375, "learning_rate": 3.6757372940109866e-06, "loss": 0.5348315715789795, "num_tokens": 6730902996.0, "step": 55140 }, { "epoch": 0.07354313659611006, "grad_norm": 2.21875, "learning_rate": 3.6770705562370013e-06, "loss": 0.5216197013854981, "num_tokens": 6733646977.0, "step": 55160 }, { "epoch": 0.07356980198283816, "grad_norm": 2.109375, "learning_rate": 3.6784038184630156e-06, "loss": 0.518770694732666, "num_tokens": 6735720023.0, "step": 55180 }, { "epoch": 0.07359646736956627, "grad_norm": 2.125, "learning_rate": 3.6797370806890303e-06, "loss": 0.5274873733520508, "num_tokens": 6738234311.0, "step": 55200 }, { "epoch": 0.07362313275629437, "grad_norm": 1.828125, "learning_rate": 3.6810703429150446e-06, "loss": 0.5279041767120362, "num_tokens": 6740473840.0, "step": 55220 }, { "epoch": 0.07364979814302247, "grad_norm": 2.53125, "learning_rate": 3.6824036051410593e-06, "loss": 0.5141175746917724, "num_tokens": 6742925704.0, "step": 55240 }, { "epoch": 0.07367646352975057, "grad_norm": 1.90625, "learning_rate": 3.6837368673670744e-06, "loss": 0.5148724555969239, "num_tokens": 6745387053.0, "step": 55260 }, { "epoch": 0.07370312891647868, "grad_norm": 1.765625, "learning_rate": 3.6850701295930887e-06, "loss": 0.5184622764587402, "num_tokens": 6747815164.0, "step": 55280 }, { "epoch": 0.07372979430320678, "grad_norm": 2.28125, "learning_rate": 3.6864033918191034e-06, "loss": 0.5174633502960205, "num_tokens": 6750258638.0, "step": 55300 }, { "epoch": 0.07375645968993488, "grad_norm": 2.5625, "learning_rate": 3.687736654045118e-06, "loss": 0.5107656955718994, "num_tokens": 6752915768.0, "step": 55320 }, { "epoch": 0.07378312507666299, "grad_norm": 1.859375, "learning_rate": 3.6890699162711324e-06, "loss": 0.515708065032959, "num_tokens": 6755397438.0, "step": 55340 }, { "epoch": 0.07380979046339109, "grad_norm": 2.0625, "learning_rate": 3.690403178497147e-06, "loss": 0.5090631484985352, "num_tokens": 6758035086.0, "step": 55360 }, { "epoch": 0.07383645585011919, "grad_norm": 1.734375, "learning_rate": 3.6917364407231614e-06, "loss": 0.520066213607788, "num_tokens": 6760633216.0, "step": 55380 }, { "epoch": 0.0738631212368473, "grad_norm": 2.140625, "learning_rate": 3.6930697029491765e-06, "loss": 0.5126689434051513, "num_tokens": 6763110835.0, "step": 55400 }, { "epoch": 0.0738897866235754, "grad_norm": 2.09375, "learning_rate": 3.694402965175191e-06, "loss": 0.5099668502807617, "num_tokens": 6765570917.0, "step": 55420 }, { "epoch": 0.0739164520103035, "grad_norm": 2.21875, "learning_rate": 3.6957362274012055e-06, "loss": 0.5132957458496094, "num_tokens": 6767944634.0, "step": 55440 }, { "epoch": 0.0739431173970316, "grad_norm": 1.8515625, "learning_rate": 3.69706948962722e-06, "loss": 0.517216968536377, "num_tokens": 6770177568.0, "step": 55460 }, { "epoch": 0.07396978278375971, "grad_norm": 2.46875, "learning_rate": 3.698402751853235e-06, "loss": 0.5248989582061767, "num_tokens": 6772510084.0, "step": 55480 }, { "epoch": 0.07399644817048781, "grad_norm": 1.7109375, "learning_rate": 3.699736014079249e-06, "loss": 0.5116249561309815, "num_tokens": 6774917808.0, "step": 55500 }, { "epoch": 0.07402311355721591, "grad_norm": 1.8671875, "learning_rate": 3.7010692763052643e-06, "loss": 0.5306506633758545, "num_tokens": 6777265736.0, "step": 55520 }, { "epoch": 0.07404977894394403, "grad_norm": 1.8515625, "learning_rate": 3.7024025385312786e-06, "loss": 0.503981351852417, "num_tokens": 6779808612.0, "step": 55540 }, { "epoch": 0.07407644433067213, "grad_norm": 1.7734375, "learning_rate": 3.7037358007572933e-06, "loss": 0.5191157341003418, "num_tokens": 6782140626.0, "step": 55560 }, { "epoch": 0.07410310971740024, "grad_norm": 1.875, "learning_rate": 3.705069062983308e-06, "loss": 0.5165977478027344, "num_tokens": 6784728301.0, "step": 55580 }, { "epoch": 0.07412977510412834, "grad_norm": 2.046875, "learning_rate": 3.7064023252093222e-06, "loss": 0.5189807891845704, "num_tokens": 6787387152.0, "step": 55600 }, { "epoch": 0.07415644049085644, "grad_norm": 1.7734375, "learning_rate": 3.707735587435337e-06, "loss": 0.5121592521667481, "num_tokens": 6790065108.0, "step": 55620 }, { "epoch": 0.07418310587758455, "grad_norm": 1.796875, "learning_rate": 3.709068849661352e-06, "loss": 0.5096944332122803, "num_tokens": 6792516625.0, "step": 55640 }, { "epoch": 0.07420977126431265, "grad_norm": 1.90625, "learning_rate": 3.7104021118873663e-06, "loss": 0.5297706604003907, "num_tokens": 6794746948.0, "step": 55660 }, { "epoch": 0.07423643665104075, "grad_norm": 2.3125, "learning_rate": 3.711735374113381e-06, "loss": 0.5128941059112548, "num_tokens": 6797275928.0, "step": 55680 }, { "epoch": 0.07426310203776885, "grad_norm": 2.25, "learning_rate": 3.7130686363393953e-06, "loss": 0.5217805862426758, "num_tokens": 6799645795.0, "step": 55700 }, { "epoch": 0.07428976742449696, "grad_norm": 1.84375, "learning_rate": 3.71440189856541e-06, "loss": 0.5008817672729492, "num_tokens": 6802010351.0, "step": 55720 }, { "epoch": 0.07431643281122506, "grad_norm": 1.9921875, "learning_rate": 3.7157351607914247e-06, "loss": 0.5079159259796142, "num_tokens": 6804493852.0, "step": 55740 }, { "epoch": 0.07434309819795316, "grad_norm": 2.09375, "learning_rate": 3.717068423017439e-06, "loss": 0.5113275527954102, "num_tokens": 6806875040.0, "step": 55760 }, { "epoch": 0.07436976358468127, "grad_norm": 2.609375, "learning_rate": 3.718401685243454e-06, "loss": 0.5141411781311035, "num_tokens": 6809293634.0, "step": 55780 }, { "epoch": 0.07439642897140937, "grad_norm": 1.421875, "learning_rate": 3.719734947469469e-06, "loss": 0.5249348163604737, "num_tokens": 6811765631.0, "step": 55800 }, { "epoch": 0.07442309435813747, "grad_norm": 1.9921875, "learning_rate": 3.721068209695483e-06, "loss": 0.5113595962524414, "num_tokens": 6814304966.0, "step": 55820 }, { "epoch": 0.07444975974486558, "grad_norm": 2.0625, "learning_rate": 3.722401471921498e-06, "loss": 0.5275816917419434, "num_tokens": 6816880670.0, "step": 55840 }, { "epoch": 0.07447642513159368, "grad_norm": 2.65625, "learning_rate": 3.723734734147512e-06, "loss": 0.5204320907592773, "num_tokens": 6819497887.0, "step": 55860 }, { "epoch": 0.07450309051832178, "grad_norm": 1.6171875, "learning_rate": 3.725067996373527e-06, "loss": 0.5052334785461425, "num_tokens": 6821940937.0, "step": 55880 }, { "epoch": 0.07452975590504989, "grad_norm": 1.8046875, "learning_rate": 3.726401258599542e-06, "loss": 0.5090674877166748, "num_tokens": 6824103483.0, "step": 55900 }, { "epoch": 0.07455642129177799, "grad_norm": 2.3125, "learning_rate": 3.727734520825556e-06, "loss": 0.5348964691162109, "num_tokens": 6826469682.0, "step": 55920 }, { "epoch": 0.0745830866785061, "grad_norm": 2.390625, "learning_rate": 3.729067783051571e-06, "loss": 0.5278614044189454, "num_tokens": 6828899011.0, "step": 55940 }, { "epoch": 0.07460975206523421, "grad_norm": 1.7109375, "learning_rate": 3.7304010452775856e-06, "loss": 0.5323129177093506, "num_tokens": 6831428425.0, "step": 55960 }, { "epoch": 0.07463641745196231, "grad_norm": 1.671875, "learning_rate": 3.7317343075036e-06, "loss": 0.5141315460205078, "num_tokens": 6834064306.0, "step": 55980 }, { "epoch": 0.07466308283869041, "grad_norm": 1.984375, "learning_rate": 3.733067569729615e-06, "loss": 0.5124815940856934, "num_tokens": 6836433682.0, "step": 56000 }, { "epoch": 0.07468974822541852, "grad_norm": 2.125, "learning_rate": 3.7344008319556293e-06, "loss": 0.5178313255310059, "num_tokens": 6839028888.0, "step": 56020 }, { "epoch": 0.07471641361214662, "grad_norm": 2.484375, "learning_rate": 3.735734094181644e-06, "loss": 0.5003844261169433, "num_tokens": 6841710642.0, "step": 56040 }, { "epoch": 0.07474307899887472, "grad_norm": 2.078125, "learning_rate": 3.7370673564076587e-06, "loss": 0.513584041595459, "num_tokens": 6844374221.0, "step": 56060 }, { "epoch": 0.07476974438560283, "grad_norm": 2.703125, "learning_rate": 3.738400618633673e-06, "loss": 0.5096826553344727, "num_tokens": 6846586351.0, "step": 56080 }, { "epoch": 0.07479640977233093, "grad_norm": 1.5234375, "learning_rate": 3.7397338808596877e-06, "loss": 0.5283381462097168, "num_tokens": 6848854578.0, "step": 56100 }, { "epoch": 0.07482307515905903, "grad_norm": 2.203125, "learning_rate": 3.7410671430857028e-06, "loss": 0.5215910911560059, "num_tokens": 6851314064.0, "step": 56120 }, { "epoch": 0.07484974054578714, "grad_norm": 2.109375, "learning_rate": 3.742400405311717e-06, "loss": 0.5202279090881348, "num_tokens": 6853659162.0, "step": 56140 }, { "epoch": 0.07487640593251524, "grad_norm": 2.09375, "learning_rate": 3.7437336675377318e-06, "loss": 0.5224614143371582, "num_tokens": 6856041077.0, "step": 56160 }, { "epoch": 0.07490307131924334, "grad_norm": 1.5625, "learning_rate": 3.745066929763746e-06, "loss": 0.5078905105590821, "num_tokens": 6858451864.0, "step": 56180 }, { "epoch": 0.07492973670597144, "grad_norm": 2.0, "learning_rate": 3.7464001919897607e-06, "loss": 0.5170731544494629, "num_tokens": 6860814440.0, "step": 56200 }, { "epoch": 0.07495640209269955, "grad_norm": 1.90625, "learning_rate": 3.7477334542157754e-06, "loss": 0.5110003471374511, "num_tokens": 6863126267.0, "step": 56220 }, { "epoch": 0.07498306747942765, "grad_norm": 1.9453125, "learning_rate": 3.7490667164417897e-06, "loss": 0.5116930961608886, "num_tokens": 6865562689.0, "step": 56240 }, { "epoch": 0.07500973286615575, "grad_norm": 2.109375, "learning_rate": 3.750399978667805e-06, "loss": 0.5278284549713135, "num_tokens": 6868156667.0, "step": 56260 }, { "epoch": 0.07503639825288386, "grad_norm": 2.15625, "learning_rate": 3.7517332408938195e-06, "loss": 0.49966940879821775, "num_tokens": 6870576603.0, "step": 56280 }, { "epoch": 0.07506306363961196, "grad_norm": 2.453125, "learning_rate": 3.753066503119834e-06, "loss": 0.5083417892456055, "num_tokens": 6873013277.0, "step": 56300 }, { "epoch": 0.07508972902634006, "grad_norm": 2.21875, "learning_rate": 3.7543997653458485e-06, "loss": 0.5283677101135253, "num_tokens": 6875449481.0, "step": 56320 }, { "epoch": 0.07511639441306817, "grad_norm": 2.0625, "learning_rate": 3.755733027571863e-06, "loss": 0.5187078475952148, "num_tokens": 6878065784.0, "step": 56340 }, { "epoch": 0.07514305979979628, "grad_norm": 2.1875, "learning_rate": 3.7570662897978775e-06, "loss": 0.5131229400634766, "num_tokens": 6880473833.0, "step": 56360 }, { "epoch": 0.07516972518652439, "grad_norm": 1.6953125, "learning_rate": 3.7583995520238926e-06, "loss": 0.5248525619506836, "num_tokens": 6882940330.0, "step": 56380 }, { "epoch": 0.07519639057325249, "grad_norm": 2.171875, "learning_rate": 3.759732814249907e-06, "loss": 0.5022263050079345, "num_tokens": 6885341517.0, "step": 56400 }, { "epoch": 0.07522305595998059, "grad_norm": 1.921875, "learning_rate": 3.7610660764759216e-06, "loss": 0.5228209495544434, "num_tokens": 6887577661.0, "step": 56420 }, { "epoch": 0.0752497213467087, "grad_norm": 1.7578125, "learning_rate": 3.7623993387019363e-06, "loss": 0.5114493370056152, "num_tokens": 6889874851.0, "step": 56440 }, { "epoch": 0.0752763867334368, "grad_norm": 1.8046875, "learning_rate": 3.7637326009279506e-06, "loss": 0.503522539138794, "num_tokens": 6892425629.0, "step": 56460 }, { "epoch": 0.0753030521201649, "grad_norm": 2.015625, "learning_rate": 3.7650658631539653e-06, "loss": 0.5038588523864747, "num_tokens": 6894731667.0, "step": 56480 }, { "epoch": 0.075329717506893, "grad_norm": 1.6171875, "learning_rate": 3.7663991253799804e-06, "loss": 0.5116384506225586, "num_tokens": 6897272403.0, "step": 56500 }, { "epoch": 0.07535638289362111, "grad_norm": 1.7265625, "learning_rate": 3.7677323876059947e-06, "loss": 0.5088501930236816, "num_tokens": 6899796182.0, "step": 56520 }, { "epoch": 0.07538304828034921, "grad_norm": 2.015625, "learning_rate": 3.7690656498320094e-06, "loss": 0.5251652717590332, "num_tokens": 6902293464.0, "step": 56540 }, { "epoch": 0.07540971366707731, "grad_norm": 2.25, "learning_rate": 3.7703989120580237e-06, "loss": 0.5166553020477295, "num_tokens": 6904806187.0, "step": 56560 }, { "epoch": 0.07543637905380542, "grad_norm": 1.96875, "learning_rate": 3.7717321742840384e-06, "loss": 0.5011692523956299, "num_tokens": 6907371081.0, "step": 56580 }, { "epoch": 0.07546304444053352, "grad_norm": 2.203125, "learning_rate": 3.7730654365100535e-06, "loss": 0.510533332824707, "num_tokens": 6909805259.0, "step": 56600 }, { "epoch": 0.07548970982726162, "grad_norm": 1.78125, "learning_rate": 3.7743986987360678e-06, "loss": 0.5138731956481933, "num_tokens": 6912432836.0, "step": 56620 }, { "epoch": 0.07551637521398973, "grad_norm": 1.9375, "learning_rate": 3.7757319609620825e-06, "loss": 0.5196608543395996, "num_tokens": 6915037696.0, "step": 56640 }, { "epoch": 0.07554304060071783, "grad_norm": 1.953125, "learning_rate": 3.777065223188097e-06, "loss": 0.5075504302978515, "num_tokens": 6917523537.0, "step": 56660 }, { "epoch": 0.07556970598744593, "grad_norm": 2.140625, "learning_rate": 3.7783984854141115e-06, "loss": 0.5020659446716309, "num_tokens": 6919842964.0, "step": 56680 }, { "epoch": 0.07559637137417403, "grad_norm": 1.8203125, "learning_rate": 3.779731747640126e-06, "loss": 0.5153459548950196, "num_tokens": 6922265639.0, "step": 56700 }, { "epoch": 0.07562303676090214, "grad_norm": 2.046875, "learning_rate": 3.7810650098661404e-06, "loss": 0.5231188774108887, "num_tokens": 6924741176.0, "step": 56720 }, { "epoch": 0.07564970214763024, "grad_norm": 1.8671875, "learning_rate": 3.7823982720921556e-06, "loss": 0.515117597579956, "num_tokens": 6927310050.0, "step": 56740 }, { "epoch": 0.07567636753435836, "grad_norm": 2.140625, "learning_rate": 3.7837315343181703e-06, "loss": 0.5058890342712402, "num_tokens": 6929726084.0, "step": 56760 }, { "epoch": 0.07570303292108646, "grad_norm": 2.0625, "learning_rate": 3.7850647965441845e-06, "loss": 0.5046196937561035, "num_tokens": 6932233327.0, "step": 56780 }, { "epoch": 0.07572969830781456, "grad_norm": 2.234375, "learning_rate": 3.7863980587701992e-06, "loss": 0.4998835563659668, "num_tokens": 6935004483.0, "step": 56800 }, { "epoch": 0.07575636369454267, "grad_norm": 1.90625, "learning_rate": 3.787731320996214e-06, "loss": 0.5228589534759521, "num_tokens": 6937595441.0, "step": 56820 }, { "epoch": 0.07578302908127077, "grad_norm": 2.09375, "learning_rate": 3.7890645832222282e-06, "loss": 0.5168106079101562, "num_tokens": 6939883048.0, "step": 56840 }, { "epoch": 0.07580969446799887, "grad_norm": 2.3125, "learning_rate": 3.7903978454482433e-06, "loss": 0.5170625686645508, "num_tokens": 6942399137.0, "step": 56860 }, { "epoch": 0.07583635985472698, "grad_norm": 2.0625, "learning_rate": 3.7917311076742576e-06, "loss": 0.5248114585876464, "num_tokens": 6944933934.0, "step": 56880 }, { "epoch": 0.07586302524145508, "grad_norm": 1.8828125, "learning_rate": 3.7930643699002723e-06, "loss": 0.5060820579528809, "num_tokens": 6947200362.0, "step": 56900 }, { "epoch": 0.07588969062818318, "grad_norm": 1.9296875, "learning_rate": 3.794397632126287e-06, "loss": 0.5137314319610595, "num_tokens": 6949776356.0, "step": 56920 }, { "epoch": 0.07591635601491128, "grad_norm": 2.140625, "learning_rate": 3.7957308943523013e-06, "loss": 0.504967737197876, "num_tokens": 6952038120.0, "step": 56940 }, { "epoch": 0.07594302140163939, "grad_norm": 2.28125, "learning_rate": 3.797064156578316e-06, "loss": 0.5018168449401855, "num_tokens": 6954689335.0, "step": 56960 }, { "epoch": 0.07596968678836749, "grad_norm": 2.34375, "learning_rate": 3.798397418804331e-06, "loss": 0.5163461685180664, "num_tokens": 6956965357.0, "step": 56980 }, { "epoch": 0.0759963521750956, "grad_norm": 2.328125, "learning_rate": 3.7997306810303454e-06, "loss": 0.4915772438049316, "num_tokens": 6959406718.0, "step": 57000 }, { "epoch": 0.0760230175618237, "grad_norm": 2.109375, "learning_rate": 3.80106394325636e-06, "loss": 0.5081088066101074, "num_tokens": 6961779582.0, "step": 57020 }, { "epoch": 0.0760496829485518, "grad_norm": 1.9765625, "learning_rate": 3.8023972054823744e-06, "loss": 0.5075229644775391, "num_tokens": 6964098558.0, "step": 57040 }, { "epoch": 0.0760763483352799, "grad_norm": 1.9453125, "learning_rate": 3.803730467708389e-06, "loss": 0.5215786457061767, "num_tokens": 6966369960.0, "step": 57060 }, { "epoch": 0.076103013722008, "grad_norm": 2.484375, "learning_rate": 3.805063729934404e-06, "loss": 0.5164341449737548, "num_tokens": 6968946935.0, "step": 57080 }, { "epoch": 0.07612967910873611, "grad_norm": 1.7734375, "learning_rate": 3.806396992160418e-06, "loss": 0.5214421272277832, "num_tokens": 6971212647.0, "step": 57100 }, { "epoch": 0.07615634449546421, "grad_norm": 2.234375, "learning_rate": 3.807730254386433e-06, "loss": 0.5094182014465332, "num_tokens": 6973602509.0, "step": 57120 }, { "epoch": 0.07618300988219232, "grad_norm": 2.71875, "learning_rate": 3.809063516612448e-06, "loss": 0.49088315963745116, "num_tokens": 6976153026.0, "step": 57140 }, { "epoch": 0.07620967526892043, "grad_norm": 2.078125, "learning_rate": 3.810396778838462e-06, "loss": 0.49393138885498045, "num_tokens": 6978470686.0, "step": 57160 }, { "epoch": 0.07623634065564854, "grad_norm": 2.140625, "learning_rate": 3.811730041064477e-06, "loss": 0.5077683925628662, "num_tokens": 6980860991.0, "step": 57180 }, { "epoch": 0.07626300604237664, "grad_norm": 1.7421875, "learning_rate": 3.813063303290491e-06, "loss": 0.5180803298950195, "num_tokens": 6983302586.0, "step": 57200 }, { "epoch": 0.07628967142910474, "grad_norm": 2.125, "learning_rate": 3.814396565516506e-06, "loss": 0.5014069557189942, "num_tokens": 6985757942.0, "step": 57220 }, { "epoch": 0.07631633681583284, "grad_norm": 2.015625, "learning_rate": 3.8157298277425206e-06, "loss": 0.5079381465911865, "num_tokens": 6988082667.0, "step": 57240 }, { "epoch": 0.07634300220256095, "grad_norm": 2.515625, "learning_rate": 3.817063089968535e-06, "loss": 0.5135651588439941, "num_tokens": 6990533089.0, "step": 57260 }, { "epoch": 0.07636966758928905, "grad_norm": 2.234375, "learning_rate": 3.81839635219455e-06, "loss": 0.519443941116333, "num_tokens": 6992876140.0, "step": 57280 }, { "epoch": 0.07639633297601715, "grad_norm": 2.03125, "learning_rate": 3.819729614420565e-06, "loss": 0.5213600158691406, "num_tokens": 6995208333.0, "step": 57300 }, { "epoch": 0.07642299836274526, "grad_norm": 2.09375, "learning_rate": 3.821062876646579e-06, "loss": 0.5061657428741455, "num_tokens": 6997519774.0, "step": 57320 }, { "epoch": 0.07644966374947336, "grad_norm": 2.140625, "learning_rate": 3.822396138872594e-06, "loss": 0.525202751159668, "num_tokens": 7000070343.0, "step": 57340 }, { "epoch": 0.07647632913620146, "grad_norm": 1.6640625, "learning_rate": 3.823729401098608e-06, "loss": 0.49925856590270995, "num_tokens": 7002417184.0, "step": 57360 }, { "epoch": 0.07650299452292957, "grad_norm": 3.0, "learning_rate": 3.825062663324623e-06, "loss": 0.49517288208007815, "num_tokens": 7004798589.0, "step": 57380 }, { "epoch": 0.07652965990965767, "grad_norm": 1.6328125, "learning_rate": 3.826395925550638e-06, "loss": 0.5105988025665283, "num_tokens": 7007430367.0, "step": 57400 }, { "epoch": 0.07655632529638577, "grad_norm": 1.953125, "learning_rate": 3.827729187776652e-06, "loss": 0.5159186363220215, "num_tokens": 7009705324.0, "step": 57420 }, { "epoch": 0.07658299068311387, "grad_norm": 2.0, "learning_rate": 3.829062450002667e-06, "loss": 0.49479103088378906, "num_tokens": 7012021103.0, "step": 57440 }, { "epoch": 0.07660965606984198, "grad_norm": 2.21875, "learning_rate": 3.8303957122286814e-06, "loss": 0.5010213375091552, "num_tokens": 7014720890.0, "step": 57460 }, { "epoch": 0.07663632145657008, "grad_norm": 2.0625, "learning_rate": 3.831728974454696e-06, "loss": 0.5073648452758789, "num_tokens": 7017283288.0, "step": 57480 }, { "epoch": 0.07666298684329818, "grad_norm": 2.015625, "learning_rate": 3.833062236680711e-06, "loss": 0.49999170303344725, "num_tokens": 7019963141.0, "step": 57500 }, { "epoch": 0.07668965223002629, "grad_norm": 1.78125, "learning_rate": 3.834395498906725e-06, "loss": 0.5060944557189941, "num_tokens": 7022462399.0, "step": 57520 }, { "epoch": 0.07671631761675439, "grad_norm": 1.8984375, "learning_rate": 3.83572876113274e-06, "loss": 0.5084094047546387, "num_tokens": 7024787398.0, "step": 57540 }, { "epoch": 0.07674298300348249, "grad_norm": 2.171875, "learning_rate": 3.837062023358755e-06, "loss": 0.5153656959533691, "num_tokens": 7027313261.0, "step": 57560 }, { "epoch": 0.07676964839021061, "grad_norm": 1.875, "learning_rate": 3.838395285584769e-06, "loss": 0.497255277633667, "num_tokens": 7029763119.0, "step": 57580 }, { "epoch": 0.07679631377693871, "grad_norm": 1.84375, "learning_rate": 3.8397285478107835e-06, "loss": 0.5222907543182373, "num_tokens": 7032136750.0, "step": 57600 }, { "epoch": 0.07682297916366682, "grad_norm": 1.6171875, "learning_rate": 3.841061810036798e-06, "loss": 0.5100398063659668, "num_tokens": 7034623044.0, "step": 57620 }, { "epoch": 0.07684964455039492, "grad_norm": 2.125, "learning_rate": 3.842395072262813e-06, "loss": 0.5029925346374512, "num_tokens": 7037174979.0, "step": 57640 }, { "epoch": 0.07687630993712302, "grad_norm": 2.1875, "learning_rate": 3.843728334488828e-06, "loss": 0.47943458557128904, "num_tokens": 7039696873.0, "step": 57660 }, { "epoch": 0.07690297532385112, "grad_norm": 1.828125, "learning_rate": 3.845061596714842e-06, "loss": 0.5079731464385986, "num_tokens": 7042272130.0, "step": 57680 }, { "epoch": 0.07692964071057923, "grad_norm": 2.25, "learning_rate": 3.846394858940857e-06, "loss": 0.5093145370483398, "num_tokens": 7044729612.0, "step": 57700 }, { "epoch": 0.07695630609730733, "grad_norm": 1.96875, "learning_rate": 3.847728121166872e-06, "loss": 0.49602270126342773, "num_tokens": 7047197701.0, "step": 57720 }, { "epoch": 0.07698297148403543, "grad_norm": 2.1875, "learning_rate": 3.8490613833928856e-06, "loss": 0.5080383777618408, "num_tokens": 7049549689.0, "step": 57740 }, { "epoch": 0.07700963687076354, "grad_norm": 1.90625, "learning_rate": 3.8503946456189e-06, "loss": 0.501335620880127, "num_tokens": 7051980816.0, "step": 57760 }, { "epoch": 0.07703630225749164, "grad_norm": 1.90625, "learning_rate": 3.851727907844916e-06, "loss": 0.494138765335083, "num_tokens": 7054478771.0, "step": 57780 }, { "epoch": 0.07706296764421974, "grad_norm": 1.4765625, "learning_rate": 3.85306117007093e-06, "loss": 0.5133513927459716, "num_tokens": 7056928632.0, "step": 57800 }, { "epoch": 0.07708963303094785, "grad_norm": 2.28125, "learning_rate": 3.854394432296944e-06, "loss": 0.5114939689636231, "num_tokens": 7059500478.0, "step": 57820 }, { "epoch": 0.07711629841767595, "grad_norm": 1.734375, "learning_rate": 3.855727694522959e-06, "loss": 0.5075160980224609, "num_tokens": 7061881931.0, "step": 57840 }, { "epoch": 0.07714296380440405, "grad_norm": 2.0, "learning_rate": 3.857060956748974e-06, "loss": 0.5206337928771972, "num_tokens": 7064348748.0, "step": 57860 }, { "epoch": 0.07716962919113216, "grad_norm": 1.7109375, "learning_rate": 3.8583942189749885e-06, "loss": 0.49280896186828616, "num_tokens": 7066901968.0, "step": 57880 }, { "epoch": 0.07719629457786026, "grad_norm": 2.046875, "learning_rate": 3.859727481201002e-06, "loss": 0.5077455520629883, "num_tokens": 7069193743.0, "step": 57900 }, { "epoch": 0.07722295996458836, "grad_norm": 1.625, "learning_rate": 3.861060743427018e-06, "loss": 0.49437966346740725, "num_tokens": 7071763117.0, "step": 57920 }, { "epoch": 0.07724962535131646, "grad_norm": 1.7421875, "learning_rate": 3.8623940056530326e-06, "loss": 0.5099650382995605, "num_tokens": 7074213909.0, "step": 57940 }, { "epoch": 0.07727629073804457, "grad_norm": 1.7890625, "learning_rate": 3.8637272678790464e-06, "loss": 0.5200443267822266, "num_tokens": 7076504290.0, "step": 57960 }, { "epoch": 0.07730295612477268, "grad_norm": 1.9609375, "learning_rate": 3.865060530105061e-06, "loss": 0.5004170417785645, "num_tokens": 7079006517.0, "step": 57980 }, { "epoch": 0.07732962151150079, "grad_norm": 2.203125, "learning_rate": 3.866393792331077e-06, "loss": 0.5230165481567383, "num_tokens": 7081487098.0, "step": 58000 }, { "epoch": 0.07735628689822889, "grad_norm": 1.8359375, "learning_rate": 3.8677270545570905e-06, "loss": 0.49112844467163086, "num_tokens": 7083994808.0, "step": 58020 }, { "epoch": 0.077382952284957, "grad_norm": 2.109375, "learning_rate": 3.869060316783105e-06, "loss": 0.4979252815246582, "num_tokens": 7086538678.0, "step": 58040 }, { "epoch": 0.0774096176716851, "grad_norm": 1.8203125, "learning_rate": 3.87039357900912e-06, "loss": 0.5125235557556153, "num_tokens": 7089080357.0, "step": 58060 }, { "epoch": 0.0774362830584132, "grad_norm": 2.421875, "learning_rate": 3.871726841235135e-06, "loss": 0.491774320602417, "num_tokens": 7091613379.0, "step": 58080 }, { "epoch": 0.0774629484451413, "grad_norm": 2.203125, "learning_rate": 3.873060103461149e-06, "loss": 0.5081217765808106, "num_tokens": 7094102632.0, "step": 58100 }, { "epoch": 0.0774896138318694, "grad_norm": 1.796875, "learning_rate": 3.874393365687163e-06, "loss": 0.5088118553161621, "num_tokens": 7096447934.0, "step": 58120 }, { "epoch": 0.07751627921859751, "grad_norm": 2.3125, "learning_rate": 3.875726627913179e-06, "loss": 0.5171767234802246, "num_tokens": 7099047579.0, "step": 58140 }, { "epoch": 0.07754294460532561, "grad_norm": 2.015625, "learning_rate": 3.8770598901391934e-06, "loss": 0.5117918491363526, "num_tokens": 7101356754.0, "step": 58160 }, { "epoch": 0.07756960999205371, "grad_norm": 1.7578125, "learning_rate": 3.878393152365207e-06, "loss": 0.49740896224975584, "num_tokens": 7103609199.0, "step": 58180 }, { "epoch": 0.07759627537878182, "grad_norm": 1.84375, "learning_rate": 3.879726414591222e-06, "loss": 0.5137558937072754, "num_tokens": 7106149423.0, "step": 58200 }, { "epoch": 0.07762294076550992, "grad_norm": 1.7578125, "learning_rate": 3.881059676817237e-06, "loss": 0.5032986164093017, "num_tokens": 7108501415.0, "step": 58220 }, { "epoch": 0.07764960615223802, "grad_norm": 1.90625, "learning_rate": 3.882392939043251e-06, "loss": 0.49193944931030276, "num_tokens": 7110870528.0, "step": 58240 }, { "epoch": 0.07767627153896613, "grad_norm": 2.359375, "learning_rate": 3.883726201269266e-06, "loss": 0.5019082069396973, "num_tokens": 7113400447.0, "step": 58260 }, { "epoch": 0.07770293692569423, "grad_norm": 1.8125, "learning_rate": 3.885059463495281e-06, "loss": 0.5215615272521973, "num_tokens": 7115966216.0, "step": 58280 }, { "epoch": 0.07772960231242233, "grad_norm": 2.1875, "learning_rate": 3.8863927257212955e-06, "loss": 0.519472312927246, "num_tokens": 7118415964.0, "step": 58300 }, { "epoch": 0.07775626769915044, "grad_norm": 2.078125, "learning_rate": 3.88772598794731e-06, "loss": 0.5229007720947265, "num_tokens": 7120964364.0, "step": 58320 }, { "epoch": 0.07778293308587854, "grad_norm": 2.203125, "learning_rate": 3.889059250173324e-06, "loss": 0.5258786678314209, "num_tokens": 7123542321.0, "step": 58340 }, { "epoch": 0.07780959847260664, "grad_norm": 1.8125, "learning_rate": 3.890392512399339e-06, "loss": 0.49497370719909667, "num_tokens": 7125916750.0, "step": 58360 }, { "epoch": 0.07783626385933475, "grad_norm": 1.828125, "learning_rate": 3.8917257746253535e-06, "loss": 0.49052982330322265, "num_tokens": 7128482784.0, "step": 58380 }, { "epoch": 0.07786292924606286, "grad_norm": 2.125, "learning_rate": 3.893059036851368e-06, "loss": 0.5050625324249267, "num_tokens": 7130935584.0, "step": 58400 }, { "epoch": 0.07788959463279096, "grad_norm": 1.546875, "learning_rate": 3.894392299077383e-06, "loss": 0.509345531463623, "num_tokens": 7133294177.0, "step": 58420 }, { "epoch": 0.07791626001951907, "grad_norm": 1.765625, "learning_rate": 3.8957255613033976e-06, "loss": 0.5048933982849121, "num_tokens": 7135404046.0, "step": 58440 }, { "epoch": 0.07794292540624717, "grad_norm": 2.234375, "learning_rate": 3.897058823529412e-06, "loss": 0.5124787807464599, "num_tokens": 7137949374.0, "step": 58460 }, { "epoch": 0.07796959079297527, "grad_norm": 2.296875, "learning_rate": 3.898392085755427e-06, "loss": 0.5043936729431152, "num_tokens": 7140324051.0, "step": 58480 }, { "epoch": 0.07799625617970338, "grad_norm": 2.015625, "learning_rate": 3.899725347981441e-06, "loss": 0.49231777191162107, "num_tokens": 7142606155.0, "step": 58500 }, { "epoch": 0.07802292156643148, "grad_norm": 1.984375, "learning_rate": 3.901058610207456e-06, "loss": 0.52381591796875, "num_tokens": 7145071526.0, "step": 58520 }, { "epoch": 0.07804958695315958, "grad_norm": 2.421875, "learning_rate": 3.90239187243347e-06, "loss": 0.5020318031311035, "num_tokens": 7147456491.0, "step": 58540 }, { "epoch": 0.07807625233988769, "grad_norm": 1.734375, "learning_rate": 3.903725134659485e-06, "loss": 0.5172972202301025, "num_tokens": 7150062802.0, "step": 58560 }, { "epoch": 0.07810291772661579, "grad_norm": 2.375, "learning_rate": 3.9050583968855e-06, "loss": 0.5059842109680176, "num_tokens": 7152591489.0, "step": 58580 }, { "epoch": 0.07812958311334389, "grad_norm": 1.9609375, "learning_rate": 3.906391659111514e-06, "loss": 0.5056340217590332, "num_tokens": 7154894088.0, "step": 58600 }, { "epoch": 0.078156248500072, "grad_norm": 2.40625, "learning_rate": 3.907724921337529e-06, "loss": 0.5200740814208984, "num_tokens": 7157060719.0, "step": 58620 }, { "epoch": 0.0781829138868001, "grad_norm": 2.09375, "learning_rate": 3.909058183563544e-06, "loss": 0.5021446704864502, "num_tokens": 7159736935.0, "step": 58640 }, { "epoch": 0.0782095792735282, "grad_norm": 1.7890625, "learning_rate": 3.9103914457895584e-06, "loss": 0.5101417541503906, "num_tokens": 7162212294.0, "step": 58660 }, { "epoch": 0.0782362446602563, "grad_norm": 1.7265625, "learning_rate": 3.911724708015573e-06, "loss": 0.4844826698303223, "num_tokens": 7164721313.0, "step": 58680 }, { "epoch": 0.07826291004698441, "grad_norm": 2.109375, "learning_rate": 3.913057970241587e-06, "loss": 0.5028193473815918, "num_tokens": 7167144026.0, "step": 58700 }, { "epoch": 0.07828957543371251, "grad_norm": 2.390625, "learning_rate": 3.914391232467602e-06, "loss": 0.4981559753417969, "num_tokens": 7169526350.0, "step": 58720 }, { "epoch": 0.07831624082044061, "grad_norm": 2.53125, "learning_rate": 3.915724494693616e-06, "loss": 0.5057503700256347, "num_tokens": 7172024340.0, "step": 58740 }, { "epoch": 0.07834290620716872, "grad_norm": 1.9765625, "learning_rate": 3.917057756919631e-06, "loss": 0.483916187286377, "num_tokens": 7174704349.0, "step": 58760 }, { "epoch": 0.07836957159389682, "grad_norm": 1.84375, "learning_rate": 3.918391019145646e-06, "loss": 0.5105930805206299, "num_tokens": 7177076102.0, "step": 58780 }, { "epoch": 0.07839623698062494, "grad_norm": 2.078125, "learning_rate": 3.9197242813716605e-06, "loss": 0.5188358306884766, "num_tokens": 7179615200.0, "step": 58800 }, { "epoch": 0.07842290236735304, "grad_norm": 2.046875, "learning_rate": 3.921057543597675e-06, "loss": 0.5077427864074707, "num_tokens": 7182144613.0, "step": 58820 }, { "epoch": 0.07844956775408114, "grad_norm": 2.109375, "learning_rate": 3.92239080582369e-06, "loss": 0.49254550933837893, "num_tokens": 7184593246.0, "step": 58840 }, { "epoch": 0.07847623314080925, "grad_norm": 1.7890625, "learning_rate": 3.923724068049704e-06, "loss": 0.5024696826934815, "num_tokens": 7186988134.0, "step": 58860 }, { "epoch": 0.07850289852753735, "grad_norm": 1.921875, "learning_rate": 3.925057330275719e-06, "loss": 0.4918798923492432, "num_tokens": 7189589435.0, "step": 58880 }, { "epoch": 0.07852956391426545, "grad_norm": 2.078125, "learning_rate": 3.926390592501734e-06, "loss": 0.5093095779418946, "num_tokens": 7192056769.0, "step": 58900 }, { "epoch": 0.07855622930099355, "grad_norm": 2.203125, "learning_rate": 3.927723854727748e-06, "loss": 0.4947789192199707, "num_tokens": 7194509498.0, "step": 58920 }, { "epoch": 0.07858289468772166, "grad_norm": 2.171875, "learning_rate": 3.9290571169537626e-06, "loss": 0.5053280353546142, "num_tokens": 7196890103.0, "step": 58940 }, { "epoch": 0.07860956007444976, "grad_norm": 1.8828125, "learning_rate": 3.930390379179777e-06, "loss": 0.5021146774291992, "num_tokens": 7199362264.0, "step": 58960 }, { "epoch": 0.07863622546117786, "grad_norm": 2.25, "learning_rate": 3.931723641405792e-06, "loss": 0.5000916481018066, "num_tokens": 7202083293.0, "step": 58980 }, { "epoch": 0.07866289084790597, "grad_norm": 2.40625, "learning_rate": 3.933056903631807e-06, "loss": 0.4896890640258789, "num_tokens": 7204189914.0, "step": 59000 }, { "epoch": 0.07868955623463407, "grad_norm": 2.09375, "learning_rate": 3.934390165857821e-06, "loss": 0.4876368999481201, "num_tokens": 7206592352.0, "step": 59020 }, { "epoch": 0.07871622162136217, "grad_norm": 2.109375, "learning_rate": 3.935723428083836e-06, "loss": 0.5063961505889892, "num_tokens": 7209026621.0, "step": 59040 }, { "epoch": 0.07874288700809028, "grad_norm": 2.140625, "learning_rate": 3.937056690309851e-06, "loss": 0.5061742782592773, "num_tokens": 7211580547.0, "step": 59060 }, { "epoch": 0.07876955239481838, "grad_norm": 2.296875, "learning_rate": 3.938389952535865e-06, "loss": 0.49808568954467775, "num_tokens": 7213976471.0, "step": 59080 }, { "epoch": 0.07879621778154648, "grad_norm": 2.296875, "learning_rate": 3.939723214761879e-06, "loss": 0.4962602615356445, "num_tokens": 7216438071.0, "step": 59100 }, { "epoch": 0.07882288316827459, "grad_norm": 2.171875, "learning_rate": 3.941056476987895e-06, "loss": 0.4992239952087402, "num_tokens": 7218892917.0, "step": 59120 }, { "epoch": 0.07884954855500269, "grad_norm": 2.21875, "learning_rate": 3.942389739213909e-06, "loss": 0.5066969871520997, "num_tokens": 7221267935.0, "step": 59140 }, { "epoch": 0.07887621394173079, "grad_norm": 2.09375, "learning_rate": 3.9437230014399234e-06, "loss": 0.5178412437438965, "num_tokens": 7223710160.0, "step": 59160 }, { "epoch": 0.0789028793284589, "grad_norm": 2.203125, "learning_rate": 3.945056263665938e-06, "loss": 0.5041680335998535, "num_tokens": 7226197572.0, "step": 59180 }, { "epoch": 0.07892954471518701, "grad_norm": 2.21875, "learning_rate": 3.946389525891953e-06, "loss": 0.4947017192840576, "num_tokens": 7228731104.0, "step": 59200 }, { "epoch": 0.07895621010191511, "grad_norm": 1.9765625, "learning_rate": 3.9477227881179675e-06, "loss": 0.49725770950317383, "num_tokens": 7231359921.0, "step": 59220 }, { "epoch": 0.07898287548864322, "grad_norm": 1.96875, "learning_rate": 3.949056050343981e-06, "loss": 0.49711060523986816, "num_tokens": 7233715106.0, "step": 59240 }, { "epoch": 0.07900954087537132, "grad_norm": 1.890625, "learning_rate": 3.950389312569997e-06, "loss": 0.5105415344238281, "num_tokens": 7236023331.0, "step": 59260 }, { "epoch": 0.07903620626209942, "grad_norm": 1.921875, "learning_rate": 3.951722574796012e-06, "loss": 0.5020304679870605, "num_tokens": 7238368365.0, "step": 59280 }, { "epoch": 0.07906287164882753, "grad_norm": 2.125, "learning_rate": 3.9530558370220255e-06, "loss": 0.5178645610809326, "num_tokens": 7240897724.0, "step": 59300 }, { "epoch": 0.07908953703555563, "grad_norm": 2.15625, "learning_rate": 3.95438909924804e-06, "loss": 0.47561216354370117, "num_tokens": 7243414093.0, "step": 59320 }, { "epoch": 0.07911620242228373, "grad_norm": 2.03125, "learning_rate": 3.955722361474055e-06, "loss": 0.5174359321594239, "num_tokens": 7245849656.0, "step": 59340 }, { "epoch": 0.07914286780901184, "grad_norm": 2.1875, "learning_rate": 3.95705562370007e-06, "loss": 0.4994783878326416, "num_tokens": 7248265072.0, "step": 59360 }, { "epoch": 0.07916953319573994, "grad_norm": 2.1875, "learning_rate": 3.958388885926084e-06, "loss": 0.5033786773681641, "num_tokens": 7250643615.0, "step": 59380 }, { "epoch": 0.07919619858246804, "grad_norm": 1.953125, "learning_rate": 3.959722148152099e-06, "loss": 0.5059736251831055, "num_tokens": 7252959419.0, "step": 59400 }, { "epoch": 0.07922286396919614, "grad_norm": 1.9921875, "learning_rate": 3.961055410378114e-06, "loss": 0.4989775657653809, "num_tokens": 7255719924.0, "step": 59420 }, { "epoch": 0.07924952935592425, "grad_norm": 1.96875, "learning_rate": 3.962388672604128e-06, "loss": 0.483699893951416, "num_tokens": 7258091289.0, "step": 59440 }, { "epoch": 0.07927619474265235, "grad_norm": 1.8359375, "learning_rate": 3.963721934830142e-06, "loss": 0.5028521060943604, "num_tokens": 7260630466.0, "step": 59460 }, { "epoch": 0.07930286012938045, "grad_norm": 2.296875, "learning_rate": 3.965055197056157e-06, "loss": 0.4949656963348389, "num_tokens": 7263100192.0, "step": 59480 }, { "epoch": 0.07932952551610856, "grad_norm": 2.015625, "learning_rate": 3.9663884592821725e-06, "loss": 0.49677190780639646, "num_tokens": 7265463691.0, "step": 59500 }, { "epoch": 0.07935619090283666, "grad_norm": 2.109375, "learning_rate": 3.967721721508186e-06, "loss": 0.4785449981689453, "num_tokens": 7267866044.0, "step": 59520 }, { "epoch": 0.07938285628956476, "grad_norm": 1.890625, "learning_rate": 3.969054983734201e-06, "loss": 0.5020563125610351, "num_tokens": 7270347932.0, "step": 59540 }, { "epoch": 0.07940952167629287, "grad_norm": 1.9453125, "learning_rate": 3.970388245960216e-06, "loss": 0.5031272888183593, "num_tokens": 7272781681.0, "step": 59560 }, { "epoch": 0.07943618706302097, "grad_norm": 2.09375, "learning_rate": 3.9717215081862305e-06, "loss": 0.5017830848693847, "num_tokens": 7275242202.0, "step": 59580 }, { "epoch": 0.07946285244974907, "grad_norm": 2.453125, "learning_rate": 3.973054770412245e-06, "loss": 0.5014398574829102, "num_tokens": 7277642149.0, "step": 59600 }, { "epoch": 0.07948951783647719, "grad_norm": 2.4375, "learning_rate": 3.974388032638259e-06, "loss": 0.47413053512573244, "num_tokens": 7280132353.0, "step": 59620 }, { "epoch": 0.07951618322320529, "grad_norm": 2.0, "learning_rate": 3.975721294864275e-06, "loss": 0.4854647159576416, "num_tokens": 7282436583.0, "step": 59640 }, { "epoch": 0.0795428486099334, "grad_norm": 2.21875, "learning_rate": 3.977054557090289e-06, "loss": 0.5053890705108642, "num_tokens": 7284965001.0, "step": 59660 }, { "epoch": 0.0795695139966615, "grad_norm": 1.90625, "learning_rate": 3.978387819316303e-06, "loss": 0.46790037155151365, "num_tokens": 7287515836.0, "step": 59680 }, { "epoch": 0.0795961793833896, "grad_norm": 2.21875, "learning_rate": 3.979721081542318e-06, "loss": 0.4855684280395508, "num_tokens": 7290082885.0, "step": 59700 }, { "epoch": 0.0796228447701177, "grad_norm": 1.75, "learning_rate": 3.9810543437683325e-06, "loss": 0.5031570434570313, "num_tokens": 7292418976.0, "step": 59720 }, { "epoch": 0.0796495101568458, "grad_norm": 1.703125, "learning_rate": 3.982387605994347e-06, "loss": 0.5028633117675781, "num_tokens": 7294763477.0, "step": 59740 }, { "epoch": 0.07967617554357391, "grad_norm": 2.046875, "learning_rate": 3.983720868220362e-06, "loss": 0.5010732650756836, "num_tokens": 7297352771.0, "step": 59760 }, { "epoch": 0.07970284093030201, "grad_norm": 2.234375, "learning_rate": 3.985054130446377e-06, "loss": 0.49401540756225587, "num_tokens": 7299653297.0, "step": 59780 }, { "epoch": 0.07972950631703012, "grad_norm": 1.8984375, "learning_rate": 3.986387392672391e-06, "loss": 0.48491792678833007, "num_tokens": 7301986818.0, "step": 59800 }, { "epoch": 0.07975617170375822, "grad_norm": 1.796875, "learning_rate": 3.987720654898406e-06, "loss": 0.5146007061004638, "num_tokens": 7304441853.0, "step": 59820 }, { "epoch": 0.07978283709048632, "grad_norm": 1.734375, "learning_rate": 3.98905391712442e-06, "loss": 0.4831378936767578, "num_tokens": 7306651085.0, "step": 59840 }, { "epoch": 0.07980950247721443, "grad_norm": 1.984375, "learning_rate": 3.9903871793504354e-06, "loss": 0.4936079025268555, "num_tokens": 7309216306.0, "step": 59860 }, { "epoch": 0.07983616786394253, "grad_norm": 2.515625, "learning_rate": 3.991720441576449e-06, "loss": 0.48314924240112306, "num_tokens": 7311715390.0, "step": 59880 }, { "epoch": 0.07986283325067063, "grad_norm": 1.7890625, "learning_rate": 3.993053703802464e-06, "loss": 0.4917910575866699, "num_tokens": 7314043326.0, "step": 59900 }, { "epoch": 0.07988949863739873, "grad_norm": 2.328125, "learning_rate": 3.994386966028479e-06, "loss": 0.4830923557281494, "num_tokens": 7316436922.0, "step": 59920 }, { "epoch": 0.07991616402412684, "grad_norm": 2.3125, "learning_rate": 3.995720228254493e-06, "loss": 0.5036594867706299, "num_tokens": 7318953937.0, "step": 59940 }, { "epoch": 0.07994282941085494, "grad_norm": 2.125, "learning_rate": 3.997053490480508e-06, "loss": 0.510953712463379, "num_tokens": 7321383725.0, "step": 59960 }, { "epoch": 0.07996949479758304, "grad_norm": 1.9296875, "learning_rate": 3.998386752706523e-06, "loss": 0.49158191680908203, "num_tokens": 7323767365.0, "step": 59980 }, { "epoch": 0.07999616018431115, "grad_norm": 2.21875, "learning_rate": 3.9997200149325375e-06, "loss": 0.49001383781433105, "num_tokens": 7326095581.0, "step": 60000 }, { "epoch": 0.08002282557103926, "grad_norm": 2.234375, "learning_rate": 4.001053277158552e-06, "loss": 0.516841459274292, "num_tokens": 7328638398.0, "step": 60020 }, { "epoch": 0.08004949095776737, "grad_norm": 2.015625, "learning_rate": 4.002386539384566e-06, "loss": 0.5048473834991455, "num_tokens": 7330842830.0, "step": 60040 }, { "epoch": 0.08007615634449547, "grad_norm": 2.25, "learning_rate": 4.003719801610581e-06, "loss": 0.4983854293823242, "num_tokens": 7333429721.0, "step": 60060 }, { "epoch": 0.08010282173122357, "grad_norm": 1.984375, "learning_rate": 4.0050530638365955e-06, "loss": 0.5056562423706055, "num_tokens": 7335957854.0, "step": 60080 }, { "epoch": 0.08012948711795168, "grad_norm": 1.9765625, "learning_rate": 4.00638632606261e-06, "loss": 0.49296083450317385, "num_tokens": 7338467113.0, "step": 60100 }, { "epoch": 0.08015615250467978, "grad_norm": 2.015625, "learning_rate": 4.007719588288625e-06, "loss": 0.5119024753570557, "num_tokens": 7340792925.0, "step": 60120 }, { "epoch": 0.08018281789140788, "grad_norm": 2.171875, "learning_rate": 4.0090528505146396e-06, "loss": 0.49470839500427244, "num_tokens": 7343478453.0, "step": 60140 }, { "epoch": 0.08020948327813598, "grad_norm": 2.265625, "learning_rate": 4.010386112740654e-06, "loss": 0.49071836471557617, "num_tokens": 7345837624.0, "step": 60160 }, { "epoch": 0.08023614866486409, "grad_norm": 2.25, "learning_rate": 4.011719374966669e-06, "loss": 0.4928267478942871, "num_tokens": 7348257020.0, "step": 60180 }, { "epoch": 0.08026281405159219, "grad_norm": 2.5625, "learning_rate": 4.013052637192683e-06, "loss": 0.500309944152832, "num_tokens": 7350553928.0, "step": 60200 }, { "epoch": 0.0802894794383203, "grad_norm": 2.046875, "learning_rate": 4.0143858994186975e-06, "loss": 0.516607141494751, "num_tokens": 7352860394.0, "step": 60220 }, { "epoch": 0.0803161448250484, "grad_norm": 1.953125, "learning_rate": 4.015719161644713e-06, "loss": 0.4945819854736328, "num_tokens": 7355505051.0, "step": 60240 }, { "epoch": 0.0803428102117765, "grad_norm": 2.09375, "learning_rate": 4.017052423870727e-06, "loss": 0.4985953330993652, "num_tokens": 7358080039.0, "step": 60260 }, { "epoch": 0.0803694755985046, "grad_norm": 1.984375, "learning_rate": 4.018385686096742e-06, "loss": 0.4949221134185791, "num_tokens": 7360394163.0, "step": 60280 }, { "epoch": 0.0803961409852327, "grad_norm": 2.046875, "learning_rate": 4.019718948322756e-06, "loss": 0.49460477828979493, "num_tokens": 7362846510.0, "step": 60300 }, { "epoch": 0.08042280637196081, "grad_norm": 1.96875, "learning_rate": 4.021052210548771e-06, "loss": 0.5005491256713868, "num_tokens": 7365158623.0, "step": 60320 }, { "epoch": 0.08044947175868891, "grad_norm": 1.8203125, "learning_rate": 4.022385472774786e-06, "loss": 0.5034080505371094, "num_tokens": 7367927038.0, "step": 60340 }, { "epoch": 0.08047613714541701, "grad_norm": 1.7578125, "learning_rate": 4.0237187350008e-06, "loss": 0.4920352935791016, "num_tokens": 7370328001.0, "step": 60360 }, { "epoch": 0.08050280253214512, "grad_norm": 2.0, "learning_rate": 4.025051997226815e-06, "loss": 0.4911354064941406, "num_tokens": 7372960591.0, "step": 60380 }, { "epoch": 0.08052946791887322, "grad_norm": 2.046875, "learning_rate": 4.02638525945283e-06, "loss": 0.51223726272583, "num_tokens": 7375308188.0, "step": 60400 }, { "epoch": 0.08055613330560134, "grad_norm": 1.96875, "learning_rate": 4.027718521678844e-06, "loss": 0.49521074295043943, "num_tokens": 7377779079.0, "step": 60420 }, { "epoch": 0.08058279869232944, "grad_norm": 1.65625, "learning_rate": 4.029051783904858e-06, "loss": 0.4868584156036377, "num_tokens": 7380432957.0, "step": 60440 }, { "epoch": 0.08060946407905754, "grad_norm": 1.921875, "learning_rate": 4.030385046130874e-06, "loss": 0.4885383605957031, "num_tokens": 7382860309.0, "step": 60460 }, { "epoch": 0.08063612946578565, "grad_norm": 1.8828125, "learning_rate": 4.031718308356888e-06, "loss": 0.4983521461486816, "num_tokens": 7385129493.0, "step": 60480 }, { "epoch": 0.08066279485251375, "grad_norm": 2.0625, "learning_rate": 4.0330515705829025e-06, "loss": 0.5004489898681641, "num_tokens": 7387495153.0, "step": 60500 }, { "epoch": 0.08068946023924185, "grad_norm": 1.59375, "learning_rate": 4.034384832808917e-06, "loss": 0.5115861892700195, "num_tokens": 7389852633.0, "step": 60520 }, { "epoch": 0.08071612562596996, "grad_norm": 2.078125, "learning_rate": 4.035718095034932e-06, "loss": 0.5010309219360352, "num_tokens": 7392352379.0, "step": 60540 }, { "epoch": 0.08074279101269806, "grad_norm": 2.03125, "learning_rate": 4.037051357260947e-06, "loss": 0.4868632793426514, "num_tokens": 7394543266.0, "step": 60560 }, { "epoch": 0.08076945639942616, "grad_norm": 1.984375, "learning_rate": 4.0383846194869605e-06, "loss": 0.48191165924072266, "num_tokens": 7397182056.0, "step": 60580 }, { "epoch": 0.08079612178615427, "grad_norm": 2.171875, "learning_rate": 4.039717881712976e-06, "loss": 0.495297908782959, "num_tokens": 7399712611.0, "step": 60600 }, { "epoch": 0.08082278717288237, "grad_norm": 1.9375, "learning_rate": 4.041051143938991e-06, "loss": 0.48251924514770506, "num_tokens": 7402058134.0, "step": 60620 }, { "epoch": 0.08084945255961047, "grad_norm": 1.9921875, "learning_rate": 4.0423844061650046e-06, "loss": 0.4998322486877441, "num_tokens": 7404541156.0, "step": 60640 }, { "epoch": 0.08087611794633857, "grad_norm": 2.453125, "learning_rate": 4.043717668391019e-06, "loss": 0.4720316886901855, "num_tokens": 7407007265.0, "step": 60660 }, { "epoch": 0.08090278333306668, "grad_norm": 2.125, "learning_rate": 4.045050930617034e-06, "loss": 0.49438161849975587, "num_tokens": 7409623771.0, "step": 60680 }, { "epoch": 0.08092944871979478, "grad_norm": 2.25, "learning_rate": 4.046384192843049e-06, "loss": 0.4927361488342285, "num_tokens": 7411994470.0, "step": 60700 }, { "epoch": 0.08095611410652288, "grad_norm": 1.7109375, "learning_rate": 4.047717455069063e-06, "loss": 0.4780630111694336, "num_tokens": 7414312246.0, "step": 60720 }, { "epoch": 0.08098277949325099, "grad_norm": 1.875, "learning_rate": 4.049050717295078e-06, "loss": 0.49844655990600584, "num_tokens": 7417004618.0, "step": 60740 }, { "epoch": 0.08100944487997909, "grad_norm": 2.34375, "learning_rate": 4.050383979521093e-06, "loss": 0.4879286766052246, "num_tokens": 7419534140.0, "step": 60760 }, { "epoch": 0.08103611026670719, "grad_norm": 2.09375, "learning_rate": 4.0517172417471075e-06, "loss": 0.48793907165527345, "num_tokens": 7422025301.0, "step": 60780 }, { "epoch": 0.0810627756534353, "grad_norm": 2.078125, "learning_rate": 4.053050503973121e-06, "loss": 0.4932390213012695, "num_tokens": 7424690633.0, "step": 60800 }, { "epoch": 0.0810894410401634, "grad_norm": 2.125, "learning_rate": 4.054383766199136e-06, "loss": 0.47890186309814453, "num_tokens": 7427163754.0, "step": 60820 }, { "epoch": 0.08111610642689152, "grad_norm": 1.8203125, "learning_rate": 4.055717028425152e-06, "loss": 0.5032333374023438, "num_tokens": 7429649783.0, "step": 60840 }, { "epoch": 0.08114277181361962, "grad_norm": 2.359375, "learning_rate": 4.0570502906511654e-06, "loss": 0.5062587261199951, "num_tokens": 7432035615.0, "step": 60860 }, { "epoch": 0.08116943720034772, "grad_norm": 2.25, "learning_rate": 4.05838355287718e-06, "loss": 0.5038694381713867, "num_tokens": 7434425518.0, "step": 60880 }, { "epoch": 0.08119610258707582, "grad_norm": 2.03125, "learning_rate": 4.059716815103195e-06, "loss": 0.49529328346252444, "num_tokens": 7436882417.0, "step": 60900 }, { "epoch": 0.08122276797380393, "grad_norm": 2.28125, "learning_rate": 4.0610500773292095e-06, "loss": 0.49167828559875487, "num_tokens": 7439538493.0, "step": 60920 }, { "epoch": 0.08124943336053203, "grad_norm": 2.0625, "learning_rate": 4.062383339555224e-06, "loss": 0.5091489791870117, "num_tokens": 7442046970.0, "step": 60940 }, { "epoch": 0.08127609874726013, "grad_norm": 2.15625, "learning_rate": 4.063716601781238e-06, "loss": 0.4960649490356445, "num_tokens": 7444549464.0, "step": 60960 }, { "epoch": 0.08130276413398824, "grad_norm": 2.109375, "learning_rate": 4.065049864007254e-06, "loss": 0.5039226531982421, "num_tokens": 7446862167.0, "step": 60980 }, { "epoch": 0.08132942952071634, "grad_norm": 1.7734375, "learning_rate": 4.066383126233268e-06, "loss": 0.5075363159179688, "num_tokens": 7449224133.0, "step": 61000 }, { "epoch": 0.08135609490744444, "grad_norm": 1.984375, "learning_rate": 4.067716388459282e-06, "loss": 0.4981637954711914, "num_tokens": 7451669961.0, "step": 61020 }, { "epoch": 0.08138276029417255, "grad_norm": 1.578125, "learning_rate": 4.069049650685297e-06, "loss": 0.4982942581176758, "num_tokens": 7454214288.0, "step": 61040 }, { "epoch": 0.08140942568090065, "grad_norm": 2.328125, "learning_rate": 4.070382912911312e-06, "loss": 0.5055695533752441, "num_tokens": 7456605625.0, "step": 61060 }, { "epoch": 0.08143609106762875, "grad_norm": 2.0625, "learning_rate": 4.071716175137326e-06, "loss": 0.4933763980865479, "num_tokens": 7458917406.0, "step": 61080 }, { "epoch": 0.08146275645435685, "grad_norm": 1.796875, "learning_rate": 4.073049437363341e-06, "loss": 0.5038667678833008, "num_tokens": 7461242860.0, "step": 61100 }, { "epoch": 0.08148942184108496, "grad_norm": 1.7421875, "learning_rate": 4.074382699589356e-06, "loss": 0.5254841804504394, "num_tokens": 7463705489.0, "step": 61120 }, { "epoch": 0.08151608722781306, "grad_norm": 1.5546875, "learning_rate": 4.07571596181537e-06, "loss": 0.49582653045654296, "num_tokens": 7466166718.0, "step": 61140 }, { "epoch": 0.08154275261454116, "grad_norm": 1.90625, "learning_rate": 4.077049224041385e-06, "loss": 0.47833914756774903, "num_tokens": 7468665278.0, "step": 61160 }, { "epoch": 0.08156941800126927, "grad_norm": 2.265625, "learning_rate": 4.078382486267399e-06, "loss": 0.47908821105957033, "num_tokens": 7471211817.0, "step": 61180 }, { "epoch": 0.08159608338799737, "grad_norm": 2.203125, "learning_rate": 4.0797157484934145e-06, "loss": 0.4914699554443359, "num_tokens": 7473613957.0, "step": 61200 }, { "epoch": 0.08162274877472547, "grad_norm": 1.953125, "learning_rate": 4.081049010719428e-06, "loss": 0.4784365653991699, "num_tokens": 7476147987.0, "step": 61220 }, { "epoch": 0.08164941416145359, "grad_norm": 2.375, "learning_rate": 4.082382272945443e-06, "loss": 0.4869736671447754, "num_tokens": 7478505558.0, "step": 61240 }, { "epoch": 0.0816760795481817, "grad_norm": 2.09375, "learning_rate": 4.083715535171458e-06, "loss": 0.4856569290161133, "num_tokens": 7480989028.0, "step": 61260 }, { "epoch": 0.0817027449349098, "grad_norm": 1.8203125, "learning_rate": 4.0850487973974725e-06, "loss": 0.4967471122741699, "num_tokens": 7483494993.0, "step": 61280 }, { "epoch": 0.0817294103216379, "grad_norm": 2.03125, "learning_rate": 4.086382059623487e-06, "loss": 0.492112922668457, "num_tokens": 7486011150.0, "step": 61300 }, { "epoch": 0.081756075708366, "grad_norm": 2.453125, "learning_rate": 4.087715321849502e-06, "loss": 0.5005480289459229, "num_tokens": 7488587113.0, "step": 61320 }, { "epoch": 0.0817827410950941, "grad_norm": 2.15625, "learning_rate": 4.089048584075517e-06, "loss": 0.4809379577636719, "num_tokens": 7491015756.0, "step": 61340 }, { "epoch": 0.08180940648182221, "grad_norm": 2.140625, "learning_rate": 4.090381846301531e-06, "loss": 0.4889378070831299, "num_tokens": 7493640344.0, "step": 61360 }, { "epoch": 0.08183607186855031, "grad_norm": 1.9375, "learning_rate": 4.091715108527545e-06, "loss": 0.49079465866088867, "num_tokens": 7496003942.0, "step": 61380 }, { "epoch": 0.08186273725527841, "grad_norm": 1.796875, "learning_rate": 4.09304837075356e-06, "loss": 0.4874702453613281, "num_tokens": 7498502516.0, "step": 61400 }, { "epoch": 0.08188940264200652, "grad_norm": 1.9296875, "learning_rate": 4.0943816329795745e-06, "loss": 0.4968867778778076, "num_tokens": 7500954572.0, "step": 61420 }, { "epoch": 0.08191606802873462, "grad_norm": 1.8671875, "learning_rate": 4.095714895205589e-06, "loss": 0.5038209915161133, "num_tokens": 7503438234.0, "step": 61440 }, { "epoch": 0.08194273341546272, "grad_norm": 2.34375, "learning_rate": 4.097048157431604e-06, "loss": 0.5015531539916992, "num_tokens": 7505908778.0, "step": 61460 }, { "epoch": 0.08196939880219083, "grad_norm": 2.203125, "learning_rate": 4.098381419657619e-06, "loss": 0.49442305564880373, "num_tokens": 7508089268.0, "step": 61480 }, { "epoch": 0.08199606418891893, "grad_norm": 1.796875, "learning_rate": 4.099714681883633e-06, "loss": 0.4943387031555176, "num_tokens": 7510678069.0, "step": 61500 }, { "epoch": 0.08202272957564703, "grad_norm": 2.125, "learning_rate": 4.101047944109648e-06, "loss": 0.48520240783691404, "num_tokens": 7513085923.0, "step": 61520 }, { "epoch": 0.08204939496237514, "grad_norm": 2.171875, "learning_rate": 4.102381206335662e-06, "loss": 0.48686561584472654, "num_tokens": 7515609996.0, "step": 61540 }, { "epoch": 0.08207606034910324, "grad_norm": 2.03125, "learning_rate": 4.103714468561677e-06, "loss": 0.4910012722015381, "num_tokens": 7517999469.0, "step": 61560 }, { "epoch": 0.08210272573583134, "grad_norm": 2.015625, "learning_rate": 4.105047730787692e-06, "loss": 0.4999356746673584, "num_tokens": 7520685049.0, "step": 61580 }, { "epoch": 0.08212939112255944, "grad_norm": 2.234375, "learning_rate": 4.106380993013706e-06, "loss": 0.4993494987487793, "num_tokens": 7523237105.0, "step": 61600 }, { "epoch": 0.08215605650928755, "grad_norm": 2.34375, "learning_rate": 4.107714255239721e-06, "loss": 0.4971149444580078, "num_tokens": 7525626367.0, "step": 61620 }, { "epoch": 0.08218272189601566, "grad_norm": 1.6796875, "learning_rate": 4.109047517465735e-06, "loss": 0.48338623046875, "num_tokens": 7528076832.0, "step": 61640 }, { "epoch": 0.08220938728274377, "grad_norm": 2.09375, "learning_rate": 4.11038077969175e-06, "loss": 0.4919157028198242, "num_tokens": 7530768759.0, "step": 61660 }, { "epoch": 0.08223605266947187, "grad_norm": 2.140625, "learning_rate": 4.111714041917765e-06, "loss": 0.4932861328125, "num_tokens": 7533193538.0, "step": 61680 }, { "epoch": 0.08226271805619997, "grad_norm": 2.125, "learning_rate": 4.113047304143779e-06, "loss": 0.4837645053863525, "num_tokens": 7535585732.0, "step": 61700 }, { "epoch": 0.08228938344292808, "grad_norm": 2.28125, "learning_rate": 4.114380566369794e-06, "loss": 0.49776315689086914, "num_tokens": 7538048698.0, "step": 61720 }, { "epoch": 0.08231604882965618, "grad_norm": 2.53125, "learning_rate": 4.115713828595809e-06, "loss": 0.5019602298736572, "num_tokens": 7540509796.0, "step": 61740 }, { "epoch": 0.08234271421638428, "grad_norm": 1.90625, "learning_rate": 4.117047090821823e-06, "loss": 0.5009790897369385, "num_tokens": 7542890315.0, "step": 61760 }, { "epoch": 0.08236937960311239, "grad_norm": 2.296875, "learning_rate": 4.1183803530478375e-06, "loss": 0.4785783767700195, "num_tokens": 7545200999.0, "step": 61780 }, { "epoch": 0.08239604498984049, "grad_norm": 1.984375, "learning_rate": 4.119713615273853e-06, "loss": 0.4959857940673828, "num_tokens": 7547639201.0, "step": 61800 }, { "epoch": 0.08242271037656859, "grad_norm": 1.9609375, "learning_rate": 4.121046877499867e-06, "loss": 0.48705625534057617, "num_tokens": 7550162973.0, "step": 61820 }, { "epoch": 0.0824493757632967, "grad_norm": 1.96875, "learning_rate": 4.122380139725882e-06, "loss": 0.4980666160583496, "num_tokens": 7552391292.0, "step": 61840 }, { "epoch": 0.0824760411500248, "grad_norm": 1.640625, "learning_rate": 4.123713401951896e-06, "loss": 0.49673123359680177, "num_tokens": 7554769622.0, "step": 61860 }, { "epoch": 0.0825027065367529, "grad_norm": 1.6953125, "learning_rate": 4.125046664177911e-06, "loss": 0.4781668663024902, "num_tokens": 7557442928.0, "step": 61880 }, { "epoch": 0.082529371923481, "grad_norm": 2.109375, "learning_rate": 4.126379926403926e-06, "loss": 0.4820090293884277, "num_tokens": 7559623110.0, "step": 61900 }, { "epoch": 0.08255603731020911, "grad_norm": 2.03125, "learning_rate": 4.1277131886299395e-06, "loss": 0.49952969551086424, "num_tokens": 7562027205.0, "step": 61920 }, { "epoch": 0.08258270269693721, "grad_norm": 2.125, "learning_rate": 4.129046450855955e-06, "loss": 0.4973934173583984, "num_tokens": 7564481353.0, "step": 61940 }, { "epoch": 0.08260936808366531, "grad_norm": 1.984375, "learning_rate": 4.13037971308197e-06, "loss": 0.494505786895752, "num_tokens": 7566990842.0, "step": 61960 }, { "epoch": 0.08263603347039342, "grad_norm": 2.203125, "learning_rate": 4.131712975307984e-06, "loss": 0.491669750213623, "num_tokens": 7569475648.0, "step": 61980 }, { "epoch": 0.08266269885712152, "grad_norm": 2.640625, "learning_rate": 4.133046237533998e-06, "loss": 0.4808187484741211, "num_tokens": 7571970489.0, "step": 62000 }, { "epoch": 0.08268936424384962, "grad_norm": 1.9453125, "learning_rate": 4.134379499760013e-06, "loss": 0.4687476634979248, "num_tokens": 7574313275.0, "step": 62020 }, { "epoch": 0.08271602963057773, "grad_norm": 1.7578125, "learning_rate": 4.135712761986028e-06, "loss": 0.5041427612304688, "num_tokens": 7576785162.0, "step": 62040 }, { "epoch": 0.08274269501730584, "grad_norm": 2.203125, "learning_rate": 4.1370460242120424e-06, "loss": 0.49311280250549316, "num_tokens": 7579243760.0, "step": 62060 }, { "epoch": 0.08276936040403395, "grad_norm": 2.046875, "learning_rate": 4.138379286438057e-06, "loss": 0.5015636920928955, "num_tokens": 7581600464.0, "step": 62080 }, { "epoch": 0.08279602579076205, "grad_norm": 1.6796875, "learning_rate": 4.139712548664072e-06, "loss": 0.505433177947998, "num_tokens": 7584024424.0, "step": 62100 }, { "epoch": 0.08282269117749015, "grad_norm": 1.734375, "learning_rate": 4.1410458108900866e-06, "loss": 0.4983847618103027, "num_tokens": 7586437258.0, "step": 62120 }, { "epoch": 0.08284935656421825, "grad_norm": 2.328125, "learning_rate": 4.1423790731161e-06, "loss": 0.49223990440368653, "num_tokens": 7589057316.0, "step": 62140 }, { "epoch": 0.08287602195094636, "grad_norm": 2.28125, "learning_rate": 4.143712335342115e-06, "loss": 0.4913745880126953, "num_tokens": 7591594027.0, "step": 62160 }, { "epoch": 0.08290268733767446, "grad_norm": 2.109375, "learning_rate": 4.145045597568131e-06, "loss": 0.5014698505401611, "num_tokens": 7594138371.0, "step": 62180 }, { "epoch": 0.08292935272440256, "grad_norm": 1.9296875, "learning_rate": 4.1463788597941445e-06, "loss": 0.48837876319885254, "num_tokens": 7596434383.0, "step": 62200 }, { "epoch": 0.08295601811113067, "grad_norm": 2.40625, "learning_rate": 4.147712122020159e-06, "loss": 0.487350606918335, "num_tokens": 7598967816.0, "step": 62220 }, { "epoch": 0.08298268349785877, "grad_norm": 1.9609375, "learning_rate": 4.149045384246174e-06, "loss": 0.4887885093688965, "num_tokens": 7601540115.0, "step": 62240 }, { "epoch": 0.08300934888458687, "grad_norm": 2.59375, "learning_rate": 4.150378646472189e-06, "loss": 0.4821589469909668, "num_tokens": 7603977245.0, "step": 62260 }, { "epoch": 0.08303601427131498, "grad_norm": 2.546875, "learning_rate": 4.151711908698203e-06, "loss": 0.48940067291259765, "num_tokens": 7606447626.0, "step": 62280 }, { "epoch": 0.08306267965804308, "grad_norm": 1.96875, "learning_rate": 4.153045170924217e-06, "loss": 0.49280567169189454, "num_tokens": 7608956112.0, "step": 62300 }, { "epoch": 0.08308934504477118, "grad_norm": 2.078125, "learning_rate": 4.154378433150233e-06, "loss": 0.48311386108398435, "num_tokens": 7611402781.0, "step": 62320 }, { "epoch": 0.08311601043149928, "grad_norm": 1.8828125, "learning_rate": 4.1557116953762474e-06, "loss": 0.4896872043609619, "num_tokens": 7613844583.0, "step": 62340 }, { "epoch": 0.08314267581822739, "grad_norm": 2.125, "learning_rate": 4.157044957602261e-06, "loss": 0.4886924743652344, "num_tokens": 7616248022.0, "step": 62360 }, { "epoch": 0.08316934120495549, "grad_norm": 2.125, "learning_rate": 4.158378219828276e-06, "loss": 0.4742110252380371, "num_tokens": 7618631507.0, "step": 62380 }, { "epoch": 0.0831960065916836, "grad_norm": 2.046875, "learning_rate": 4.159711482054291e-06, "loss": 0.490172004699707, "num_tokens": 7621011903.0, "step": 62400 }, { "epoch": 0.0832226719784117, "grad_norm": 1.84375, "learning_rate": 4.161044744280305e-06, "loss": 0.49224581718444826, "num_tokens": 7623569769.0, "step": 62420 }, { "epoch": 0.0832493373651398, "grad_norm": 1.453125, "learning_rate": 4.16237800650632e-06, "loss": 0.4746662139892578, "num_tokens": 7626012516.0, "step": 62440 }, { "epoch": 0.08327600275186792, "grad_norm": 2.484375, "learning_rate": 4.163711268732335e-06, "loss": 0.4955441474914551, "num_tokens": 7628428996.0, "step": 62460 }, { "epoch": 0.08330266813859602, "grad_norm": 2.0625, "learning_rate": 4.1650445309583495e-06, "loss": 0.47840085029602053, "num_tokens": 7630812553.0, "step": 62480 }, { "epoch": 0.08332933352532412, "grad_norm": 2.703125, "learning_rate": 4.166377793184364e-06, "loss": 0.49433155059814454, "num_tokens": 7633236127.0, "step": 62500 }, { "epoch": 0.08335599891205223, "grad_norm": 1.96875, "learning_rate": 4.167711055410378e-06, "loss": 0.4789741039276123, "num_tokens": 7635600212.0, "step": 62520 }, { "epoch": 0.08338266429878033, "grad_norm": 1.6640625, "learning_rate": 4.169044317636393e-06, "loss": 0.4862213134765625, "num_tokens": 7637985367.0, "step": 62540 }, { "epoch": 0.08340932968550843, "grad_norm": 2.078125, "learning_rate": 4.1703775798624074e-06, "loss": 0.48271894454956055, "num_tokens": 7640476363.0, "step": 62560 }, { "epoch": 0.08343599507223654, "grad_norm": 2.171875, "learning_rate": 4.171710842088422e-06, "loss": 0.48974199295043946, "num_tokens": 7642882610.0, "step": 62580 }, { "epoch": 0.08346266045896464, "grad_norm": 2.3125, "learning_rate": 4.173044104314437e-06, "loss": 0.48484106063842775, "num_tokens": 7645147542.0, "step": 62600 }, { "epoch": 0.08348932584569274, "grad_norm": 2.25, "learning_rate": 4.1743773665404516e-06, "loss": 0.49628539085388185, "num_tokens": 7647567012.0, "step": 62620 }, { "epoch": 0.08351599123242084, "grad_norm": 2.3125, "learning_rate": 4.175710628766466e-06, "loss": 0.46930875778198244, "num_tokens": 7649999935.0, "step": 62640 }, { "epoch": 0.08354265661914895, "grad_norm": 2.59375, "learning_rate": 4.177043890992481e-06, "loss": 0.46649961471557616, "num_tokens": 7652439590.0, "step": 62660 }, { "epoch": 0.08356932200587705, "grad_norm": 1.8359375, "learning_rate": 4.178377153218496e-06, "loss": 0.4931499481201172, "num_tokens": 7655006587.0, "step": 62680 }, { "epoch": 0.08359598739260515, "grad_norm": 1.9765625, "learning_rate": 4.17971041544451e-06, "loss": 0.49365577697753904, "num_tokens": 7657445728.0, "step": 62700 }, { "epoch": 0.08362265277933326, "grad_norm": 1.84375, "learning_rate": 4.181043677670524e-06, "loss": 0.48056831359863283, "num_tokens": 7659943925.0, "step": 62720 }, { "epoch": 0.08364931816606136, "grad_norm": 1.9609375, "learning_rate": 4.182376939896539e-06, "loss": 0.48052315711975097, "num_tokens": 7662615206.0, "step": 62740 }, { "epoch": 0.08367598355278946, "grad_norm": 1.4921875, "learning_rate": 4.183710202122554e-06, "loss": 0.503524923324585, "num_tokens": 7664899061.0, "step": 62760 }, { "epoch": 0.08370264893951757, "grad_norm": 2.03125, "learning_rate": 4.185043464348568e-06, "loss": 0.48451056480407717, "num_tokens": 7667457685.0, "step": 62780 }, { "epoch": 0.08372931432624567, "grad_norm": 1.7421875, "learning_rate": 4.186376726574583e-06, "loss": 0.4984583854675293, "num_tokens": 7669743666.0, "step": 62800 }, { "epoch": 0.08375597971297377, "grad_norm": 2.40625, "learning_rate": 4.187709988800598e-06, "loss": 0.4806025505065918, "num_tokens": 7672370757.0, "step": 62820 }, { "epoch": 0.08378264509970187, "grad_norm": 1.8984375, "learning_rate": 4.189043251026612e-06, "loss": 0.5036988258361816, "num_tokens": 7674747575.0, "step": 62840 }, { "epoch": 0.08380931048642999, "grad_norm": 2.296875, "learning_rate": 4.190376513252627e-06, "loss": 0.49725685119628904, "num_tokens": 7677349941.0, "step": 62860 }, { "epoch": 0.0838359758731581, "grad_norm": 2.03125, "learning_rate": 4.191709775478641e-06, "loss": 0.4907033443450928, "num_tokens": 7679765063.0, "step": 62880 }, { "epoch": 0.0838626412598862, "grad_norm": 1.7421875, "learning_rate": 4.193043037704656e-06, "loss": 0.49203004837036135, "num_tokens": 7682322186.0, "step": 62900 }, { "epoch": 0.0838893066466143, "grad_norm": 2.5, "learning_rate": 4.194376299930671e-06, "loss": 0.49682273864746096, "num_tokens": 7684759813.0, "step": 62920 }, { "epoch": 0.0839159720333424, "grad_norm": 2.140625, "learning_rate": 4.195709562156685e-06, "loss": 0.495974063873291, "num_tokens": 7687139146.0, "step": 62940 }, { "epoch": 0.0839426374200705, "grad_norm": 1.84375, "learning_rate": 4.1970428243827e-06, "loss": 0.48198866844177246, "num_tokens": 7689636723.0, "step": 62960 }, { "epoch": 0.08396930280679861, "grad_norm": 2.25, "learning_rate": 4.1983760866087145e-06, "loss": 0.49915332794189454, "num_tokens": 7691733247.0, "step": 62980 }, { "epoch": 0.08399596819352671, "grad_norm": 1.765625, "learning_rate": 4.199709348834729e-06, "loss": 0.49057846069335936, "num_tokens": 7694380083.0, "step": 63000 }, { "epoch": 0.08402263358025482, "grad_norm": 2.0, "learning_rate": 4.201042611060744e-06, "loss": 0.4838283061981201, "num_tokens": 7696665221.0, "step": 63020 }, { "epoch": 0.08404929896698292, "grad_norm": 2.28125, "learning_rate": 4.202375873286758e-06, "loss": 0.486993408203125, "num_tokens": 7699057519.0, "step": 63040 }, { "epoch": 0.08407596435371102, "grad_norm": 2.546875, "learning_rate": 4.203709135512773e-06, "loss": 0.4973881244659424, "num_tokens": 7701546284.0, "step": 63060 }, { "epoch": 0.08410262974043912, "grad_norm": 1.9609375, "learning_rate": 4.205042397738788e-06, "loss": 0.4967776298522949, "num_tokens": 7704004870.0, "step": 63080 }, { "epoch": 0.08412929512716723, "grad_norm": 2.203125, "learning_rate": 4.206375659964802e-06, "loss": 0.501395320892334, "num_tokens": 7706460308.0, "step": 63100 }, { "epoch": 0.08415596051389533, "grad_norm": 2.140625, "learning_rate": 4.2077089221908165e-06, "loss": 0.47112326622009276, "num_tokens": 7708681180.0, "step": 63120 }, { "epoch": 0.08418262590062343, "grad_norm": 1.8125, "learning_rate": 4.209042184416831e-06, "loss": 0.490887451171875, "num_tokens": 7710843649.0, "step": 63140 }, { "epoch": 0.08420929128735154, "grad_norm": 2.453125, "learning_rate": 4.210375446642846e-06, "loss": 0.4844989776611328, "num_tokens": 7713250116.0, "step": 63160 }, { "epoch": 0.08423595667407964, "grad_norm": 1.6484375, "learning_rate": 4.211708708868861e-06, "loss": 0.4779962539672852, "num_tokens": 7715657869.0, "step": 63180 }, { "epoch": 0.08426262206080774, "grad_norm": 1.8203125, "learning_rate": 4.213041971094875e-06, "loss": 0.5036233901977539, "num_tokens": 7718181635.0, "step": 63200 }, { "epoch": 0.08428928744753585, "grad_norm": 1.640625, "learning_rate": 4.21437523332089e-06, "loss": 0.4744864463806152, "num_tokens": 7720776249.0, "step": 63220 }, { "epoch": 0.08431595283426395, "grad_norm": 2.125, "learning_rate": 4.215708495546905e-06, "loss": 0.48690214157104494, "num_tokens": 7723077934.0, "step": 63240 }, { "epoch": 0.08434261822099205, "grad_norm": 1.90625, "learning_rate": 4.217041757772919e-06, "loss": 0.515131664276123, "num_tokens": 7725447827.0, "step": 63260 }, { "epoch": 0.08436928360772017, "grad_norm": 2.03125, "learning_rate": 4.218375019998933e-06, "loss": 0.48669824600219724, "num_tokens": 7727996322.0, "step": 63280 }, { "epoch": 0.08439594899444827, "grad_norm": 1.8515625, "learning_rate": 4.219708282224949e-06, "loss": 0.5009878158569336, "num_tokens": 7730061056.0, "step": 63300 }, { "epoch": 0.08442261438117638, "grad_norm": 1.859375, "learning_rate": 4.221041544450963e-06, "loss": 0.4721534252166748, "num_tokens": 7732439434.0, "step": 63320 }, { "epoch": 0.08444927976790448, "grad_norm": 2.03125, "learning_rate": 4.222374806676977e-06, "loss": 0.4956791877746582, "num_tokens": 7735164437.0, "step": 63340 }, { "epoch": 0.08447594515463258, "grad_norm": 2.15625, "learning_rate": 4.223708068902992e-06, "loss": 0.4777385711669922, "num_tokens": 7737497041.0, "step": 63360 }, { "epoch": 0.08450261054136068, "grad_norm": 1.8125, "learning_rate": 4.225041331129007e-06, "loss": 0.47711548805236814, "num_tokens": 7740053780.0, "step": 63380 }, { "epoch": 0.08452927592808879, "grad_norm": 2.203125, "learning_rate": 4.2263745933550215e-06, "loss": 0.489012622833252, "num_tokens": 7742567174.0, "step": 63400 }, { "epoch": 0.08455594131481689, "grad_norm": 2.140625, "learning_rate": 4.227707855581036e-06, "loss": 0.4915167808532715, "num_tokens": 7745173556.0, "step": 63420 }, { "epoch": 0.084582606701545, "grad_norm": 1.9921875, "learning_rate": 4.229041117807051e-06, "loss": 0.5173891544342041, "num_tokens": 7747767193.0, "step": 63440 }, { "epoch": 0.0846092720882731, "grad_norm": 1.546875, "learning_rate": 4.230374380033066e-06, "loss": 0.4773097038269043, "num_tokens": 7750286756.0, "step": 63460 }, { "epoch": 0.0846359374750012, "grad_norm": 2.140625, "learning_rate": 4.2317076422590795e-06, "loss": 0.47415990829467775, "num_tokens": 7752632438.0, "step": 63480 }, { "epoch": 0.0846626028617293, "grad_norm": 1.5078125, "learning_rate": 4.233040904485094e-06, "loss": 0.4816089630126953, "num_tokens": 7755123529.0, "step": 63500 }, { "epoch": 0.0846892682484574, "grad_norm": 1.8515625, "learning_rate": 4.23437416671111e-06, "loss": 0.47490415573120115, "num_tokens": 7757586699.0, "step": 63520 }, { "epoch": 0.08471593363518551, "grad_norm": 2.0, "learning_rate": 4.235707428937124e-06, "loss": 0.4823190689086914, "num_tokens": 7760207507.0, "step": 63540 }, { "epoch": 0.08474259902191361, "grad_norm": 1.8203125, "learning_rate": 4.237040691163138e-06, "loss": 0.5043483734130859, "num_tokens": 7762808824.0, "step": 63560 }, { "epoch": 0.08476926440864171, "grad_norm": 1.765625, "learning_rate": 4.238373953389153e-06, "loss": 0.4905510425567627, "num_tokens": 7765132142.0, "step": 63580 }, { "epoch": 0.08479592979536982, "grad_norm": 1.9375, "learning_rate": 4.239707215615168e-06, "loss": 0.4931620121002197, "num_tokens": 7767570585.0, "step": 63600 }, { "epoch": 0.08482259518209792, "grad_norm": 1.9140625, "learning_rate": 4.241040477841182e-06, "loss": 0.47661781311035156, "num_tokens": 7770036995.0, "step": 63620 }, { "epoch": 0.08484926056882602, "grad_norm": 1.875, "learning_rate": 4.242373740067196e-06, "loss": 0.5102083206176757, "num_tokens": 7772592373.0, "step": 63640 }, { "epoch": 0.08487592595555413, "grad_norm": 1.875, "learning_rate": 4.243707002293212e-06, "loss": 0.4787607669830322, "num_tokens": 7775056688.0, "step": 63660 }, { "epoch": 0.08490259134228224, "grad_norm": 1.9765625, "learning_rate": 4.2450402645192265e-06, "loss": 0.4777054309844971, "num_tokens": 7777354298.0, "step": 63680 }, { "epoch": 0.08492925672901035, "grad_norm": 2.34375, "learning_rate": 4.24637352674524e-06, "loss": 0.495526123046875, "num_tokens": 7779770593.0, "step": 63700 }, { "epoch": 0.08495592211573845, "grad_norm": 1.8046875, "learning_rate": 4.247706788971255e-06, "loss": 0.4786182403564453, "num_tokens": 7782070156.0, "step": 63720 }, { "epoch": 0.08498258750246655, "grad_norm": 1.8984375, "learning_rate": 4.24904005119727e-06, "loss": 0.4976628303527832, "num_tokens": 7784503548.0, "step": 63740 }, { "epoch": 0.08500925288919466, "grad_norm": 2.078125, "learning_rate": 4.2503733134232845e-06, "loss": 0.491852855682373, "num_tokens": 7786888986.0, "step": 63760 }, { "epoch": 0.08503591827592276, "grad_norm": 1.8046875, "learning_rate": 4.251706575649299e-06, "loss": 0.48781728744506836, "num_tokens": 7789522008.0, "step": 63780 }, { "epoch": 0.08506258366265086, "grad_norm": 1.5546875, "learning_rate": 4.253039837875314e-06, "loss": 0.5066676139831543, "num_tokens": 7791924813.0, "step": 63800 }, { "epoch": 0.08508924904937896, "grad_norm": 2.078125, "learning_rate": 4.2543731001013286e-06, "loss": 0.45569849014282227, "num_tokens": 7794334975.0, "step": 63820 }, { "epoch": 0.08511591443610707, "grad_norm": 2.125, "learning_rate": 4.255706362327343e-06, "loss": 0.4992047309875488, "num_tokens": 7796796360.0, "step": 63840 }, { "epoch": 0.08514257982283517, "grad_norm": 1.9765625, "learning_rate": 4.257039624553357e-06, "loss": 0.4777881145477295, "num_tokens": 7799220123.0, "step": 63860 }, { "epoch": 0.08516924520956327, "grad_norm": 2.25, "learning_rate": 4.258372886779372e-06, "loss": 0.47680044174194336, "num_tokens": 7801562988.0, "step": 63880 }, { "epoch": 0.08519591059629138, "grad_norm": 2.21875, "learning_rate": 4.2597061490053865e-06, "loss": 0.5032878875732422, "num_tokens": 7804158326.0, "step": 63900 }, { "epoch": 0.08522257598301948, "grad_norm": 2.25, "learning_rate": 4.261039411231401e-06, "loss": 0.4805899143218994, "num_tokens": 7806654802.0, "step": 63920 }, { "epoch": 0.08524924136974758, "grad_norm": 2.125, "learning_rate": 4.262372673457416e-06, "loss": 0.4813284397125244, "num_tokens": 7809289032.0, "step": 63940 }, { "epoch": 0.08527590675647569, "grad_norm": 2.328125, "learning_rate": 4.263705935683431e-06, "loss": 0.47965087890625, "num_tokens": 7811919451.0, "step": 63960 }, { "epoch": 0.08530257214320379, "grad_norm": 2.59375, "learning_rate": 4.265039197909445e-06, "loss": 0.4771135330200195, "num_tokens": 7814348147.0, "step": 63980 }, { "epoch": 0.08532923752993189, "grad_norm": 1.9921875, "learning_rate": 4.26637246013546e-06, "loss": 0.47842607498168943, "num_tokens": 7816797174.0, "step": 64000 }, { "epoch": 0.08535590291666, "grad_norm": 2.296875, "learning_rate": 4.267705722361474e-06, "loss": 0.4895878791809082, "num_tokens": 7819408504.0, "step": 64020 }, { "epoch": 0.0853825683033881, "grad_norm": 2.125, "learning_rate": 4.2690389845874894e-06, "loss": 0.4693427085876465, "num_tokens": 7821819834.0, "step": 64040 }, { "epoch": 0.0854092336901162, "grad_norm": 1.7265625, "learning_rate": 4.270372246813503e-06, "loss": 0.46613388061523436, "num_tokens": 7824214448.0, "step": 64060 }, { "epoch": 0.08543589907684432, "grad_norm": 2.1875, "learning_rate": 4.271705509039518e-06, "loss": 0.4925544261932373, "num_tokens": 7826548201.0, "step": 64080 }, { "epoch": 0.08546256446357242, "grad_norm": 1.7734375, "learning_rate": 4.273038771265533e-06, "loss": 0.4741021156311035, "num_tokens": 7829044446.0, "step": 64100 }, { "epoch": 0.08548922985030052, "grad_norm": 2.484375, "learning_rate": 4.274372033491547e-06, "loss": 0.4874286651611328, "num_tokens": 7831286248.0, "step": 64120 }, { "epoch": 0.08551589523702863, "grad_norm": 1.9375, "learning_rate": 4.275705295717562e-06, "loss": 0.4800933837890625, "num_tokens": 7833742959.0, "step": 64140 }, { "epoch": 0.08554256062375673, "grad_norm": 2.03125, "learning_rate": 4.277038557943577e-06, "loss": 0.4872264385223389, "num_tokens": 7836029414.0, "step": 64160 }, { "epoch": 0.08556922601048483, "grad_norm": 2.28125, "learning_rate": 4.2783718201695915e-06, "loss": 0.47401766777038573, "num_tokens": 7838663206.0, "step": 64180 }, { "epoch": 0.08559589139721294, "grad_norm": 1.9296875, "learning_rate": 4.279705082395606e-06, "loss": 0.4933058261871338, "num_tokens": 7841062781.0, "step": 64200 }, { "epoch": 0.08562255678394104, "grad_norm": 2.046875, "learning_rate": 4.28103834462162e-06, "loss": 0.4876065254211426, "num_tokens": 7843550412.0, "step": 64220 }, { "epoch": 0.08564922217066914, "grad_norm": 2.4375, "learning_rate": 4.282371606847635e-06, "loss": 0.47478647232055665, "num_tokens": 7846234026.0, "step": 64240 }, { "epoch": 0.08567588755739725, "grad_norm": 2.078125, "learning_rate": 4.28370486907365e-06, "loss": 0.494202709197998, "num_tokens": 7848882518.0, "step": 64260 }, { "epoch": 0.08570255294412535, "grad_norm": 1.8125, "learning_rate": 4.285038131299664e-06, "loss": 0.46530637741088865, "num_tokens": 7851290638.0, "step": 64280 }, { "epoch": 0.08572921833085345, "grad_norm": 2.140625, "learning_rate": 4.286371393525679e-06, "loss": 0.48767595291137694, "num_tokens": 7853562089.0, "step": 64300 }, { "epoch": 0.08575588371758155, "grad_norm": 1.8984375, "learning_rate": 4.2877046557516936e-06, "loss": 0.4926017761230469, "num_tokens": 7855887759.0, "step": 64320 }, { "epoch": 0.08578254910430966, "grad_norm": 2.28125, "learning_rate": 4.289037917977708e-06, "loss": 0.48479576110839845, "num_tokens": 7858249332.0, "step": 64340 }, { "epoch": 0.08580921449103776, "grad_norm": 1.921875, "learning_rate": 4.290371180203723e-06, "loss": 0.49046812057495115, "num_tokens": 7860835655.0, "step": 64360 }, { "epoch": 0.08583587987776586, "grad_norm": 1.9765625, "learning_rate": 4.291704442429737e-06, "loss": 0.47922239303588865, "num_tokens": 7863196274.0, "step": 64380 }, { "epoch": 0.08586254526449397, "grad_norm": 2.34375, "learning_rate": 4.293037704655752e-06, "loss": 0.493292236328125, "num_tokens": 7865902733.0, "step": 64400 }, { "epoch": 0.08588921065122207, "grad_norm": 2.328125, "learning_rate": 4.294370966881767e-06, "loss": 0.47192916870117185, "num_tokens": 7868288859.0, "step": 64420 }, { "epoch": 0.08591587603795017, "grad_norm": 2.078125, "learning_rate": 4.295704229107781e-06, "loss": 0.474039363861084, "num_tokens": 7870707937.0, "step": 64440 }, { "epoch": 0.08594254142467828, "grad_norm": 2.328125, "learning_rate": 4.297037491333796e-06, "loss": 0.47994747161865237, "num_tokens": 7873162043.0, "step": 64460 }, { "epoch": 0.08596920681140638, "grad_norm": 1.8359375, "learning_rate": 4.29837075355981e-06, "loss": 0.48867149353027345, "num_tokens": 7875384326.0, "step": 64480 }, { "epoch": 0.0859958721981345, "grad_norm": 1.703125, "learning_rate": 4.299704015785825e-06, "loss": 0.4881932258605957, "num_tokens": 7877916322.0, "step": 64500 }, { "epoch": 0.0860225375848626, "grad_norm": 1.7265625, "learning_rate": 4.30103727801184e-06, "loss": 0.48690128326416016, "num_tokens": 7880242726.0, "step": 64520 }, { "epoch": 0.0860492029715907, "grad_norm": 1.5, "learning_rate": 4.3023705402378544e-06, "loss": 0.4819291591644287, "num_tokens": 7882598614.0, "step": 64540 }, { "epoch": 0.0860758683583188, "grad_norm": 1.9453125, "learning_rate": 4.303703802463869e-06, "loss": 0.47106194496154785, "num_tokens": 7885302503.0, "step": 64560 }, { "epoch": 0.08610253374504691, "grad_norm": 1.984375, "learning_rate": 4.305037064689884e-06, "loss": 0.48836164474487304, "num_tokens": 7887661178.0, "step": 64580 }, { "epoch": 0.08612919913177501, "grad_norm": 1.640625, "learning_rate": 4.306370326915898e-06, "loss": 0.4661564826965332, "num_tokens": 7890328826.0, "step": 64600 }, { "epoch": 0.08615586451850311, "grad_norm": 1.75, "learning_rate": 4.307703589141912e-06, "loss": 0.5063909530639649, "num_tokens": 7892715367.0, "step": 64620 }, { "epoch": 0.08618252990523122, "grad_norm": 1.671875, "learning_rate": 4.309036851367928e-06, "loss": 0.4820252418518066, "num_tokens": 7895232252.0, "step": 64640 }, { "epoch": 0.08620919529195932, "grad_norm": 2.3125, "learning_rate": 4.310370113593942e-06, "loss": 0.47802934646606443, "num_tokens": 7897697551.0, "step": 64660 }, { "epoch": 0.08623586067868742, "grad_norm": 1.8125, "learning_rate": 4.3117033758199565e-06, "loss": 0.48091630935668944, "num_tokens": 7900025187.0, "step": 64680 }, { "epoch": 0.08626252606541553, "grad_norm": 2.234375, "learning_rate": 4.313036638045971e-06, "loss": 0.5117968082427978, "num_tokens": 7902390914.0, "step": 64700 }, { "epoch": 0.08628919145214363, "grad_norm": 1.84375, "learning_rate": 4.314369900271986e-06, "loss": 0.4888933181762695, "num_tokens": 7904765208.0, "step": 64720 }, { "epoch": 0.08631585683887173, "grad_norm": 1.9296875, "learning_rate": 4.315703162498001e-06, "loss": 0.48660888671875, "num_tokens": 7907126027.0, "step": 64740 }, { "epoch": 0.08634252222559984, "grad_norm": 2.515625, "learning_rate": 4.3170364247240144e-06, "loss": 0.4737410068511963, "num_tokens": 7909581127.0, "step": 64760 }, { "epoch": 0.08636918761232794, "grad_norm": 2.046875, "learning_rate": 4.31836968695003e-06, "loss": 0.46738629341125487, "num_tokens": 7912045213.0, "step": 64780 }, { "epoch": 0.08639585299905604, "grad_norm": 1.9609375, "learning_rate": 4.319702949176045e-06, "loss": 0.4709151268005371, "num_tokens": 7914335662.0, "step": 64800 }, { "epoch": 0.08642251838578414, "grad_norm": 1.9375, "learning_rate": 4.3210362114020586e-06, "loss": 0.4650296211242676, "num_tokens": 7916556554.0, "step": 64820 }, { "epoch": 0.08644918377251225, "grad_norm": 1.8046875, "learning_rate": 4.322369473628073e-06, "loss": 0.48622827529907225, "num_tokens": 7918972061.0, "step": 64840 }, { "epoch": 0.08647584915924035, "grad_norm": 1.71875, "learning_rate": 4.323702735854089e-06, "loss": 0.488702392578125, "num_tokens": 7921460715.0, "step": 64860 }, { "epoch": 0.08650251454596845, "grad_norm": 1.6640625, "learning_rate": 4.325035998080103e-06, "loss": 0.4722916126251221, "num_tokens": 7923781484.0, "step": 64880 }, { "epoch": 0.08652917993269657, "grad_norm": 2.21875, "learning_rate": 4.326369260306117e-06, "loss": 0.47966656684875486, "num_tokens": 7926228889.0, "step": 64900 }, { "epoch": 0.08655584531942467, "grad_norm": 1.9453125, "learning_rate": 4.327702522532132e-06, "loss": 0.48850178718566895, "num_tokens": 7928449126.0, "step": 64920 }, { "epoch": 0.08658251070615278, "grad_norm": 1.8359375, "learning_rate": 4.329035784758147e-06, "loss": 0.48940653800964357, "num_tokens": 7930950767.0, "step": 64940 }, { "epoch": 0.08660917609288088, "grad_norm": 2.53125, "learning_rate": 4.3303690469841615e-06, "loss": 0.4829439163208008, "num_tokens": 7933197851.0, "step": 64960 }, { "epoch": 0.08663584147960898, "grad_norm": 2.09375, "learning_rate": 4.331702309210175e-06, "loss": 0.4805262565612793, "num_tokens": 7935399676.0, "step": 64980 }, { "epoch": 0.08666250686633709, "grad_norm": 1.9609375, "learning_rate": 4.333035571436191e-06, "loss": 0.4979850769042969, "num_tokens": 7937805624.0, "step": 65000 }, { "epoch": 0.08668917225306519, "grad_norm": 2.296875, "learning_rate": 4.3343688336622056e-06, "loss": 0.47906084060668946, "num_tokens": 7940377636.0, "step": 65020 }, { "epoch": 0.08671583763979329, "grad_norm": 2.015625, "learning_rate": 4.3357020958882194e-06, "loss": 0.4782205581665039, "num_tokens": 7942826887.0, "step": 65040 }, { "epoch": 0.0867425030265214, "grad_norm": 2.21875, "learning_rate": 4.337035358114234e-06, "loss": 0.48410959243774415, "num_tokens": 7945198467.0, "step": 65060 }, { "epoch": 0.0867691684132495, "grad_norm": 2.453125, "learning_rate": 4.338368620340249e-06, "loss": 0.47243189811706543, "num_tokens": 7947622310.0, "step": 65080 }, { "epoch": 0.0867958337999776, "grad_norm": 2.171875, "learning_rate": 4.3397018825662635e-06, "loss": 0.48694934844970705, "num_tokens": 7950259897.0, "step": 65100 }, { "epoch": 0.0868224991867057, "grad_norm": 1.890625, "learning_rate": 4.341035144792278e-06, "loss": 0.4732816696166992, "num_tokens": 7952803432.0, "step": 65120 }, { "epoch": 0.0868491645734338, "grad_norm": 1.7890625, "learning_rate": 4.342368407018293e-06, "loss": 0.47645263671875, "num_tokens": 7955240697.0, "step": 65140 }, { "epoch": 0.08687582996016191, "grad_norm": 1.84375, "learning_rate": 4.343701669244308e-06, "loss": 0.4709156036376953, "num_tokens": 7957671901.0, "step": 65160 }, { "epoch": 0.08690249534689001, "grad_norm": 1.9140625, "learning_rate": 4.345034931470322e-06, "loss": 0.4567234516143799, "num_tokens": 7960051217.0, "step": 65180 }, { "epoch": 0.08692916073361812, "grad_norm": 2.375, "learning_rate": 4.346368193696336e-06, "loss": 0.48200092315673826, "num_tokens": 7962518703.0, "step": 65200 }, { "epoch": 0.08695582612034622, "grad_norm": 2.203125, "learning_rate": 4.347701455922351e-06, "loss": 0.48901982307434083, "num_tokens": 7964921640.0, "step": 65220 }, { "epoch": 0.08698249150707432, "grad_norm": 2.671875, "learning_rate": 4.349034718148366e-06, "loss": 0.49094343185424805, "num_tokens": 7967397092.0, "step": 65240 }, { "epoch": 0.08700915689380243, "grad_norm": 2.234375, "learning_rate": 4.35036798037438e-06, "loss": 0.4751227855682373, "num_tokens": 7969808498.0, "step": 65260 }, { "epoch": 0.08703582228053053, "grad_norm": 1.8046875, "learning_rate": 4.351701242600395e-06, "loss": 0.48766508102416994, "num_tokens": 7972207677.0, "step": 65280 }, { "epoch": 0.08706248766725865, "grad_norm": 2.0625, "learning_rate": 4.35303450482641e-06, "loss": 0.49447269439697267, "num_tokens": 7974596470.0, "step": 65300 }, { "epoch": 0.08708915305398675, "grad_norm": 1.96875, "learning_rate": 4.354367767052424e-06, "loss": 0.47165212631225584, "num_tokens": 7977111039.0, "step": 65320 }, { "epoch": 0.08711581844071485, "grad_norm": 2.03125, "learning_rate": 4.355701029278439e-06, "loss": 0.4762692451477051, "num_tokens": 7979383057.0, "step": 65340 }, { "epoch": 0.08714248382744295, "grad_norm": 2.140625, "learning_rate": 4.357034291504453e-06, "loss": 0.47945055961608884, "num_tokens": 7981629483.0, "step": 65360 }, { "epoch": 0.08716914921417106, "grad_norm": 1.78125, "learning_rate": 4.3583675537304685e-06, "loss": 0.48549351692199705, "num_tokens": 7983960269.0, "step": 65380 }, { "epoch": 0.08719581460089916, "grad_norm": 1.546875, "learning_rate": 4.359700815956482e-06, "loss": 0.481157112121582, "num_tokens": 7986365512.0, "step": 65400 }, { "epoch": 0.08722247998762726, "grad_norm": 2.5625, "learning_rate": 4.361034078182497e-06, "loss": 0.47196016311645506, "num_tokens": 7988811756.0, "step": 65420 }, { "epoch": 0.08724914537435537, "grad_norm": 2.328125, "learning_rate": 4.362367340408512e-06, "loss": 0.47890987396240237, "num_tokens": 7991323963.0, "step": 65440 }, { "epoch": 0.08727581076108347, "grad_norm": 2.203125, "learning_rate": 4.3637006026345265e-06, "loss": 0.4941703319549561, "num_tokens": 7993648160.0, "step": 65460 }, { "epoch": 0.08730247614781157, "grad_norm": 1.8984375, "learning_rate": 4.365033864860541e-06, "loss": 0.47319507598876953, "num_tokens": 7995860843.0, "step": 65480 }, { "epoch": 0.08732914153453968, "grad_norm": 2.09375, "learning_rate": 4.366367127086556e-06, "loss": 0.46346230506896974, "num_tokens": 7998131494.0, "step": 65500 }, { "epoch": 0.08735580692126778, "grad_norm": 1.6484375, "learning_rate": 4.3677003893125706e-06, "loss": 0.4713125228881836, "num_tokens": 8000677998.0, "step": 65520 }, { "epoch": 0.08738247230799588, "grad_norm": 2.421875, "learning_rate": 4.369033651538585e-06, "loss": 0.4738302707672119, "num_tokens": 8003150978.0, "step": 65540 }, { "epoch": 0.08740913769472398, "grad_norm": 1.6875, "learning_rate": 4.370366913764599e-06, "loss": 0.465228271484375, "num_tokens": 8005579383.0, "step": 65560 }, { "epoch": 0.08743580308145209, "grad_norm": 2.0, "learning_rate": 4.371700175990614e-06, "loss": 0.47072582244873046, "num_tokens": 8008093116.0, "step": 65580 }, { "epoch": 0.08746246846818019, "grad_norm": 1.6953125, "learning_rate": 4.373033438216629e-06, "loss": 0.46575002670288085, "num_tokens": 8010579148.0, "step": 65600 }, { "epoch": 0.0874891338549083, "grad_norm": 1.71875, "learning_rate": 4.374366700442643e-06, "loss": 0.4729331970214844, "num_tokens": 8013018797.0, "step": 65620 }, { "epoch": 0.0875157992416364, "grad_norm": 2.609375, "learning_rate": 4.375699962668658e-06, "loss": 0.4856115341186523, "num_tokens": 8015392353.0, "step": 65640 }, { "epoch": 0.0875424646283645, "grad_norm": 1.8125, "learning_rate": 4.377033224894673e-06, "loss": 0.4682131767272949, "num_tokens": 8017756339.0, "step": 65660 }, { "epoch": 0.0875691300150926, "grad_norm": 2.046875, "learning_rate": 4.378366487120687e-06, "loss": 0.48053936958312987, "num_tokens": 8020042026.0, "step": 65680 }, { "epoch": 0.0875957954018207, "grad_norm": 1.921875, "learning_rate": 4.379699749346702e-06, "loss": 0.48080921173095703, "num_tokens": 8022615130.0, "step": 65700 }, { "epoch": 0.08762246078854882, "grad_norm": 1.875, "learning_rate": 4.381033011572716e-06, "loss": 0.4633049011230469, "num_tokens": 8025166070.0, "step": 65720 }, { "epoch": 0.08764912617527693, "grad_norm": 1.9765625, "learning_rate": 4.3823662737987314e-06, "loss": 0.4717757225036621, "num_tokens": 8027599462.0, "step": 65740 }, { "epoch": 0.08767579156200503, "grad_norm": 2.03125, "learning_rate": 4.383699536024746e-06, "loss": 0.46137247085571287, "num_tokens": 8029944227.0, "step": 65760 }, { "epoch": 0.08770245694873313, "grad_norm": 1.90625, "learning_rate": 4.38503279825076e-06, "loss": 0.47395954132080076, "num_tokens": 8032309905.0, "step": 65780 }, { "epoch": 0.08772912233546123, "grad_norm": 2.09375, "learning_rate": 4.386366060476775e-06, "loss": 0.47332196235656737, "num_tokens": 8034732412.0, "step": 65800 }, { "epoch": 0.08775578772218934, "grad_norm": 2.546875, "learning_rate": 4.387699322702789e-06, "loss": 0.48159022331237794, "num_tokens": 8037281629.0, "step": 65820 }, { "epoch": 0.08778245310891744, "grad_norm": 1.7734375, "learning_rate": 4.389032584928804e-06, "loss": 0.4668092727661133, "num_tokens": 8039792980.0, "step": 65840 }, { "epoch": 0.08780911849564554, "grad_norm": 1.9375, "learning_rate": 4.390365847154819e-06, "loss": 0.46351327896118166, "num_tokens": 8042288778.0, "step": 65860 }, { "epoch": 0.08783578388237365, "grad_norm": 2.203125, "learning_rate": 4.3916991093808335e-06, "loss": 0.4917177200317383, "num_tokens": 8044697968.0, "step": 65880 }, { "epoch": 0.08786244926910175, "grad_norm": 2.734375, "learning_rate": 4.393032371606848e-06, "loss": 0.46550979614257815, "num_tokens": 8047144538.0, "step": 65900 }, { "epoch": 0.08788911465582985, "grad_norm": 1.6484375, "learning_rate": 4.394365633832863e-06, "loss": 0.4934104919433594, "num_tokens": 8049617156.0, "step": 65920 }, { "epoch": 0.08791578004255796, "grad_norm": 1.9609375, "learning_rate": 4.395698896058877e-06, "loss": 0.46850290298461916, "num_tokens": 8051955615.0, "step": 65940 }, { "epoch": 0.08794244542928606, "grad_norm": 1.734375, "learning_rate": 4.3970321582848915e-06, "loss": 0.48098058700561525, "num_tokens": 8054284963.0, "step": 65960 }, { "epoch": 0.08796911081601416, "grad_norm": 2.90625, "learning_rate": 4.398365420510907e-06, "loss": 0.47416296005249026, "num_tokens": 8056849859.0, "step": 65980 }, { "epoch": 0.08799577620274227, "grad_norm": 1.96875, "learning_rate": 4.399698682736921e-06, "loss": 0.49472479820251464, "num_tokens": 8059311592.0, "step": 66000 }, { "epoch": 0.08802244158947037, "grad_norm": 2.203125, "learning_rate": 4.4010319449629356e-06, "loss": 0.48088831901550294, "num_tokens": 8061823036.0, "step": 66020 }, { "epoch": 0.08804910697619847, "grad_norm": 1.7265625, "learning_rate": 4.40236520718895e-06, "loss": 0.47415966987609864, "num_tokens": 8064073930.0, "step": 66040 }, { "epoch": 0.08807577236292657, "grad_norm": 2.21875, "learning_rate": 4.403698469414965e-06, "loss": 0.471370792388916, "num_tokens": 8066672389.0, "step": 66060 }, { "epoch": 0.08810243774965468, "grad_norm": 2.4375, "learning_rate": 4.40503173164098e-06, "loss": 0.49532294273376465, "num_tokens": 8068968307.0, "step": 66080 }, { "epoch": 0.08812910313638278, "grad_norm": 1.640625, "learning_rate": 4.4063649938669935e-06, "loss": 0.4701725482940674, "num_tokens": 8071301537.0, "step": 66100 }, { "epoch": 0.0881557685231109, "grad_norm": 2.53125, "learning_rate": 4.407698256093009e-06, "loss": 0.4696096897125244, "num_tokens": 8073748926.0, "step": 66120 }, { "epoch": 0.088182433909839, "grad_norm": 1.9140625, "learning_rate": 4.409031518319024e-06, "loss": 0.4890585422515869, "num_tokens": 8076284444.0, "step": 66140 }, { "epoch": 0.0882090992965671, "grad_norm": 2.578125, "learning_rate": 4.410364780545038e-06, "loss": 0.47333426475524903, "num_tokens": 8078467981.0, "step": 66160 }, { "epoch": 0.0882357646832952, "grad_norm": 1.796875, "learning_rate": 4.411698042771052e-06, "loss": 0.472761344909668, "num_tokens": 8080931322.0, "step": 66180 }, { "epoch": 0.08826243007002331, "grad_norm": 1.9140625, "learning_rate": 4.413031304997067e-06, "loss": 0.48446145057678225, "num_tokens": 8083508559.0, "step": 66200 }, { "epoch": 0.08828909545675141, "grad_norm": 2.109375, "learning_rate": 4.414364567223082e-06, "loss": 0.4775350570678711, "num_tokens": 8085811622.0, "step": 66220 }, { "epoch": 0.08831576084347952, "grad_norm": 2.703125, "learning_rate": 4.4156978294490964e-06, "loss": 0.48003425598144533, "num_tokens": 8088403548.0, "step": 66240 }, { "epoch": 0.08834242623020762, "grad_norm": 2.359375, "learning_rate": 4.417031091675111e-06, "loss": 0.4886218547821045, "num_tokens": 8090961367.0, "step": 66260 }, { "epoch": 0.08836909161693572, "grad_norm": 1.953125, "learning_rate": 4.418364353901126e-06, "loss": 0.4579345226287842, "num_tokens": 8093263328.0, "step": 66280 }, { "epoch": 0.08839575700366382, "grad_norm": 2.4375, "learning_rate": 4.4196976161271405e-06, "loss": 0.48343758583068847, "num_tokens": 8095664586.0, "step": 66300 }, { "epoch": 0.08842242239039193, "grad_norm": 2.078125, "learning_rate": 4.421030878353154e-06, "loss": 0.4968564987182617, "num_tokens": 8098138100.0, "step": 66320 }, { "epoch": 0.08844908777712003, "grad_norm": 2.375, "learning_rate": 4.42236414057917e-06, "loss": 0.45878067016601565, "num_tokens": 8100866346.0, "step": 66340 }, { "epoch": 0.08847575316384813, "grad_norm": 2.296875, "learning_rate": 4.423697402805185e-06, "loss": 0.48673205375671386, "num_tokens": 8103253728.0, "step": 66360 }, { "epoch": 0.08850241855057624, "grad_norm": 2.09375, "learning_rate": 4.4250306650311985e-06, "loss": 0.47052655220031736, "num_tokens": 8105656992.0, "step": 66380 }, { "epoch": 0.08852908393730434, "grad_norm": 1.703125, "learning_rate": 4.426363927257213e-06, "loss": 0.4623411178588867, "num_tokens": 8108157773.0, "step": 66400 }, { "epoch": 0.08855574932403244, "grad_norm": 1.875, "learning_rate": 4.427697189483228e-06, "loss": 0.46661930084228515, "num_tokens": 8110529436.0, "step": 66420 }, { "epoch": 0.08858241471076055, "grad_norm": 2.09375, "learning_rate": 4.429030451709243e-06, "loss": 0.46876373291015627, "num_tokens": 8112752467.0, "step": 66440 }, { "epoch": 0.08860908009748865, "grad_norm": 2.0625, "learning_rate": 4.430363713935257e-06, "loss": 0.46248998641967776, "num_tokens": 8115112308.0, "step": 66460 }, { "epoch": 0.08863574548421675, "grad_norm": 2.828125, "learning_rate": 4.431696976161272e-06, "loss": 0.473065185546875, "num_tokens": 8117578586.0, "step": 66480 }, { "epoch": 0.08866241087094485, "grad_norm": 2.15625, "learning_rate": 4.433030238387287e-06, "loss": 0.4560199737548828, "num_tokens": 8119938552.0, "step": 66500 }, { "epoch": 0.08868907625767297, "grad_norm": 2.390625, "learning_rate": 4.434363500613301e-06, "loss": 0.4804524421691895, "num_tokens": 8122406942.0, "step": 66520 }, { "epoch": 0.08871574164440107, "grad_norm": 1.5625, "learning_rate": 4.435696762839315e-06, "loss": 0.47324414253234864, "num_tokens": 8124962222.0, "step": 66540 }, { "epoch": 0.08874240703112918, "grad_norm": 2.234375, "learning_rate": 4.43703002506533e-06, "loss": 0.47286176681518555, "num_tokens": 8127425848.0, "step": 66560 }, { "epoch": 0.08876907241785728, "grad_norm": 1.828125, "learning_rate": 4.438363287291345e-06, "loss": 0.4728400707244873, "num_tokens": 8129996946.0, "step": 66580 }, { "epoch": 0.08879573780458538, "grad_norm": 2.46875, "learning_rate": 4.439696549517359e-06, "loss": 0.48218569755554197, "num_tokens": 8132342923.0, "step": 66600 }, { "epoch": 0.08882240319131349, "grad_norm": 2.125, "learning_rate": 4.441029811743374e-06, "loss": 0.4786513805389404, "num_tokens": 8134679077.0, "step": 66620 }, { "epoch": 0.08884906857804159, "grad_norm": 1.8984375, "learning_rate": 4.442363073969389e-06, "loss": 0.48326311111450193, "num_tokens": 8137166704.0, "step": 66640 }, { "epoch": 0.0888757339647697, "grad_norm": 1.9921875, "learning_rate": 4.4436963361954035e-06, "loss": 0.48135900497436523, "num_tokens": 8139715513.0, "step": 66660 }, { "epoch": 0.0889023993514978, "grad_norm": 1.9609375, "learning_rate": 4.445029598421418e-06, "loss": 0.4591250419616699, "num_tokens": 8142221183.0, "step": 66680 }, { "epoch": 0.0889290647382259, "grad_norm": 1.8125, "learning_rate": 4.446362860647432e-06, "loss": 0.4679133415222168, "num_tokens": 8144558180.0, "step": 66700 }, { "epoch": 0.088955730124954, "grad_norm": 1.7890625, "learning_rate": 4.4476961228734476e-06, "loss": 0.46327056884765627, "num_tokens": 8146887855.0, "step": 66720 }, { "epoch": 0.0889823955116821, "grad_norm": 2.296875, "learning_rate": 4.4490293850994614e-06, "loss": 0.47261486053466795, "num_tokens": 8149268891.0, "step": 66740 }, { "epoch": 0.08900906089841021, "grad_norm": 1.8671875, "learning_rate": 4.450362647325476e-06, "loss": 0.4750836372375488, "num_tokens": 8151775943.0, "step": 66760 }, { "epoch": 0.08903572628513831, "grad_norm": 2.0, "learning_rate": 4.451695909551491e-06, "loss": 0.46856842041015623, "num_tokens": 8154307408.0, "step": 66780 }, { "epoch": 0.08906239167186641, "grad_norm": 2.109375, "learning_rate": 4.4530291717775055e-06, "loss": 0.46344733238220215, "num_tokens": 8156909007.0, "step": 66800 }, { "epoch": 0.08908905705859452, "grad_norm": 2.046875, "learning_rate": 4.45436243400352e-06, "loss": 0.4855930328369141, "num_tokens": 8159408533.0, "step": 66820 }, { "epoch": 0.08911572244532262, "grad_norm": 1.8359375, "learning_rate": 4.455695696229535e-06, "loss": 0.4895639419555664, "num_tokens": 8161576413.0, "step": 66840 }, { "epoch": 0.08914238783205072, "grad_norm": 1.9609375, "learning_rate": 4.45702895845555e-06, "loss": 0.4867586135864258, "num_tokens": 8164006464.0, "step": 66860 }, { "epoch": 0.08916905321877883, "grad_norm": 2.125, "learning_rate": 4.458362220681564e-06, "loss": 0.4623612880706787, "num_tokens": 8166533108.0, "step": 66880 }, { "epoch": 0.08919571860550693, "grad_norm": 1.9765625, "learning_rate": 4.459695482907578e-06, "loss": 0.4842100143432617, "num_tokens": 8168913228.0, "step": 66900 }, { "epoch": 0.08922238399223503, "grad_norm": 2.171875, "learning_rate": 4.461028745133593e-06, "loss": 0.48193941116333006, "num_tokens": 8171230263.0, "step": 66920 }, { "epoch": 0.08924904937896315, "grad_norm": 2.234375, "learning_rate": 4.462362007359608e-06, "loss": 0.46253581047058107, "num_tokens": 8173812836.0, "step": 66940 }, { "epoch": 0.08927571476569125, "grad_norm": 2.234375, "learning_rate": 4.463695269585622e-06, "loss": 0.4642483711242676, "num_tokens": 8176381934.0, "step": 66960 }, { "epoch": 0.08930238015241936, "grad_norm": 1.8828125, "learning_rate": 4.465028531811637e-06, "loss": 0.4697371006011963, "num_tokens": 8178838360.0, "step": 66980 }, { "epoch": 0.08932904553914746, "grad_norm": 2.140625, "learning_rate": 4.466361794037652e-06, "loss": 0.4650888442993164, "num_tokens": 8181220714.0, "step": 67000 }, { "epoch": 0.08935571092587556, "grad_norm": 1.765625, "learning_rate": 4.467695056263666e-06, "loss": 0.4817510604858398, "num_tokens": 8183680669.0, "step": 67020 }, { "epoch": 0.08938237631260366, "grad_norm": 2.1875, "learning_rate": 4.469028318489681e-06, "loss": 0.46868534088134767, "num_tokens": 8186084572.0, "step": 67040 }, { "epoch": 0.08940904169933177, "grad_norm": 2.015625, "learning_rate": 4.470361580715695e-06, "loss": 0.4582971572875977, "num_tokens": 8188754207.0, "step": 67060 }, { "epoch": 0.08943570708605987, "grad_norm": 1.796875, "learning_rate": 4.47169484294171e-06, "loss": 0.47246885299682617, "num_tokens": 8191196431.0, "step": 67080 }, { "epoch": 0.08946237247278797, "grad_norm": 1.609375, "learning_rate": 4.473028105167725e-06, "loss": 0.4756758689880371, "num_tokens": 8193646983.0, "step": 67100 }, { "epoch": 0.08948903785951608, "grad_norm": 1.8984375, "learning_rate": 4.474361367393739e-06, "loss": 0.4670370578765869, "num_tokens": 8196306051.0, "step": 67120 }, { "epoch": 0.08951570324624418, "grad_norm": 1.765625, "learning_rate": 4.475694629619754e-06, "loss": 0.47095184326171874, "num_tokens": 8198858197.0, "step": 67140 }, { "epoch": 0.08954236863297228, "grad_norm": 2.078125, "learning_rate": 4.4770278918457685e-06, "loss": 0.4723982810974121, "num_tokens": 8201139749.0, "step": 67160 }, { "epoch": 0.08956903401970039, "grad_norm": 1.890625, "learning_rate": 4.478361154071783e-06, "loss": 0.4586506366729736, "num_tokens": 8203719954.0, "step": 67180 }, { "epoch": 0.08959569940642849, "grad_norm": 1.8359375, "learning_rate": 4.479694416297798e-06, "loss": 0.469834041595459, "num_tokens": 8206020778.0, "step": 67200 }, { "epoch": 0.08962236479315659, "grad_norm": 2.03125, "learning_rate": 4.4810276785238126e-06, "loss": 0.4672880172729492, "num_tokens": 8208557386.0, "step": 67220 }, { "epoch": 0.0896490301798847, "grad_norm": 1.9765625, "learning_rate": 4.482360940749827e-06, "loss": 0.4687229633331299, "num_tokens": 8210956536.0, "step": 67240 }, { "epoch": 0.0896756955666128, "grad_norm": 2.09375, "learning_rate": 4.483694202975842e-06, "loss": 0.48566594123840334, "num_tokens": 8213326202.0, "step": 67260 }, { "epoch": 0.0897023609533409, "grad_norm": 2.09375, "learning_rate": 4.485027465201856e-06, "loss": 0.46633205413818357, "num_tokens": 8215549004.0, "step": 67280 }, { "epoch": 0.089729026340069, "grad_norm": 2.03125, "learning_rate": 4.4863607274278705e-06, "loss": 0.472976016998291, "num_tokens": 8218074383.0, "step": 67300 }, { "epoch": 0.08975569172679711, "grad_norm": 2.328125, "learning_rate": 4.487693989653886e-06, "loss": 0.4802032470703125, "num_tokens": 8220480811.0, "step": 67320 }, { "epoch": 0.08978235711352522, "grad_norm": 2.078125, "learning_rate": 4.4890272518799e-06, "loss": 0.4821528434753418, "num_tokens": 8223002906.0, "step": 67340 }, { "epoch": 0.08980902250025333, "grad_norm": 2.140625, "learning_rate": 4.490360514105915e-06, "loss": 0.48584375381469724, "num_tokens": 8225493309.0, "step": 67360 }, { "epoch": 0.08983568788698143, "grad_norm": 1.9609375, "learning_rate": 4.491693776331929e-06, "loss": 0.47031517028808595, "num_tokens": 8227727931.0, "step": 67380 }, { "epoch": 0.08986235327370953, "grad_norm": 1.7734375, "learning_rate": 4.493027038557944e-06, "loss": 0.4700364112854004, "num_tokens": 8229929061.0, "step": 67400 }, { "epoch": 0.08988901866043764, "grad_norm": 2.09375, "learning_rate": 4.494360300783959e-06, "loss": 0.4728670597076416, "num_tokens": 8232085619.0, "step": 67420 }, { "epoch": 0.08991568404716574, "grad_norm": 2.0, "learning_rate": 4.495693563009973e-06, "loss": 0.4581727981567383, "num_tokens": 8234474942.0, "step": 67440 }, { "epoch": 0.08994234943389384, "grad_norm": 2.25, "learning_rate": 4.497026825235988e-06, "loss": 0.4638678073883057, "num_tokens": 8236981069.0, "step": 67460 }, { "epoch": 0.08996901482062195, "grad_norm": 1.921875, "learning_rate": 4.498360087462003e-06, "loss": 0.4974085807800293, "num_tokens": 8239307591.0, "step": 67480 }, { "epoch": 0.08999568020735005, "grad_norm": 2.109375, "learning_rate": 4.499693349688017e-06, "loss": 0.46377086639404297, "num_tokens": 8241662572.0, "step": 67500 }, { "epoch": 0.09002234559407815, "grad_norm": 1.9609375, "learning_rate": 4.501026611914031e-06, "loss": 0.47470884323120116, "num_tokens": 8244026821.0, "step": 67520 }, { "epoch": 0.09004901098080625, "grad_norm": 1.9375, "learning_rate": 4.502359874140046e-06, "loss": 0.4732543468475342, "num_tokens": 8246692724.0, "step": 67540 }, { "epoch": 0.09007567636753436, "grad_norm": 2.3125, "learning_rate": 4.503693136366061e-06, "loss": 0.4720296382904053, "num_tokens": 8249107491.0, "step": 67560 }, { "epoch": 0.09010234175426246, "grad_norm": 2.390625, "learning_rate": 4.5050263985920755e-06, "loss": 0.4729470252990723, "num_tokens": 8251495953.0, "step": 67580 }, { "epoch": 0.09012900714099056, "grad_norm": 1.796875, "learning_rate": 4.50635966081809e-06, "loss": 0.477418851852417, "num_tokens": 8253831726.0, "step": 67600 }, { "epoch": 0.09015567252771867, "grad_norm": 1.7109375, "learning_rate": 4.507692923044105e-06, "loss": 0.46826705932617185, "num_tokens": 8256354429.0, "step": 67620 }, { "epoch": 0.09018233791444677, "grad_norm": 1.765625, "learning_rate": 4.50902618527012e-06, "loss": 0.4896021842956543, "num_tokens": 8258860878.0, "step": 67640 }, { "epoch": 0.09020900330117487, "grad_norm": 2.4375, "learning_rate": 4.5103594474961335e-06, "loss": 0.46542158126831057, "num_tokens": 8261162008.0, "step": 67660 }, { "epoch": 0.09023566868790298, "grad_norm": 1.8828125, "learning_rate": 4.511692709722148e-06, "loss": 0.47281970977783205, "num_tokens": 8263672946.0, "step": 67680 }, { "epoch": 0.09026233407463108, "grad_norm": 2.859375, "learning_rate": 4.513025971948164e-06, "loss": 0.4761789798736572, "num_tokens": 8266277359.0, "step": 67700 }, { "epoch": 0.09028899946135918, "grad_norm": 1.8125, "learning_rate": 4.5143592341741776e-06, "loss": 0.46120548248291016, "num_tokens": 8268626444.0, "step": 67720 }, { "epoch": 0.0903156648480873, "grad_norm": 1.578125, "learning_rate": 4.515692496400192e-06, "loss": 0.49382696151733396, "num_tokens": 8270991806.0, "step": 67740 }, { "epoch": 0.0903423302348154, "grad_norm": 1.9140625, "learning_rate": 4.517025758626207e-06, "loss": 0.46623821258544923, "num_tokens": 8273530450.0, "step": 67760 }, { "epoch": 0.0903689956215435, "grad_norm": 2.15625, "learning_rate": 4.518359020852222e-06, "loss": 0.46619291305541993, "num_tokens": 8276074938.0, "step": 67780 }, { "epoch": 0.09039566100827161, "grad_norm": 2.234375, "learning_rate": 4.519692283078236e-06, "loss": 0.47214040756225584, "num_tokens": 8278323838.0, "step": 67800 }, { "epoch": 0.09042232639499971, "grad_norm": 1.9453125, "learning_rate": 4.52102554530425e-06, "loss": 0.4797680854797363, "num_tokens": 8280652625.0, "step": 67820 }, { "epoch": 0.09044899178172781, "grad_norm": 1.984375, "learning_rate": 4.522358807530266e-06, "loss": 0.46259918212890627, "num_tokens": 8283245456.0, "step": 67840 }, { "epoch": 0.09047565716845592, "grad_norm": 1.9140625, "learning_rate": 4.5236920697562805e-06, "loss": 0.4810516834259033, "num_tokens": 8285673383.0, "step": 67860 }, { "epoch": 0.09050232255518402, "grad_norm": 1.953125, "learning_rate": 4.525025331982294e-06, "loss": 0.47280054092407225, "num_tokens": 8287943002.0, "step": 67880 }, { "epoch": 0.09052898794191212, "grad_norm": 2.046875, "learning_rate": 4.526358594208309e-06, "loss": 0.48154678344726565, "num_tokens": 8290357759.0, "step": 67900 }, { "epoch": 0.09055565332864023, "grad_norm": 2.078125, "learning_rate": 4.527691856434324e-06, "loss": 0.4888195514678955, "num_tokens": 8292812288.0, "step": 67920 }, { "epoch": 0.09058231871536833, "grad_norm": 2.328125, "learning_rate": 4.5290251186603384e-06, "loss": 0.47923789024353025, "num_tokens": 8295116522.0, "step": 67940 }, { "epoch": 0.09060898410209643, "grad_norm": 2.234375, "learning_rate": 4.530358380886353e-06, "loss": 0.44936408996582033, "num_tokens": 8297493436.0, "step": 67960 }, { "epoch": 0.09063564948882454, "grad_norm": 2.109375, "learning_rate": 4.531691643112368e-06, "loss": 0.48786029815673826, "num_tokens": 8299893212.0, "step": 67980 }, { "epoch": 0.09066231487555264, "grad_norm": 2.125, "learning_rate": 4.5330249053383825e-06, "loss": 0.4820260047912598, "num_tokens": 8302218078.0, "step": 68000 }, { "epoch": 0.09068898026228074, "grad_norm": 2.25, "learning_rate": 4.534358167564397e-06, "loss": 0.4615958213806152, "num_tokens": 8304691222.0, "step": 68020 }, { "epoch": 0.09071564564900884, "grad_norm": 1.8046875, "learning_rate": 4.535691429790411e-06, "loss": 0.4840442657470703, "num_tokens": 8307239401.0, "step": 68040 }, { "epoch": 0.09074231103573695, "grad_norm": 1.9765625, "learning_rate": 4.537024692016427e-06, "loss": 0.46475830078125, "num_tokens": 8309341913.0, "step": 68060 }, { "epoch": 0.09076897642246505, "grad_norm": 2.03125, "learning_rate": 4.5383579542424405e-06, "loss": 0.4512629985809326, "num_tokens": 8311562787.0, "step": 68080 }, { "epoch": 0.09079564180919315, "grad_norm": 1.90625, "learning_rate": 4.539691216468455e-06, "loss": 0.4650458812713623, "num_tokens": 8314116432.0, "step": 68100 }, { "epoch": 0.09082230719592126, "grad_norm": 2.28125, "learning_rate": 4.54102447869447e-06, "loss": 0.4576726913452148, "num_tokens": 8316515487.0, "step": 68120 }, { "epoch": 0.09084897258264936, "grad_norm": 1.9375, "learning_rate": 4.542357740920485e-06, "loss": 0.46497297286987305, "num_tokens": 8319066677.0, "step": 68140 }, { "epoch": 0.09087563796937748, "grad_norm": 1.8125, "learning_rate": 4.543691003146499e-06, "loss": 0.475173282623291, "num_tokens": 8321490208.0, "step": 68160 }, { "epoch": 0.09090230335610558, "grad_norm": 1.9609375, "learning_rate": 4.545024265372514e-06, "loss": 0.45932278633117674, "num_tokens": 8323910691.0, "step": 68180 }, { "epoch": 0.09092896874283368, "grad_norm": 2.28125, "learning_rate": 4.546357527598529e-06, "loss": 0.44936723709106446, "num_tokens": 8326365267.0, "step": 68200 }, { "epoch": 0.09095563412956179, "grad_norm": 2.28125, "learning_rate": 4.547690789824543e-06, "loss": 0.46503515243530275, "num_tokens": 8328618868.0, "step": 68220 }, { "epoch": 0.09098229951628989, "grad_norm": 2.15625, "learning_rate": 4.549024052050557e-06, "loss": 0.4644940853118896, "num_tokens": 8330910322.0, "step": 68240 }, { "epoch": 0.09100896490301799, "grad_norm": 2.453125, "learning_rate": 4.550357314276572e-06, "loss": 0.4620547294616699, "num_tokens": 8333282055.0, "step": 68260 }, { "epoch": 0.0910356302897461, "grad_norm": 2.703125, "learning_rate": 4.551690576502587e-06, "loss": 0.45900373458862304, "num_tokens": 8335769854.0, "step": 68280 }, { "epoch": 0.0910622956764742, "grad_norm": 1.7109375, "learning_rate": 4.553023838728601e-06, "loss": 0.4483960151672363, "num_tokens": 8338432245.0, "step": 68300 }, { "epoch": 0.0910889610632023, "grad_norm": 2.5, "learning_rate": 4.554357100954616e-06, "loss": 0.47574143409729003, "num_tokens": 8340795687.0, "step": 68320 }, { "epoch": 0.0911156264499304, "grad_norm": 2.265625, "learning_rate": 4.555690363180631e-06, "loss": 0.48441371917724607, "num_tokens": 8343338457.0, "step": 68340 }, { "epoch": 0.0911422918366585, "grad_norm": 2.03125, "learning_rate": 4.5570236254066455e-06, "loss": 0.4662959575653076, "num_tokens": 8345829872.0, "step": 68360 }, { "epoch": 0.09116895722338661, "grad_norm": 1.6484375, "learning_rate": 4.55835688763266e-06, "loss": 0.45929679870605467, "num_tokens": 8348167503.0, "step": 68380 }, { "epoch": 0.09119562261011471, "grad_norm": 2.03125, "learning_rate": 4.559690149858674e-06, "loss": 0.4773733615875244, "num_tokens": 8350436475.0, "step": 68400 }, { "epoch": 0.09122228799684282, "grad_norm": 2.109375, "learning_rate": 4.561023412084689e-06, "loss": 0.4552305221557617, "num_tokens": 8352883893.0, "step": 68420 }, { "epoch": 0.09124895338357092, "grad_norm": 2.171875, "learning_rate": 4.562356674310704e-06, "loss": 0.4646748542785645, "num_tokens": 8355389376.0, "step": 68440 }, { "epoch": 0.09127561877029902, "grad_norm": 1.7890625, "learning_rate": 4.563689936536718e-06, "loss": 0.45307245254516604, "num_tokens": 8357584337.0, "step": 68460 }, { "epoch": 0.09130228415702712, "grad_norm": 1.7265625, "learning_rate": 4.565023198762733e-06, "loss": 0.45818614959716797, "num_tokens": 8359840780.0, "step": 68480 }, { "epoch": 0.09132894954375523, "grad_norm": 2.03125, "learning_rate": 4.5663564609887475e-06, "loss": 0.45378694534301756, "num_tokens": 8362488292.0, "step": 68500 }, { "epoch": 0.09135561493048333, "grad_norm": 1.921875, "learning_rate": 4.567689723214762e-06, "loss": 0.4779416561126709, "num_tokens": 8364989802.0, "step": 68520 }, { "epoch": 0.09138228031721143, "grad_norm": 2.28125, "learning_rate": 4.569022985440777e-06, "loss": 0.4964109420776367, "num_tokens": 8367324614.0, "step": 68540 }, { "epoch": 0.09140894570393955, "grad_norm": 2.0, "learning_rate": 4.570356247666791e-06, "loss": 0.4546483039855957, "num_tokens": 8369805687.0, "step": 68560 }, { "epoch": 0.09143561109066765, "grad_norm": 2.21875, "learning_rate": 4.571689509892806e-06, "loss": 0.45699152946472166, "num_tokens": 8372259038.0, "step": 68580 }, { "epoch": 0.09146227647739576, "grad_norm": 2.21875, "learning_rate": 4.573022772118821e-06, "loss": 0.46630287170410156, "num_tokens": 8374768956.0, "step": 68600 }, { "epoch": 0.09148894186412386, "grad_norm": 1.8203125, "learning_rate": 4.574356034344835e-06, "loss": 0.47353763580322267, "num_tokens": 8377185738.0, "step": 68620 }, { "epoch": 0.09151560725085196, "grad_norm": 1.71875, "learning_rate": 4.57568929657085e-06, "loss": 0.4727948665618896, "num_tokens": 8379597465.0, "step": 68640 }, { "epoch": 0.09154227263758007, "grad_norm": 2.84375, "learning_rate": 4.577022558796865e-06, "loss": 0.456640625, "num_tokens": 8382072532.0, "step": 68660 }, { "epoch": 0.09156893802430817, "grad_norm": 2.109375, "learning_rate": 4.578355821022879e-06, "loss": 0.45852813720703123, "num_tokens": 8384375827.0, "step": 68680 }, { "epoch": 0.09159560341103627, "grad_norm": 2.328125, "learning_rate": 4.579689083248894e-06, "loss": 0.4609355449676514, "num_tokens": 8386925210.0, "step": 68700 }, { "epoch": 0.09162226879776438, "grad_norm": 2.203125, "learning_rate": 4.581022345474908e-06, "loss": 0.46622347831726074, "num_tokens": 8389422753.0, "step": 68720 }, { "epoch": 0.09164893418449248, "grad_norm": 2.28125, "learning_rate": 4.582355607700923e-06, "loss": 0.4693140029907227, "num_tokens": 8391715429.0, "step": 68740 }, { "epoch": 0.09167559957122058, "grad_norm": 1.734375, "learning_rate": 4.583688869926938e-06, "loss": 0.4585277557373047, "num_tokens": 8394151681.0, "step": 68760 }, { "epoch": 0.09170226495794868, "grad_norm": 2.140625, "learning_rate": 4.585022132152952e-06, "loss": 0.4492523193359375, "num_tokens": 8396601629.0, "step": 68780 }, { "epoch": 0.09172893034467679, "grad_norm": 1.78125, "learning_rate": 4.586355394378967e-06, "loss": 0.47282862663269043, "num_tokens": 8399240063.0, "step": 68800 }, { "epoch": 0.09175559573140489, "grad_norm": 2.15625, "learning_rate": 4.587688656604982e-06, "loss": 0.4647053718566895, "num_tokens": 8401925592.0, "step": 68820 }, { "epoch": 0.091782261118133, "grad_norm": 1.8828125, "learning_rate": 4.589021918830996e-06, "loss": 0.4689767837524414, "num_tokens": 8404212551.0, "step": 68840 }, { "epoch": 0.0918089265048611, "grad_norm": 1.734375, "learning_rate": 4.5903551810570105e-06, "loss": 0.47183594703674314, "num_tokens": 8406997024.0, "step": 68860 }, { "epoch": 0.0918355918915892, "grad_norm": 2.484375, "learning_rate": 4.591688443283025e-06, "loss": 0.46715664863586426, "num_tokens": 8409475709.0, "step": 68880 }, { "epoch": 0.0918622572783173, "grad_norm": 2.03125, "learning_rate": 4.59302170550904e-06, "loss": 0.4675562858581543, "num_tokens": 8412184626.0, "step": 68900 }, { "epoch": 0.0918889226650454, "grad_norm": 1.8515625, "learning_rate": 4.594354967735055e-06, "loss": 0.46471586227416994, "num_tokens": 8414533309.0, "step": 68920 }, { "epoch": 0.09191558805177351, "grad_norm": 2.25, "learning_rate": 4.595688229961069e-06, "loss": 0.4824225902557373, "num_tokens": 8416911651.0, "step": 68940 }, { "epoch": 0.09194225343850163, "grad_norm": 2.09375, "learning_rate": 4.597021492187084e-06, "loss": 0.4809579849243164, "num_tokens": 8419238783.0, "step": 68960 }, { "epoch": 0.09196891882522973, "grad_norm": 1.7265625, "learning_rate": 4.598354754413099e-06, "loss": 0.45931205749511717, "num_tokens": 8421767182.0, "step": 68980 }, { "epoch": 0.09199558421195783, "grad_norm": 2.0, "learning_rate": 4.5996880166391125e-06, "loss": 0.4616701126098633, "num_tokens": 8424178559.0, "step": 69000 }, { "epoch": 0.09202224959868593, "grad_norm": 2.3125, "learning_rate": 4.601021278865127e-06, "loss": 0.4633225917816162, "num_tokens": 8426525890.0, "step": 69020 }, { "epoch": 0.09204891498541404, "grad_norm": 1.7890625, "learning_rate": 4.602354541091143e-06, "loss": 0.4712948799133301, "num_tokens": 8428676518.0, "step": 69040 }, { "epoch": 0.09207558037214214, "grad_norm": 2.046875, "learning_rate": 4.603687803317157e-06, "loss": 0.4449955940246582, "num_tokens": 8431126081.0, "step": 69060 }, { "epoch": 0.09210224575887024, "grad_norm": 1.796875, "learning_rate": 4.605021065543171e-06, "loss": 0.47833895683288574, "num_tokens": 8433432571.0, "step": 69080 }, { "epoch": 0.09212891114559835, "grad_norm": 2.0625, "learning_rate": 4.606354327769186e-06, "loss": 0.463975715637207, "num_tokens": 8435757827.0, "step": 69100 }, { "epoch": 0.09215557653232645, "grad_norm": 2.328125, "learning_rate": 4.607687589995201e-06, "loss": 0.45847606658935547, "num_tokens": 8438044462.0, "step": 69120 }, { "epoch": 0.09218224191905455, "grad_norm": 1.984375, "learning_rate": 4.6090208522212154e-06, "loss": 0.4622491836547852, "num_tokens": 8440660792.0, "step": 69140 }, { "epoch": 0.09220890730578266, "grad_norm": 1.9140625, "learning_rate": 4.610354114447229e-06, "loss": 0.4686256408691406, "num_tokens": 8443223784.0, "step": 69160 }, { "epoch": 0.09223557269251076, "grad_norm": 2.046875, "learning_rate": 4.611687376673245e-06, "loss": 0.46522321701049807, "num_tokens": 8445640008.0, "step": 69180 }, { "epoch": 0.09226223807923886, "grad_norm": 1.78125, "learning_rate": 4.6130206388992596e-06, "loss": 0.47040462493896484, "num_tokens": 8447854749.0, "step": 69200 }, { "epoch": 0.09228890346596696, "grad_norm": 1.9453125, "learning_rate": 4.614353901125273e-06, "loss": 0.47243590354919435, "num_tokens": 8450274449.0, "step": 69220 }, { "epoch": 0.09231556885269507, "grad_norm": 1.859375, "learning_rate": 4.615687163351288e-06, "loss": 0.4548171043395996, "num_tokens": 8452581459.0, "step": 69240 }, { "epoch": 0.09234223423942317, "grad_norm": 2.21875, "learning_rate": 4.617020425577303e-06, "loss": 0.48103604316711424, "num_tokens": 8454957185.0, "step": 69260 }, { "epoch": 0.09236889962615127, "grad_norm": 1.75, "learning_rate": 4.6183536878033175e-06, "loss": 0.4564816474914551, "num_tokens": 8457308635.0, "step": 69280 }, { "epoch": 0.09239556501287938, "grad_norm": 1.484375, "learning_rate": 4.619686950029332e-06, "loss": 0.456851863861084, "num_tokens": 8459695365.0, "step": 69300 }, { "epoch": 0.09242223039960748, "grad_norm": 2.328125, "learning_rate": 4.621020212255347e-06, "loss": 0.4689070701599121, "num_tokens": 8462310181.0, "step": 69320 }, { "epoch": 0.09244889578633558, "grad_norm": 1.8359375, "learning_rate": 4.622353474481362e-06, "loss": 0.45935959815979005, "num_tokens": 8464659593.0, "step": 69340 }, { "epoch": 0.09247556117306369, "grad_norm": 2.015625, "learning_rate": 4.623686736707376e-06, "loss": 0.4802551746368408, "num_tokens": 8467097639.0, "step": 69360 }, { "epoch": 0.0925022265597918, "grad_norm": 2.125, "learning_rate": 4.62501999893339e-06, "loss": 0.4628382682800293, "num_tokens": 8469558039.0, "step": 69380 }, { "epoch": 0.0925288919465199, "grad_norm": 2.296875, "learning_rate": 4.626353261159406e-06, "loss": 0.46364240646362304, "num_tokens": 8471945062.0, "step": 69400 }, { "epoch": 0.09255555733324801, "grad_norm": 2.265625, "learning_rate": 4.6276865233854196e-06, "loss": 0.4709444046020508, "num_tokens": 8474483358.0, "step": 69420 }, { "epoch": 0.09258222271997611, "grad_norm": 1.8203125, "learning_rate": 4.629019785611434e-06, "loss": 0.46860408782958984, "num_tokens": 8476967810.0, "step": 69440 }, { "epoch": 0.09260888810670422, "grad_norm": 2.34375, "learning_rate": 4.630353047837449e-06, "loss": 0.4583578586578369, "num_tokens": 8479435221.0, "step": 69460 }, { "epoch": 0.09263555349343232, "grad_norm": 1.84375, "learning_rate": 4.631686310063464e-06, "loss": 0.48043384552001955, "num_tokens": 8482247485.0, "step": 69480 }, { "epoch": 0.09266221888016042, "grad_norm": 1.8125, "learning_rate": 4.633019572289478e-06, "loss": 0.43978114128112794, "num_tokens": 8484625759.0, "step": 69500 }, { "epoch": 0.09268888426688852, "grad_norm": 2.234375, "learning_rate": 4.634352834515493e-06, "loss": 0.45179271697998047, "num_tokens": 8486765243.0, "step": 69520 }, { "epoch": 0.09271554965361663, "grad_norm": 2.09375, "learning_rate": 4.635686096741508e-06, "loss": 0.4619856834411621, "num_tokens": 8489392995.0, "step": 69540 }, { "epoch": 0.09274221504034473, "grad_norm": 2.125, "learning_rate": 4.6370193589675225e-06, "loss": 0.4462436676025391, "num_tokens": 8491675583.0, "step": 69560 }, { "epoch": 0.09276888042707283, "grad_norm": 1.65625, "learning_rate": 4.638352621193536e-06, "loss": 0.4761656284332275, "num_tokens": 8494019105.0, "step": 69580 }, { "epoch": 0.09279554581380094, "grad_norm": 2.0, "learning_rate": 4.639685883419551e-06, "loss": 0.4474672794342041, "num_tokens": 8496433187.0, "step": 69600 }, { "epoch": 0.09282221120052904, "grad_norm": 1.9609375, "learning_rate": 4.641019145645566e-06, "loss": 0.4776414394378662, "num_tokens": 8498968833.0, "step": 69620 }, { "epoch": 0.09284887658725714, "grad_norm": 2.296875, "learning_rate": 4.6423524078715804e-06, "loss": 0.473723316192627, "num_tokens": 8501470286.0, "step": 69640 }, { "epoch": 0.09287554197398525, "grad_norm": 2.09375, "learning_rate": 4.643685670097595e-06, "loss": 0.46184682846069336, "num_tokens": 8503806177.0, "step": 69660 }, { "epoch": 0.09290220736071335, "grad_norm": 2.234375, "learning_rate": 4.64501893232361e-06, "loss": 0.4597681999206543, "num_tokens": 8506074044.0, "step": 69680 }, { "epoch": 0.09292887274744145, "grad_norm": 1.9296875, "learning_rate": 4.6463521945496245e-06, "loss": 0.4471959114074707, "num_tokens": 8508556432.0, "step": 69700 }, { "epoch": 0.09295553813416955, "grad_norm": 1.9296875, "learning_rate": 4.647685456775639e-06, "loss": 0.4863309860229492, "num_tokens": 8511011022.0, "step": 69720 }, { "epoch": 0.09298220352089766, "grad_norm": 1.8984375, "learning_rate": 4.649018719001653e-06, "loss": 0.4555243492126465, "num_tokens": 8513602770.0, "step": 69740 }, { "epoch": 0.09300886890762576, "grad_norm": 2.453125, "learning_rate": 4.650351981227668e-06, "loss": 0.46270103454589845, "num_tokens": 8515876296.0, "step": 69760 }, { "epoch": 0.09303553429435388, "grad_norm": 1.828125, "learning_rate": 4.651685243453683e-06, "loss": 0.46596612930297854, "num_tokens": 8518356048.0, "step": 69780 }, { "epoch": 0.09306219968108198, "grad_norm": 1.828125, "learning_rate": 4.653018505679697e-06, "loss": 0.45772724151611327, "num_tokens": 8520648754.0, "step": 69800 }, { "epoch": 0.09308886506781008, "grad_norm": 1.71875, "learning_rate": 4.654351767905712e-06, "loss": 0.4599897861480713, "num_tokens": 8522953484.0, "step": 69820 }, { "epoch": 0.09311553045453819, "grad_norm": 2.078125, "learning_rate": 4.655685030131727e-06, "loss": 0.4554804801940918, "num_tokens": 8525192335.0, "step": 69840 }, { "epoch": 0.09314219584126629, "grad_norm": 2.140625, "learning_rate": 4.657018292357741e-06, "loss": 0.45326547622680663, "num_tokens": 8527587234.0, "step": 69860 }, { "epoch": 0.09316886122799439, "grad_norm": 1.9453125, "learning_rate": 4.658351554583756e-06, "loss": 0.4551382064819336, "num_tokens": 8530069611.0, "step": 69880 }, { "epoch": 0.0931955266147225, "grad_norm": 2.15625, "learning_rate": 4.65968481680977e-06, "loss": 0.47298760414123536, "num_tokens": 8532534103.0, "step": 69900 }, { "epoch": 0.0932221920014506, "grad_norm": 2.0625, "learning_rate": 4.661018079035785e-06, "loss": 0.46585354804992674, "num_tokens": 8535114110.0, "step": 69920 }, { "epoch": 0.0932488573881787, "grad_norm": 2.640625, "learning_rate": 4.6623513412618e-06, "loss": 0.4607856273651123, "num_tokens": 8537541394.0, "step": 69940 }, { "epoch": 0.0932755227749068, "grad_norm": 1.578125, "learning_rate": 4.663684603487814e-06, "loss": 0.4520700931549072, "num_tokens": 8540005322.0, "step": 69960 }, { "epoch": 0.09330218816163491, "grad_norm": 1.9921875, "learning_rate": 4.665017865713829e-06, "loss": 0.47408208847045896, "num_tokens": 8542533993.0, "step": 69980 }, { "epoch": 0.09332885354836301, "grad_norm": 1.984375, "learning_rate": 4.666351127939843e-06, "loss": 0.4751143455505371, "num_tokens": 8545088066.0, "step": 70000 }, { "epoch": 0.09335551893509111, "grad_norm": 2.109375, "learning_rate": 4.667684390165858e-06, "loss": 0.4668177604675293, "num_tokens": 8547392598.0, "step": 70020 }, { "epoch": 0.09338218432181922, "grad_norm": 1.9140625, "learning_rate": 4.669017652391873e-06, "loss": 0.4544715881347656, "num_tokens": 8549996089.0, "step": 70040 }, { "epoch": 0.09340884970854732, "grad_norm": 2.234375, "learning_rate": 4.6703509146178875e-06, "loss": 0.4612995147705078, "num_tokens": 8552380186.0, "step": 70060 }, { "epoch": 0.09343551509527542, "grad_norm": 1.7890625, "learning_rate": 4.671684176843902e-06, "loss": 0.4547071933746338, "num_tokens": 8554744705.0, "step": 70080 }, { "epoch": 0.09346218048200353, "grad_norm": 2.25, "learning_rate": 4.673017439069917e-06, "loss": 0.4647663116455078, "num_tokens": 8557409143.0, "step": 70100 }, { "epoch": 0.09348884586873163, "grad_norm": 2.15625, "learning_rate": 4.674350701295931e-06, "loss": 0.4596426486968994, "num_tokens": 8559871000.0, "step": 70120 }, { "epoch": 0.09351551125545973, "grad_norm": 1.5546875, "learning_rate": 4.675683963521946e-06, "loss": 0.4574114799499512, "num_tokens": 8562394817.0, "step": 70140 }, { "epoch": 0.09354217664218784, "grad_norm": 2.453125, "learning_rate": 4.677017225747961e-06, "loss": 0.465790843963623, "num_tokens": 8564796110.0, "step": 70160 }, { "epoch": 0.09356884202891595, "grad_norm": 1.65625, "learning_rate": 4.678350487973975e-06, "loss": 0.46671409606933595, "num_tokens": 8567380103.0, "step": 70180 }, { "epoch": 0.09359550741564406, "grad_norm": 2.40625, "learning_rate": 4.6796837501999895e-06, "loss": 0.4475258350372314, "num_tokens": 8569820751.0, "step": 70200 }, { "epoch": 0.09362217280237216, "grad_norm": 2.296875, "learning_rate": 4.681017012426004e-06, "loss": 0.4721802234649658, "num_tokens": 8572370934.0, "step": 70220 }, { "epoch": 0.09364883818910026, "grad_norm": 2.09375, "learning_rate": 4.682350274652019e-06, "loss": 0.4639269351959229, "num_tokens": 8574827507.0, "step": 70240 }, { "epoch": 0.09367550357582836, "grad_norm": 1.7890625, "learning_rate": 4.683683536878034e-06, "loss": 0.47648167610168457, "num_tokens": 8577282375.0, "step": 70260 }, { "epoch": 0.09370216896255647, "grad_norm": 2.5, "learning_rate": 4.685016799104048e-06, "loss": 0.460714054107666, "num_tokens": 8579826578.0, "step": 70280 }, { "epoch": 0.09372883434928457, "grad_norm": 2.03125, "learning_rate": 4.686350061330063e-06, "loss": 0.4513659954071045, "num_tokens": 8582233814.0, "step": 70300 }, { "epoch": 0.09375549973601267, "grad_norm": 1.96875, "learning_rate": 4.687683323556078e-06, "loss": 0.47107319831848143, "num_tokens": 8584836698.0, "step": 70320 }, { "epoch": 0.09378216512274078, "grad_norm": 1.640625, "learning_rate": 4.689016585782092e-06, "loss": 0.47990641593933103, "num_tokens": 8587105845.0, "step": 70340 }, { "epoch": 0.09380883050946888, "grad_norm": 2.03125, "learning_rate": 4.690349848008106e-06, "loss": 0.4609681129455566, "num_tokens": 8589425987.0, "step": 70360 }, { "epoch": 0.09383549589619698, "grad_norm": 1.859375, "learning_rate": 4.691683110234122e-06, "loss": 0.46774682998657224, "num_tokens": 8592032925.0, "step": 70380 }, { "epoch": 0.09386216128292509, "grad_norm": 1.8984375, "learning_rate": 4.693016372460136e-06, "loss": 0.46231751441955565, "num_tokens": 8594331718.0, "step": 70400 }, { "epoch": 0.09388882666965319, "grad_norm": 1.8828125, "learning_rate": 4.69434963468615e-06, "loss": 0.4487753391265869, "num_tokens": 8596816911.0, "step": 70420 }, { "epoch": 0.09391549205638129, "grad_norm": 1.8125, "learning_rate": 4.695682896912165e-06, "loss": 0.44982213973999025, "num_tokens": 8599206388.0, "step": 70440 }, { "epoch": 0.0939421574431094, "grad_norm": 2.078125, "learning_rate": 4.69701615913818e-06, "loss": 0.45334591865539553, "num_tokens": 8601420255.0, "step": 70460 }, { "epoch": 0.0939688228298375, "grad_norm": 2.125, "learning_rate": 4.6983494213641945e-06, "loss": 0.47407875061035154, "num_tokens": 8603894088.0, "step": 70480 }, { "epoch": 0.0939954882165656, "grad_norm": 1.984375, "learning_rate": 4.699682683590208e-06, "loss": 0.471739673614502, "num_tokens": 8606020525.0, "step": 70500 }, { "epoch": 0.0940221536032937, "grad_norm": 1.8828125, "learning_rate": 4.701015945816224e-06, "loss": 0.478150463104248, "num_tokens": 8608585443.0, "step": 70520 }, { "epoch": 0.0940488189900218, "grad_norm": 2.046875, "learning_rate": 4.702349208042239e-06, "loss": 0.4614903450012207, "num_tokens": 8610891113.0, "step": 70540 }, { "epoch": 0.09407548437674991, "grad_norm": 1.6875, "learning_rate": 4.7036824702682525e-06, "loss": 0.45998544692993165, "num_tokens": 8613083578.0, "step": 70560 }, { "epoch": 0.09410214976347801, "grad_norm": 1.8125, "learning_rate": 4.705015732494267e-06, "loss": 0.4482487678527832, "num_tokens": 8615707849.0, "step": 70580 }, { "epoch": 0.09412881515020613, "grad_norm": 2.515625, "learning_rate": 4.706348994720282e-06, "loss": 0.4491249084472656, "num_tokens": 8618226482.0, "step": 70600 }, { "epoch": 0.09415548053693423, "grad_norm": 2.1875, "learning_rate": 4.707682256946297e-06, "loss": 0.4509617805480957, "num_tokens": 8620598396.0, "step": 70620 }, { "epoch": 0.09418214592366234, "grad_norm": 1.9140625, "learning_rate": 4.709015519172311e-06, "loss": 0.4634251594543457, "num_tokens": 8623073500.0, "step": 70640 }, { "epoch": 0.09420881131039044, "grad_norm": 2.046875, "learning_rate": 4.710348781398326e-06, "loss": 0.4668569087982178, "num_tokens": 8625511654.0, "step": 70660 }, { "epoch": 0.09423547669711854, "grad_norm": 1.4765625, "learning_rate": 4.711682043624341e-06, "loss": 0.4519962787628174, "num_tokens": 8628086268.0, "step": 70680 }, { "epoch": 0.09426214208384665, "grad_norm": 1.9140625, "learning_rate": 4.713015305850355e-06, "loss": 0.4521437168121338, "num_tokens": 8630547329.0, "step": 70700 }, { "epoch": 0.09428880747057475, "grad_norm": 2.265625, "learning_rate": 4.714348568076369e-06, "loss": 0.4580547332763672, "num_tokens": 8633024665.0, "step": 70720 }, { "epoch": 0.09431547285730285, "grad_norm": 1.90625, "learning_rate": 4.715681830302384e-06, "loss": 0.47472076416015624, "num_tokens": 8635469013.0, "step": 70740 }, { "epoch": 0.09434213824403095, "grad_norm": 2.296875, "learning_rate": 4.717015092528399e-06, "loss": 0.4620808601379395, "num_tokens": 8637647122.0, "step": 70760 }, { "epoch": 0.09436880363075906, "grad_norm": 1.96875, "learning_rate": 4.718348354754413e-06, "loss": 0.4704929828643799, "num_tokens": 8640213350.0, "step": 70780 }, { "epoch": 0.09439546901748716, "grad_norm": 1.96875, "learning_rate": 4.719681616980428e-06, "loss": 0.44956326484680176, "num_tokens": 8642504460.0, "step": 70800 }, { "epoch": 0.09442213440421526, "grad_norm": 2.046875, "learning_rate": 4.721014879206443e-06, "loss": 0.47204742431640623, "num_tokens": 8644939806.0, "step": 70820 }, { "epoch": 0.09444879979094337, "grad_norm": 1.609375, "learning_rate": 4.7223481414324575e-06, "loss": 0.4565784454345703, "num_tokens": 8647365003.0, "step": 70840 }, { "epoch": 0.09447546517767147, "grad_norm": 2.1875, "learning_rate": 4.723681403658472e-06, "loss": 0.4491114139556885, "num_tokens": 8649894995.0, "step": 70860 }, { "epoch": 0.09450213056439957, "grad_norm": 2.65625, "learning_rate": 4.725014665884486e-06, "loss": 0.45517578125, "num_tokens": 8652270402.0, "step": 70880 }, { "epoch": 0.09452879595112768, "grad_norm": 2.21875, "learning_rate": 4.7263479281105016e-06, "loss": 0.4474630832672119, "num_tokens": 8654558368.0, "step": 70900 }, { "epoch": 0.09455546133785578, "grad_norm": 2.28125, "learning_rate": 4.727681190336515e-06, "loss": 0.46206226348876955, "num_tokens": 8657152281.0, "step": 70920 }, { "epoch": 0.09458212672458388, "grad_norm": 1.984375, "learning_rate": 4.72901445256253e-06, "loss": 0.44942331314086914, "num_tokens": 8659649815.0, "step": 70940 }, { "epoch": 0.09460879211131198, "grad_norm": 2.0, "learning_rate": 4.730347714788545e-06, "loss": 0.4664185047149658, "num_tokens": 8661986185.0, "step": 70960 }, { "epoch": 0.09463545749804009, "grad_norm": 2.796875, "learning_rate": 4.7316809770145595e-06, "loss": 0.4458589553833008, "num_tokens": 8664351386.0, "step": 70980 }, { "epoch": 0.0946621228847682, "grad_norm": 1.921875, "learning_rate": 4.733014239240574e-06, "loss": 0.4630718231201172, "num_tokens": 8666490308.0, "step": 71000 }, { "epoch": 0.09468878827149631, "grad_norm": 2.09375, "learning_rate": 4.734347501466589e-06, "loss": 0.46440944671630857, "num_tokens": 8669078476.0, "step": 71020 }, { "epoch": 0.09471545365822441, "grad_norm": 2.140625, "learning_rate": 4.735680763692604e-06, "loss": 0.44917588233947753, "num_tokens": 8671395425.0, "step": 71040 }, { "epoch": 0.09474211904495251, "grad_norm": 1.875, "learning_rate": 4.737014025918618e-06, "loss": 0.457182788848877, "num_tokens": 8673545087.0, "step": 71060 }, { "epoch": 0.09476878443168062, "grad_norm": 1.921875, "learning_rate": 4.738347288144632e-06, "loss": 0.4688117027282715, "num_tokens": 8675978436.0, "step": 71080 }, { "epoch": 0.09479544981840872, "grad_norm": 1.7734375, "learning_rate": 4.739680550370647e-06, "loss": 0.46703352928161623, "num_tokens": 8678260562.0, "step": 71100 }, { "epoch": 0.09482211520513682, "grad_norm": 1.8046875, "learning_rate": 4.7410138125966624e-06, "loss": 0.46849470138549804, "num_tokens": 8680568343.0, "step": 71120 }, { "epoch": 0.09484878059186493, "grad_norm": 1.9453125, "learning_rate": 4.742347074822676e-06, "loss": 0.4582244873046875, "num_tokens": 8683185927.0, "step": 71140 }, { "epoch": 0.09487544597859303, "grad_norm": 1.59375, "learning_rate": 4.743680337048691e-06, "loss": 0.44721412658691406, "num_tokens": 8685720870.0, "step": 71160 }, { "epoch": 0.09490211136532113, "grad_norm": 1.734375, "learning_rate": 4.745013599274706e-06, "loss": 0.45498085021972656, "num_tokens": 8688193311.0, "step": 71180 }, { "epoch": 0.09492877675204923, "grad_norm": 1.65625, "learning_rate": 4.74634686150072e-06, "loss": 0.45522403717041016, "num_tokens": 8690697673.0, "step": 71200 }, { "epoch": 0.09495544213877734, "grad_norm": 2.234375, "learning_rate": 4.747680123726735e-06, "loss": 0.45331487655639646, "num_tokens": 8693157912.0, "step": 71220 }, { "epoch": 0.09498210752550544, "grad_norm": 2.203125, "learning_rate": 4.749013385952749e-06, "loss": 0.448210334777832, "num_tokens": 8695769605.0, "step": 71240 }, { "epoch": 0.09500877291223354, "grad_norm": 2.015625, "learning_rate": 4.7503466481787645e-06, "loss": 0.44929656982421873, "num_tokens": 8698316576.0, "step": 71260 }, { "epoch": 0.09503543829896165, "grad_norm": 2.0, "learning_rate": 4.751679910404779e-06, "loss": 0.45014543533325196, "num_tokens": 8700854348.0, "step": 71280 }, { "epoch": 0.09506210368568975, "grad_norm": 2.34375, "learning_rate": 4.753013172630793e-06, "loss": 0.4464887142181396, "num_tokens": 8703433512.0, "step": 71300 }, { "epoch": 0.09508876907241785, "grad_norm": 1.875, "learning_rate": 4.754346434856808e-06, "loss": 0.46580753326416013, "num_tokens": 8705947428.0, "step": 71320 }, { "epoch": 0.09511543445914596, "grad_norm": 1.8671875, "learning_rate": 4.7556796970828224e-06, "loss": 0.4398662567138672, "num_tokens": 8708376364.0, "step": 71340 }, { "epoch": 0.09514209984587406, "grad_norm": 1.7890625, "learning_rate": 4.757012959308837e-06, "loss": 0.44844532012939453, "num_tokens": 8710788037.0, "step": 71360 }, { "epoch": 0.09516876523260216, "grad_norm": 2.328125, "learning_rate": 4.758346221534852e-06, "loss": 0.47469544410705566, "num_tokens": 8713121804.0, "step": 71380 }, { "epoch": 0.09519543061933028, "grad_norm": 1.828125, "learning_rate": 4.7596794837608666e-06, "loss": 0.46013078689575193, "num_tokens": 8715444734.0, "step": 71400 }, { "epoch": 0.09522209600605838, "grad_norm": 1.8359375, "learning_rate": 4.761012745986881e-06, "loss": 0.46816864013671877, "num_tokens": 8717770182.0, "step": 71420 }, { "epoch": 0.09524876139278649, "grad_norm": 1.9375, "learning_rate": 4.762346008212896e-06, "loss": 0.45957651138305666, "num_tokens": 8720276079.0, "step": 71440 }, { "epoch": 0.09527542677951459, "grad_norm": 1.875, "learning_rate": 4.76367927043891e-06, "loss": 0.45220484733581545, "num_tokens": 8722926181.0, "step": 71460 }, { "epoch": 0.09530209216624269, "grad_norm": 1.6484375, "learning_rate": 4.7650125326649245e-06, "loss": 0.46314563751220705, "num_tokens": 8725438581.0, "step": 71480 }, { "epoch": 0.0953287575529708, "grad_norm": 2.1875, "learning_rate": 4.76634579489094e-06, "loss": 0.4469954490661621, "num_tokens": 8727965256.0, "step": 71500 }, { "epoch": 0.0953554229396989, "grad_norm": 2.078125, "learning_rate": 4.767679057116954e-06, "loss": 0.4629838943481445, "num_tokens": 8730585079.0, "step": 71520 }, { "epoch": 0.095382088326427, "grad_norm": 2.25, "learning_rate": 4.769012319342969e-06, "loss": 0.46401028633117675, "num_tokens": 8733069277.0, "step": 71540 }, { "epoch": 0.0954087537131551, "grad_norm": 2.109375, "learning_rate": 4.770345581568983e-06, "loss": 0.46347770690917967, "num_tokens": 8735638458.0, "step": 71560 }, { "epoch": 0.0954354190998832, "grad_norm": 1.9453125, "learning_rate": 4.771678843794998e-06, "loss": 0.471506929397583, "num_tokens": 8738153842.0, "step": 71580 }, { "epoch": 0.09546208448661131, "grad_norm": 2.234375, "learning_rate": 4.773012106021013e-06, "loss": 0.4851221084594727, "num_tokens": 8740489731.0, "step": 71600 }, { "epoch": 0.09548874987333941, "grad_norm": 2.28125, "learning_rate": 4.774345368247027e-06, "loss": 0.44456777572631834, "num_tokens": 8742847478.0, "step": 71620 }, { "epoch": 0.09551541526006752, "grad_norm": 1.5390625, "learning_rate": 4.775678630473042e-06, "loss": 0.4519216537475586, "num_tokens": 8745154343.0, "step": 71640 }, { "epoch": 0.09554208064679562, "grad_norm": 2.359375, "learning_rate": 4.777011892699057e-06, "loss": 0.47147130966186523, "num_tokens": 8747631961.0, "step": 71660 }, { "epoch": 0.09556874603352372, "grad_norm": 1.6875, "learning_rate": 4.778345154925071e-06, "loss": 0.4489900588989258, "num_tokens": 8750119235.0, "step": 71680 }, { "epoch": 0.09559541142025182, "grad_norm": 2.484375, "learning_rate": 4.779678417151085e-06, "loss": 0.4562647819519043, "num_tokens": 8752651320.0, "step": 71700 }, { "epoch": 0.09562207680697993, "grad_norm": 1.828125, "learning_rate": 4.781011679377101e-06, "loss": 0.4442176818847656, "num_tokens": 8755000454.0, "step": 71720 }, { "epoch": 0.09564874219370803, "grad_norm": 1.984375, "learning_rate": 4.782344941603115e-06, "loss": 0.46350917816162107, "num_tokens": 8757241637.0, "step": 71740 }, { "epoch": 0.09567540758043613, "grad_norm": 1.6875, "learning_rate": 4.7836782038291295e-06, "loss": 0.4667309284210205, "num_tokens": 8759746716.0, "step": 71760 }, { "epoch": 0.09570207296716424, "grad_norm": 2.46875, "learning_rate": 4.785011466055144e-06, "loss": 0.4752981185913086, "num_tokens": 8762189544.0, "step": 71780 }, { "epoch": 0.09572873835389234, "grad_norm": 2.015625, "learning_rate": 4.786344728281159e-06, "loss": 0.4592705726623535, "num_tokens": 8764577800.0, "step": 71800 }, { "epoch": 0.09575540374062046, "grad_norm": 1.8671875, "learning_rate": 4.787677990507174e-06, "loss": 0.43850183486938477, "num_tokens": 8766882110.0, "step": 71820 }, { "epoch": 0.09578206912734856, "grad_norm": 2.046875, "learning_rate": 4.7890112527331874e-06, "loss": 0.46123013496398924, "num_tokens": 8769456040.0, "step": 71840 }, { "epoch": 0.09580873451407666, "grad_norm": 1.734375, "learning_rate": 4.790344514959203e-06, "loss": 0.4564612865447998, "num_tokens": 8771946840.0, "step": 71860 }, { "epoch": 0.09583539990080477, "grad_norm": 1.640625, "learning_rate": 4.791677777185218e-06, "loss": 0.4600202560424805, "num_tokens": 8774341187.0, "step": 71880 }, { "epoch": 0.09586206528753287, "grad_norm": 1.984375, "learning_rate": 4.7930110394112316e-06, "loss": 0.4642399787902832, "num_tokens": 8776861783.0, "step": 71900 }, { "epoch": 0.09588873067426097, "grad_norm": 1.5625, "learning_rate": 4.794344301637246e-06, "loss": 0.45069260597229005, "num_tokens": 8779156426.0, "step": 71920 }, { "epoch": 0.09591539606098907, "grad_norm": 2.078125, "learning_rate": 4.795677563863261e-06, "loss": 0.46326723098754885, "num_tokens": 8781430820.0, "step": 71940 }, { "epoch": 0.09594206144771718, "grad_norm": 1.859375, "learning_rate": 4.797010826089276e-06, "loss": 0.4711332321166992, "num_tokens": 8783954980.0, "step": 71960 }, { "epoch": 0.09596872683444528, "grad_norm": 2.171875, "learning_rate": 4.79834408831529e-06, "loss": 0.45427618026733396, "num_tokens": 8786425534.0, "step": 71980 }, { "epoch": 0.09599539222117338, "grad_norm": 1.9140625, "learning_rate": 4.799677350541305e-06, "loss": 0.4704731464385986, "num_tokens": 8788976592.0, "step": 72000 }, { "epoch": 0.09602205760790149, "grad_norm": 1.7421875, "learning_rate": 4.80101061276732e-06, "loss": 0.45082740783691405, "num_tokens": 8791379889.0, "step": 72020 }, { "epoch": 0.09604872299462959, "grad_norm": 1.7265625, "learning_rate": 4.8023438749933345e-06, "loss": 0.4620457649230957, "num_tokens": 8793781970.0, "step": 72040 }, { "epoch": 0.0960753883813577, "grad_norm": 1.59375, "learning_rate": 4.803677137219348e-06, "loss": 0.4761186122894287, "num_tokens": 8796208552.0, "step": 72060 }, { "epoch": 0.0961020537680858, "grad_norm": 1.6640625, "learning_rate": 4.805010399445363e-06, "loss": 0.4440771102905273, "num_tokens": 8798509695.0, "step": 72080 }, { "epoch": 0.0961287191548139, "grad_norm": 1.6953125, "learning_rate": 4.806343661671378e-06, "loss": 0.46810150146484375, "num_tokens": 8801072774.0, "step": 72100 }, { "epoch": 0.096155384541542, "grad_norm": 2.46875, "learning_rate": 4.807676923897392e-06, "loss": 0.4605240345001221, "num_tokens": 8803328423.0, "step": 72120 }, { "epoch": 0.0961820499282701, "grad_norm": 2.078125, "learning_rate": 4.809010186123407e-06, "loss": 0.4531947135925293, "num_tokens": 8805826157.0, "step": 72140 }, { "epoch": 0.09620871531499821, "grad_norm": 2.296875, "learning_rate": 4.810343448349422e-06, "loss": 0.47174954414367676, "num_tokens": 8808366030.0, "step": 72160 }, { "epoch": 0.09623538070172631, "grad_norm": 2.0625, "learning_rate": 4.8116767105754365e-06, "loss": 0.4537498474121094, "num_tokens": 8810913357.0, "step": 72180 }, { "epoch": 0.09626204608845441, "grad_norm": 1.8203125, "learning_rate": 4.813009972801451e-06, "loss": 0.4638975143432617, "num_tokens": 8813619160.0, "step": 72200 }, { "epoch": 0.09628871147518253, "grad_norm": 1.9453125, "learning_rate": 4.814343235027465e-06, "loss": 0.44709024429321287, "num_tokens": 8815980959.0, "step": 72220 }, { "epoch": 0.09631537686191063, "grad_norm": 1.9921875, "learning_rate": 4.815676497253481e-06, "loss": 0.4441171169281006, "num_tokens": 8818303826.0, "step": 72240 }, { "epoch": 0.09634204224863874, "grad_norm": 2.125, "learning_rate": 4.8170097594794945e-06, "loss": 0.4548459053039551, "num_tokens": 8820872169.0, "step": 72260 }, { "epoch": 0.09636870763536684, "grad_norm": 2.21875, "learning_rate": 4.818343021705509e-06, "loss": 0.46005783081054685, "num_tokens": 8823203081.0, "step": 72280 }, { "epoch": 0.09639537302209494, "grad_norm": 2.015625, "learning_rate": 4.819676283931524e-06, "loss": 0.44860334396362306, "num_tokens": 8825680787.0, "step": 72300 }, { "epoch": 0.09642203840882305, "grad_norm": 2.046875, "learning_rate": 4.821009546157539e-06, "loss": 0.4486656188964844, "num_tokens": 8828015992.0, "step": 72320 }, { "epoch": 0.09644870379555115, "grad_norm": 1.6015625, "learning_rate": 4.822342808383553e-06, "loss": 0.45796971321105956, "num_tokens": 8830750741.0, "step": 72340 }, { "epoch": 0.09647536918227925, "grad_norm": 2.359375, "learning_rate": 4.823676070609568e-06, "loss": 0.44590420722961427, "num_tokens": 8833045110.0, "step": 72360 }, { "epoch": 0.09650203456900736, "grad_norm": 1.8828125, "learning_rate": 4.825009332835583e-06, "loss": 0.4579498291015625, "num_tokens": 8835432210.0, "step": 72380 }, { "epoch": 0.09652869995573546, "grad_norm": 1.9375, "learning_rate": 4.826342595061597e-06, "loss": 0.4462145805358887, "num_tokens": 8837923835.0, "step": 72400 }, { "epoch": 0.09655536534246356, "grad_norm": 2.125, "learning_rate": 4.827675857287611e-06, "loss": 0.45461101531982423, "num_tokens": 8840213985.0, "step": 72420 }, { "epoch": 0.09658203072919166, "grad_norm": 2.171875, "learning_rate": 4.829009119513626e-06, "loss": 0.4590146064758301, "num_tokens": 8842610256.0, "step": 72440 }, { "epoch": 0.09660869611591977, "grad_norm": 2.171875, "learning_rate": 4.8303423817396415e-06, "loss": 0.437618350982666, "num_tokens": 8844757916.0, "step": 72460 }, { "epoch": 0.09663536150264787, "grad_norm": 1.984375, "learning_rate": 4.831675643965655e-06, "loss": 0.45915994644165037, "num_tokens": 8847247996.0, "step": 72480 }, { "epoch": 0.09666202688937597, "grad_norm": 1.65625, "learning_rate": 4.83300890619167e-06, "loss": 0.45941758155822754, "num_tokens": 8849754137.0, "step": 72500 }, { "epoch": 0.09668869227610408, "grad_norm": 1.7578125, "learning_rate": 4.834342168417685e-06, "loss": 0.47487363815307615, "num_tokens": 8852239221.0, "step": 72520 }, { "epoch": 0.09671535766283218, "grad_norm": 2.03125, "learning_rate": 4.8356754306436995e-06, "loss": 0.46442208290100095, "num_tokens": 8854828358.0, "step": 72540 }, { "epoch": 0.09674202304956028, "grad_norm": 2.140625, "learning_rate": 4.837008692869714e-06, "loss": 0.4471580982208252, "num_tokens": 8857465316.0, "step": 72560 }, { "epoch": 0.09676868843628839, "grad_norm": 1.9140625, "learning_rate": 4.838341955095728e-06, "loss": 0.46345205307006837, "num_tokens": 8860136295.0, "step": 72580 }, { "epoch": 0.09679535382301649, "grad_norm": 1.5625, "learning_rate": 4.8396752173217436e-06, "loss": 0.44291157722473146, "num_tokens": 8862554786.0, "step": 72600 }, { "epoch": 0.0968220192097446, "grad_norm": 1.5625, "learning_rate": 4.841008479547758e-06, "loss": 0.43462634086608887, "num_tokens": 8865128606.0, "step": 72620 }, { "epoch": 0.09684868459647271, "grad_norm": 2.0625, "learning_rate": 4.842341741773772e-06, "loss": 0.4453136920928955, "num_tokens": 8867714691.0, "step": 72640 }, { "epoch": 0.09687534998320081, "grad_norm": 2.109375, "learning_rate": 4.843675003999787e-06, "loss": 0.44529018402099607, "num_tokens": 8870171348.0, "step": 72660 }, { "epoch": 0.09690201536992891, "grad_norm": 2.25, "learning_rate": 4.8450082662258015e-06, "loss": 0.4548483848571777, "num_tokens": 8872572671.0, "step": 72680 }, { "epoch": 0.09692868075665702, "grad_norm": 2.59375, "learning_rate": 4.846341528451816e-06, "loss": 0.4536560535430908, "num_tokens": 8874798925.0, "step": 72700 }, { "epoch": 0.09695534614338512, "grad_norm": 1.703125, "learning_rate": 4.847674790677831e-06, "loss": 0.4620392322540283, "num_tokens": 8877306893.0, "step": 72720 }, { "epoch": 0.09698201153011322, "grad_norm": 1.9609375, "learning_rate": 4.849008052903846e-06, "loss": 0.4500420093536377, "num_tokens": 8879873619.0, "step": 72740 }, { "epoch": 0.09700867691684133, "grad_norm": 2.21875, "learning_rate": 4.85034131512986e-06, "loss": 0.46125946044921873, "num_tokens": 8882030052.0, "step": 72760 }, { "epoch": 0.09703534230356943, "grad_norm": 1.9375, "learning_rate": 4.851674577355875e-06, "loss": 0.46205549240112304, "num_tokens": 8884536319.0, "step": 72780 }, { "epoch": 0.09706200769029753, "grad_norm": 1.9140625, "learning_rate": 4.853007839581889e-06, "loss": 0.4612597942352295, "num_tokens": 8886947081.0, "step": 72800 }, { "epoch": 0.09708867307702564, "grad_norm": 2.046875, "learning_rate": 4.854341101807904e-06, "loss": 0.4470070838928223, "num_tokens": 8889413587.0, "step": 72820 }, { "epoch": 0.09711533846375374, "grad_norm": 1.8828125, "learning_rate": 4.855674364033919e-06, "loss": 0.4514348030090332, "num_tokens": 8892078744.0, "step": 72840 }, { "epoch": 0.09714200385048184, "grad_norm": 1.65625, "learning_rate": 4.857007626259933e-06, "loss": 0.47417163848876953, "num_tokens": 8894693672.0, "step": 72860 }, { "epoch": 0.09716866923720995, "grad_norm": 1.75, "learning_rate": 4.858340888485948e-06, "loss": 0.44813966751098633, "num_tokens": 8897024338.0, "step": 72880 }, { "epoch": 0.09719533462393805, "grad_norm": 2.34375, "learning_rate": 4.859674150711962e-06, "loss": 0.45264291763305664, "num_tokens": 8899585224.0, "step": 72900 }, { "epoch": 0.09722200001066615, "grad_norm": 2.515625, "learning_rate": 4.861007412937977e-06, "loss": 0.45151557922363283, "num_tokens": 8901899075.0, "step": 72920 }, { "epoch": 0.09724866539739425, "grad_norm": 1.6953125, "learning_rate": 4.862340675163992e-06, "loss": 0.45311856269836426, "num_tokens": 8904251821.0, "step": 72940 }, { "epoch": 0.09727533078412236, "grad_norm": 2.296875, "learning_rate": 4.863673937390006e-06, "loss": 0.46073141098022463, "num_tokens": 8906722299.0, "step": 72960 }, { "epoch": 0.09730199617085046, "grad_norm": 1.9609375, "learning_rate": 4.865007199616021e-06, "loss": 0.4398029327392578, "num_tokens": 8909100721.0, "step": 72980 }, { "epoch": 0.09732866155757856, "grad_norm": 2.125, "learning_rate": 4.866340461842036e-06, "loss": 0.45383520126342775, "num_tokens": 8911605130.0, "step": 73000 }, { "epoch": 0.09735532694430667, "grad_norm": 1.8359375, "learning_rate": 4.86767372406805e-06, "loss": 0.45047636032104493, "num_tokens": 8913941579.0, "step": 73020 }, { "epoch": 0.09738199233103478, "grad_norm": 2.328125, "learning_rate": 4.8690069862940645e-06, "loss": 0.4681849479675293, "num_tokens": 8916037421.0, "step": 73040 }, { "epoch": 0.09740865771776289, "grad_norm": 1.28125, "learning_rate": 4.87034024852008e-06, "loss": 0.44623165130615233, "num_tokens": 8918471067.0, "step": 73060 }, { "epoch": 0.09743532310449099, "grad_norm": 1.9921875, "learning_rate": 4.871673510746094e-06, "loss": 0.4383037567138672, "num_tokens": 8920910526.0, "step": 73080 }, { "epoch": 0.09746198849121909, "grad_norm": 2.03125, "learning_rate": 4.8730067729721086e-06, "loss": 0.44255571365356444, "num_tokens": 8923634821.0, "step": 73100 }, { "epoch": 0.0974886538779472, "grad_norm": 1.8828125, "learning_rate": 4.874340035198123e-06, "loss": 0.4601734161376953, "num_tokens": 8926258126.0, "step": 73120 }, { "epoch": 0.0975153192646753, "grad_norm": 1.8359375, "learning_rate": 4.875673297424138e-06, "loss": 0.46515369415283203, "num_tokens": 8928767877.0, "step": 73140 }, { "epoch": 0.0975419846514034, "grad_norm": 1.7890625, "learning_rate": 4.877006559650153e-06, "loss": 0.45171413421630857, "num_tokens": 8931346262.0, "step": 73160 }, { "epoch": 0.0975686500381315, "grad_norm": 1.859375, "learning_rate": 4.8783398218761665e-06, "loss": 0.4556800365447998, "num_tokens": 8933780378.0, "step": 73180 }, { "epoch": 0.09759531542485961, "grad_norm": 1.7421875, "learning_rate": 4.879673084102182e-06, "loss": 0.4621072292327881, "num_tokens": 8936149221.0, "step": 73200 }, { "epoch": 0.09762198081158771, "grad_norm": 2.5625, "learning_rate": 4.881006346328197e-06, "loss": 0.44101266860961913, "num_tokens": 8938694477.0, "step": 73220 }, { "epoch": 0.09764864619831581, "grad_norm": 2.0, "learning_rate": 4.882339608554211e-06, "loss": 0.44672770500183107, "num_tokens": 8941059860.0, "step": 73240 }, { "epoch": 0.09767531158504392, "grad_norm": 2.03125, "learning_rate": 4.883672870780225e-06, "loss": 0.45323958396911623, "num_tokens": 8943490994.0, "step": 73260 }, { "epoch": 0.09770197697177202, "grad_norm": 2.1875, "learning_rate": 4.88500613300624e-06, "loss": 0.45037240982055665, "num_tokens": 8945885462.0, "step": 73280 }, { "epoch": 0.09772864235850012, "grad_norm": 2.078125, "learning_rate": 4.886339395232255e-06, "loss": 0.4604645252227783, "num_tokens": 8948417238.0, "step": 73300 }, { "epoch": 0.09775530774522823, "grad_norm": 2.015625, "learning_rate": 4.8876726574582694e-06, "loss": 0.43811473846435545, "num_tokens": 8950980750.0, "step": 73320 }, { "epoch": 0.09778197313195633, "grad_norm": 2.140625, "learning_rate": 4.889005919684284e-06, "loss": 0.4515837669372559, "num_tokens": 8953538397.0, "step": 73340 }, { "epoch": 0.09780863851868443, "grad_norm": 1.890625, "learning_rate": 4.890339181910299e-06, "loss": 0.4472977638244629, "num_tokens": 8956070000.0, "step": 73360 }, { "epoch": 0.09783530390541254, "grad_norm": 2.0625, "learning_rate": 4.8916724441363135e-06, "loss": 0.4485520362854004, "num_tokens": 8958525055.0, "step": 73380 }, { "epoch": 0.09786196929214064, "grad_norm": 2.390625, "learning_rate": 4.893005706362327e-06, "loss": 0.4552407741546631, "num_tokens": 8960951811.0, "step": 73400 }, { "epoch": 0.09788863467886874, "grad_norm": 1.734375, "learning_rate": 4.894338968588342e-06, "loss": 0.4497408866882324, "num_tokens": 8963401519.0, "step": 73420 }, { "epoch": 0.09791530006559686, "grad_norm": 1.875, "learning_rate": 4.895672230814357e-06, "loss": 0.44817204475402833, "num_tokens": 8966122047.0, "step": 73440 }, { "epoch": 0.09794196545232496, "grad_norm": 1.84375, "learning_rate": 4.8970054930403715e-06, "loss": 0.43938627243041994, "num_tokens": 8968443159.0, "step": 73460 }, { "epoch": 0.09796863083905306, "grad_norm": 1.96875, "learning_rate": 4.898338755266386e-06, "loss": 0.46764497756958007, "num_tokens": 8971036214.0, "step": 73480 }, { "epoch": 0.09799529622578117, "grad_norm": 2.078125, "learning_rate": 4.899672017492401e-06, "loss": 0.45990982055664065, "num_tokens": 8973591857.0, "step": 73500 }, { "epoch": 0.09802196161250927, "grad_norm": 1.7890625, "learning_rate": 4.901005279718416e-06, "loss": 0.44754719734191895, "num_tokens": 8975943925.0, "step": 73520 }, { "epoch": 0.09804862699923737, "grad_norm": 1.984375, "learning_rate": 4.90233854194443e-06, "loss": 0.4489278793334961, "num_tokens": 8978302557.0, "step": 73540 }, { "epoch": 0.09807529238596548, "grad_norm": 2.140625, "learning_rate": 4.903671804170444e-06, "loss": 0.4562373638153076, "num_tokens": 8980860385.0, "step": 73560 }, { "epoch": 0.09810195777269358, "grad_norm": 1.9609375, "learning_rate": 4.90500506639646e-06, "loss": 0.44191617965698243, "num_tokens": 8983250302.0, "step": 73580 }, { "epoch": 0.09812862315942168, "grad_norm": 1.6328125, "learning_rate": 4.9063383286224736e-06, "loss": 0.4386930465698242, "num_tokens": 8985812335.0, "step": 73600 }, { "epoch": 0.09815528854614979, "grad_norm": 2.046875, "learning_rate": 4.907671590848488e-06, "loss": 0.4533107757568359, "num_tokens": 8987962138.0, "step": 73620 }, { "epoch": 0.09818195393287789, "grad_norm": 2.25, "learning_rate": 4.909004853074503e-06, "loss": 0.43828797340393066, "num_tokens": 8990426647.0, "step": 73640 }, { "epoch": 0.09820861931960599, "grad_norm": 1.7890625, "learning_rate": 4.910338115300518e-06, "loss": 0.43169417381286623, "num_tokens": 8992674098.0, "step": 73660 }, { "epoch": 0.0982352847063341, "grad_norm": 2.28125, "learning_rate": 4.911671377526532e-06, "loss": 0.44927663803100587, "num_tokens": 8995104638.0, "step": 73680 }, { "epoch": 0.0982619500930622, "grad_norm": 2.09375, "learning_rate": 4.913004639752547e-06, "loss": 0.44601998329162595, "num_tokens": 8997677629.0, "step": 73700 }, { "epoch": 0.0982886154797903, "grad_norm": 2.09375, "learning_rate": 4.914337901978562e-06, "loss": 0.44162817001342775, "num_tokens": 9000155942.0, "step": 73720 }, { "epoch": 0.0983152808665184, "grad_norm": 2.15625, "learning_rate": 4.9156711642045765e-06, "loss": 0.45455131530761717, "num_tokens": 9002810882.0, "step": 73740 }, { "epoch": 0.0983419462532465, "grad_norm": 1.703125, "learning_rate": 4.91700442643059e-06, "loss": 0.4418339729309082, "num_tokens": 9005508962.0, "step": 73760 }, { "epoch": 0.09836861163997461, "grad_norm": 2.078125, "learning_rate": 4.918337688656605e-06, "loss": 0.4476015567779541, "num_tokens": 9007881171.0, "step": 73780 }, { "epoch": 0.09839527702670271, "grad_norm": 2.140625, "learning_rate": 4.91967095088262e-06, "loss": 0.4426567077636719, "num_tokens": 9010263710.0, "step": 73800 }, { "epoch": 0.09842194241343082, "grad_norm": 1.859375, "learning_rate": 4.9210042131086344e-06, "loss": 0.43954830169677733, "num_tokens": 9012555959.0, "step": 73820 }, { "epoch": 0.09844860780015892, "grad_norm": 1.796875, "learning_rate": 4.922337475334649e-06, "loss": 0.46604342460632325, "num_tokens": 9015186113.0, "step": 73840 }, { "epoch": 0.09847527318688704, "grad_norm": 1.734375, "learning_rate": 4.923670737560664e-06, "loss": 0.4558852195739746, "num_tokens": 9017536140.0, "step": 73860 }, { "epoch": 0.09850193857361514, "grad_norm": 2.59375, "learning_rate": 4.9250039997866785e-06, "loss": 0.4528553009033203, "num_tokens": 9019800734.0, "step": 73880 }, { "epoch": 0.09852860396034324, "grad_norm": 1.828125, "learning_rate": 4.926337262012693e-06, "loss": 0.4667649269104004, "num_tokens": 9022370226.0, "step": 73900 }, { "epoch": 0.09855526934707134, "grad_norm": 1.890625, "learning_rate": 4.927670524238707e-06, "loss": 0.4355876922607422, "num_tokens": 9024781678.0, "step": 73920 }, { "epoch": 0.09858193473379945, "grad_norm": 2.109375, "learning_rate": 4.929003786464723e-06, "loss": 0.4529151916503906, "num_tokens": 9027340124.0, "step": 73940 }, { "epoch": 0.09860860012052755, "grad_norm": 2.703125, "learning_rate": 4.930337048690737e-06, "loss": 0.45296320915222166, "num_tokens": 9029824333.0, "step": 73960 }, { "epoch": 0.09863526550725565, "grad_norm": 1.7421875, "learning_rate": 4.931670310916751e-06, "loss": 0.45113887786865237, "num_tokens": 9032294208.0, "step": 73980 }, { "epoch": 0.09866193089398376, "grad_norm": 1.7578125, "learning_rate": 4.933003573142766e-06, "loss": 0.45632328987121584, "num_tokens": 9034725678.0, "step": 74000 }, { "epoch": 0.09868859628071186, "grad_norm": 2.203125, "learning_rate": 4.934336835368781e-06, "loss": 0.4442568778991699, "num_tokens": 9037103158.0, "step": 74020 }, { "epoch": 0.09871526166743996, "grad_norm": 2.15625, "learning_rate": 4.935670097594795e-06, "loss": 0.4509085178375244, "num_tokens": 9039575800.0, "step": 74040 }, { "epoch": 0.09874192705416807, "grad_norm": 2.703125, "learning_rate": 4.93700335982081e-06, "loss": 0.43612380027770997, "num_tokens": 9041976033.0, "step": 74060 }, { "epoch": 0.09876859244089617, "grad_norm": 1.9765625, "learning_rate": 4.938336622046825e-06, "loss": 0.4510974884033203, "num_tokens": 9044459661.0, "step": 74080 }, { "epoch": 0.09879525782762427, "grad_norm": 1.734375, "learning_rate": 4.939669884272839e-06, "loss": 0.4400203704833984, "num_tokens": 9046624440.0, "step": 74100 }, { "epoch": 0.09882192321435238, "grad_norm": 2.25, "learning_rate": 4.941003146498854e-06, "loss": 0.45558962821960447, "num_tokens": 9049037248.0, "step": 74120 }, { "epoch": 0.09884858860108048, "grad_norm": 2.3125, "learning_rate": 4.942336408724868e-06, "loss": 0.43468337059020995, "num_tokens": 9051591093.0, "step": 74140 }, { "epoch": 0.09887525398780858, "grad_norm": 2.03125, "learning_rate": 4.943669670950883e-06, "loss": 0.45093536376953125, "num_tokens": 9053900420.0, "step": 74160 }, { "epoch": 0.09890191937453668, "grad_norm": 2.015625, "learning_rate": 4.945002933176898e-06, "loss": 0.44687600135803224, "num_tokens": 9056270054.0, "step": 74180 }, { "epoch": 0.09892858476126479, "grad_norm": 2.046875, "learning_rate": 4.946336195402912e-06, "loss": 0.4572334289550781, "num_tokens": 9058832340.0, "step": 74200 }, { "epoch": 0.09895525014799289, "grad_norm": 1.9453125, "learning_rate": 4.947669457628927e-06, "loss": 0.44785375595092775, "num_tokens": 9061411907.0, "step": 74220 }, { "epoch": 0.098981915534721, "grad_norm": 2.0625, "learning_rate": 4.9490027198549415e-06, "loss": 0.45508155822753904, "num_tokens": 9063763384.0, "step": 74240 }, { "epoch": 0.09900858092144911, "grad_norm": 2.015625, "learning_rate": 4.950335982080956e-06, "loss": 0.45556211471557617, "num_tokens": 9065985451.0, "step": 74260 }, { "epoch": 0.09903524630817721, "grad_norm": 1.6953125, "learning_rate": 4.951669244306971e-06, "loss": 0.44979381561279297, "num_tokens": 9068542573.0, "step": 74280 }, { "epoch": 0.09906191169490532, "grad_norm": 2.5, "learning_rate": 4.953002506532985e-06, "loss": 0.4337045192718506, "num_tokens": 9070952188.0, "step": 74300 }, { "epoch": 0.09908857708163342, "grad_norm": 2.15625, "learning_rate": 4.954335768759e-06, "loss": 0.4513555526733398, "num_tokens": 9073235225.0, "step": 74320 }, { "epoch": 0.09911524246836152, "grad_norm": 1.7578125, "learning_rate": 4.955669030985015e-06, "loss": 0.4500643253326416, "num_tokens": 9075484824.0, "step": 74340 }, { "epoch": 0.09914190785508963, "grad_norm": 1.7578125, "learning_rate": 4.957002293211029e-06, "loss": 0.44966955184936525, "num_tokens": 9077705689.0, "step": 74360 }, { "epoch": 0.09916857324181773, "grad_norm": 2.71875, "learning_rate": 4.9583355554370435e-06, "loss": 0.45299696922302246, "num_tokens": 9080113159.0, "step": 74380 }, { "epoch": 0.09919523862854583, "grad_norm": 1.6953125, "learning_rate": 4.959668817663058e-06, "loss": 0.4434059143066406, "num_tokens": 9082751678.0, "step": 74400 }, { "epoch": 0.09922190401527393, "grad_norm": 1.8984375, "learning_rate": 4.961002079889073e-06, "loss": 0.43634843826293945, "num_tokens": 9085129683.0, "step": 74420 }, { "epoch": 0.09924856940200204, "grad_norm": 2.03125, "learning_rate": 4.962335342115088e-06, "loss": 0.44409332275390623, "num_tokens": 9087515842.0, "step": 74440 }, { "epoch": 0.09927523478873014, "grad_norm": 1.828125, "learning_rate": 4.963668604341102e-06, "loss": 0.42771182060241697, "num_tokens": 9089861362.0, "step": 74460 }, { "epoch": 0.09930190017545824, "grad_norm": 1.765625, "learning_rate": 4.965001866567117e-06, "loss": 0.4408436298370361, "num_tokens": 9092359908.0, "step": 74480 }, { "epoch": 0.09932856556218635, "grad_norm": 2.484375, "learning_rate": 4.966335128793132e-06, "loss": 0.4633146286010742, "num_tokens": 9094695537.0, "step": 74500 }, { "epoch": 0.09935523094891445, "grad_norm": 2.09375, "learning_rate": 4.967668391019146e-06, "loss": 0.4334749698638916, "num_tokens": 9096999624.0, "step": 74520 }, { "epoch": 0.09938189633564255, "grad_norm": 1.9609375, "learning_rate": 4.96900165324516e-06, "loss": 0.4447360038757324, "num_tokens": 9099488361.0, "step": 74540 }, { "epoch": 0.09940856172237066, "grad_norm": 2.546875, "learning_rate": 4.970334915471176e-06, "loss": 0.46237382888793943, "num_tokens": 9101830523.0, "step": 74560 }, { "epoch": 0.09943522710909876, "grad_norm": 1.7734375, "learning_rate": 4.97166817769719e-06, "loss": 0.4390972137451172, "num_tokens": 9104142738.0, "step": 74580 }, { "epoch": 0.09946189249582686, "grad_norm": 2.015625, "learning_rate": 4.973001439923204e-06, "loss": 0.45824265480041504, "num_tokens": 9106638401.0, "step": 74600 }, { "epoch": 0.09948855788255496, "grad_norm": 2.234375, "learning_rate": 4.974334702149219e-06, "loss": 0.4415532112121582, "num_tokens": 9109134191.0, "step": 74620 }, { "epoch": 0.09951522326928307, "grad_norm": 2.125, "learning_rate": 4.975667964375234e-06, "loss": 0.4434762954711914, "num_tokens": 9111575405.0, "step": 74640 }, { "epoch": 0.09954188865601118, "grad_norm": 1.5390625, "learning_rate": 4.9770012266012485e-06, "loss": 0.4556590557098389, "num_tokens": 9114023605.0, "step": 74660 }, { "epoch": 0.09956855404273929, "grad_norm": 1.6484375, "learning_rate": 4.978334488827262e-06, "loss": 0.44504971504211427, "num_tokens": 9116428429.0, "step": 74680 }, { "epoch": 0.09959521942946739, "grad_norm": 1.6328125, "learning_rate": 4.979667751053278e-06, "loss": 0.44008970260620117, "num_tokens": 9118942550.0, "step": 74700 }, { "epoch": 0.0996218848161955, "grad_norm": 2.125, "learning_rate": 4.981001013279293e-06, "loss": 0.4466695308685303, "num_tokens": 9121249700.0, "step": 74720 }, { "epoch": 0.0996485502029236, "grad_norm": 2.140625, "learning_rate": 4.9823342755053065e-06, "loss": 0.4476443290710449, "num_tokens": 9123777004.0, "step": 74740 }, { "epoch": 0.0996752155896517, "grad_norm": 2.109375, "learning_rate": 4.983667537731321e-06, "loss": 0.452911376953125, "num_tokens": 9126198404.0, "step": 74760 }, { "epoch": 0.0997018809763798, "grad_norm": 2.6875, "learning_rate": 4.985000799957336e-06, "loss": 0.4380695343017578, "num_tokens": 9128749384.0, "step": 74780 }, { "epoch": 0.0997285463631079, "grad_norm": 1.9921875, "learning_rate": 4.9863340621833506e-06, "loss": 0.4522047996520996, "num_tokens": 9131346621.0, "step": 74800 }, { "epoch": 0.09975521174983601, "grad_norm": 1.421875, "learning_rate": 4.987667324409365e-06, "loss": 0.4513514518737793, "num_tokens": 9133755704.0, "step": 74820 }, { "epoch": 0.09978187713656411, "grad_norm": 2.203125, "learning_rate": 4.98900058663538e-06, "loss": 0.430128288269043, "num_tokens": 9135767207.0, "step": 74840 }, { "epoch": 0.09980854252329222, "grad_norm": 2.0625, "learning_rate": 4.990333848861395e-06, "loss": 0.44039225578308105, "num_tokens": 9138193780.0, "step": 74860 }, { "epoch": 0.09983520791002032, "grad_norm": 2.046875, "learning_rate": 4.991667111087409e-06, "loss": 0.44595584869384763, "num_tokens": 9140708099.0, "step": 74880 }, { "epoch": 0.09986187329674842, "grad_norm": 2.375, "learning_rate": 4.993000373313423e-06, "loss": 0.4411149501800537, "num_tokens": 9143248155.0, "step": 74900 }, { "epoch": 0.09988853868347652, "grad_norm": 2.140625, "learning_rate": 4.994333635539439e-06, "loss": 0.4631045341491699, "num_tokens": 9145733431.0, "step": 74920 }, { "epoch": 0.09991520407020463, "grad_norm": 2.109375, "learning_rate": 4.995666897765453e-06, "loss": 0.4529073238372803, "num_tokens": 9148084528.0, "step": 74940 }, { "epoch": 0.09994186945693273, "grad_norm": 1.9296875, "learning_rate": 4.997000159991467e-06, "loss": 0.43645830154418946, "num_tokens": 9150526575.0, "step": 74960 }, { "epoch": 0.09996853484366083, "grad_norm": 2.375, "learning_rate": 4.998333422217482e-06, "loss": 0.4282383918762207, "num_tokens": 9153089563.0, "step": 74980 }, { "epoch": 0.09999520023038894, "grad_norm": 2.453125, "learning_rate": 4.999666684443497e-06, "loss": 0.46367411613464354, "num_tokens": 9155336426.0, "step": 75000 }, { "epoch": 0.10002186561711704, "grad_norm": 2.09375, "learning_rate": 4.999999945174063e-06, "loss": 0.45351400375366213, "num_tokens": 9157774832.0, "step": 75020 }, { "epoch": 0.10004853100384514, "grad_norm": 1.8828125, "learning_rate": 4.999999701503238e-06, "loss": 0.4521155834197998, "num_tokens": 9160167256.0, "step": 75040 }, { "epoch": 0.10007519639057325, "grad_norm": 1.5625, "learning_rate": 4.999999262895771e-06, "loss": 0.44585280418395995, "num_tokens": 9162470261.0, "step": 75060 }, { "epoch": 0.10010186177730136, "grad_norm": 1.515625, "learning_rate": 4.999998629351698e-06, "loss": 0.4534342288970947, "num_tokens": 9165027366.0, "step": 75080 }, { "epoch": 0.10012852716402947, "grad_norm": 1.953125, "learning_rate": 4.999997800871067e-06, "loss": 0.44684672355651855, "num_tokens": 9167423935.0, "step": 75100 }, { "epoch": 0.10015519255075757, "grad_norm": 1.71875, "learning_rate": 4.999996777453944e-06, "loss": 0.430431604385376, "num_tokens": 9169886697.0, "step": 75120 }, { "epoch": 0.10018185793748567, "grad_norm": 1.9765625, "learning_rate": 4.999995559100408e-06, "loss": 0.45201616287231444, "num_tokens": 9172355978.0, "step": 75140 }, { "epoch": 0.10020852332421377, "grad_norm": 1.9296875, "learning_rate": 4.999994145810554e-06, "loss": 0.44694061279296876, "num_tokens": 9174918217.0, "step": 75160 }, { "epoch": 0.10023518871094188, "grad_norm": 1.9375, "learning_rate": 4.9999925375844925e-06, "loss": 0.44635515213012694, "num_tokens": 9177423701.0, "step": 75180 }, { "epoch": 0.10026185409766998, "grad_norm": 2.34375, "learning_rate": 4.999990734422349e-06, "loss": 0.4433812141418457, "num_tokens": 9179614732.0, "step": 75200 }, { "epoch": 0.10028851948439808, "grad_norm": 1.859375, "learning_rate": 4.999988736324264e-06, "loss": 0.44518375396728516, "num_tokens": 9181797273.0, "step": 75220 }, { "epoch": 0.10031518487112619, "grad_norm": 2.0625, "learning_rate": 4.999986543290393e-06, "loss": 0.4376828193664551, "num_tokens": 9184111221.0, "step": 75240 }, { "epoch": 0.10034185025785429, "grad_norm": 2.265625, "learning_rate": 4.9999841553209075e-06, "loss": 0.4504563808441162, "num_tokens": 9186686970.0, "step": 75260 }, { "epoch": 0.10036851564458239, "grad_norm": 2.296875, "learning_rate": 4.999981572415993e-06, "loss": 0.4541130065917969, "num_tokens": 9189308299.0, "step": 75280 }, { "epoch": 0.1003951810313105, "grad_norm": 2.15625, "learning_rate": 4.9999787945758515e-06, "loss": 0.4371979236602783, "num_tokens": 9191880138.0, "step": 75300 }, { "epoch": 0.1004218464180386, "grad_norm": 1.7421875, "learning_rate": 4.9999758218007e-06, "loss": 0.43806257247924807, "num_tokens": 9194335493.0, "step": 75320 }, { "epoch": 0.1004485118047667, "grad_norm": 2.4375, "learning_rate": 4.999972654090769e-06, "loss": 0.4597421646118164, "num_tokens": 9196820153.0, "step": 75340 }, { "epoch": 0.1004751771914948, "grad_norm": 2.234375, "learning_rate": 4.999969291446306e-06, "loss": 0.4447322845458984, "num_tokens": 9199233685.0, "step": 75360 }, { "epoch": 0.10050184257822291, "grad_norm": 1.65625, "learning_rate": 4.999965733867574e-06, "loss": 0.46172122955322265, "num_tokens": 9201498897.0, "step": 75380 }, { "epoch": 0.10052850796495101, "grad_norm": 2.25, "learning_rate": 4.99996198135485e-06, "loss": 0.4531271934509277, "num_tokens": 9204094738.0, "step": 75400 }, { "epoch": 0.10055517335167911, "grad_norm": 2.15625, "learning_rate": 4.999958033908425e-06, "loss": 0.4540103912353516, "num_tokens": 9206422386.0, "step": 75420 }, { "epoch": 0.10058183873840722, "grad_norm": 2.15625, "learning_rate": 4.99995389152861e-06, "loss": 0.43105735778808596, "num_tokens": 9208839562.0, "step": 75440 }, { "epoch": 0.10060850412513532, "grad_norm": 1.8515625, "learning_rate": 4.999949554215725e-06, "loss": 0.4298699378967285, "num_tokens": 9211287069.0, "step": 75460 }, { "epoch": 0.10063516951186344, "grad_norm": 1.890625, "learning_rate": 4.99994502197011e-06, "loss": 0.45281643867492677, "num_tokens": 9213743534.0, "step": 75480 }, { "epoch": 0.10066183489859154, "grad_norm": 2.09375, "learning_rate": 4.999940294792118e-06, "loss": 0.43570585250854493, "num_tokens": 9216263644.0, "step": 75500 }, { "epoch": 0.10068850028531964, "grad_norm": 2.21875, "learning_rate": 4.999935372682117e-06, "loss": 0.4466911792755127, "num_tokens": 9218652722.0, "step": 75520 }, { "epoch": 0.10071516567204775, "grad_norm": 2.140625, "learning_rate": 4.999930255640491e-06, "loss": 0.43085498809814454, "num_tokens": 9221473572.0, "step": 75540 }, { "epoch": 0.10074183105877585, "grad_norm": 2.125, "learning_rate": 4.99992494366764e-06, "loss": 0.4444171905517578, "num_tokens": 9223906887.0, "step": 75560 }, { "epoch": 0.10076849644550395, "grad_norm": 1.8203125, "learning_rate": 4.999919436763977e-06, "loss": 0.4644841194152832, "num_tokens": 9226367490.0, "step": 75580 }, { "epoch": 0.10079516183223206, "grad_norm": 1.90625, "learning_rate": 4.9999137349299315e-06, "loss": 0.44019289016723634, "num_tokens": 9228934935.0, "step": 75600 }, { "epoch": 0.10082182721896016, "grad_norm": 2.59375, "learning_rate": 4.999907838165948e-06, "loss": 0.4414208889007568, "num_tokens": 9231353216.0, "step": 75620 }, { "epoch": 0.10084849260568826, "grad_norm": 2.375, "learning_rate": 4.999901746472488e-06, "loss": 0.44505014419555666, "num_tokens": 9233632658.0, "step": 75640 }, { "epoch": 0.10087515799241636, "grad_norm": 1.875, "learning_rate": 4.999895459850025e-06, "loss": 0.4425031661987305, "num_tokens": 9236021464.0, "step": 75660 }, { "epoch": 0.10090182337914447, "grad_norm": 2.234375, "learning_rate": 4.999888978299048e-06, "loss": 0.43900346755981445, "num_tokens": 9238402134.0, "step": 75680 }, { "epoch": 0.10092848876587257, "grad_norm": 1.78125, "learning_rate": 4.999882301820065e-06, "loss": 0.43866991996765137, "num_tokens": 9240976912.0, "step": 75700 }, { "epoch": 0.10095515415260067, "grad_norm": 1.875, "learning_rate": 4.999875430413595e-06, "loss": 0.4180473327636719, "num_tokens": 9243440459.0, "step": 75720 }, { "epoch": 0.10098181953932878, "grad_norm": 2.34375, "learning_rate": 4.999868364080175e-06, "loss": 0.42583913803100587, "num_tokens": 9246026895.0, "step": 75740 }, { "epoch": 0.10100848492605688, "grad_norm": 1.875, "learning_rate": 4.999861102820355e-06, "loss": 0.4519039154052734, "num_tokens": 9248384523.0, "step": 75760 }, { "epoch": 0.10103515031278498, "grad_norm": 1.84375, "learning_rate": 4.9998536466347e-06, "loss": 0.43792428970336916, "num_tokens": 9250667716.0, "step": 75780 }, { "epoch": 0.10106181569951309, "grad_norm": 2.65625, "learning_rate": 4.999845995523794e-06, "loss": 0.4627485752105713, "num_tokens": 9253162842.0, "step": 75800 }, { "epoch": 0.10108848108624119, "grad_norm": 2.109375, "learning_rate": 4.9998381494882325e-06, "loss": 0.4530280113220215, "num_tokens": 9255827707.0, "step": 75820 }, { "epoch": 0.10111514647296929, "grad_norm": 2.0625, "learning_rate": 4.999830108528627e-06, "loss": 0.44154868125915525, "num_tokens": 9258275751.0, "step": 75840 }, { "epoch": 0.1011418118596974, "grad_norm": 2.1875, "learning_rate": 4.999821872645604e-06, "loss": 0.43239831924438477, "num_tokens": 9260735962.0, "step": 75860 }, { "epoch": 0.10116847724642551, "grad_norm": 2.078125, "learning_rate": 4.999813441839807e-06, "loss": 0.4700496673583984, "num_tokens": 9263071357.0, "step": 75880 }, { "epoch": 0.10119514263315361, "grad_norm": 2.453125, "learning_rate": 4.999804816111893e-06, "loss": 0.4498763084411621, "num_tokens": 9265371638.0, "step": 75900 }, { "epoch": 0.10122180801988172, "grad_norm": 2.25, "learning_rate": 4.999795995462533e-06, "loss": 0.4341480255126953, "num_tokens": 9267862086.0, "step": 75920 }, { "epoch": 0.10124847340660982, "grad_norm": 2.125, "learning_rate": 4.999786979892417e-06, "loss": 0.43462982177734377, "num_tokens": 9270628163.0, "step": 75940 }, { "epoch": 0.10127513879333792, "grad_norm": 2.21875, "learning_rate": 4.999777769402248e-06, "loss": 0.4473854064941406, "num_tokens": 9273184959.0, "step": 75960 }, { "epoch": 0.10130180418006603, "grad_norm": 1.7109375, "learning_rate": 4.999768363992742e-06, "loss": 0.47499747276306153, "num_tokens": 9275496822.0, "step": 75980 }, { "epoch": 0.10132846956679413, "grad_norm": 2.125, "learning_rate": 4.999758763664634e-06, "loss": 0.44842185974121096, "num_tokens": 9278066287.0, "step": 76000 }, { "epoch": 0.10135513495352223, "grad_norm": 1.8984375, "learning_rate": 4.999748968418673e-06, "loss": 0.4306044578552246, "num_tokens": 9280689422.0, "step": 76020 }, { "epoch": 0.10138180034025034, "grad_norm": 2.4375, "learning_rate": 4.9997389782556205e-06, "loss": 0.45325298309326173, "num_tokens": 9283038936.0, "step": 76040 }, { "epoch": 0.10140846572697844, "grad_norm": 1.6484375, "learning_rate": 4.999728793176258e-06, "loss": 0.4443732738494873, "num_tokens": 9285331875.0, "step": 76060 }, { "epoch": 0.10143513111370654, "grad_norm": 2.28125, "learning_rate": 4.999718413181378e-06, "loss": 0.437407112121582, "num_tokens": 9287664775.0, "step": 76080 }, { "epoch": 0.10146179650043465, "grad_norm": 2.109375, "learning_rate": 4.999707838271791e-06, "loss": 0.4273425579071045, "num_tokens": 9290196353.0, "step": 76100 }, { "epoch": 0.10148846188716275, "grad_norm": 1.375, "learning_rate": 4.9996970684483215e-06, "loss": 0.4500901699066162, "num_tokens": 9292570852.0, "step": 76120 }, { "epoch": 0.10151512727389085, "grad_norm": 2.125, "learning_rate": 4.999686103711808e-06, "loss": 0.45130481719970705, "num_tokens": 9295095366.0, "step": 76140 }, { "epoch": 0.10154179266061895, "grad_norm": 2.15625, "learning_rate": 4.999674944063108e-06, "loss": 0.44304594993591306, "num_tokens": 9297519809.0, "step": 76160 }, { "epoch": 0.10156845804734706, "grad_norm": 1.875, "learning_rate": 4.999663589503089e-06, "loss": 0.4417325496673584, "num_tokens": 9299936916.0, "step": 76180 }, { "epoch": 0.10159512343407516, "grad_norm": 2.0625, "learning_rate": 4.999652040032637e-06, "loss": 0.4327810764312744, "num_tokens": 9302467953.0, "step": 76200 }, { "epoch": 0.10162178882080326, "grad_norm": 1.703125, "learning_rate": 4.999640295652654e-06, "loss": 0.43763341903686526, "num_tokens": 9304792094.0, "step": 76220 }, { "epoch": 0.10164845420753137, "grad_norm": 2.15625, "learning_rate": 4.999628356364053e-06, "loss": 0.4365379810333252, "num_tokens": 9307235099.0, "step": 76240 }, { "epoch": 0.10167511959425947, "grad_norm": 2.03125, "learning_rate": 4.999616222167769e-06, "loss": 0.44299702644348143, "num_tokens": 9309647484.0, "step": 76260 }, { "epoch": 0.10170178498098757, "grad_norm": 1.65625, "learning_rate": 4.999603893064745e-06, "loss": 0.44130425453186034, "num_tokens": 9312112739.0, "step": 76280 }, { "epoch": 0.10172845036771569, "grad_norm": 2.078125, "learning_rate": 4.999591369055943e-06, "loss": 0.44649453163146974, "num_tokens": 9314376165.0, "step": 76300 }, { "epoch": 0.10175511575444379, "grad_norm": 2.015625, "learning_rate": 4.9995786501423395e-06, "loss": 0.4280745506286621, "num_tokens": 9316942448.0, "step": 76320 }, { "epoch": 0.1017817811411719, "grad_norm": 2.296875, "learning_rate": 4.999565736324927e-06, "loss": 0.4373776912689209, "num_tokens": 9319628157.0, "step": 76340 }, { "epoch": 0.1018084465279, "grad_norm": 2.078125, "learning_rate": 4.999552627604711e-06, "loss": 0.4519608497619629, "num_tokens": 9321957721.0, "step": 76360 }, { "epoch": 0.1018351119146281, "grad_norm": 2.0625, "learning_rate": 4.999539323982716e-06, "loss": 0.45219712257385253, "num_tokens": 9324257881.0, "step": 76380 }, { "epoch": 0.1018617773013562, "grad_norm": 2.0625, "learning_rate": 4.999525825459978e-06, "loss": 0.4349200248718262, "num_tokens": 9326801318.0, "step": 76400 }, { "epoch": 0.10188844268808431, "grad_norm": 1.984375, "learning_rate": 4.999512132037549e-06, "loss": 0.44939723014831545, "num_tokens": 9329145341.0, "step": 76420 }, { "epoch": 0.10191510807481241, "grad_norm": 2.125, "learning_rate": 4.9994982437164965e-06, "loss": 0.43317742347717286, "num_tokens": 9331316956.0, "step": 76440 }, { "epoch": 0.10194177346154051, "grad_norm": 1.953125, "learning_rate": 4.999484160497905e-06, "loss": 0.44275951385498047, "num_tokens": 9333827429.0, "step": 76460 }, { "epoch": 0.10196843884826862, "grad_norm": 1.96875, "learning_rate": 4.9994698823828716e-06, "loss": 0.444139289855957, "num_tokens": 9336198009.0, "step": 76480 }, { "epoch": 0.10199510423499672, "grad_norm": 2.375, "learning_rate": 4.99945540937251e-06, "loss": 0.4248545169830322, "num_tokens": 9338656917.0, "step": 76500 }, { "epoch": 0.10202176962172482, "grad_norm": 2.109375, "learning_rate": 4.9994407414679495e-06, "loss": 0.448819637298584, "num_tokens": 9341027288.0, "step": 76520 }, { "epoch": 0.10204843500845293, "grad_norm": 2.125, "learning_rate": 4.999425878670332e-06, "loss": 0.4411346912384033, "num_tokens": 9343371898.0, "step": 76540 }, { "epoch": 0.10207510039518103, "grad_norm": 2.375, "learning_rate": 4.9994108209808165e-06, "loss": 0.44338836669921877, "num_tokens": 9345753344.0, "step": 76560 }, { "epoch": 0.10210176578190913, "grad_norm": 2.046875, "learning_rate": 4.999395568400579e-06, "loss": 0.4314098358154297, "num_tokens": 9348321945.0, "step": 76580 }, { "epoch": 0.10212843116863723, "grad_norm": 1.9296875, "learning_rate": 4.999380120930808e-06, "loss": 0.4433177947998047, "num_tokens": 9350981402.0, "step": 76600 }, { "epoch": 0.10215509655536534, "grad_norm": 2.625, "learning_rate": 4.999364478572708e-06, "loss": 0.43088488578796386, "num_tokens": 9353250432.0, "step": 76620 }, { "epoch": 0.10218176194209344, "grad_norm": 2.046875, "learning_rate": 4.999348641327497e-06, "loss": 0.44809746742248535, "num_tokens": 9355585938.0, "step": 76640 }, { "epoch": 0.10220842732882154, "grad_norm": 2.34375, "learning_rate": 4.999332609196413e-06, "loss": 0.4201189994812012, "num_tokens": 9358224505.0, "step": 76660 }, { "epoch": 0.10223509271554965, "grad_norm": 1.8515625, "learning_rate": 4.9993163821807035e-06, "loss": 0.44737548828125, "num_tokens": 9360768844.0, "step": 76680 }, { "epoch": 0.10226175810227776, "grad_norm": 2.375, "learning_rate": 4.999299960281635e-06, "loss": 0.4181852340698242, "num_tokens": 9363399337.0, "step": 76700 }, { "epoch": 0.10228842348900587, "grad_norm": 2.34375, "learning_rate": 4.999283343500488e-06, "loss": 0.4410722255706787, "num_tokens": 9365862827.0, "step": 76720 }, { "epoch": 0.10231508887573397, "grad_norm": 2.25, "learning_rate": 4.999266531838558e-06, "loss": 0.4413330078125, "num_tokens": 9368573251.0, "step": 76740 }, { "epoch": 0.10234175426246207, "grad_norm": 1.9296875, "learning_rate": 4.999249525297156e-06, "loss": 0.44298591613769533, "num_tokens": 9371050974.0, "step": 76760 }, { "epoch": 0.10236841964919018, "grad_norm": 1.96875, "learning_rate": 4.999232323877607e-06, "loss": 0.43608970642089845, "num_tokens": 9373400455.0, "step": 76780 }, { "epoch": 0.10239508503591828, "grad_norm": 1.84375, "learning_rate": 4.999214927581254e-06, "loss": 0.45105628967285155, "num_tokens": 9375762321.0, "step": 76800 }, { "epoch": 0.10242175042264638, "grad_norm": 2.15625, "learning_rate": 4.999197336409453e-06, "loss": 0.4380503177642822, "num_tokens": 9378412942.0, "step": 76820 }, { "epoch": 0.10244841580937449, "grad_norm": 2.125, "learning_rate": 4.999179550363575e-06, "loss": 0.42907190322875977, "num_tokens": 9380846341.0, "step": 76840 }, { "epoch": 0.10247508119610259, "grad_norm": 1.953125, "learning_rate": 4.9991615694450076e-06, "loss": 0.43144826889038085, "num_tokens": 9383660447.0, "step": 76860 }, { "epoch": 0.10250174658283069, "grad_norm": 2.3125, "learning_rate": 4.9991433936551515e-06, "loss": 0.4410713195800781, "num_tokens": 9386006748.0, "step": 76880 }, { "epoch": 0.1025284119695588, "grad_norm": 1.75, "learning_rate": 4.999125022995426e-06, "loss": 0.4400278091430664, "num_tokens": 9388437026.0, "step": 76900 }, { "epoch": 0.1025550773562869, "grad_norm": 2.171875, "learning_rate": 4.999106457467262e-06, "loss": 0.44514169692993166, "num_tokens": 9391071397.0, "step": 76920 }, { "epoch": 0.102581742743015, "grad_norm": 2.25, "learning_rate": 4.999087697072107e-06, "loss": 0.44701123237609863, "num_tokens": 9393410247.0, "step": 76940 }, { "epoch": 0.1026084081297431, "grad_norm": 1.953125, "learning_rate": 4.999068741811425e-06, "loss": 0.43406200408935547, "num_tokens": 9395868242.0, "step": 76960 }, { "epoch": 0.1026350735164712, "grad_norm": 2.125, "learning_rate": 4.999049591686693e-06, "loss": 0.43551025390625, "num_tokens": 9398213280.0, "step": 76980 }, { "epoch": 0.10266173890319931, "grad_norm": 1.8125, "learning_rate": 4.999030246699406e-06, "loss": 0.4282036781311035, "num_tokens": 9400756744.0, "step": 77000 }, { "epoch": 0.10268840428992741, "grad_norm": 1.765625, "learning_rate": 4.9990107068510695e-06, "loss": 0.43900766372680666, "num_tokens": 9403382154.0, "step": 77020 }, { "epoch": 0.10271506967665552, "grad_norm": 1.953125, "learning_rate": 4.998990972143209e-06, "loss": 0.40858922004699705, "num_tokens": 9406021372.0, "step": 77040 }, { "epoch": 0.10274173506338362, "grad_norm": 1.625, "learning_rate": 4.9989710425773644e-06, "loss": 0.42800226211547854, "num_tokens": 9408320023.0, "step": 77060 }, { "epoch": 0.10276840045011172, "grad_norm": 2.46875, "learning_rate": 4.9989509181550875e-06, "loss": 0.4452977180480957, "num_tokens": 9410941500.0, "step": 77080 }, { "epoch": 0.10279506583683984, "grad_norm": 2.03125, "learning_rate": 4.998930598877948e-06, "loss": 0.4389211177825928, "num_tokens": 9413470627.0, "step": 77100 }, { "epoch": 0.10282173122356794, "grad_norm": 1.7265625, "learning_rate": 4.998910084747531e-06, "loss": 0.42699851989746096, "num_tokens": 9415830036.0, "step": 77120 }, { "epoch": 0.10284839661029604, "grad_norm": 2.328125, "learning_rate": 4.9988893757654355e-06, "loss": 0.4244386196136475, "num_tokens": 9418321198.0, "step": 77140 }, { "epoch": 0.10287506199702415, "grad_norm": 2.203125, "learning_rate": 4.998868471933277e-06, "loss": 0.43913469314575193, "num_tokens": 9420772316.0, "step": 77160 }, { "epoch": 0.10290172738375225, "grad_norm": 2.171875, "learning_rate": 4.9988473732526844e-06, "loss": 0.4412506103515625, "num_tokens": 9423178389.0, "step": 77180 }, { "epoch": 0.10292839277048035, "grad_norm": 2.21875, "learning_rate": 4.998826079725303e-06, "loss": 0.4313943862915039, "num_tokens": 9425658485.0, "step": 77200 }, { "epoch": 0.10295505815720846, "grad_norm": 1.96875, "learning_rate": 4.998804591352794e-06, "loss": 0.4426749229431152, "num_tokens": 9428112084.0, "step": 77220 }, { "epoch": 0.10298172354393656, "grad_norm": 2.125, "learning_rate": 4.998782908136834e-06, "loss": 0.43147764205932615, "num_tokens": 9430726003.0, "step": 77240 }, { "epoch": 0.10300838893066466, "grad_norm": 2.171875, "learning_rate": 4.99876103007911e-06, "loss": 0.4323246955871582, "num_tokens": 9433466717.0, "step": 77260 }, { "epoch": 0.10303505431739277, "grad_norm": 1.9140625, "learning_rate": 4.998738957181331e-06, "loss": 0.44600276947021483, "num_tokens": 9436045257.0, "step": 77280 }, { "epoch": 0.10306171970412087, "grad_norm": 2.15625, "learning_rate": 4.9987166894452174e-06, "loss": 0.43157329559326174, "num_tokens": 9438429432.0, "step": 77300 }, { "epoch": 0.10308838509084897, "grad_norm": 2.359375, "learning_rate": 4.998694226872505e-06, "loss": 0.4521948337554932, "num_tokens": 9440501309.0, "step": 77320 }, { "epoch": 0.10311505047757707, "grad_norm": 2.015625, "learning_rate": 4.998671569464947e-06, "loss": 0.44008169174194334, "num_tokens": 9442867972.0, "step": 77340 }, { "epoch": 0.10314171586430518, "grad_norm": 2.125, "learning_rate": 4.998648717224307e-06, "loss": 0.43975143432617186, "num_tokens": 9445331284.0, "step": 77360 }, { "epoch": 0.10316838125103328, "grad_norm": 2.140625, "learning_rate": 4.99862567015237e-06, "loss": 0.4444732666015625, "num_tokens": 9447642550.0, "step": 77380 }, { "epoch": 0.10319504663776138, "grad_norm": 1.71875, "learning_rate": 4.998602428250932e-06, "loss": 0.44199790954589846, "num_tokens": 9450045021.0, "step": 77400 }, { "epoch": 0.10322171202448949, "grad_norm": 1.953125, "learning_rate": 4.998578991521805e-06, "loss": 0.435956335067749, "num_tokens": 9452493050.0, "step": 77420 }, { "epoch": 0.10324837741121759, "grad_norm": 1.796875, "learning_rate": 4.9985553599668155e-06, "loss": 0.4275201797485352, "num_tokens": 9454880315.0, "step": 77440 }, { "epoch": 0.1032750427979457, "grad_norm": 2.15625, "learning_rate": 4.998531533587808e-06, "loss": 0.4299768924713135, "num_tokens": 9457223044.0, "step": 77460 }, { "epoch": 0.1033017081846738, "grad_norm": 2.0625, "learning_rate": 4.99850751238664e-06, "loss": 0.4392963409423828, "num_tokens": 9459686285.0, "step": 77480 }, { "epoch": 0.1033283735714019, "grad_norm": 1.609375, "learning_rate": 4.998483296365184e-06, "loss": 0.444685697555542, "num_tokens": 9462161495.0, "step": 77500 }, { "epoch": 0.10335503895813002, "grad_norm": 2.1875, "learning_rate": 4.998458885525328e-06, "loss": 0.43706398010253905, "num_tokens": 9464775556.0, "step": 77520 }, { "epoch": 0.10338170434485812, "grad_norm": 2.15625, "learning_rate": 4.998434279868976e-06, "loss": 0.445654296875, "num_tokens": 9467319074.0, "step": 77540 }, { "epoch": 0.10340836973158622, "grad_norm": 1.75, "learning_rate": 4.998409479398047e-06, "loss": 0.4420157909393311, "num_tokens": 9470056347.0, "step": 77560 }, { "epoch": 0.10343503511831433, "grad_norm": 2.078125, "learning_rate": 4.9983844841144734e-06, "loss": 0.4364800930023193, "num_tokens": 9472500700.0, "step": 77580 }, { "epoch": 0.10346170050504243, "grad_norm": 1.890625, "learning_rate": 4.998359294020205e-06, "loss": 0.42749814987182616, "num_tokens": 9474878791.0, "step": 77600 }, { "epoch": 0.10348836589177053, "grad_norm": 1.5, "learning_rate": 4.998333909117207e-06, "loss": 0.43386154174804686, "num_tokens": 9477452513.0, "step": 77620 }, { "epoch": 0.10351503127849863, "grad_norm": 2.015625, "learning_rate": 4.998308329407456e-06, "loss": 0.4482755661010742, "num_tokens": 9479740788.0, "step": 77640 }, { "epoch": 0.10354169666522674, "grad_norm": 2.203125, "learning_rate": 4.998282554892951e-06, "loss": 0.4467187404632568, "num_tokens": 9482067563.0, "step": 77660 }, { "epoch": 0.10356836205195484, "grad_norm": 1.7421875, "learning_rate": 4.998256585575697e-06, "loss": 0.43619441986083984, "num_tokens": 9484410272.0, "step": 77680 }, { "epoch": 0.10359502743868294, "grad_norm": 2.390625, "learning_rate": 4.998230421457721e-06, "loss": 0.4457918643951416, "num_tokens": 9486818893.0, "step": 77700 }, { "epoch": 0.10362169282541105, "grad_norm": 1.859375, "learning_rate": 4.998204062541065e-06, "loss": 0.4277618408203125, "num_tokens": 9489145036.0, "step": 77720 }, { "epoch": 0.10364835821213915, "grad_norm": 1.9296875, "learning_rate": 4.998177508827781e-06, "loss": 0.43373804092407225, "num_tokens": 9491561623.0, "step": 77740 }, { "epoch": 0.10367502359886725, "grad_norm": 2.375, "learning_rate": 4.998150760319941e-06, "loss": 0.43139057159423827, "num_tokens": 9493859633.0, "step": 77760 }, { "epoch": 0.10370168898559536, "grad_norm": 2.34375, "learning_rate": 4.998123817019632e-06, "loss": 0.4252308368682861, "num_tokens": 9496396046.0, "step": 77780 }, { "epoch": 0.10372835437232346, "grad_norm": 1.671875, "learning_rate": 4.9980966789289525e-06, "loss": 0.4360051155090332, "num_tokens": 9498680090.0, "step": 77800 }, { "epoch": 0.10375501975905156, "grad_norm": 1.890625, "learning_rate": 4.998069346050021e-06, "loss": 0.45298404693603517, "num_tokens": 9501387826.0, "step": 77820 }, { "epoch": 0.10378168514577966, "grad_norm": 1.6953125, "learning_rate": 4.9980418183849664e-06, "loss": 0.4145974636077881, "num_tokens": 9503879481.0, "step": 77840 }, { "epoch": 0.10380835053250777, "grad_norm": 1.703125, "learning_rate": 4.998014095935936e-06, "loss": 0.4558383941650391, "num_tokens": 9506285723.0, "step": 77860 }, { "epoch": 0.10383501591923587, "grad_norm": 1.5859375, "learning_rate": 4.997986178705093e-06, "loss": 0.43911151885986327, "num_tokens": 9508983872.0, "step": 77880 }, { "epoch": 0.10386168130596397, "grad_norm": 1.859375, "learning_rate": 4.997958066694612e-06, "loss": 0.43233547210693357, "num_tokens": 9511447412.0, "step": 77900 }, { "epoch": 0.10388834669269209, "grad_norm": 2.21875, "learning_rate": 4.997929759906687e-06, "loss": 0.4306196689605713, "num_tokens": 9513764508.0, "step": 77920 }, { "epoch": 0.1039150120794202, "grad_norm": 1.9765625, "learning_rate": 4.997901258343523e-06, "loss": 0.4391219139099121, "num_tokens": 9516156233.0, "step": 77940 }, { "epoch": 0.1039416774661483, "grad_norm": 1.8828125, "learning_rate": 4.997872562007344e-06, "loss": 0.4347228050231934, "num_tokens": 9518488138.0, "step": 77960 }, { "epoch": 0.1039683428528764, "grad_norm": 1.609375, "learning_rate": 4.997843670900388e-06, "loss": 0.44086384773254395, "num_tokens": 9521065619.0, "step": 77980 }, { "epoch": 0.1039950082396045, "grad_norm": 2.4375, "learning_rate": 4.997814585024906e-06, "loss": 0.44179954528808596, "num_tokens": 9523356842.0, "step": 78000 }, { "epoch": 0.1040216736263326, "grad_norm": 2.625, "learning_rate": 4.997785304383167e-06, "loss": 0.4405229568481445, "num_tokens": 9525856730.0, "step": 78020 }, { "epoch": 0.10404833901306071, "grad_norm": 2.140625, "learning_rate": 4.997755828977454e-06, "loss": 0.4429435729980469, "num_tokens": 9528198528.0, "step": 78040 }, { "epoch": 0.10407500439978881, "grad_norm": 2.25, "learning_rate": 4.997726158810066e-06, "loss": 0.4618067264556885, "num_tokens": 9530629219.0, "step": 78060 }, { "epoch": 0.10410166978651691, "grad_norm": 2.140625, "learning_rate": 4.997696293883315e-06, "loss": 0.43023028373718264, "num_tokens": 9533099012.0, "step": 78080 }, { "epoch": 0.10412833517324502, "grad_norm": 1.8671875, "learning_rate": 4.997666234199532e-06, "loss": 0.42745275497436525, "num_tokens": 9535338025.0, "step": 78100 }, { "epoch": 0.10415500055997312, "grad_norm": 2.0, "learning_rate": 4.997635979761059e-06, "loss": 0.4332015037536621, "num_tokens": 9537871357.0, "step": 78120 }, { "epoch": 0.10418166594670122, "grad_norm": 1.796875, "learning_rate": 4.997605530570255e-06, "loss": 0.42841362953186035, "num_tokens": 9540294892.0, "step": 78140 }, { "epoch": 0.10420833133342933, "grad_norm": 2.171875, "learning_rate": 4.9975748866294945e-06, "loss": 0.4336076259613037, "num_tokens": 9542842415.0, "step": 78160 }, { "epoch": 0.10423499672015743, "grad_norm": 1.78125, "learning_rate": 4.997544047941168e-06, "loss": 0.43490076065063477, "num_tokens": 9545369677.0, "step": 78180 }, { "epoch": 0.10426166210688553, "grad_norm": 2.296875, "learning_rate": 4.99751301450768e-06, "loss": 0.430598783493042, "num_tokens": 9547875417.0, "step": 78200 }, { "epoch": 0.10428832749361364, "grad_norm": 1.375, "learning_rate": 4.99748178633145e-06, "loss": 0.43195810317993166, "num_tokens": 9550401191.0, "step": 78220 }, { "epoch": 0.10431499288034174, "grad_norm": 1.578125, "learning_rate": 4.997450363414912e-06, "loss": 0.4140301704406738, "num_tokens": 9552921186.0, "step": 78240 }, { "epoch": 0.10434165826706984, "grad_norm": 2.71875, "learning_rate": 4.997418745760517e-06, "loss": 0.45087194442749023, "num_tokens": 9555260843.0, "step": 78260 }, { "epoch": 0.10436832365379795, "grad_norm": 2.109375, "learning_rate": 4.99738693337073e-06, "loss": 0.44694318771362307, "num_tokens": 9557910287.0, "step": 78280 }, { "epoch": 0.10439498904052605, "grad_norm": 2.171875, "learning_rate": 4.997354926248033e-06, "loss": 0.4407051563262939, "num_tokens": 9560359451.0, "step": 78300 }, { "epoch": 0.10442165442725417, "grad_norm": 1.6796875, "learning_rate": 4.99732272439492e-06, "loss": 0.43947858810424806, "num_tokens": 9562945323.0, "step": 78320 }, { "epoch": 0.10444831981398227, "grad_norm": 1.84375, "learning_rate": 4.997290327813903e-06, "loss": 0.42840213775634767, "num_tokens": 9565129738.0, "step": 78340 }, { "epoch": 0.10447498520071037, "grad_norm": 1.9375, "learning_rate": 4.997257736507507e-06, "loss": 0.4520416259765625, "num_tokens": 9567447593.0, "step": 78360 }, { "epoch": 0.10450165058743847, "grad_norm": 2.078125, "learning_rate": 4.997224950478274e-06, "loss": 0.4405784606933594, "num_tokens": 9569976411.0, "step": 78380 }, { "epoch": 0.10452831597416658, "grad_norm": 1.6796875, "learning_rate": 4.997191969728762e-06, "loss": 0.44123039245605467, "num_tokens": 9572375842.0, "step": 78400 }, { "epoch": 0.10455498136089468, "grad_norm": 2.171875, "learning_rate": 4.99715879426154e-06, "loss": 0.43821134567260744, "num_tokens": 9574698071.0, "step": 78420 }, { "epoch": 0.10458164674762278, "grad_norm": 1.859375, "learning_rate": 4.997125424079196e-06, "loss": 0.4480457305908203, "num_tokens": 9576926563.0, "step": 78440 }, { "epoch": 0.10460831213435089, "grad_norm": 1.734375, "learning_rate": 4.997091859184332e-06, "loss": 0.43538479804992675, "num_tokens": 9579312703.0, "step": 78460 }, { "epoch": 0.10463497752107899, "grad_norm": 1.8515625, "learning_rate": 4.997058099579565e-06, "loss": 0.4230968952178955, "num_tokens": 9581781713.0, "step": 78480 }, { "epoch": 0.10466164290780709, "grad_norm": 2.078125, "learning_rate": 4.997024145267528e-06, "loss": 0.42496376037597655, "num_tokens": 9584448451.0, "step": 78500 }, { "epoch": 0.1046883082945352, "grad_norm": 2.234375, "learning_rate": 4.996989996250869e-06, "loss": 0.43116559982299807, "num_tokens": 9587167468.0, "step": 78520 }, { "epoch": 0.1047149736812633, "grad_norm": 2.0625, "learning_rate": 4.996955652532249e-06, "loss": 0.43377127647399905, "num_tokens": 9589537245.0, "step": 78540 }, { "epoch": 0.1047416390679914, "grad_norm": 1.890625, "learning_rate": 4.996921114114346e-06, "loss": 0.44220705032348634, "num_tokens": 9592033148.0, "step": 78560 }, { "epoch": 0.1047683044547195, "grad_norm": 1.828125, "learning_rate": 4.996886380999855e-06, "loss": 0.44888129234313967, "num_tokens": 9594665190.0, "step": 78580 }, { "epoch": 0.10479496984144761, "grad_norm": 2.09375, "learning_rate": 4.996851453191483e-06, "loss": 0.4413937568664551, "num_tokens": 9597290812.0, "step": 78600 }, { "epoch": 0.10482163522817571, "grad_norm": 2.140625, "learning_rate": 4.996816330691955e-06, "loss": 0.439070463180542, "num_tokens": 9599965445.0, "step": 78620 }, { "epoch": 0.10484830061490381, "grad_norm": 1.7734375, "learning_rate": 4.996781013504007e-06, "loss": 0.4324623107910156, "num_tokens": 9602710014.0, "step": 78640 }, { "epoch": 0.10487496600163192, "grad_norm": 1.7890625, "learning_rate": 4.9967455016303955e-06, "loss": 0.4610661506652832, "num_tokens": 9605232991.0, "step": 78660 }, { "epoch": 0.10490163138836002, "grad_norm": 2.53125, "learning_rate": 4.9967097950738875e-06, "loss": 0.43199634552001953, "num_tokens": 9607503592.0, "step": 78680 }, { "epoch": 0.10492829677508812, "grad_norm": 1.7265625, "learning_rate": 4.996673893837268e-06, "loss": 0.4306187152862549, "num_tokens": 9609885502.0, "step": 78700 }, { "epoch": 0.10495496216181623, "grad_norm": 1.78125, "learning_rate": 4.9966377979233374e-06, "loss": 0.436671781539917, "num_tokens": 9612344747.0, "step": 78720 }, { "epoch": 0.10498162754854434, "grad_norm": 2.015625, "learning_rate": 4.996601507334909e-06, "loss": 0.4363370895385742, "num_tokens": 9614997547.0, "step": 78740 }, { "epoch": 0.10500829293527245, "grad_norm": 1.765625, "learning_rate": 4.996565022074812e-06, "loss": 0.43189706802368166, "num_tokens": 9617699279.0, "step": 78760 }, { "epoch": 0.10503495832200055, "grad_norm": 2.453125, "learning_rate": 4.996528342145893e-06, "loss": 0.4302024841308594, "num_tokens": 9620229756.0, "step": 78780 }, { "epoch": 0.10506162370872865, "grad_norm": 1.984375, "learning_rate": 4.996491467551011e-06, "loss": 0.42591047286987305, "num_tokens": 9622843716.0, "step": 78800 }, { "epoch": 0.10508828909545676, "grad_norm": 1.703125, "learning_rate": 4.9964543982930415e-06, "loss": 0.43080930709838866, "num_tokens": 9625090232.0, "step": 78820 }, { "epoch": 0.10511495448218486, "grad_norm": 2.671875, "learning_rate": 4.996417134374875e-06, "loss": 0.4323223114013672, "num_tokens": 9627460230.0, "step": 78840 }, { "epoch": 0.10514161986891296, "grad_norm": 1.9140625, "learning_rate": 4.996379675799417e-06, "loss": 0.4242420196533203, "num_tokens": 9629964527.0, "step": 78860 }, { "epoch": 0.10516828525564106, "grad_norm": 2.328125, "learning_rate": 4.996342022569589e-06, "loss": 0.42591233253479005, "num_tokens": 9632395985.0, "step": 78880 }, { "epoch": 0.10519495064236917, "grad_norm": 2.734375, "learning_rate": 4.996304174688326e-06, "loss": 0.4178632736206055, "num_tokens": 9634998789.0, "step": 78900 }, { "epoch": 0.10522161602909727, "grad_norm": 2.5, "learning_rate": 4.99626613215858e-06, "loss": 0.4426276206970215, "num_tokens": 9637464388.0, "step": 78920 }, { "epoch": 0.10524828141582537, "grad_norm": 1.9375, "learning_rate": 4.9962278949833175e-06, "loss": 0.44005470275878905, "num_tokens": 9639892959.0, "step": 78940 }, { "epoch": 0.10527494680255348, "grad_norm": 1.875, "learning_rate": 4.996189463165518e-06, "loss": 0.4433740139007568, "num_tokens": 9642162115.0, "step": 78960 }, { "epoch": 0.10530161218928158, "grad_norm": 2.6875, "learning_rate": 4.996150836708181e-06, "loss": 0.4343711853027344, "num_tokens": 9644572803.0, "step": 78980 }, { "epoch": 0.10532827757600968, "grad_norm": 1.984375, "learning_rate": 4.996112015614317e-06, "loss": 0.43445544242858886, "num_tokens": 9647062059.0, "step": 79000 }, { "epoch": 0.10535494296273779, "grad_norm": 1.84375, "learning_rate": 4.996072999886953e-06, "loss": 0.4218132019042969, "num_tokens": 9649423442.0, "step": 79020 }, { "epoch": 0.10538160834946589, "grad_norm": 1.796875, "learning_rate": 4.996033789529132e-06, "loss": 0.4431025505065918, "num_tokens": 9651945734.0, "step": 79040 }, { "epoch": 0.10540827373619399, "grad_norm": 2.09375, "learning_rate": 4.99599438454391e-06, "loss": 0.4415114402770996, "num_tokens": 9654322256.0, "step": 79060 }, { "epoch": 0.1054349391229221, "grad_norm": 1.984375, "learning_rate": 4.995954784934362e-06, "loss": 0.42457165718078616, "num_tokens": 9656762045.0, "step": 79080 }, { "epoch": 0.1054616045096502, "grad_norm": 2.59375, "learning_rate": 4.995914990703573e-06, "loss": 0.43890132904052737, "num_tokens": 9659216285.0, "step": 79100 }, { "epoch": 0.1054882698963783, "grad_norm": 1.9921875, "learning_rate": 4.995875001854646e-06, "loss": 0.4459707260131836, "num_tokens": 9661542492.0, "step": 79120 }, { "epoch": 0.10551493528310642, "grad_norm": 2.359375, "learning_rate": 4.995834818390702e-06, "loss": 0.4165870189666748, "num_tokens": 9664094261.0, "step": 79140 }, { "epoch": 0.10554160066983452, "grad_norm": 1.8671875, "learning_rate": 4.9957944403148725e-06, "loss": 0.43532733917236327, "num_tokens": 9666492680.0, "step": 79160 }, { "epoch": 0.10556826605656262, "grad_norm": 2.1875, "learning_rate": 4.995753867630305e-06, "loss": 0.4180733680725098, "num_tokens": 9669235526.0, "step": 79180 }, { "epoch": 0.10559493144329073, "grad_norm": 1.96875, "learning_rate": 4.995713100340166e-06, "loss": 0.42737603187561035, "num_tokens": 9671667198.0, "step": 79200 }, { "epoch": 0.10562159683001883, "grad_norm": 1.6875, "learning_rate": 4.9956721384476305e-06, "loss": 0.4316230773925781, "num_tokens": 9674166039.0, "step": 79220 }, { "epoch": 0.10564826221674693, "grad_norm": 1.9453125, "learning_rate": 4.995630981955896e-06, "loss": 0.4190321445465088, "num_tokens": 9676825953.0, "step": 79240 }, { "epoch": 0.10567492760347504, "grad_norm": 2.125, "learning_rate": 4.995589630868169e-06, "loss": 0.4462893962860107, "num_tokens": 9679396702.0, "step": 79260 }, { "epoch": 0.10570159299020314, "grad_norm": 2.046875, "learning_rate": 4.995548085187675e-06, "loss": 0.425870418548584, "num_tokens": 9681924768.0, "step": 79280 }, { "epoch": 0.10572825837693124, "grad_norm": 1.890625, "learning_rate": 4.995506344917654e-06, "loss": 0.4337456703186035, "num_tokens": 9684408254.0, "step": 79300 }, { "epoch": 0.10575492376365934, "grad_norm": 2.1875, "learning_rate": 4.99546441006136e-06, "loss": 0.44940910339355467, "num_tokens": 9686715627.0, "step": 79320 }, { "epoch": 0.10578158915038745, "grad_norm": 2.625, "learning_rate": 4.9954222806220635e-06, "loss": 0.4224327564239502, "num_tokens": 9689051833.0, "step": 79340 }, { "epoch": 0.10580825453711555, "grad_norm": 1.7890625, "learning_rate": 4.995379956603049e-06, "loss": 0.43487372398376467, "num_tokens": 9691586029.0, "step": 79360 }, { "epoch": 0.10583491992384365, "grad_norm": 2.125, "learning_rate": 4.995337438007616e-06, "loss": 0.425124454498291, "num_tokens": 9694008609.0, "step": 79380 }, { "epoch": 0.10586158531057176, "grad_norm": 2.375, "learning_rate": 4.995294724839081e-06, "loss": 0.43076462745666505, "num_tokens": 9696715347.0, "step": 79400 }, { "epoch": 0.10588825069729986, "grad_norm": 1.96875, "learning_rate": 4.995251817100774e-06, "loss": 0.43151345252990725, "num_tokens": 9699050601.0, "step": 79420 }, { "epoch": 0.10591491608402796, "grad_norm": 2.0625, "learning_rate": 4.995208714796042e-06, "loss": 0.42684144973754884, "num_tokens": 9701470843.0, "step": 79440 }, { "epoch": 0.10594158147075607, "grad_norm": 1.8671875, "learning_rate": 4.995165417928244e-06, "loss": 0.43745875358581543, "num_tokens": 9704172061.0, "step": 79460 }, { "epoch": 0.10596824685748417, "grad_norm": 2.109375, "learning_rate": 4.9951219265007565e-06, "loss": 0.4432551383972168, "num_tokens": 9706629317.0, "step": 79480 }, { "epoch": 0.10599491224421227, "grad_norm": 1.6796875, "learning_rate": 4.995078240516971e-06, "loss": 0.43706235885620115, "num_tokens": 9709007851.0, "step": 79500 }, { "epoch": 0.10602157763094038, "grad_norm": 2.4375, "learning_rate": 4.995034359980294e-06, "loss": 0.43030462265014646, "num_tokens": 9711344148.0, "step": 79520 }, { "epoch": 0.10604824301766849, "grad_norm": 1.8515625, "learning_rate": 4.994990284894148e-06, "loss": 0.4324657440185547, "num_tokens": 9713566763.0, "step": 79540 }, { "epoch": 0.1060749084043966, "grad_norm": 1.84375, "learning_rate": 4.994946015261969e-06, "loss": 0.43410654067993165, "num_tokens": 9715988428.0, "step": 79560 }, { "epoch": 0.1061015737911247, "grad_norm": 2.40625, "learning_rate": 4.994901551087207e-06, "loss": 0.4469090461730957, "num_tokens": 9718641755.0, "step": 79580 }, { "epoch": 0.1061282391778528, "grad_norm": 2.203125, "learning_rate": 4.9948568923733325e-06, "loss": 0.43288140296936034, "num_tokens": 9721076466.0, "step": 79600 }, { "epoch": 0.1061549045645809, "grad_norm": 2.515625, "learning_rate": 4.9948120391238245e-06, "loss": 0.44042177200317384, "num_tokens": 9723406319.0, "step": 79620 }, { "epoch": 0.10618156995130901, "grad_norm": 1.8359375, "learning_rate": 4.994766991342183e-06, "loss": 0.43449997901916504, "num_tokens": 9726020281.0, "step": 79640 }, { "epoch": 0.10620823533803711, "grad_norm": 2.328125, "learning_rate": 4.9947217490319186e-06, "loss": 0.42908663749694825, "num_tokens": 9728599799.0, "step": 79660 }, { "epoch": 0.10623490072476521, "grad_norm": 2.265625, "learning_rate": 4.99467631219656e-06, "loss": 0.4310178756713867, "num_tokens": 9730768921.0, "step": 79680 }, { "epoch": 0.10626156611149332, "grad_norm": 2.171875, "learning_rate": 4.994630680839652e-06, "loss": 0.4317909240722656, "num_tokens": 9733125559.0, "step": 79700 }, { "epoch": 0.10628823149822142, "grad_norm": 1.96875, "learning_rate": 4.994584854964748e-06, "loss": 0.4371167182922363, "num_tokens": 9735602813.0, "step": 79720 }, { "epoch": 0.10631489688494952, "grad_norm": 2.234375, "learning_rate": 4.994538834575426e-06, "loss": 0.4466825485229492, "num_tokens": 9737984572.0, "step": 79740 }, { "epoch": 0.10634156227167763, "grad_norm": 1.578125, "learning_rate": 4.994492619675272e-06, "loss": 0.4272343635559082, "num_tokens": 9740434465.0, "step": 79760 }, { "epoch": 0.10636822765840573, "grad_norm": 2.015625, "learning_rate": 4.99444621026789e-06, "loss": 0.4294009208679199, "num_tokens": 9743180238.0, "step": 79780 }, { "epoch": 0.10639489304513383, "grad_norm": 2.453125, "learning_rate": 4.994399606356899e-06, "loss": 0.4385566234588623, "num_tokens": 9745644354.0, "step": 79800 }, { "epoch": 0.10642155843186193, "grad_norm": 2.171875, "learning_rate": 4.994352807945932e-06, "loss": 0.4288773059844971, "num_tokens": 9748097518.0, "step": 79820 }, { "epoch": 0.10644822381859004, "grad_norm": 2.03125, "learning_rate": 4.99430581503864e-06, "loss": 0.4498795986175537, "num_tokens": 9750616014.0, "step": 79840 }, { "epoch": 0.10647488920531814, "grad_norm": 2.1875, "learning_rate": 4.9942586276386854e-06, "loss": 0.4399550437927246, "num_tokens": 9753346499.0, "step": 79860 }, { "epoch": 0.10650155459204624, "grad_norm": 2.375, "learning_rate": 4.994211245749748e-06, "loss": 0.438754940032959, "num_tokens": 9755802696.0, "step": 79880 }, { "epoch": 0.10652821997877435, "grad_norm": 1.765625, "learning_rate": 4.994163669375523e-06, "loss": 0.4484394073486328, "num_tokens": 9758288903.0, "step": 79900 }, { "epoch": 0.10655488536550245, "grad_norm": 2.28125, "learning_rate": 4.99411589851972e-06, "loss": 0.41542558670043944, "num_tokens": 9760675791.0, "step": 79920 }, { "epoch": 0.10658155075223055, "grad_norm": 1.609375, "learning_rate": 4.994067933186064e-06, "loss": 0.4295989990234375, "num_tokens": 9763192548.0, "step": 79940 }, { "epoch": 0.10660821613895867, "grad_norm": 1.828125, "learning_rate": 4.994019773378295e-06, "loss": 0.4498439311981201, "num_tokens": 9765530759.0, "step": 79960 }, { "epoch": 0.10663488152568677, "grad_norm": 1.90625, "learning_rate": 4.993971419100167e-06, "loss": 0.4295699119567871, "num_tokens": 9767700412.0, "step": 79980 }, { "epoch": 0.10666154691241488, "grad_norm": 2.1875, "learning_rate": 4.993922870355453e-06, "loss": 0.42812376022338866, "num_tokens": 9770050844.0, "step": 80000 }, { "epoch": 0.10668821229914298, "grad_norm": 2.375, "learning_rate": 4.9938741271479364e-06, "loss": 0.428402042388916, "num_tokens": 9772360376.0, "step": 80020 }, { "epoch": 0.10671487768587108, "grad_norm": 1.84375, "learning_rate": 4.993825189481418e-06, "loss": 0.4468088626861572, "num_tokens": 9774693926.0, "step": 80040 }, { "epoch": 0.10674154307259918, "grad_norm": 1.8828125, "learning_rate": 4.993776057359715e-06, "loss": 0.4334266185760498, "num_tokens": 9777227351.0, "step": 80060 }, { "epoch": 0.10676820845932729, "grad_norm": 1.90625, "learning_rate": 4.993726730786658e-06, "loss": 0.4216335296630859, "num_tokens": 9779490174.0, "step": 80080 }, { "epoch": 0.10679487384605539, "grad_norm": 1.59375, "learning_rate": 4.993677209766092e-06, "loss": 0.4287853717803955, "num_tokens": 9782009592.0, "step": 80100 }, { "epoch": 0.1068215392327835, "grad_norm": 2.125, "learning_rate": 4.99362749430188e-06, "loss": 0.43221349716186525, "num_tokens": 9784599142.0, "step": 80120 }, { "epoch": 0.1068482046195116, "grad_norm": 2.09375, "learning_rate": 4.993577584397898e-06, "loss": 0.43059144020080564, "num_tokens": 9787085797.0, "step": 80140 }, { "epoch": 0.1068748700062397, "grad_norm": 1.953125, "learning_rate": 4.993527480058037e-06, "loss": 0.4456002712249756, "num_tokens": 9789659097.0, "step": 80160 }, { "epoch": 0.1069015353929678, "grad_norm": 1.9765625, "learning_rate": 4.9934771812862055e-06, "loss": 0.43592352867126466, "num_tokens": 9792184574.0, "step": 80180 }, { "epoch": 0.1069282007796959, "grad_norm": 1.7578125, "learning_rate": 4.993426688086323e-06, "loss": 0.4453399658203125, "num_tokens": 9794334296.0, "step": 80200 }, { "epoch": 0.10695486616642401, "grad_norm": 2.546875, "learning_rate": 4.99337600046233e-06, "loss": 0.4192070484161377, "num_tokens": 9796726667.0, "step": 80220 }, { "epoch": 0.10698153155315211, "grad_norm": 2.109375, "learning_rate": 4.993325118418177e-06, "loss": 0.4260509490966797, "num_tokens": 9799179973.0, "step": 80240 }, { "epoch": 0.10700819693988022, "grad_norm": 1.921875, "learning_rate": 4.993274041957831e-06, "loss": 0.4280524730682373, "num_tokens": 9801519665.0, "step": 80260 }, { "epoch": 0.10703486232660832, "grad_norm": 1.8671875, "learning_rate": 4.993222771085275e-06, "loss": 0.43198504447937014, "num_tokens": 9803843272.0, "step": 80280 }, { "epoch": 0.10706152771333642, "grad_norm": 2.53125, "learning_rate": 4.993171305804507e-06, "loss": 0.4223592281341553, "num_tokens": 9806298873.0, "step": 80300 }, { "epoch": 0.10708819310006452, "grad_norm": 2.40625, "learning_rate": 4.99311964611954e-06, "loss": 0.4321390151977539, "num_tokens": 9808603522.0, "step": 80320 }, { "epoch": 0.10711485848679263, "grad_norm": 2.03125, "learning_rate": 4.993067792034403e-06, "loss": 0.4403054237365723, "num_tokens": 9811116836.0, "step": 80340 }, { "epoch": 0.10714152387352074, "grad_norm": 2.09375, "learning_rate": 4.993015743553138e-06, "loss": 0.4326303005218506, "num_tokens": 9813449651.0, "step": 80360 }, { "epoch": 0.10716818926024885, "grad_norm": 2.015625, "learning_rate": 4.992963500679804e-06, "loss": 0.4222977638244629, "num_tokens": 9816133257.0, "step": 80380 }, { "epoch": 0.10719485464697695, "grad_norm": 1.9140625, "learning_rate": 4.992911063418474e-06, "loss": 0.43145174980163575, "num_tokens": 9818789344.0, "step": 80400 }, { "epoch": 0.10722152003370505, "grad_norm": 1.6640625, "learning_rate": 4.992858431773238e-06, "loss": 0.43490238189697267, "num_tokens": 9821436598.0, "step": 80420 }, { "epoch": 0.10724818542043316, "grad_norm": 1.7109375, "learning_rate": 4.9928056057482e-06, "loss": 0.4355599403381348, "num_tokens": 9824045326.0, "step": 80440 }, { "epoch": 0.10727485080716126, "grad_norm": 1.734375, "learning_rate": 4.992752585347478e-06, "loss": 0.44939570426940917, "num_tokens": 9826497139.0, "step": 80460 }, { "epoch": 0.10730151619388936, "grad_norm": 1.8515625, "learning_rate": 4.992699370575206e-06, "loss": 0.43570237159729003, "num_tokens": 9828623864.0, "step": 80480 }, { "epoch": 0.10732818158061747, "grad_norm": 1.734375, "learning_rate": 4.992645961435535e-06, "loss": 0.4254964828491211, "num_tokens": 9831149306.0, "step": 80500 }, { "epoch": 0.10735484696734557, "grad_norm": 1.9921875, "learning_rate": 4.992592357932628e-06, "loss": 0.4307896614074707, "num_tokens": 9833503070.0, "step": 80520 }, { "epoch": 0.10738151235407367, "grad_norm": 1.84375, "learning_rate": 4.992538560070666e-06, "loss": 0.44017419815063474, "num_tokens": 9835983432.0, "step": 80540 }, { "epoch": 0.10740817774080177, "grad_norm": 2.171875, "learning_rate": 4.992484567853842e-06, "loss": 0.4382582664489746, "num_tokens": 9838538658.0, "step": 80560 }, { "epoch": 0.10743484312752988, "grad_norm": 2.234375, "learning_rate": 4.992430381286368e-06, "loss": 0.429413890838623, "num_tokens": 9841163962.0, "step": 80580 }, { "epoch": 0.10746150851425798, "grad_norm": 2.25, "learning_rate": 4.992376000372468e-06, "loss": 0.4390713214874268, "num_tokens": 9843511072.0, "step": 80600 }, { "epoch": 0.10748817390098608, "grad_norm": 1.8046875, "learning_rate": 4.992321425116383e-06, "loss": 0.4280061721801758, "num_tokens": 9846076138.0, "step": 80620 }, { "epoch": 0.10751483928771419, "grad_norm": 2.203125, "learning_rate": 4.992266655522369e-06, "loss": 0.42532005310058596, "num_tokens": 9848467931.0, "step": 80640 }, { "epoch": 0.10754150467444229, "grad_norm": 2.359375, "learning_rate": 4.992211691594695e-06, "loss": 0.4441507339477539, "num_tokens": 9850866070.0, "step": 80660 }, { "epoch": 0.10756817006117039, "grad_norm": 2.421875, "learning_rate": 4.992156533337648e-06, "loss": 0.4295328140258789, "num_tokens": 9853243747.0, "step": 80680 }, { "epoch": 0.1075948354478985, "grad_norm": 2.359375, "learning_rate": 4.9921011807555285e-06, "loss": 0.43352270126342773, "num_tokens": 9855682296.0, "step": 80700 }, { "epoch": 0.1076215008346266, "grad_norm": 2.21875, "learning_rate": 4.992045633852653e-06, "loss": 0.43114728927612306, "num_tokens": 9858308786.0, "step": 80720 }, { "epoch": 0.1076481662213547, "grad_norm": 1.890625, "learning_rate": 4.991989892633351e-06, "loss": 0.4285404205322266, "num_tokens": 9860737484.0, "step": 80740 }, { "epoch": 0.10767483160808282, "grad_norm": 1.515625, "learning_rate": 4.9919339571019724e-06, "loss": 0.4443691253662109, "num_tokens": 9863002718.0, "step": 80760 }, { "epoch": 0.10770149699481092, "grad_norm": 1.96875, "learning_rate": 4.9918778272628756e-06, "loss": 0.44170446395874025, "num_tokens": 9865682414.0, "step": 80780 }, { "epoch": 0.10772816238153902, "grad_norm": 2.34375, "learning_rate": 4.991821503120439e-06, "loss": 0.4208557605743408, "num_tokens": 9867916354.0, "step": 80800 }, { "epoch": 0.10775482776826713, "grad_norm": 2.03125, "learning_rate": 4.991764984679053e-06, "loss": 0.45616874694824217, "num_tokens": 9870436097.0, "step": 80820 }, { "epoch": 0.10778149315499523, "grad_norm": 1.7578125, "learning_rate": 4.9917082719431255e-06, "loss": 0.4361401557922363, "num_tokens": 9873105643.0, "step": 80840 }, { "epoch": 0.10780815854172333, "grad_norm": 1.671875, "learning_rate": 4.991651364917079e-06, "loss": 0.4487736225128174, "num_tokens": 9875835839.0, "step": 80860 }, { "epoch": 0.10783482392845144, "grad_norm": 2.234375, "learning_rate": 4.991594263605351e-06, "loss": 0.43054947853088377, "num_tokens": 9878108261.0, "step": 80880 }, { "epoch": 0.10786148931517954, "grad_norm": 1.875, "learning_rate": 4.991536968012394e-06, "loss": 0.4227121353149414, "num_tokens": 9880568692.0, "step": 80900 }, { "epoch": 0.10788815470190764, "grad_norm": 1.8515625, "learning_rate": 4.991479478142673e-06, "loss": 0.4306969165802002, "num_tokens": 9882974372.0, "step": 80920 }, { "epoch": 0.10791482008863575, "grad_norm": 2.25, "learning_rate": 4.9914217940006745e-06, "loss": 0.44278898239135744, "num_tokens": 9885426655.0, "step": 80940 }, { "epoch": 0.10794148547536385, "grad_norm": 1.8515625, "learning_rate": 4.991363915590894e-06, "loss": 0.4313981056213379, "num_tokens": 9887753113.0, "step": 80960 }, { "epoch": 0.10796815086209195, "grad_norm": 1.9921875, "learning_rate": 4.9913058429178455e-06, "loss": 0.4479363441467285, "num_tokens": 9890386973.0, "step": 80980 }, { "epoch": 0.10799481624882006, "grad_norm": 1.921875, "learning_rate": 4.991247575986056e-06, "loss": 0.41526145935058595, "num_tokens": 9892944229.0, "step": 81000 }, { "epoch": 0.10802148163554816, "grad_norm": 1.5546875, "learning_rate": 4.99118911480007e-06, "loss": 0.42731704711914065, "num_tokens": 9895191447.0, "step": 81020 }, { "epoch": 0.10804814702227626, "grad_norm": 2.328125, "learning_rate": 4.991130459364447e-06, "loss": 0.43489513397216795, "num_tokens": 9897473245.0, "step": 81040 }, { "epoch": 0.10807481240900436, "grad_norm": 2.09375, "learning_rate": 4.991071609683758e-06, "loss": 0.42478013038635254, "num_tokens": 9899938934.0, "step": 81060 }, { "epoch": 0.10810147779573247, "grad_norm": 1.96875, "learning_rate": 4.991012565762594e-06, "loss": 0.4384599208831787, "num_tokens": 9902354435.0, "step": 81080 }, { "epoch": 0.10812814318246057, "grad_norm": 1.8828125, "learning_rate": 4.990953327605557e-06, "loss": 0.4295368194580078, "num_tokens": 9904852545.0, "step": 81100 }, { "epoch": 0.10815480856918867, "grad_norm": 1.9375, "learning_rate": 4.9908938952172685e-06, "loss": 0.43814449310302733, "num_tokens": 9907134792.0, "step": 81120 }, { "epoch": 0.10818147395591678, "grad_norm": 1.890625, "learning_rate": 4.9908342686023615e-06, "loss": 0.4288168430328369, "num_tokens": 9909798547.0, "step": 81140 }, { "epoch": 0.10820813934264488, "grad_norm": 2.171875, "learning_rate": 4.990774447765484e-06, "loss": 0.4332524299621582, "num_tokens": 9912085828.0, "step": 81160 }, { "epoch": 0.108234804729373, "grad_norm": 1.8359375, "learning_rate": 4.990714432711303e-06, "loss": 0.4168119430541992, "num_tokens": 9914400171.0, "step": 81180 }, { "epoch": 0.1082614701161011, "grad_norm": 2.046875, "learning_rate": 4.990654223444496e-06, "loss": 0.438970947265625, "num_tokens": 9917146159.0, "step": 81200 }, { "epoch": 0.1082881355028292, "grad_norm": 2.125, "learning_rate": 4.990593819969759e-06, "loss": 0.4263947010040283, "num_tokens": 9919388075.0, "step": 81220 }, { "epoch": 0.1083148008895573, "grad_norm": 2.40625, "learning_rate": 4.990533222291802e-06, "loss": 0.4376528739929199, "num_tokens": 9921663276.0, "step": 81240 }, { "epoch": 0.10834146627628541, "grad_norm": 1.828125, "learning_rate": 4.9904724304153494e-06, "loss": 0.4208376884460449, "num_tokens": 9924051883.0, "step": 81260 }, { "epoch": 0.10836813166301351, "grad_norm": 1.609375, "learning_rate": 4.990411444345142e-06, "loss": 0.4322836875915527, "num_tokens": 9926366961.0, "step": 81280 }, { "epoch": 0.10839479704974161, "grad_norm": 2.421875, "learning_rate": 4.9903502640859346e-06, "loss": 0.4292752265930176, "num_tokens": 9928636596.0, "step": 81300 }, { "epoch": 0.10842146243646972, "grad_norm": 1.640625, "learning_rate": 4.990288889642498e-06, "loss": 0.41578149795532227, "num_tokens": 9930888133.0, "step": 81320 }, { "epoch": 0.10844812782319782, "grad_norm": 1.9375, "learning_rate": 4.9902273210196185e-06, "loss": 0.43649959564208984, "num_tokens": 9933321861.0, "step": 81340 }, { "epoch": 0.10847479320992592, "grad_norm": 2.140625, "learning_rate": 4.990165558222096e-06, "loss": 0.4311631202697754, "num_tokens": 9935830580.0, "step": 81360 }, { "epoch": 0.10850145859665403, "grad_norm": 1.7109375, "learning_rate": 4.990103601254747e-06, "loss": 0.42888216972351073, "num_tokens": 9938487788.0, "step": 81380 }, { "epoch": 0.10852812398338213, "grad_norm": 1.625, "learning_rate": 4.990041450122402e-06, "loss": 0.43996829986572267, "num_tokens": 9940739531.0, "step": 81400 }, { "epoch": 0.10855478937011023, "grad_norm": 1.9609375, "learning_rate": 4.989979104829908e-06, "loss": 0.4288339614868164, "num_tokens": 9942935258.0, "step": 81420 }, { "epoch": 0.10858145475683834, "grad_norm": 1.9921875, "learning_rate": 4.9899165653821255e-06, "loss": 0.4291213512420654, "num_tokens": 9945285349.0, "step": 81440 }, { "epoch": 0.10860812014356644, "grad_norm": 2.140625, "learning_rate": 4.989853831783931e-06, "loss": 0.43849964141845704, "num_tokens": 9947617900.0, "step": 81460 }, { "epoch": 0.10863478553029454, "grad_norm": 2.421875, "learning_rate": 4.989790904040217e-06, "loss": 0.437428617477417, "num_tokens": 9949925553.0, "step": 81480 }, { "epoch": 0.10866145091702265, "grad_norm": 2.015625, "learning_rate": 4.98972778215589e-06, "loss": 0.4379903793334961, "num_tokens": 9952321617.0, "step": 81500 }, { "epoch": 0.10868811630375075, "grad_norm": 1.78125, "learning_rate": 4.98966446613587e-06, "loss": 0.4182014465332031, "num_tokens": 9954846765.0, "step": 81520 }, { "epoch": 0.10871478169047885, "grad_norm": 1.953125, "learning_rate": 4.9896009559850974e-06, "loss": 0.44071149826049805, "num_tokens": 9957334604.0, "step": 81540 }, { "epoch": 0.10874144707720695, "grad_norm": 1.84375, "learning_rate": 4.989537251708523e-06, "loss": 0.4358081817626953, "num_tokens": 9959745081.0, "step": 81560 }, { "epoch": 0.10876811246393507, "grad_norm": 1.84375, "learning_rate": 4.989473353311113e-06, "loss": 0.41521692276000977, "num_tokens": 9961917996.0, "step": 81580 }, { "epoch": 0.10879477785066317, "grad_norm": 2.21875, "learning_rate": 4.989409260797851e-06, "loss": 0.4222237586975098, "num_tokens": 9964518297.0, "step": 81600 }, { "epoch": 0.10882144323739128, "grad_norm": 2.078125, "learning_rate": 4.989344974173733e-06, "loss": 0.4381516456604004, "num_tokens": 9966947829.0, "step": 81620 }, { "epoch": 0.10884810862411938, "grad_norm": 2.046875, "learning_rate": 4.989280493443774e-06, "loss": 0.43738322257995604, "num_tokens": 9969419466.0, "step": 81640 }, { "epoch": 0.10887477401084748, "grad_norm": 1.8046875, "learning_rate": 4.989215818613001e-06, "loss": 0.42960119247436523, "num_tokens": 9972000246.0, "step": 81660 }, { "epoch": 0.10890143939757559, "grad_norm": 1.2578125, "learning_rate": 4.989150949686458e-06, "loss": 0.43113040924072266, "num_tokens": 9974638635.0, "step": 81680 }, { "epoch": 0.10892810478430369, "grad_norm": 2.25, "learning_rate": 4.9890858866692e-06, "loss": 0.4207949638366699, "num_tokens": 9977252753.0, "step": 81700 }, { "epoch": 0.10895477017103179, "grad_norm": 1.9375, "learning_rate": 4.9890206295663036e-06, "loss": 0.42765159606933595, "num_tokens": 9979645782.0, "step": 81720 }, { "epoch": 0.1089814355577599, "grad_norm": 1.828125, "learning_rate": 4.988955178382855e-06, "loss": 0.40776753425598145, "num_tokens": 9982248198.0, "step": 81740 }, { "epoch": 0.109008100944488, "grad_norm": 1.7734375, "learning_rate": 4.988889533123958e-06, "loss": 0.43888378143310547, "num_tokens": 9984687752.0, "step": 81760 }, { "epoch": 0.1090347663312161, "grad_norm": 1.6953125, "learning_rate": 4.988823693794734e-06, "loss": 0.4423541069030762, "num_tokens": 9987077910.0, "step": 81780 }, { "epoch": 0.1090614317179442, "grad_norm": 2.109375, "learning_rate": 4.9887576604003126e-06, "loss": 0.4335629463195801, "num_tokens": 9989485611.0, "step": 81800 }, { "epoch": 0.10908809710467231, "grad_norm": 1.8515625, "learning_rate": 4.988691432945846e-06, "loss": 0.4445277214050293, "num_tokens": 9991648818.0, "step": 81820 }, { "epoch": 0.10911476249140041, "grad_norm": 1.8984375, "learning_rate": 4.988625011436497e-06, "loss": 0.42432303428649903, "num_tokens": 9994030741.0, "step": 81840 }, { "epoch": 0.10914142787812851, "grad_norm": 2.625, "learning_rate": 4.988558395877444e-06, "loss": 0.4452059745788574, "num_tokens": 9996547172.0, "step": 81860 }, { "epoch": 0.10916809326485662, "grad_norm": 1.71875, "learning_rate": 4.988491586273883e-06, "loss": 0.43840579986572265, "num_tokens": 9998988966.0, "step": 81880 }, { "epoch": 0.10919475865158472, "grad_norm": 2.0625, "learning_rate": 4.988424582631023e-06, "loss": 0.4263430595397949, "num_tokens": 10001528136.0, "step": 81900 }, { "epoch": 0.10922142403831282, "grad_norm": 2.140625, "learning_rate": 4.988357384954087e-06, "loss": 0.44140968322753904, "num_tokens": 10003957881.0, "step": 81920 }, { "epoch": 0.10924808942504093, "grad_norm": 2.03125, "learning_rate": 4.988289993248317e-06, "loss": 0.4273377418518066, "num_tokens": 10006414242.0, "step": 81940 }, { "epoch": 0.10927475481176903, "grad_norm": 1.96875, "learning_rate": 4.988222407518966e-06, "loss": 0.42452545166015626, "num_tokens": 10008889508.0, "step": 81960 }, { "epoch": 0.10930142019849715, "grad_norm": 2.25, "learning_rate": 4.988154627771305e-06, "loss": 0.4087838649749756, "num_tokens": 10011271130.0, "step": 81980 }, { "epoch": 0.10932808558522525, "grad_norm": 2.265625, "learning_rate": 4.988086654010619e-06, "loss": 0.42397117614746094, "num_tokens": 10013451473.0, "step": 82000 }, { "epoch": 0.10935475097195335, "grad_norm": 2.203125, "learning_rate": 4.988018486242207e-06, "loss": 0.43246707916259763, "num_tokens": 10015803510.0, "step": 82020 }, { "epoch": 0.10938141635868145, "grad_norm": 2.375, "learning_rate": 4.987950124471388e-06, "loss": 0.43207492828369143, "num_tokens": 10018355747.0, "step": 82040 }, { "epoch": 0.10940808174540956, "grad_norm": 2.640625, "learning_rate": 4.987881568703487e-06, "loss": 0.44092421531677245, "num_tokens": 10020805635.0, "step": 82060 }, { "epoch": 0.10943474713213766, "grad_norm": 2.03125, "learning_rate": 4.987812818943854e-06, "loss": 0.425091552734375, "num_tokens": 10023200504.0, "step": 82080 }, { "epoch": 0.10946141251886576, "grad_norm": 1.828125, "learning_rate": 4.987743875197849e-06, "loss": 0.45308332443237304, "num_tokens": 10025595048.0, "step": 82100 }, { "epoch": 0.10948807790559387, "grad_norm": 1.8671875, "learning_rate": 4.987674737470847e-06, "loss": 0.43205862045288085, "num_tokens": 10028310297.0, "step": 82120 }, { "epoch": 0.10951474329232197, "grad_norm": 2.0, "learning_rate": 4.987605405768238e-06, "loss": 0.43001422882080076, "num_tokens": 10030766595.0, "step": 82140 }, { "epoch": 0.10954140867905007, "grad_norm": 1.8125, "learning_rate": 4.98753588009543e-06, "loss": 0.4292281627655029, "num_tokens": 10033223754.0, "step": 82160 }, { "epoch": 0.10956807406577818, "grad_norm": 1.6875, "learning_rate": 4.9874661604578435e-06, "loss": 0.419648551940918, "num_tokens": 10035626770.0, "step": 82180 }, { "epoch": 0.10959473945250628, "grad_norm": 2.078125, "learning_rate": 4.987396246860915e-06, "loss": 0.42934532165527345, "num_tokens": 10038062709.0, "step": 82200 }, { "epoch": 0.10962140483923438, "grad_norm": 2.4375, "learning_rate": 4.987326139310095e-06, "loss": 0.4308168888092041, "num_tokens": 10040373778.0, "step": 82220 }, { "epoch": 0.10964807022596249, "grad_norm": 1.9453125, "learning_rate": 4.987255837810851e-06, "loss": 0.4293966770172119, "num_tokens": 10042553526.0, "step": 82240 }, { "epoch": 0.10967473561269059, "grad_norm": 2.328125, "learning_rate": 4.987185342368666e-06, "loss": 0.425931453704834, "num_tokens": 10044660725.0, "step": 82260 }, { "epoch": 0.10970140099941869, "grad_norm": 2.59375, "learning_rate": 4.987114652989035e-06, "loss": 0.41420793533325195, "num_tokens": 10047092365.0, "step": 82280 }, { "epoch": 0.1097280663861468, "grad_norm": 1.921875, "learning_rate": 4.98704376967747e-06, "loss": 0.42132568359375, "num_tokens": 10049646155.0, "step": 82300 }, { "epoch": 0.1097547317728749, "grad_norm": 2.03125, "learning_rate": 4.9869726924394995e-06, "loss": 0.4382833480834961, "num_tokens": 10052277246.0, "step": 82320 }, { "epoch": 0.109781397159603, "grad_norm": 1.859375, "learning_rate": 4.986901421280664e-06, "loss": 0.43799629211425783, "num_tokens": 10054669030.0, "step": 82340 }, { "epoch": 0.1098080625463311, "grad_norm": 2.15625, "learning_rate": 4.986829956206523e-06, "loss": 0.42246570587158205, "num_tokens": 10056956190.0, "step": 82360 }, { "epoch": 0.1098347279330592, "grad_norm": 2.15625, "learning_rate": 4.986758297222646e-06, "loss": 0.41544151306152344, "num_tokens": 10059549361.0, "step": 82380 }, { "epoch": 0.10986139331978732, "grad_norm": 2.09375, "learning_rate": 4.986686444334623e-06, "loss": 0.4237339019775391, "num_tokens": 10061883242.0, "step": 82400 }, { "epoch": 0.10988805870651543, "grad_norm": 1.6796875, "learning_rate": 4.986614397548056e-06, "loss": 0.41975884437561034, "num_tokens": 10064465876.0, "step": 82420 }, { "epoch": 0.10991472409324353, "grad_norm": 2.078125, "learning_rate": 4.986542156868563e-06, "loss": 0.41580915451049805, "num_tokens": 10066855754.0, "step": 82440 }, { "epoch": 0.10994138947997163, "grad_norm": 1.671875, "learning_rate": 4.986469722301777e-06, "loss": 0.43229355812072756, "num_tokens": 10069531197.0, "step": 82460 }, { "epoch": 0.10996805486669974, "grad_norm": 1.5859375, "learning_rate": 4.986397093853346e-06, "loss": 0.43904819488525393, "num_tokens": 10072037681.0, "step": 82480 }, { "epoch": 0.10999472025342784, "grad_norm": 1.9765625, "learning_rate": 4.986324271528932e-06, "loss": 0.44724483489990235, "num_tokens": 10074288502.0, "step": 82500 }, { "epoch": 0.11002138564015594, "grad_norm": 2.390625, "learning_rate": 4.9862512553342145e-06, "loss": 0.4424687385559082, "num_tokens": 10076736607.0, "step": 82520 }, { "epoch": 0.11004805102688404, "grad_norm": 2.03125, "learning_rate": 4.986178045274887e-06, "loss": 0.4199268341064453, "num_tokens": 10079188040.0, "step": 82540 }, { "epoch": 0.11007471641361215, "grad_norm": 1.703125, "learning_rate": 4.986104641356657e-06, "loss": 0.4548486709594727, "num_tokens": 10081362326.0, "step": 82560 }, { "epoch": 0.11010138180034025, "grad_norm": 1.7578125, "learning_rate": 4.98603104358525e-06, "loss": 0.4252124309539795, "num_tokens": 10083740117.0, "step": 82580 }, { "epoch": 0.11012804718706835, "grad_norm": 1.4453125, "learning_rate": 4.985957251966404e-06, "loss": 0.4390096664428711, "num_tokens": 10086384012.0, "step": 82600 }, { "epoch": 0.11015471257379646, "grad_norm": 1.8671875, "learning_rate": 4.985883266505871e-06, "loss": 0.43425650596618653, "num_tokens": 10088936490.0, "step": 82620 }, { "epoch": 0.11018137796052456, "grad_norm": 2.015625, "learning_rate": 4.985809087209422e-06, "loss": 0.42763495445251465, "num_tokens": 10091273318.0, "step": 82640 }, { "epoch": 0.11020804334725266, "grad_norm": 1.796875, "learning_rate": 4.9857347140828406e-06, "loss": 0.4519392967224121, "num_tokens": 10093772711.0, "step": 82660 }, { "epoch": 0.11023470873398077, "grad_norm": 1.65625, "learning_rate": 4.985660147131927e-06, "loss": 0.4258440971374512, "num_tokens": 10096344585.0, "step": 82680 }, { "epoch": 0.11026137412070887, "grad_norm": 1.8359375, "learning_rate": 4.985585386362493e-06, "loss": 0.4457763671875, "num_tokens": 10098625743.0, "step": 82700 }, { "epoch": 0.11028803950743697, "grad_norm": 2.046875, "learning_rate": 4.985510431780371e-06, "loss": 0.4240588188171387, "num_tokens": 10101084840.0, "step": 82720 }, { "epoch": 0.11031470489416507, "grad_norm": 1.828125, "learning_rate": 4.985435283391403e-06, "loss": 0.4584170341491699, "num_tokens": 10103730368.0, "step": 82740 }, { "epoch": 0.11034137028089318, "grad_norm": 2.453125, "learning_rate": 4.985359941201451e-06, "loss": 0.42935476303100584, "num_tokens": 10106080252.0, "step": 82760 }, { "epoch": 0.11036803566762128, "grad_norm": 1.53125, "learning_rate": 4.985284405216388e-06, "loss": 0.41903152465820315, "num_tokens": 10108435007.0, "step": 82780 }, { "epoch": 0.1103947010543494, "grad_norm": 2.125, "learning_rate": 4.985208675442103e-06, "loss": 0.4362794876098633, "num_tokens": 10110917262.0, "step": 82800 }, { "epoch": 0.1104213664410775, "grad_norm": 1.8671875, "learning_rate": 4.985132751884505e-06, "loss": 0.443471622467041, "num_tokens": 10113469690.0, "step": 82820 }, { "epoch": 0.1104480318278056, "grad_norm": 2.421875, "learning_rate": 4.98505663454951e-06, "loss": 0.43199634552001953, "num_tokens": 10115912563.0, "step": 82840 }, { "epoch": 0.11047469721453371, "grad_norm": 2.328125, "learning_rate": 4.984980323443055e-06, "loss": 0.4201032638549805, "num_tokens": 10118365121.0, "step": 82860 }, { "epoch": 0.11050136260126181, "grad_norm": 2.234375, "learning_rate": 4.984903818571091e-06, "loss": 0.42989530563354494, "num_tokens": 10120645044.0, "step": 82880 }, { "epoch": 0.11052802798798991, "grad_norm": 1.9296875, "learning_rate": 4.984827119939582e-06, "loss": 0.4172780513763428, "num_tokens": 10123019362.0, "step": 82900 }, { "epoch": 0.11055469337471802, "grad_norm": 2.0, "learning_rate": 4.98475022755451e-06, "loss": 0.43558921813964846, "num_tokens": 10125492067.0, "step": 82920 }, { "epoch": 0.11058135876144612, "grad_norm": 2.1875, "learning_rate": 4.984673141421869e-06, "loss": 0.43567986488342286, "num_tokens": 10127819270.0, "step": 82940 }, { "epoch": 0.11060802414817422, "grad_norm": 2.09375, "learning_rate": 4.984595861547671e-06, "loss": 0.42829389572143556, "num_tokens": 10130298117.0, "step": 82960 }, { "epoch": 0.11063468953490233, "grad_norm": 2.328125, "learning_rate": 4.984518387937941e-06, "loss": 0.44730892181396487, "num_tokens": 10132889461.0, "step": 82980 }, { "epoch": 0.11066135492163043, "grad_norm": 1.96875, "learning_rate": 4.984440720598721e-06, "loss": 0.43263974189758303, "num_tokens": 10135244887.0, "step": 83000 }, { "epoch": 0.11068802030835853, "grad_norm": 1.953125, "learning_rate": 4.984362859536066e-06, "loss": 0.42815065383911133, "num_tokens": 10137678937.0, "step": 83020 }, { "epoch": 0.11071468569508663, "grad_norm": 2.28125, "learning_rate": 4.984284804756048e-06, "loss": 0.4268337726593018, "num_tokens": 10140299953.0, "step": 83040 }, { "epoch": 0.11074135108181474, "grad_norm": 2.03125, "learning_rate": 4.984206556264754e-06, "loss": 0.43758525848388674, "num_tokens": 10142614188.0, "step": 83060 }, { "epoch": 0.11076801646854284, "grad_norm": 2.421875, "learning_rate": 4.984128114068283e-06, "loss": 0.4294093132019043, "num_tokens": 10145106658.0, "step": 83080 }, { "epoch": 0.11079468185527094, "grad_norm": 1.9375, "learning_rate": 4.984049478172753e-06, "loss": 0.42421913146972656, "num_tokens": 10147608912.0, "step": 83100 }, { "epoch": 0.11082134724199905, "grad_norm": 2.046875, "learning_rate": 4.9839706485842955e-06, "loss": 0.4301266670227051, "num_tokens": 10150051971.0, "step": 83120 }, { "epoch": 0.11084801262872715, "grad_norm": 1.7734375, "learning_rate": 4.983891625309058e-06, "loss": 0.42649307250976565, "num_tokens": 10152426179.0, "step": 83140 }, { "epoch": 0.11087467801545525, "grad_norm": 2.34375, "learning_rate": 4.9838124083532015e-06, "loss": 0.437177848815918, "num_tokens": 10155048862.0, "step": 83160 }, { "epoch": 0.11090134340218336, "grad_norm": 1.7578125, "learning_rate": 4.983732997722902e-06, "loss": 0.4275347232818604, "num_tokens": 10157522448.0, "step": 83180 }, { "epoch": 0.11092800878891147, "grad_norm": 2.484375, "learning_rate": 4.983653393424353e-06, "loss": 0.4295187950134277, "num_tokens": 10159875349.0, "step": 83200 }, { "epoch": 0.11095467417563958, "grad_norm": 1.9296875, "learning_rate": 4.9835735954637625e-06, "loss": 0.4370394706726074, "num_tokens": 10162127366.0, "step": 83220 }, { "epoch": 0.11098133956236768, "grad_norm": 1.828125, "learning_rate": 4.983493603847349e-06, "loss": 0.4136075019836426, "num_tokens": 10164604405.0, "step": 83240 }, { "epoch": 0.11100800494909578, "grad_norm": 2.140625, "learning_rate": 4.983413418581354e-06, "loss": 0.41222577095031737, "num_tokens": 10167282241.0, "step": 83260 }, { "epoch": 0.11103467033582388, "grad_norm": 2.078125, "learning_rate": 4.983333039672027e-06, "loss": 0.4297691822052002, "num_tokens": 10169841246.0, "step": 83280 }, { "epoch": 0.11106133572255199, "grad_norm": 1.9921875, "learning_rate": 4.983252467125637e-06, "loss": 0.42850966453552247, "num_tokens": 10172057005.0, "step": 83300 }, { "epoch": 0.11108800110928009, "grad_norm": 1.8125, "learning_rate": 4.983171700948466e-06, "loss": 0.42741870880126953, "num_tokens": 10174622665.0, "step": 83320 }, { "epoch": 0.1111146664960082, "grad_norm": 2.65625, "learning_rate": 4.983090741146812e-06, "loss": 0.45256738662719725, "num_tokens": 10176831174.0, "step": 83340 }, { "epoch": 0.1111413318827363, "grad_norm": 2.0625, "learning_rate": 4.983009587726988e-06, "loss": 0.42020349502563475, "num_tokens": 10179185083.0, "step": 83360 }, { "epoch": 0.1111679972694644, "grad_norm": 1.875, "learning_rate": 4.982928240695321e-06, "loss": 0.42452049255371094, "num_tokens": 10181613541.0, "step": 83380 }, { "epoch": 0.1111946626561925, "grad_norm": 2.015625, "learning_rate": 4.982846700058155e-06, "loss": 0.4206217288970947, "num_tokens": 10184149070.0, "step": 83400 }, { "epoch": 0.1112213280429206, "grad_norm": 2.0625, "learning_rate": 4.982764965821848e-06, "loss": 0.41902685165405273, "num_tokens": 10186741064.0, "step": 83420 }, { "epoch": 0.11124799342964871, "grad_norm": 2.015625, "learning_rate": 4.9826830379927725e-06, "loss": 0.43401174545288085, "num_tokens": 10189144844.0, "step": 83440 }, { "epoch": 0.11127465881637681, "grad_norm": 1.671875, "learning_rate": 4.982600916577317e-06, "loss": 0.43895320892333983, "num_tokens": 10191772530.0, "step": 83460 }, { "epoch": 0.11130132420310491, "grad_norm": 1.8359375, "learning_rate": 4.982518601581886e-06, "loss": 0.4359461307525635, "num_tokens": 10194310131.0, "step": 83480 }, { "epoch": 0.11132798958983302, "grad_norm": 2.171875, "learning_rate": 4.982436093012896e-06, "loss": 0.4163771629333496, "num_tokens": 10196857211.0, "step": 83500 }, { "epoch": 0.11135465497656112, "grad_norm": 1.671875, "learning_rate": 4.982353390876784e-06, "loss": 0.43395843505859377, "num_tokens": 10199329080.0, "step": 83520 }, { "epoch": 0.11138132036328922, "grad_norm": 1.9609375, "learning_rate": 4.9822704951799944e-06, "loss": 0.4378338813781738, "num_tokens": 10201731218.0, "step": 83540 }, { "epoch": 0.11140798575001733, "grad_norm": 2.390625, "learning_rate": 4.982187405928993e-06, "loss": 0.4429368495941162, "num_tokens": 10204350557.0, "step": 83560 }, { "epoch": 0.11143465113674543, "grad_norm": 2.0625, "learning_rate": 4.982104123130259e-06, "loss": 0.4247306823730469, "num_tokens": 10206911458.0, "step": 83580 }, { "epoch": 0.11146131652347353, "grad_norm": 2.125, "learning_rate": 4.982020646790286e-06, "loss": 0.4383580684661865, "num_tokens": 10209365153.0, "step": 83600 }, { "epoch": 0.11148798191020165, "grad_norm": 2.4375, "learning_rate": 4.9819369769155825e-06, "loss": 0.4436457633972168, "num_tokens": 10211720298.0, "step": 83620 }, { "epoch": 0.11151464729692975, "grad_norm": 2.171875, "learning_rate": 4.981853113512674e-06, "loss": 0.4394256591796875, "num_tokens": 10214084152.0, "step": 83640 }, { "epoch": 0.11154131268365786, "grad_norm": 1.890625, "learning_rate": 4.981769056588098e-06, "loss": 0.4286806583404541, "num_tokens": 10216707545.0, "step": 83660 }, { "epoch": 0.11156797807038596, "grad_norm": 1.921875, "learning_rate": 4.981684806148411e-06, "loss": 0.45055160522460935, "num_tokens": 10219032335.0, "step": 83680 }, { "epoch": 0.11159464345711406, "grad_norm": 1.8984375, "learning_rate": 4.9816003622001794e-06, "loss": 0.4262382030487061, "num_tokens": 10221512039.0, "step": 83700 }, { "epoch": 0.11162130884384217, "grad_norm": 1.7421875, "learning_rate": 4.98151572474999e-06, "loss": 0.42869205474853517, "num_tokens": 10223875795.0, "step": 83720 }, { "epoch": 0.11164797423057027, "grad_norm": 1.625, "learning_rate": 4.981430893804441e-06, "loss": 0.42718048095703126, "num_tokens": 10226208890.0, "step": 83740 }, { "epoch": 0.11167463961729837, "grad_norm": 2.21875, "learning_rate": 4.981345869370149e-06, "loss": 0.4382824420928955, "num_tokens": 10228866109.0, "step": 83760 }, { "epoch": 0.11170130500402647, "grad_norm": 2.453125, "learning_rate": 4.981260651453742e-06, "loss": 0.4288005352020264, "num_tokens": 10231522154.0, "step": 83780 }, { "epoch": 0.11172797039075458, "grad_norm": 1.953125, "learning_rate": 4.981175240061865e-06, "loss": 0.41601028442382815, "num_tokens": 10234107827.0, "step": 83800 }, { "epoch": 0.11175463577748268, "grad_norm": 1.6953125, "learning_rate": 4.981089635201178e-06, "loss": 0.4224959373474121, "num_tokens": 10236479523.0, "step": 83820 }, { "epoch": 0.11178130116421078, "grad_norm": 2.15625, "learning_rate": 4.981003836878357e-06, "loss": 0.43302345275878906, "num_tokens": 10238895944.0, "step": 83840 }, { "epoch": 0.11180796655093889, "grad_norm": 1.9375, "learning_rate": 4.980917845100092e-06, "loss": 0.4211467742919922, "num_tokens": 10241117383.0, "step": 83860 }, { "epoch": 0.11183463193766699, "grad_norm": 1.828125, "learning_rate": 4.980831659873086e-06, "loss": 0.4313316345214844, "num_tokens": 10243720796.0, "step": 83880 }, { "epoch": 0.11186129732439509, "grad_norm": 1.7109375, "learning_rate": 4.980745281204063e-06, "loss": 0.4187187194824219, "num_tokens": 10246349707.0, "step": 83900 }, { "epoch": 0.1118879627111232, "grad_norm": 2.015625, "learning_rate": 4.980658709099754e-06, "loss": 0.4242356300354004, "num_tokens": 10248807166.0, "step": 83920 }, { "epoch": 0.1119146280978513, "grad_norm": 1.828125, "learning_rate": 4.980571943566912e-06, "loss": 0.4369328498840332, "num_tokens": 10251385813.0, "step": 83940 }, { "epoch": 0.1119412934845794, "grad_norm": 2.265625, "learning_rate": 4.980484984612303e-06, "loss": 0.4249164581298828, "num_tokens": 10253859473.0, "step": 83960 }, { "epoch": 0.1119679588713075, "grad_norm": 1.8359375, "learning_rate": 4.980397832242706e-06, "loss": 0.43448648452758787, "num_tokens": 10256329900.0, "step": 83980 }, { "epoch": 0.11199462425803561, "grad_norm": 2.015625, "learning_rate": 4.9803104864649174e-06, "loss": 0.43955278396606445, "num_tokens": 10258916122.0, "step": 84000 }, { "epoch": 0.11202128964476372, "grad_norm": 1.890625, "learning_rate": 4.980222947285749e-06, "loss": 0.4304195404052734, "num_tokens": 10261355830.0, "step": 84020 }, { "epoch": 0.11204795503149183, "grad_norm": 2.1875, "learning_rate": 4.980135214712024e-06, "loss": 0.42192726135253905, "num_tokens": 10263597918.0, "step": 84040 }, { "epoch": 0.11207462041821993, "grad_norm": 1.625, "learning_rate": 4.980047288750587e-06, "loss": 0.42151288986206054, "num_tokens": 10266058937.0, "step": 84060 }, { "epoch": 0.11210128580494803, "grad_norm": 2.03125, "learning_rate": 4.97995916940829e-06, "loss": 0.4381307601928711, "num_tokens": 10268434655.0, "step": 84080 }, { "epoch": 0.11212795119167614, "grad_norm": 2.328125, "learning_rate": 4.979870856692007e-06, "loss": 0.4374251365661621, "num_tokens": 10270993172.0, "step": 84100 }, { "epoch": 0.11215461657840424, "grad_norm": 1.796875, "learning_rate": 4.979782350608623e-06, "loss": 0.4309051036834717, "num_tokens": 10273461663.0, "step": 84120 }, { "epoch": 0.11218128196513234, "grad_norm": 2.0, "learning_rate": 4.979693651165039e-06, "loss": 0.4214798927307129, "num_tokens": 10275901762.0, "step": 84140 }, { "epoch": 0.11220794735186045, "grad_norm": 2.296875, "learning_rate": 4.979604758368172e-06, "loss": 0.4323294639587402, "num_tokens": 10278388307.0, "step": 84160 }, { "epoch": 0.11223461273858855, "grad_norm": 1.8359375, "learning_rate": 4.979515672224953e-06, "loss": 0.42983303070068357, "num_tokens": 10280779992.0, "step": 84180 }, { "epoch": 0.11226127812531665, "grad_norm": 1.7421875, "learning_rate": 4.979426392742328e-06, "loss": 0.42115211486816406, "num_tokens": 10283480449.0, "step": 84200 }, { "epoch": 0.11228794351204475, "grad_norm": 2.078125, "learning_rate": 4.97933691992726e-06, "loss": 0.4265629768371582, "num_tokens": 10286071891.0, "step": 84220 }, { "epoch": 0.11231460889877286, "grad_norm": 2.421875, "learning_rate": 4.979247253786724e-06, "loss": 0.4286788463592529, "num_tokens": 10288671438.0, "step": 84240 }, { "epoch": 0.11234127428550096, "grad_norm": 2.21875, "learning_rate": 4.979157394327713e-06, "loss": 0.44152212142944336, "num_tokens": 10291080777.0, "step": 84260 }, { "epoch": 0.11236793967222906, "grad_norm": 1.5, "learning_rate": 4.979067341557233e-06, "loss": 0.43099288940429686, "num_tokens": 10293668816.0, "step": 84280 }, { "epoch": 0.11239460505895717, "grad_norm": 2.0625, "learning_rate": 4.978977095482305e-06, "loss": 0.4146939754486084, "num_tokens": 10296020027.0, "step": 84300 }, { "epoch": 0.11242127044568527, "grad_norm": 2.140625, "learning_rate": 4.978886656109968e-06, "loss": 0.4267248630523682, "num_tokens": 10298813894.0, "step": 84320 }, { "epoch": 0.11244793583241337, "grad_norm": 2.59375, "learning_rate": 4.978796023447272e-06, "loss": 0.4313878059387207, "num_tokens": 10301358784.0, "step": 84340 }, { "epoch": 0.11247460121914148, "grad_norm": 1.890625, "learning_rate": 4.9787051975012855e-06, "loss": 0.4162418365478516, "num_tokens": 10303626866.0, "step": 84360 }, { "epoch": 0.11250126660586958, "grad_norm": 1.625, "learning_rate": 4.97861417827909e-06, "loss": 0.41147470474243164, "num_tokens": 10306130799.0, "step": 84380 }, { "epoch": 0.11252793199259768, "grad_norm": 2.21875, "learning_rate": 4.978522965787783e-06, "loss": 0.4354723930358887, "num_tokens": 10308599615.0, "step": 84400 }, { "epoch": 0.1125545973793258, "grad_norm": 2.046875, "learning_rate": 4.978431560034476e-06, "loss": 0.42713623046875, "num_tokens": 10310905726.0, "step": 84420 }, { "epoch": 0.1125812627660539, "grad_norm": 1.921875, "learning_rate": 4.978339961026297e-06, "loss": 0.4282214164733887, "num_tokens": 10313591613.0, "step": 84440 }, { "epoch": 0.112607928152782, "grad_norm": 1.6640625, "learning_rate": 4.978248168770388e-06, "loss": 0.43461952209472654, "num_tokens": 10316050549.0, "step": 84460 }, { "epoch": 0.11263459353951011, "grad_norm": 1.734375, "learning_rate": 4.978156183273907e-06, "loss": 0.42691736221313475, "num_tokens": 10318435127.0, "step": 84480 }, { "epoch": 0.11266125892623821, "grad_norm": 1.65625, "learning_rate": 4.978064004544026e-06, "loss": 0.4430365562438965, "num_tokens": 10320922728.0, "step": 84500 }, { "epoch": 0.11268792431296631, "grad_norm": 2.140625, "learning_rate": 4.977971632587933e-06, "loss": 0.42011270523071287, "num_tokens": 10323549943.0, "step": 84520 }, { "epoch": 0.11271458969969442, "grad_norm": 1.6328125, "learning_rate": 4.97787906741283e-06, "loss": 0.41806859970092775, "num_tokens": 10325915229.0, "step": 84540 }, { "epoch": 0.11274125508642252, "grad_norm": 1.765625, "learning_rate": 4.977786309025936e-06, "loss": 0.43221006393432615, "num_tokens": 10328569555.0, "step": 84560 }, { "epoch": 0.11276792047315062, "grad_norm": 2.046875, "learning_rate": 4.977693357434482e-06, "loss": 0.4167820453643799, "num_tokens": 10331107048.0, "step": 84580 }, { "epoch": 0.11279458585987873, "grad_norm": 1.640625, "learning_rate": 4.9776002126457185e-06, "loss": 0.4204882621765137, "num_tokens": 10333467255.0, "step": 84600 }, { "epoch": 0.11282125124660683, "grad_norm": 2.25, "learning_rate": 4.977506874666906e-06, "loss": 0.43112549781799314, "num_tokens": 10335782189.0, "step": 84620 }, { "epoch": 0.11284791663333493, "grad_norm": 1.921875, "learning_rate": 4.977413343505323e-06, "loss": 0.41504626274108886, "num_tokens": 10338132447.0, "step": 84640 }, { "epoch": 0.11287458202006304, "grad_norm": 1.7109375, "learning_rate": 4.977319619168264e-06, "loss": 0.41952266693115237, "num_tokens": 10340529017.0, "step": 84660 }, { "epoch": 0.11290124740679114, "grad_norm": 1.859375, "learning_rate": 4.977225701663035e-06, "loss": 0.4140133857727051, "num_tokens": 10342985660.0, "step": 84680 }, { "epoch": 0.11292791279351924, "grad_norm": 1.6015625, "learning_rate": 4.97713159099696e-06, "loss": 0.4203899383544922, "num_tokens": 10345325337.0, "step": 84700 }, { "epoch": 0.11295457818024734, "grad_norm": 1.796875, "learning_rate": 4.977037287177378e-06, "loss": 0.43224287033081055, "num_tokens": 10347601895.0, "step": 84720 }, { "epoch": 0.11298124356697545, "grad_norm": 2.453125, "learning_rate": 4.9769427902116415e-06, "loss": 0.4290959358215332, "num_tokens": 10350078978.0, "step": 84740 }, { "epoch": 0.11300790895370355, "grad_norm": 2.15625, "learning_rate": 4.97684810010712e-06, "loss": 0.4425028324127197, "num_tokens": 10352567872.0, "step": 84760 }, { "epoch": 0.11303457434043165, "grad_norm": 2.28125, "learning_rate": 4.9767532168711955e-06, "loss": 0.44083619117736816, "num_tokens": 10354990567.0, "step": 84780 }, { "epoch": 0.11306123972715976, "grad_norm": 1.71875, "learning_rate": 4.976658140511267e-06, "loss": 0.4235576629638672, "num_tokens": 10357642119.0, "step": 84800 }, { "epoch": 0.11308790511388786, "grad_norm": 1.9765625, "learning_rate": 4.976562871034748e-06, "loss": 0.4270334720611572, "num_tokens": 10359872979.0, "step": 84820 }, { "epoch": 0.11311457050061598, "grad_norm": 2.0625, "learning_rate": 4.976467408449067e-06, "loss": 0.431402587890625, "num_tokens": 10362303653.0, "step": 84840 }, { "epoch": 0.11314123588734408, "grad_norm": 2.0, "learning_rate": 4.976371752761669e-06, "loss": 0.43509416580200194, "num_tokens": 10364929934.0, "step": 84860 }, { "epoch": 0.11316790127407218, "grad_norm": 2.046875, "learning_rate": 4.976275903980011e-06, "loss": 0.4388636589050293, "num_tokens": 10367529768.0, "step": 84880 }, { "epoch": 0.11319456666080029, "grad_norm": 2.296875, "learning_rate": 4.976179862111567e-06, "loss": 0.4413510799407959, "num_tokens": 10369982656.0, "step": 84900 }, { "epoch": 0.11322123204752839, "grad_norm": 2.203125, "learning_rate": 4.976083627163827e-06, "loss": 0.4332249164581299, "num_tokens": 10372314964.0, "step": 84920 }, { "epoch": 0.11324789743425649, "grad_norm": 2.40625, "learning_rate": 4.975987199144293e-06, "loss": 0.42712979316711425, "num_tokens": 10374718878.0, "step": 84940 }, { "epoch": 0.1132745628209846, "grad_norm": 2.15625, "learning_rate": 4.975890578060486e-06, "loss": 0.4319766521453857, "num_tokens": 10377317622.0, "step": 84960 }, { "epoch": 0.1133012282077127, "grad_norm": 2.265625, "learning_rate": 4.975793763919938e-06, "loss": 0.42775979042053225, "num_tokens": 10379838034.0, "step": 84980 }, { "epoch": 0.1133278935944408, "grad_norm": 2.0, "learning_rate": 4.9756967567302e-06, "loss": 0.4168521881103516, "num_tokens": 10382150278.0, "step": 85000 }, { "epoch": 0.1133545589811689, "grad_norm": 2.03125, "learning_rate": 4.9755995564988346e-06, "loss": 0.42656826972961426, "num_tokens": 10384367030.0, "step": 85020 }, { "epoch": 0.11338122436789701, "grad_norm": 1.9609375, "learning_rate": 4.975502163233422e-06, "loss": 0.4290909767150879, "num_tokens": 10386771441.0, "step": 85040 }, { "epoch": 0.11340788975462511, "grad_norm": 2.0625, "learning_rate": 4.975404576941556e-06, "loss": 0.4375750541687012, "num_tokens": 10389086300.0, "step": 85060 }, { "epoch": 0.11343455514135321, "grad_norm": 1.6875, "learning_rate": 4.975306797630845e-06, "loss": 0.414255428314209, "num_tokens": 10391497997.0, "step": 85080 }, { "epoch": 0.11346122052808132, "grad_norm": 1.7421875, "learning_rate": 4.975208825308916e-06, "loss": 0.43006410598754885, "num_tokens": 10393999963.0, "step": 85100 }, { "epoch": 0.11348788591480942, "grad_norm": 2.125, "learning_rate": 4.975110659983405e-06, "loss": 0.4234933853149414, "num_tokens": 10396479777.0, "step": 85120 }, { "epoch": 0.11351455130153752, "grad_norm": 2.21875, "learning_rate": 4.9750123016619675e-06, "loss": 0.4273242950439453, "num_tokens": 10398830255.0, "step": 85140 }, { "epoch": 0.11354121668826563, "grad_norm": 1.75, "learning_rate": 4.974913750352275e-06, "loss": 0.4284353256225586, "num_tokens": 10401309595.0, "step": 85160 }, { "epoch": 0.11356788207499373, "grad_norm": 1.8984375, "learning_rate": 4.9748150060620085e-06, "loss": 0.42486023902893066, "num_tokens": 10403606561.0, "step": 85180 }, { "epoch": 0.11359454746172183, "grad_norm": 2.0, "learning_rate": 4.97471606879887e-06, "loss": 0.4102333068847656, "num_tokens": 10406016360.0, "step": 85200 }, { "epoch": 0.11362121284844993, "grad_norm": 2.546875, "learning_rate": 4.974616938570574e-06, "loss": 0.4138115406036377, "num_tokens": 10408300668.0, "step": 85220 }, { "epoch": 0.11364787823517805, "grad_norm": 1.828125, "learning_rate": 4.97451761538485e-06, "loss": 0.4056685924530029, "num_tokens": 10410828084.0, "step": 85240 }, { "epoch": 0.11367454362190615, "grad_norm": 2.140625, "learning_rate": 4.974418099249442e-06, "loss": 0.4026822566986084, "num_tokens": 10413107010.0, "step": 85260 }, { "epoch": 0.11370120900863426, "grad_norm": 2.03125, "learning_rate": 4.9743183901721094e-06, "loss": 0.4157055377960205, "num_tokens": 10415485873.0, "step": 85280 }, { "epoch": 0.11372787439536236, "grad_norm": 1.9765625, "learning_rate": 4.974218488160628e-06, "loss": 0.40959606170654295, "num_tokens": 10418146706.0, "step": 85300 }, { "epoch": 0.11375453978209046, "grad_norm": 2.1875, "learning_rate": 4.974118393222788e-06, "loss": 0.4290811538696289, "num_tokens": 10420556416.0, "step": 85320 }, { "epoch": 0.11378120516881857, "grad_norm": 2.09375, "learning_rate": 4.974018105366393e-06, "loss": 0.4293185234069824, "num_tokens": 10422841857.0, "step": 85340 }, { "epoch": 0.11380787055554667, "grad_norm": 2.078125, "learning_rate": 4.973917624599263e-06, "loss": 0.41332244873046875, "num_tokens": 10425284872.0, "step": 85360 }, { "epoch": 0.11383453594227477, "grad_norm": 2.40625, "learning_rate": 4.973816950929234e-06, "loss": 0.4308753967285156, "num_tokens": 10427597149.0, "step": 85380 }, { "epoch": 0.11386120132900288, "grad_norm": 1.8203125, "learning_rate": 4.973716084364155e-06, "loss": 0.4254601001739502, "num_tokens": 10430072797.0, "step": 85400 }, { "epoch": 0.11388786671573098, "grad_norm": 1.8203125, "learning_rate": 4.973615024911892e-06, "loss": 0.4212039947509766, "num_tokens": 10432495690.0, "step": 85420 }, { "epoch": 0.11391453210245908, "grad_norm": 1.953125, "learning_rate": 4.973513772580324e-06, "loss": 0.44051408767700195, "num_tokens": 10434789379.0, "step": 85440 }, { "epoch": 0.11394119748918718, "grad_norm": 1.875, "learning_rate": 4.973412327377347e-06, "loss": 0.4386460781097412, "num_tokens": 10437234922.0, "step": 85460 }, { "epoch": 0.11396786287591529, "grad_norm": 2.0, "learning_rate": 4.973310689310871e-06, "loss": 0.4212640762329102, "num_tokens": 10439770190.0, "step": 85480 }, { "epoch": 0.11399452826264339, "grad_norm": 2.0, "learning_rate": 4.97320885838882e-06, "loss": 0.43253068923950194, "num_tokens": 10442344208.0, "step": 85500 }, { "epoch": 0.1140211936493715, "grad_norm": 1.7109375, "learning_rate": 4.973106834619136e-06, "loss": 0.4260526657104492, "num_tokens": 10444665311.0, "step": 85520 }, { "epoch": 0.1140478590360996, "grad_norm": 1.890625, "learning_rate": 4.973004618009773e-06, "loss": 0.4314149856567383, "num_tokens": 10447122464.0, "step": 85540 }, { "epoch": 0.1140745244228277, "grad_norm": 2.25, "learning_rate": 4.972902208568703e-06, "loss": 0.416064453125, "num_tokens": 10449484087.0, "step": 85560 }, { "epoch": 0.1141011898095558, "grad_norm": 2.0625, "learning_rate": 4.97279960630391e-06, "loss": 0.4232898235321045, "num_tokens": 10451562750.0, "step": 85580 }, { "epoch": 0.1141278551962839, "grad_norm": 1.9375, "learning_rate": 4.972696811223393e-06, "loss": 0.4222073554992676, "num_tokens": 10454038897.0, "step": 85600 }, { "epoch": 0.11415452058301201, "grad_norm": 1.5078125, "learning_rate": 4.972593823335171e-06, "loss": 0.4203986644744873, "num_tokens": 10456549545.0, "step": 85620 }, { "epoch": 0.11418118596974013, "grad_norm": 1.8671875, "learning_rate": 4.97249064264727e-06, "loss": 0.4246562957763672, "num_tokens": 10458984117.0, "step": 85640 }, { "epoch": 0.11420785135646823, "grad_norm": 2.109375, "learning_rate": 4.97238726916774e-06, "loss": 0.4334193229675293, "num_tokens": 10461392474.0, "step": 85660 }, { "epoch": 0.11423451674319633, "grad_norm": 2.046875, "learning_rate": 4.972283702904638e-06, "loss": 0.43846492767333983, "num_tokens": 10463533872.0, "step": 85680 }, { "epoch": 0.11426118212992444, "grad_norm": 1.8984375, "learning_rate": 4.972179943866042e-06, "loss": 0.41452951431274415, "num_tokens": 10465956651.0, "step": 85700 }, { "epoch": 0.11428784751665254, "grad_norm": 2.484375, "learning_rate": 4.972075992060041e-06, "loss": 0.4201393127441406, "num_tokens": 10468380849.0, "step": 85720 }, { "epoch": 0.11431451290338064, "grad_norm": 1.9609375, "learning_rate": 4.971971847494741e-06, "loss": 0.42014002799987793, "num_tokens": 10470822811.0, "step": 85740 }, { "epoch": 0.11434117829010874, "grad_norm": 1.9140625, "learning_rate": 4.971867510178263e-06, "loss": 0.4337310791015625, "num_tokens": 10473138378.0, "step": 85760 }, { "epoch": 0.11436784367683685, "grad_norm": 1.8984375, "learning_rate": 4.971762980118742e-06, "loss": 0.41964082717895507, "num_tokens": 10475851202.0, "step": 85780 }, { "epoch": 0.11439450906356495, "grad_norm": 2.28125, "learning_rate": 4.97165825732433e-06, "loss": 0.4335034370422363, "num_tokens": 10478191481.0, "step": 85800 }, { "epoch": 0.11442117445029305, "grad_norm": 2.234375, "learning_rate": 4.9715533418031905e-06, "loss": 0.4049217700958252, "num_tokens": 10480620938.0, "step": 85820 }, { "epoch": 0.11444783983702116, "grad_norm": 1.7421875, "learning_rate": 4.971448233563506e-06, "loss": 0.4217867851257324, "num_tokens": 10483153038.0, "step": 85840 }, { "epoch": 0.11447450522374926, "grad_norm": 2.375, "learning_rate": 4.971342932613472e-06, "loss": 0.41942558288574217, "num_tokens": 10485534675.0, "step": 85860 }, { "epoch": 0.11450117061047736, "grad_norm": 1.6875, "learning_rate": 4.9712374389613e-06, "loss": 0.43121824264526365, "num_tokens": 10487997723.0, "step": 85880 }, { "epoch": 0.11452783599720547, "grad_norm": 1.765625, "learning_rate": 4.971131752615213e-06, "loss": 0.428707218170166, "num_tokens": 10490413461.0, "step": 85900 }, { "epoch": 0.11455450138393357, "grad_norm": 2.109375, "learning_rate": 4.971025873583456e-06, "loss": 0.42905559539794924, "num_tokens": 10492959290.0, "step": 85920 }, { "epoch": 0.11458116677066167, "grad_norm": 2.265625, "learning_rate": 4.970919801874282e-06, "loss": 0.4223925590515137, "num_tokens": 10495540393.0, "step": 85940 }, { "epoch": 0.11460783215738977, "grad_norm": 2.15625, "learning_rate": 4.9708135374959626e-06, "loss": 0.41083879470825196, "num_tokens": 10497960954.0, "step": 85960 }, { "epoch": 0.11463449754411788, "grad_norm": 1.671875, "learning_rate": 4.970707080456784e-06, "loss": 0.4350688934326172, "num_tokens": 10500679728.0, "step": 85980 }, { "epoch": 0.11466116293084598, "grad_norm": 2.203125, "learning_rate": 4.9706004307650465e-06, "loss": 0.41985502243041994, "num_tokens": 10503144525.0, "step": 86000 }, { "epoch": 0.11468782831757408, "grad_norm": 1.7578125, "learning_rate": 4.970493588429066e-06, "loss": 0.42697782516479493, "num_tokens": 10505703532.0, "step": 86020 }, { "epoch": 0.11471449370430219, "grad_norm": 2.640625, "learning_rate": 4.970386553457175e-06, "loss": 0.412900447845459, "num_tokens": 10508149646.0, "step": 86040 }, { "epoch": 0.1147411590910303, "grad_norm": 2.5, "learning_rate": 4.970279325857719e-06, "loss": 0.4319173812866211, "num_tokens": 10510494154.0, "step": 86060 }, { "epoch": 0.1147678244777584, "grad_norm": 2.390625, "learning_rate": 4.970171905639057e-06, "loss": 0.4300703048706055, "num_tokens": 10512907101.0, "step": 86080 }, { "epoch": 0.11479448986448651, "grad_norm": 1.9140625, "learning_rate": 4.9700642928095675e-06, "loss": 0.43769073486328125, "num_tokens": 10515331096.0, "step": 86100 }, { "epoch": 0.11482115525121461, "grad_norm": 2.015625, "learning_rate": 4.96995648737764e-06, "loss": 0.4109529972076416, "num_tokens": 10517943988.0, "step": 86120 }, { "epoch": 0.11484782063794272, "grad_norm": 1.8125, "learning_rate": 4.9698484893516824e-06, "loss": 0.42455339431762695, "num_tokens": 10520416348.0, "step": 86140 }, { "epoch": 0.11487448602467082, "grad_norm": 1.9765625, "learning_rate": 4.969740298740114e-06, "loss": 0.4189697265625, "num_tokens": 10522892140.0, "step": 86160 }, { "epoch": 0.11490115141139892, "grad_norm": 2.109375, "learning_rate": 4.9696319155513715e-06, "loss": 0.43535943031311036, "num_tokens": 10525551615.0, "step": 86180 }, { "epoch": 0.11492781679812702, "grad_norm": 2.28125, "learning_rate": 4.969523339793907e-06, "loss": 0.42255301475524903, "num_tokens": 10527935345.0, "step": 86200 }, { "epoch": 0.11495448218485513, "grad_norm": 2.328125, "learning_rate": 4.969414571476185e-06, "loss": 0.42502851486206056, "num_tokens": 10530354246.0, "step": 86220 }, { "epoch": 0.11498114757158323, "grad_norm": 2.0, "learning_rate": 4.969305610606687e-06, "loss": 0.4276304244995117, "num_tokens": 10532669336.0, "step": 86240 }, { "epoch": 0.11500781295831133, "grad_norm": 1.9140625, "learning_rate": 4.969196457193911e-06, "loss": 0.43032197952270507, "num_tokens": 10535123053.0, "step": 86260 }, { "epoch": 0.11503447834503944, "grad_norm": 1.7109375, "learning_rate": 4.969087111246365e-06, "loss": 0.41785659790039065, "num_tokens": 10537376024.0, "step": 86280 }, { "epoch": 0.11506114373176754, "grad_norm": 2.484375, "learning_rate": 4.968977572772579e-06, "loss": 0.4159992218017578, "num_tokens": 10539573080.0, "step": 86300 }, { "epoch": 0.11508780911849564, "grad_norm": 2.1875, "learning_rate": 4.968867841781092e-06, "loss": 0.42231087684631347, "num_tokens": 10542158571.0, "step": 86320 }, { "epoch": 0.11511447450522375, "grad_norm": 1.703125, "learning_rate": 4.96875791828046e-06, "loss": 0.41980457305908203, "num_tokens": 10544689185.0, "step": 86340 }, { "epoch": 0.11514113989195185, "grad_norm": 1.734375, "learning_rate": 4.968647802279255e-06, "loss": 0.44727363586425783, "num_tokens": 10547011280.0, "step": 86360 }, { "epoch": 0.11516780527867995, "grad_norm": 1.8515625, "learning_rate": 4.968537493786063e-06, "loss": 0.43217129707336427, "num_tokens": 10549398820.0, "step": 86380 }, { "epoch": 0.11519447066540806, "grad_norm": 2.09375, "learning_rate": 4.968426992809485e-06, "loss": 0.4258612632751465, "num_tokens": 10551832934.0, "step": 86400 }, { "epoch": 0.11522113605213616, "grad_norm": 2.21875, "learning_rate": 4.9683162993581385e-06, "loss": 0.42867794036865237, "num_tokens": 10554106556.0, "step": 86420 }, { "epoch": 0.11524780143886426, "grad_norm": 2.09375, "learning_rate": 4.968205413440653e-06, "loss": 0.4202416896820068, "num_tokens": 10556281849.0, "step": 86440 }, { "epoch": 0.11527446682559238, "grad_norm": 2.234375, "learning_rate": 4.968094335065677e-06, "loss": 0.4084596633911133, "num_tokens": 10558721755.0, "step": 86460 }, { "epoch": 0.11530113221232048, "grad_norm": 2.109375, "learning_rate": 4.9679830642418695e-06, "loss": 0.41956286430358886, "num_tokens": 10561218306.0, "step": 86480 }, { "epoch": 0.11532779759904858, "grad_norm": 2.171875, "learning_rate": 4.967871600977907e-06, "loss": 0.4343003273010254, "num_tokens": 10563717384.0, "step": 86500 }, { "epoch": 0.11535446298577669, "grad_norm": 1.6953125, "learning_rate": 4.967759945282483e-06, "loss": 0.41179823875427246, "num_tokens": 10566325633.0, "step": 86520 }, { "epoch": 0.11538112837250479, "grad_norm": 1.765625, "learning_rate": 4.9676480971643015e-06, "loss": 0.42277774810791013, "num_tokens": 10568789398.0, "step": 86540 }, { "epoch": 0.1154077937592329, "grad_norm": 2.296875, "learning_rate": 4.9675360566320854e-06, "loss": 0.42786345481872556, "num_tokens": 10571169457.0, "step": 86560 }, { "epoch": 0.115434459145961, "grad_norm": 2.078125, "learning_rate": 4.967423823694569e-06, "loss": 0.4341557025909424, "num_tokens": 10573834207.0, "step": 86580 }, { "epoch": 0.1154611245326891, "grad_norm": 2.078125, "learning_rate": 4.9673113983605065e-06, "loss": 0.4084153175354004, "num_tokens": 10576282806.0, "step": 86600 }, { "epoch": 0.1154877899194172, "grad_norm": 1.8359375, "learning_rate": 4.967198780638663e-06, "loss": 0.43336944580078124, "num_tokens": 10578630260.0, "step": 86620 }, { "epoch": 0.1155144553061453, "grad_norm": 1.7734375, "learning_rate": 4.9670859705378185e-06, "loss": 0.4328789710998535, "num_tokens": 10580982686.0, "step": 86640 }, { "epoch": 0.11554112069287341, "grad_norm": 1.71875, "learning_rate": 4.96697296806677e-06, "loss": 0.41362934112548827, "num_tokens": 10583449006.0, "step": 86660 }, { "epoch": 0.11556778607960151, "grad_norm": 1.671875, "learning_rate": 4.9668597732343295e-06, "loss": 0.43865094184875486, "num_tokens": 10585912914.0, "step": 86680 }, { "epoch": 0.11559445146632961, "grad_norm": 1.828125, "learning_rate": 4.966746386049323e-06, "loss": 0.41318845748901367, "num_tokens": 10588318883.0, "step": 86700 }, { "epoch": 0.11562111685305772, "grad_norm": 2.515625, "learning_rate": 4.966632806520593e-06, "loss": 0.42551460266113283, "num_tokens": 10590808640.0, "step": 86720 }, { "epoch": 0.11564778223978582, "grad_norm": 1.90625, "learning_rate": 4.9665190346569935e-06, "loss": 0.4150669574737549, "num_tokens": 10593251551.0, "step": 86740 }, { "epoch": 0.11567444762651392, "grad_norm": 1.734375, "learning_rate": 4.966405070467397e-06, "loss": 0.42637929916381834, "num_tokens": 10595710450.0, "step": 86760 }, { "epoch": 0.11570111301324203, "grad_norm": 2.28125, "learning_rate": 4.966290913960691e-06, "loss": 0.41818857192993164, "num_tokens": 10598006247.0, "step": 86780 }, { "epoch": 0.11572777839997013, "grad_norm": 2.34375, "learning_rate": 4.966176565145775e-06, "loss": 0.4268838882446289, "num_tokens": 10600359147.0, "step": 86800 }, { "epoch": 0.11575444378669823, "grad_norm": 2.21875, "learning_rate": 4.966062024031565e-06, "loss": 0.444088077545166, "num_tokens": 10602987046.0, "step": 86820 }, { "epoch": 0.11578110917342634, "grad_norm": 2.109375, "learning_rate": 4.9659472906269945e-06, "loss": 0.41409592628479003, "num_tokens": 10605720325.0, "step": 86840 }, { "epoch": 0.11580777456015445, "grad_norm": 1.984375, "learning_rate": 4.965832364941008e-06, "loss": 0.4125401973724365, "num_tokens": 10608202882.0, "step": 86860 }, { "epoch": 0.11583443994688256, "grad_norm": 2.203125, "learning_rate": 4.965717246982567e-06, "loss": 0.4309269905090332, "num_tokens": 10610517095.0, "step": 86880 }, { "epoch": 0.11586110533361066, "grad_norm": 1.8203125, "learning_rate": 4.965601936760649e-06, "loss": 0.43869881629943847, "num_tokens": 10613024858.0, "step": 86900 }, { "epoch": 0.11588777072033876, "grad_norm": 2.078125, "learning_rate": 4.965486434284244e-06, "loss": 0.4054402351379395, "num_tokens": 10615507217.0, "step": 86920 }, { "epoch": 0.11591443610706686, "grad_norm": 2.140625, "learning_rate": 4.9653707395623585e-06, "loss": 0.4233902931213379, "num_tokens": 10617884812.0, "step": 86940 }, { "epoch": 0.11594110149379497, "grad_norm": 1.734375, "learning_rate": 4.965254852604013e-06, "loss": 0.43230018615722654, "num_tokens": 10620513242.0, "step": 86960 }, { "epoch": 0.11596776688052307, "grad_norm": 1.765625, "learning_rate": 4.965138773418246e-06, "loss": 0.43125147819519044, "num_tokens": 10622875490.0, "step": 86980 }, { "epoch": 0.11599443226725117, "grad_norm": 2.171875, "learning_rate": 4.965022502014108e-06, "loss": 0.42845911979675294, "num_tokens": 10625420563.0, "step": 87000 }, { "epoch": 0.11602109765397928, "grad_norm": 2.265625, "learning_rate": 4.964906038400663e-06, "loss": 0.42713184356689454, "num_tokens": 10627863194.0, "step": 87020 }, { "epoch": 0.11604776304070738, "grad_norm": 2.28125, "learning_rate": 4.964789382586996e-06, "loss": 0.42472271919250487, "num_tokens": 10630619752.0, "step": 87040 }, { "epoch": 0.11607442842743548, "grad_norm": 1.8984375, "learning_rate": 4.9646725345821985e-06, "loss": 0.41933860778808596, "num_tokens": 10632870355.0, "step": 87060 }, { "epoch": 0.11610109381416359, "grad_norm": 1.8828125, "learning_rate": 4.964555494395385e-06, "loss": 0.41190624237060547, "num_tokens": 10635373610.0, "step": 87080 }, { "epoch": 0.11612775920089169, "grad_norm": 1.671875, "learning_rate": 4.964438262035682e-06, "loss": 0.42145528793334963, "num_tokens": 10637707769.0, "step": 87100 }, { "epoch": 0.11615442458761979, "grad_norm": 2.234375, "learning_rate": 4.964320837512229e-06, "loss": 0.4280824661254883, "num_tokens": 10640226684.0, "step": 87120 }, { "epoch": 0.1161810899743479, "grad_norm": 2.21875, "learning_rate": 4.964203220834182e-06, "loss": 0.42693305015563965, "num_tokens": 10642714885.0, "step": 87140 }, { "epoch": 0.116207755361076, "grad_norm": 2.015625, "learning_rate": 4.964085412010714e-06, "loss": 0.42570905685424804, "num_tokens": 10645489478.0, "step": 87160 }, { "epoch": 0.1162344207478041, "grad_norm": 1.65625, "learning_rate": 4.963967411051008e-06, "loss": 0.43485174179077146, "num_tokens": 10648061776.0, "step": 87180 }, { "epoch": 0.1162610861345322, "grad_norm": 1.8359375, "learning_rate": 4.963849217964269e-06, "loss": 0.43869795799255373, "num_tokens": 10650672811.0, "step": 87200 }, { "epoch": 0.11628775152126031, "grad_norm": 2.09375, "learning_rate": 4.96373083275971e-06, "loss": 0.42334756851196287, "num_tokens": 10653287250.0, "step": 87220 }, { "epoch": 0.11631441690798841, "grad_norm": 2.0625, "learning_rate": 4.963612255446564e-06, "loss": 0.41795997619628905, "num_tokens": 10655596730.0, "step": 87240 }, { "epoch": 0.11634108229471651, "grad_norm": 2.15625, "learning_rate": 4.963493486034076e-06, "loss": 0.43275146484375, "num_tokens": 10658025619.0, "step": 87260 }, { "epoch": 0.11636774768144463, "grad_norm": 1.6484375, "learning_rate": 4.963374524531506e-06, "loss": 0.4129335403442383, "num_tokens": 10660652736.0, "step": 87280 }, { "epoch": 0.11639441306817273, "grad_norm": 1.8203125, "learning_rate": 4.9632553709481315e-06, "loss": 0.41641759872436523, "num_tokens": 10662829754.0, "step": 87300 }, { "epoch": 0.11642107845490084, "grad_norm": 2.328125, "learning_rate": 4.963136025293243e-06, "loss": 0.4110379695892334, "num_tokens": 10665247755.0, "step": 87320 }, { "epoch": 0.11644774384162894, "grad_norm": 2.09375, "learning_rate": 4.9630164875761475e-06, "loss": 0.44703073501586915, "num_tokens": 10667765110.0, "step": 87340 }, { "epoch": 0.11647440922835704, "grad_norm": 2.078125, "learning_rate": 4.962896757806165e-06, "loss": 0.4398829460144043, "num_tokens": 10670073935.0, "step": 87360 }, { "epoch": 0.11650107461508515, "grad_norm": 2.296875, "learning_rate": 4.9627768359926295e-06, "loss": 0.41952314376831057, "num_tokens": 10672483951.0, "step": 87380 }, { "epoch": 0.11652774000181325, "grad_norm": 1.7578125, "learning_rate": 4.9626567221448954e-06, "loss": 0.43453178405761717, "num_tokens": 10674738492.0, "step": 87400 }, { "epoch": 0.11655440538854135, "grad_norm": 2.375, "learning_rate": 4.9625364162723265e-06, "loss": 0.418101167678833, "num_tokens": 10676948126.0, "step": 87420 }, { "epoch": 0.11658107077526945, "grad_norm": 2.234375, "learning_rate": 4.9624159183843036e-06, "loss": 0.41953105926513673, "num_tokens": 10679355533.0, "step": 87440 }, { "epoch": 0.11660773616199756, "grad_norm": 2.265625, "learning_rate": 4.962295228490223e-06, "loss": 0.4367386341094971, "num_tokens": 10681761742.0, "step": 87460 }, { "epoch": 0.11663440154872566, "grad_norm": 2.109375, "learning_rate": 4.962174346599496e-06, "loss": 0.4126895904541016, "num_tokens": 10684084505.0, "step": 87480 }, { "epoch": 0.11666106693545376, "grad_norm": 2.25, "learning_rate": 4.962053272721546e-06, "loss": 0.4074286937713623, "num_tokens": 10686647813.0, "step": 87500 }, { "epoch": 0.11668773232218187, "grad_norm": 2.078125, "learning_rate": 4.961932006865817e-06, "loss": 0.4055479049682617, "num_tokens": 10689259508.0, "step": 87520 }, { "epoch": 0.11671439770890997, "grad_norm": 1.9921875, "learning_rate": 4.961810549041762e-06, "loss": 0.41475443840026854, "num_tokens": 10691645407.0, "step": 87540 }, { "epoch": 0.11674106309563807, "grad_norm": 2.03125, "learning_rate": 4.961688899258852e-06, "loss": 0.4223884105682373, "num_tokens": 10694071398.0, "step": 87560 }, { "epoch": 0.11676772848236618, "grad_norm": 1.78125, "learning_rate": 4.9615670575265744e-06, "loss": 0.4239207744598389, "num_tokens": 10696648320.0, "step": 87580 }, { "epoch": 0.11679439386909428, "grad_norm": 1.875, "learning_rate": 4.961445023854428e-06, "loss": 0.41633243560791017, "num_tokens": 10699278320.0, "step": 87600 }, { "epoch": 0.11682105925582238, "grad_norm": 2.0625, "learning_rate": 4.961322798251929e-06, "loss": 0.43928937911987304, "num_tokens": 10701716718.0, "step": 87620 }, { "epoch": 0.11684772464255049, "grad_norm": 2.6875, "learning_rate": 4.961200380728608e-06, "loss": 0.41866655349731446, "num_tokens": 10704014345.0, "step": 87640 }, { "epoch": 0.11687439002927859, "grad_norm": 1.78125, "learning_rate": 4.96107777129401e-06, "loss": 0.4157554626464844, "num_tokens": 10706171214.0, "step": 87660 }, { "epoch": 0.1169010554160067, "grad_norm": 1.859375, "learning_rate": 4.960954969957695e-06, "loss": 0.4137465476989746, "num_tokens": 10708729008.0, "step": 87680 }, { "epoch": 0.11692772080273481, "grad_norm": 2.78125, "learning_rate": 4.96083197672924e-06, "loss": 0.4264528274536133, "num_tokens": 10711104123.0, "step": 87700 }, { "epoch": 0.11695438618946291, "grad_norm": 1.65625, "learning_rate": 4.960708791618235e-06, "loss": 0.4126791477203369, "num_tokens": 10713442948.0, "step": 87720 }, { "epoch": 0.11698105157619101, "grad_norm": 1.9921875, "learning_rate": 4.960585414634284e-06, "loss": 0.43145132064819336, "num_tokens": 10715735879.0, "step": 87740 }, { "epoch": 0.11700771696291912, "grad_norm": 1.9296875, "learning_rate": 4.960461845787008e-06, "loss": 0.43723068237304685, "num_tokens": 10718258070.0, "step": 87760 }, { "epoch": 0.11703438234964722, "grad_norm": 1.875, "learning_rate": 4.960338085086042e-06, "loss": 0.3982025146484375, "num_tokens": 10720836502.0, "step": 87780 }, { "epoch": 0.11706104773637532, "grad_norm": 1.9140625, "learning_rate": 4.960214132541037e-06, "loss": 0.4175272464752197, "num_tokens": 10723455173.0, "step": 87800 }, { "epoch": 0.11708771312310343, "grad_norm": 2.328125, "learning_rate": 4.960089988161658e-06, "loss": 0.4087533950805664, "num_tokens": 10725898445.0, "step": 87820 }, { "epoch": 0.11711437850983153, "grad_norm": 2.421875, "learning_rate": 4.959965651957584e-06, "loss": 0.41177215576171877, "num_tokens": 10728179539.0, "step": 87840 }, { "epoch": 0.11714104389655963, "grad_norm": 2.546875, "learning_rate": 4.9598411239385105e-06, "loss": 0.4207442283630371, "num_tokens": 10730432363.0, "step": 87860 }, { "epoch": 0.11716770928328774, "grad_norm": 1.7578125, "learning_rate": 4.959716404114149e-06, "loss": 0.4073984622955322, "num_tokens": 10732920069.0, "step": 87880 }, { "epoch": 0.11719437467001584, "grad_norm": 2.609375, "learning_rate": 4.9595914924942225e-06, "loss": 0.44229917526245116, "num_tokens": 10735385742.0, "step": 87900 }, { "epoch": 0.11722104005674394, "grad_norm": 2.390625, "learning_rate": 4.959466389088472e-06, "loss": 0.4295417308807373, "num_tokens": 10737810757.0, "step": 87920 }, { "epoch": 0.11724770544347204, "grad_norm": 2.171875, "learning_rate": 4.959341093906652e-06, "loss": 0.42212510108947754, "num_tokens": 10740218050.0, "step": 87940 }, { "epoch": 0.11727437083020015, "grad_norm": 1.7265625, "learning_rate": 4.959215606958533e-06, "loss": 0.4282970428466797, "num_tokens": 10742636240.0, "step": 87960 }, { "epoch": 0.11730103621692825, "grad_norm": 2.046875, "learning_rate": 4.959089928253899e-06, "loss": 0.4128757953643799, "num_tokens": 10745126834.0, "step": 87980 }, { "epoch": 0.11732770160365635, "grad_norm": 1.7890625, "learning_rate": 4.958964057802551e-06, "loss": 0.4556132316589355, "num_tokens": 10747709377.0, "step": 88000 }, { "epoch": 0.11735436699038446, "grad_norm": 1.9765625, "learning_rate": 4.958837995614302e-06, "loss": 0.42821393013000486, "num_tokens": 10750303997.0, "step": 88020 }, { "epoch": 0.11738103237711256, "grad_norm": 2.34375, "learning_rate": 4.9587117416989815e-06, "loss": 0.41671004295349123, "num_tokens": 10752852769.0, "step": 88040 }, { "epoch": 0.11740769776384066, "grad_norm": 2.140625, "learning_rate": 4.958585296066436e-06, "loss": 0.43038620948791506, "num_tokens": 10755429195.0, "step": 88060 }, { "epoch": 0.11743436315056878, "grad_norm": 1.859375, "learning_rate": 4.9584586587265245e-06, "loss": 0.40627493858337405, "num_tokens": 10758130677.0, "step": 88080 }, { "epoch": 0.11746102853729688, "grad_norm": 2.203125, "learning_rate": 4.958331829689121e-06, "loss": 0.43012475967407227, "num_tokens": 10760491669.0, "step": 88100 }, { "epoch": 0.11748769392402499, "grad_norm": 2.0625, "learning_rate": 4.958204808964114e-06, "loss": 0.42222113609313966, "num_tokens": 10762800155.0, "step": 88120 }, { "epoch": 0.11751435931075309, "grad_norm": 2.4375, "learning_rate": 4.9580775965614105e-06, "loss": 0.42680673599243163, "num_tokens": 10765080070.0, "step": 88140 }, { "epoch": 0.11754102469748119, "grad_norm": 1.78125, "learning_rate": 4.957950192490927e-06, "loss": 0.4276179313659668, "num_tokens": 10767569153.0, "step": 88160 }, { "epoch": 0.1175676900842093, "grad_norm": 2.15625, "learning_rate": 4.9578225967626e-06, "loss": 0.44031729698181155, "num_tokens": 10769872507.0, "step": 88180 }, { "epoch": 0.1175943554709374, "grad_norm": 2.09375, "learning_rate": 4.9576948093863776e-06, "loss": 0.4261771202087402, "num_tokens": 10772081885.0, "step": 88200 }, { "epoch": 0.1176210208576655, "grad_norm": 2.265625, "learning_rate": 4.957566830372224e-06, "loss": 0.4142916679382324, "num_tokens": 10774549676.0, "step": 88220 }, { "epoch": 0.1176476862443936, "grad_norm": 2.265625, "learning_rate": 4.957438659730119e-06, "loss": 0.4230339050292969, "num_tokens": 10776890492.0, "step": 88240 }, { "epoch": 0.1176743516311217, "grad_norm": 1.8203125, "learning_rate": 4.957310297470056e-06, "loss": 0.4296834945678711, "num_tokens": 10779228198.0, "step": 88260 }, { "epoch": 0.11770101701784981, "grad_norm": 1.8671875, "learning_rate": 4.957181743602044e-06, "loss": 0.419095516204834, "num_tokens": 10781562479.0, "step": 88280 }, { "epoch": 0.11772768240457791, "grad_norm": 2.265625, "learning_rate": 4.957052998136107e-06, "loss": 0.43209104537963866, "num_tokens": 10784046239.0, "step": 88300 }, { "epoch": 0.11775434779130602, "grad_norm": 1.1640625, "learning_rate": 4.956924061082284e-06, "loss": 0.4175095081329346, "num_tokens": 10786532657.0, "step": 88320 }, { "epoch": 0.11778101317803412, "grad_norm": 1.7734375, "learning_rate": 4.9567949324506295e-06, "loss": 0.4216745853424072, "num_tokens": 10789035613.0, "step": 88340 }, { "epoch": 0.11780767856476222, "grad_norm": 1.7734375, "learning_rate": 4.956665612251211e-06, "loss": 0.43213415145874023, "num_tokens": 10791839473.0, "step": 88360 }, { "epoch": 0.11783434395149033, "grad_norm": 1.6640625, "learning_rate": 4.956536100494114e-06, "loss": 0.43983612060546873, "num_tokens": 10794154712.0, "step": 88380 }, { "epoch": 0.11786100933821843, "grad_norm": 2.265625, "learning_rate": 4.956406397189435e-06, "loss": 0.4401700496673584, "num_tokens": 10796597458.0, "step": 88400 }, { "epoch": 0.11788767472494653, "grad_norm": 2.28125, "learning_rate": 4.956276502347288e-06, "loss": 0.4149295806884766, "num_tokens": 10798855864.0, "step": 88420 }, { "epoch": 0.11791434011167463, "grad_norm": 1.515625, "learning_rate": 4.956146415977803e-06, "loss": 0.4174777030944824, "num_tokens": 10801639566.0, "step": 88440 }, { "epoch": 0.11794100549840274, "grad_norm": 1.6640625, "learning_rate": 4.956016138091122e-06, "loss": 0.41597728729248046, "num_tokens": 10804202137.0, "step": 88460 }, { "epoch": 0.11796767088513084, "grad_norm": 2.046875, "learning_rate": 4.955885668697404e-06, "loss": 0.45215559005737305, "num_tokens": 10806780783.0, "step": 88480 }, { "epoch": 0.11799433627185896, "grad_norm": 2.046875, "learning_rate": 4.955755007806822e-06, "loss": 0.4155181884765625, "num_tokens": 10809115584.0, "step": 88500 }, { "epoch": 0.11802100165858706, "grad_norm": 2.046875, "learning_rate": 4.955624155429564e-06, "loss": 0.41077961921691897, "num_tokens": 10811584315.0, "step": 88520 }, { "epoch": 0.11804766704531516, "grad_norm": 2.03125, "learning_rate": 4.9554931115758345e-06, "loss": 0.4052751541137695, "num_tokens": 10814182001.0, "step": 88540 }, { "epoch": 0.11807433243204327, "grad_norm": 2.34375, "learning_rate": 4.95536187625585e-06, "loss": 0.42882471084594725, "num_tokens": 10816635398.0, "step": 88560 }, { "epoch": 0.11810099781877137, "grad_norm": 2.265625, "learning_rate": 4.955230449479844e-06, "loss": 0.4376533985137939, "num_tokens": 10819187566.0, "step": 88580 }, { "epoch": 0.11812766320549947, "grad_norm": 1.9921875, "learning_rate": 4.955098831258065e-06, "loss": 0.4312902450561523, "num_tokens": 10821617127.0, "step": 88600 }, { "epoch": 0.11815432859222758, "grad_norm": 2.390625, "learning_rate": 4.9549670216007764e-06, "loss": 0.41811213493347166, "num_tokens": 10824174382.0, "step": 88620 }, { "epoch": 0.11818099397895568, "grad_norm": 1.671875, "learning_rate": 4.954835020518254e-06, "loss": 0.40468320846557615, "num_tokens": 10826830395.0, "step": 88640 }, { "epoch": 0.11820765936568378, "grad_norm": 1.671875, "learning_rate": 4.954702828020792e-06, "loss": 0.4191885471343994, "num_tokens": 10829216634.0, "step": 88660 }, { "epoch": 0.11823432475241188, "grad_norm": 1.8359375, "learning_rate": 4.954570444118698e-06, "loss": 0.42986268997192384, "num_tokens": 10831820801.0, "step": 88680 }, { "epoch": 0.11826099013913999, "grad_norm": 1.84375, "learning_rate": 4.954437868822295e-06, "loss": 0.4296908378601074, "num_tokens": 10834343620.0, "step": 88700 }, { "epoch": 0.11828765552586809, "grad_norm": 2.0625, "learning_rate": 4.954305102141919e-06, "loss": 0.4253106117248535, "num_tokens": 10836698986.0, "step": 88720 }, { "epoch": 0.1183143209125962, "grad_norm": 1.8046875, "learning_rate": 4.954172144087923e-06, "loss": 0.4216305732727051, "num_tokens": 10839286469.0, "step": 88740 }, { "epoch": 0.1183409862993243, "grad_norm": 2.21875, "learning_rate": 4.954038994670675e-06, "loss": 0.42993783950805664, "num_tokens": 10842029455.0, "step": 88760 }, { "epoch": 0.1183676516860524, "grad_norm": 1.75, "learning_rate": 4.953905653900557e-06, "loss": 0.4286780834197998, "num_tokens": 10844369887.0, "step": 88780 }, { "epoch": 0.1183943170727805, "grad_norm": 2.609375, "learning_rate": 4.953772121787966e-06, "loss": 0.4092047691345215, "num_tokens": 10846768822.0, "step": 88800 }, { "epoch": 0.1184209824595086, "grad_norm": 2.203125, "learning_rate": 4.953638398343314e-06, "loss": 0.4341732025146484, "num_tokens": 10849195783.0, "step": 88820 }, { "epoch": 0.11844764784623671, "grad_norm": 2.03125, "learning_rate": 4.953504483577029e-06, "loss": 0.4267268180847168, "num_tokens": 10851515307.0, "step": 88840 }, { "epoch": 0.11847431323296481, "grad_norm": 1.9453125, "learning_rate": 4.953370377499552e-06, "loss": 0.43160834312438967, "num_tokens": 10853913721.0, "step": 88860 }, { "epoch": 0.11850097861969291, "grad_norm": 1.9453125, "learning_rate": 4.95323608012134e-06, "loss": 0.41365461349487304, "num_tokens": 10856325767.0, "step": 88880 }, { "epoch": 0.11852764400642103, "grad_norm": 1.8046875, "learning_rate": 4.953101591452865e-06, "loss": 0.4315023422241211, "num_tokens": 10858843487.0, "step": 88900 }, { "epoch": 0.11855430939314913, "grad_norm": 2.015625, "learning_rate": 4.9529669115046146e-06, "loss": 0.4147578239440918, "num_tokens": 10861046589.0, "step": 88920 }, { "epoch": 0.11858097477987724, "grad_norm": 2.125, "learning_rate": 4.952832040287088e-06, "loss": 0.4103302001953125, "num_tokens": 10863416652.0, "step": 88940 }, { "epoch": 0.11860764016660534, "grad_norm": 1.4921875, "learning_rate": 4.952696977810803e-06, "loss": 0.4396174430847168, "num_tokens": 10865744366.0, "step": 88960 }, { "epoch": 0.11863430555333344, "grad_norm": 2.078125, "learning_rate": 4.9525617240862935e-06, "loss": 0.43988771438598634, "num_tokens": 10868198143.0, "step": 88980 }, { "epoch": 0.11866097094006155, "grad_norm": 1.8671875, "learning_rate": 4.952426279124102e-06, "loss": 0.43432817459106443, "num_tokens": 10870638412.0, "step": 89000 }, { "epoch": 0.11868763632678965, "grad_norm": 2.03125, "learning_rate": 4.952290642934792e-06, "loss": 0.4205946922302246, "num_tokens": 10873064990.0, "step": 89020 }, { "epoch": 0.11871430171351775, "grad_norm": 1.640625, "learning_rate": 4.952154815528938e-06, "loss": 0.4263114929199219, "num_tokens": 10875530551.0, "step": 89040 }, { "epoch": 0.11874096710024586, "grad_norm": 2.046875, "learning_rate": 4.952018796917133e-06, "loss": 0.4094329833984375, "num_tokens": 10877914308.0, "step": 89060 }, { "epoch": 0.11876763248697396, "grad_norm": 1.8671875, "learning_rate": 4.951882587109982e-06, "loss": 0.413545036315918, "num_tokens": 10880078694.0, "step": 89080 }, { "epoch": 0.11879429787370206, "grad_norm": 2.078125, "learning_rate": 4.951746186118106e-06, "loss": 0.4201992988586426, "num_tokens": 10882352417.0, "step": 89100 }, { "epoch": 0.11882096326043017, "grad_norm": 2.25, "learning_rate": 4.951609593952141e-06, "loss": 0.428102970123291, "num_tokens": 10884779475.0, "step": 89120 }, { "epoch": 0.11884762864715827, "grad_norm": 2.078125, "learning_rate": 4.951472810622738e-06, "loss": 0.418184232711792, "num_tokens": 10887212818.0, "step": 89140 }, { "epoch": 0.11887429403388637, "grad_norm": 1.9609375, "learning_rate": 4.951335836140562e-06, "loss": 0.4244973659515381, "num_tokens": 10889750372.0, "step": 89160 }, { "epoch": 0.11890095942061447, "grad_norm": 1.9453125, "learning_rate": 4.951198670516294e-06, "loss": 0.4219141960144043, "num_tokens": 10892168110.0, "step": 89180 }, { "epoch": 0.11892762480734258, "grad_norm": 1.8359375, "learning_rate": 4.951061313760629e-06, "loss": 0.41761436462402346, "num_tokens": 10894606595.0, "step": 89200 }, { "epoch": 0.11895429019407068, "grad_norm": 2.125, "learning_rate": 4.950923765884278e-06, "loss": 0.42861123085021974, "num_tokens": 10897136925.0, "step": 89220 }, { "epoch": 0.11898095558079878, "grad_norm": 2.09375, "learning_rate": 4.950786026897966e-06, "loss": 0.42173118591308595, "num_tokens": 10899613868.0, "step": 89240 }, { "epoch": 0.11900762096752689, "grad_norm": 2.03125, "learning_rate": 4.950648096812433e-06, "loss": 0.42010135650634767, "num_tokens": 10902010553.0, "step": 89260 }, { "epoch": 0.11903428635425499, "grad_norm": 2.03125, "learning_rate": 4.950509975638433e-06, "loss": 0.4183784484863281, "num_tokens": 10904462730.0, "step": 89280 }, { "epoch": 0.1190609517409831, "grad_norm": 2.453125, "learning_rate": 4.950371663386739e-06, "loss": 0.42471466064453123, "num_tokens": 10906618759.0, "step": 89300 }, { "epoch": 0.11908761712771121, "grad_norm": 2.09375, "learning_rate": 4.950233160068132e-06, "loss": 0.41305365562438967, "num_tokens": 10909130080.0, "step": 89320 }, { "epoch": 0.11911428251443931, "grad_norm": 2.046875, "learning_rate": 4.950094465693413e-06, "loss": 0.42038259506225584, "num_tokens": 10911460767.0, "step": 89340 }, { "epoch": 0.11914094790116742, "grad_norm": 2.15625, "learning_rate": 4.9499555802734e-06, "loss": 0.43126835823059084, "num_tokens": 10913895935.0, "step": 89360 }, { "epoch": 0.11916761328789552, "grad_norm": 2.46875, "learning_rate": 4.949816503818918e-06, "loss": 0.40686564445495604, "num_tokens": 10916128795.0, "step": 89380 }, { "epoch": 0.11919427867462362, "grad_norm": 1.75, "learning_rate": 4.949677236340813e-06, "loss": 0.4207571029663086, "num_tokens": 10918584858.0, "step": 89400 }, { "epoch": 0.11922094406135172, "grad_norm": 1.8046875, "learning_rate": 4.949537777849945e-06, "loss": 0.41176233291625974, "num_tokens": 10921183786.0, "step": 89420 }, { "epoch": 0.11924760944807983, "grad_norm": 1.9140625, "learning_rate": 4.949398128357189e-06, "loss": 0.4265762805938721, "num_tokens": 10923862685.0, "step": 89440 }, { "epoch": 0.11927427483480793, "grad_norm": 2.125, "learning_rate": 4.949258287873432e-06, "loss": 0.4226893424987793, "num_tokens": 10926502038.0, "step": 89460 }, { "epoch": 0.11930094022153603, "grad_norm": 1.5078125, "learning_rate": 4.9491182564095785e-06, "loss": 0.42752251625061033, "num_tokens": 10928880198.0, "step": 89480 }, { "epoch": 0.11932760560826414, "grad_norm": 1.6015625, "learning_rate": 4.948978033976548e-06, "loss": 0.4247902393341064, "num_tokens": 10931132985.0, "step": 89500 }, { "epoch": 0.11935427099499224, "grad_norm": 1.6953125, "learning_rate": 4.948837620585275e-06, "loss": 0.42109055519104005, "num_tokens": 10933748903.0, "step": 89520 }, { "epoch": 0.11938093638172034, "grad_norm": 2.328125, "learning_rate": 4.948697016246707e-06, "loss": 0.40883784294128417, "num_tokens": 10936476336.0, "step": 89540 }, { "epoch": 0.11940760176844845, "grad_norm": 2.171875, "learning_rate": 4.948556220971807e-06, "loss": 0.420766019821167, "num_tokens": 10938985175.0, "step": 89560 }, { "epoch": 0.11943426715517655, "grad_norm": 2.1875, "learning_rate": 4.9484152347715554e-06, "loss": 0.42612619400024415, "num_tokens": 10941683157.0, "step": 89580 }, { "epoch": 0.11946093254190465, "grad_norm": 2.109375, "learning_rate": 4.948274057656945e-06, "loss": 0.42470130920410154, "num_tokens": 10944312472.0, "step": 89600 }, { "epoch": 0.11948759792863275, "grad_norm": 1.890625, "learning_rate": 4.948132689638983e-06, "loss": 0.43294510841369627, "num_tokens": 10946775334.0, "step": 89620 }, { "epoch": 0.11951426331536086, "grad_norm": 1.625, "learning_rate": 4.947991130728693e-06, "loss": 0.4293182373046875, "num_tokens": 10949297685.0, "step": 89640 }, { "epoch": 0.11954092870208896, "grad_norm": 1.765625, "learning_rate": 4.9478493809371135e-06, "loss": 0.42641487121582033, "num_tokens": 10951708548.0, "step": 89660 }, { "epoch": 0.11956759408881706, "grad_norm": 2.15625, "learning_rate": 4.947707440275296e-06, "loss": 0.41490678787231444, "num_tokens": 10953862247.0, "step": 89680 }, { "epoch": 0.11959425947554517, "grad_norm": 1.828125, "learning_rate": 4.94756530875431e-06, "loss": 0.439973258972168, "num_tokens": 10956397773.0, "step": 89700 }, { "epoch": 0.11962092486227328, "grad_norm": 1.8671875, "learning_rate": 4.947422986385238e-06, "loss": 0.4282082080841064, "num_tokens": 10958923449.0, "step": 89720 }, { "epoch": 0.11964759024900139, "grad_norm": 2.328125, "learning_rate": 4.947280473179176e-06, "loss": 0.42008271217346194, "num_tokens": 10961189741.0, "step": 89740 }, { "epoch": 0.11967425563572949, "grad_norm": 2.03125, "learning_rate": 4.947137769147238e-06, "loss": 0.434444522857666, "num_tokens": 10963606127.0, "step": 89760 }, { "epoch": 0.1197009210224576, "grad_norm": 1.9921875, "learning_rate": 4.94699487430055e-06, "loss": 0.4236310958862305, "num_tokens": 10965926086.0, "step": 89780 }, { "epoch": 0.1197275864091857, "grad_norm": 1.53125, "learning_rate": 4.946851788650255e-06, "loss": 0.411912727355957, "num_tokens": 10968378658.0, "step": 89800 }, { "epoch": 0.1197542517959138, "grad_norm": 2.234375, "learning_rate": 4.946708512207511e-06, "loss": 0.4266082286834717, "num_tokens": 10970907513.0, "step": 89820 }, { "epoch": 0.1197809171826419, "grad_norm": 2.21875, "learning_rate": 4.946565044983488e-06, "loss": 0.40895786285400393, "num_tokens": 10973300839.0, "step": 89840 }, { "epoch": 0.11980758256937, "grad_norm": 1.9765625, "learning_rate": 4.9464213869893735e-06, "loss": 0.41567330360412597, "num_tokens": 10975613300.0, "step": 89860 }, { "epoch": 0.11983424795609811, "grad_norm": 1.5546875, "learning_rate": 4.94627753823637e-06, "loss": 0.4195283889770508, "num_tokens": 10977752501.0, "step": 89880 }, { "epoch": 0.11986091334282621, "grad_norm": 2.203125, "learning_rate": 4.946133498735692e-06, "loss": 0.4237341403961182, "num_tokens": 10980081855.0, "step": 89900 }, { "epoch": 0.11988757872955431, "grad_norm": 2.09375, "learning_rate": 4.945989268498575e-06, "loss": 0.43078269958496096, "num_tokens": 10982650169.0, "step": 89920 }, { "epoch": 0.11991424411628242, "grad_norm": 1.7890625, "learning_rate": 4.945844847536261e-06, "loss": 0.421937370300293, "num_tokens": 10985060285.0, "step": 89940 }, { "epoch": 0.11994090950301052, "grad_norm": 1.8359375, "learning_rate": 4.945700235860013e-06, "loss": 0.44106578826904297, "num_tokens": 10987457748.0, "step": 89960 }, { "epoch": 0.11996757488973862, "grad_norm": 1.8984375, "learning_rate": 4.945555433481106e-06, "loss": 0.4232639789581299, "num_tokens": 10989966794.0, "step": 89980 }, { "epoch": 0.11999424027646673, "grad_norm": 1.4765625, "learning_rate": 4.945410440410833e-06, "loss": 0.4033712387084961, "num_tokens": 10992482821.0, "step": 90000 }, { "epoch": 0.12002090566319483, "grad_norm": 1.5703125, "learning_rate": 4.945265256660498e-06, "loss": 0.41958794593811033, "num_tokens": 10994840329.0, "step": 90020 }, { "epoch": 0.12004757104992293, "grad_norm": 1.6015625, "learning_rate": 4.945119882241422e-06, "loss": 0.41642208099365235, "num_tokens": 10997292967.0, "step": 90040 }, { "epoch": 0.12007423643665104, "grad_norm": 1.6796875, "learning_rate": 4.944974317164941e-06, "loss": 0.4187636852264404, "num_tokens": 10999875897.0, "step": 90060 }, { "epoch": 0.12010090182337914, "grad_norm": 2.390625, "learning_rate": 4.9448285614424055e-06, "loss": 0.4219470977783203, "num_tokens": 11002187815.0, "step": 90080 }, { "epoch": 0.12012756721010724, "grad_norm": 1.6953125, "learning_rate": 4.94468261508518e-06, "loss": 0.42717952728271485, "num_tokens": 11004680165.0, "step": 90100 }, { "epoch": 0.12015423259683536, "grad_norm": 1.8203125, "learning_rate": 4.944536478104646e-06, "loss": 0.41521472930908204, "num_tokens": 11007169360.0, "step": 90120 }, { "epoch": 0.12018089798356346, "grad_norm": 1.8984375, "learning_rate": 4.944390150512196e-06, "loss": 0.40299310684204104, "num_tokens": 11009693225.0, "step": 90140 }, { "epoch": 0.12020756337029156, "grad_norm": 2.0625, "learning_rate": 4.944243632319242e-06, "loss": 0.4216022491455078, "num_tokens": 11012213423.0, "step": 90160 }, { "epoch": 0.12023422875701967, "grad_norm": 2.203125, "learning_rate": 4.944096923537208e-06, "loss": 0.4269230365753174, "num_tokens": 11014852553.0, "step": 90180 }, { "epoch": 0.12026089414374777, "grad_norm": 1.9453125, "learning_rate": 4.943950024177534e-06, "loss": 0.42448816299438474, "num_tokens": 11017171924.0, "step": 90200 }, { "epoch": 0.12028755953047587, "grad_norm": 1.7890625, "learning_rate": 4.9438029342516745e-06, "loss": 0.4277020454406738, "num_tokens": 11019688126.0, "step": 90220 }, { "epoch": 0.12031422491720398, "grad_norm": 2.109375, "learning_rate": 4.943655653771097e-06, "loss": 0.42741570472717283, "num_tokens": 11021955513.0, "step": 90240 }, { "epoch": 0.12034089030393208, "grad_norm": 2.359375, "learning_rate": 4.943508182747288e-06, "loss": 0.41202716827392577, "num_tokens": 11024495581.0, "step": 90260 }, { "epoch": 0.12036755569066018, "grad_norm": 2.046875, "learning_rate": 4.943360521191745e-06, "loss": 0.4203312873840332, "num_tokens": 11027021181.0, "step": 90280 }, { "epoch": 0.12039422107738829, "grad_norm": 2.21875, "learning_rate": 4.943212669115982e-06, "loss": 0.413228702545166, "num_tokens": 11029349961.0, "step": 90300 }, { "epoch": 0.12042088646411639, "grad_norm": 2.078125, "learning_rate": 4.943064626531529e-06, "loss": 0.4196938514709473, "num_tokens": 11031781936.0, "step": 90320 }, { "epoch": 0.12044755185084449, "grad_norm": 2.03125, "learning_rate": 4.942916393449928e-06, "loss": 0.4109355926513672, "num_tokens": 11034260354.0, "step": 90340 }, { "epoch": 0.1204742172375726, "grad_norm": 2.21875, "learning_rate": 4.942767969882738e-06, "loss": 0.4070257663726807, "num_tokens": 11036768580.0, "step": 90360 }, { "epoch": 0.1205008826243007, "grad_norm": 2.234375, "learning_rate": 4.942619355841532e-06, "loss": 0.4278407096862793, "num_tokens": 11039253445.0, "step": 90380 }, { "epoch": 0.1205275480110288, "grad_norm": 1.7578125, "learning_rate": 4.9424705513379e-06, "loss": 0.4071512699127197, "num_tokens": 11041740340.0, "step": 90400 }, { "epoch": 0.1205542133977569, "grad_norm": 2.390625, "learning_rate": 4.9423215563834424e-06, "loss": 0.4245782375335693, "num_tokens": 11044132023.0, "step": 90420 }, { "epoch": 0.12058087878448501, "grad_norm": 1.9296875, "learning_rate": 4.942172370989778e-06, "loss": 0.42073640823364256, "num_tokens": 11046621901.0, "step": 90440 }, { "epoch": 0.12060754417121311, "grad_norm": 1.875, "learning_rate": 4.942022995168539e-06, "loss": 0.4025142192840576, "num_tokens": 11048941929.0, "step": 90460 }, { "epoch": 0.12063420955794121, "grad_norm": 2.03125, "learning_rate": 4.941873428931374e-06, "loss": 0.42669248580932617, "num_tokens": 11051254054.0, "step": 90480 }, { "epoch": 0.12066087494466932, "grad_norm": 1.734375, "learning_rate": 4.941723672289946e-06, "loss": 0.4143815994262695, "num_tokens": 11053512570.0, "step": 90500 }, { "epoch": 0.12068754033139742, "grad_norm": 2.046875, "learning_rate": 4.941573725255931e-06, "loss": 0.42757220268249513, "num_tokens": 11056006197.0, "step": 90520 }, { "epoch": 0.12071420571812554, "grad_norm": 2.203125, "learning_rate": 4.94142358784102e-06, "loss": 0.42905402183532715, "num_tokens": 11058672723.0, "step": 90540 }, { "epoch": 0.12074087110485364, "grad_norm": 1.7890625, "learning_rate": 4.941273260056921e-06, "loss": 0.4117724418640137, "num_tokens": 11061254682.0, "step": 90560 }, { "epoch": 0.12076753649158174, "grad_norm": 2.421875, "learning_rate": 4.9411227419153566e-06, "loss": 0.41432862281799315, "num_tokens": 11063604791.0, "step": 90580 }, { "epoch": 0.12079420187830985, "grad_norm": 2.109375, "learning_rate": 4.940972033428062e-06, "loss": 0.43412766456604, "num_tokens": 11066117919.0, "step": 90600 }, { "epoch": 0.12082086726503795, "grad_norm": 2.09375, "learning_rate": 4.940821134606789e-06, "loss": 0.41640801429748536, "num_tokens": 11068483179.0, "step": 90620 }, { "epoch": 0.12084753265176605, "grad_norm": 1.5, "learning_rate": 4.940670045463305e-06, "loss": 0.43112797737121583, "num_tokens": 11070700765.0, "step": 90640 }, { "epoch": 0.12087419803849415, "grad_norm": 1.875, "learning_rate": 4.94051876600939e-06, "loss": 0.42610926628112794, "num_tokens": 11073185181.0, "step": 90660 }, { "epoch": 0.12090086342522226, "grad_norm": 2.09375, "learning_rate": 4.94036729625684e-06, "loss": 0.42583308219909666, "num_tokens": 11075532149.0, "step": 90680 }, { "epoch": 0.12092752881195036, "grad_norm": 1.9375, "learning_rate": 4.9402156362174665e-06, "loss": 0.41720871925354003, "num_tokens": 11078060489.0, "step": 90700 }, { "epoch": 0.12095419419867846, "grad_norm": 2.671875, "learning_rate": 4.940063785903094e-06, "loss": 0.4136996269226074, "num_tokens": 11080371911.0, "step": 90720 }, { "epoch": 0.12098085958540657, "grad_norm": 2.125, "learning_rate": 4.939911745325564e-06, "loss": 0.40971622467041013, "num_tokens": 11082938692.0, "step": 90740 }, { "epoch": 0.12100752497213467, "grad_norm": 1.953125, "learning_rate": 4.939759514496732e-06, "loss": 0.4333306312561035, "num_tokens": 11085379456.0, "step": 90760 }, { "epoch": 0.12103419035886277, "grad_norm": 1.7109375, "learning_rate": 4.939607093428468e-06, "loss": 0.41133790016174315, "num_tokens": 11087900656.0, "step": 90780 }, { "epoch": 0.12106085574559088, "grad_norm": 2.125, "learning_rate": 4.939454482132655e-06, "loss": 0.4293020248413086, "num_tokens": 11090221386.0, "step": 90800 }, { "epoch": 0.12108752113231898, "grad_norm": 1.6328125, "learning_rate": 4.939301680621196e-06, "loss": 0.4265739440917969, "num_tokens": 11092593531.0, "step": 90820 }, { "epoch": 0.12111418651904708, "grad_norm": 1.6953125, "learning_rate": 4.939148688906004e-06, "loss": 0.428294038772583, "num_tokens": 11095021710.0, "step": 90840 }, { "epoch": 0.12114085190577518, "grad_norm": 2.296875, "learning_rate": 4.938995506999008e-06, "loss": 0.43173770904541015, "num_tokens": 11097582842.0, "step": 90860 }, { "epoch": 0.12116751729250329, "grad_norm": 1.7109375, "learning_rate": 4.938842134912153e-06, "loss": 0.41994595527648926, "num_tokens": 11100033367.0, "step": 90880 }, { "epoch": 0.12119418267923139, "grad_norm": 1.6171875, "learning_rate": 4.938688572657398e-06, "loss": 0.42894926071166994, "num_tokens": 11102602439.0, "step": 90900 }, { "epoch": 0.1212208480659595, "grad_norm": 2.03125, "learning_rate": 4.938534820246717e-06, "loss": 0.42838873863220217, "num_tokens": 11105019431.0, "step": 90920 }, { "epoch": 0.12124751345268761, "grad_norm": 1.9921875, "learning_rate": 4.938380877692098e-06, "loss": 0.4285735607147217, "num_tokens": 11107437803.0, "step": 90940 }, { "epoch": 0.12127417883941571, "grad_norm": 1.96875, "learning_rate": 4.938226745005545e-06, "loss": 0.42154865264892577, "num_tokens": 11109613819.0, "step": 90960 }, { "epoch": 0.12130084422614382, "grad_norm": 2.203125, "learning_rate": 4.9380724221990785e-06, "loss": 0.42786664962768556, "num_tokens": 11112171916.0, "step": 90980 }, { "epoch": 0.12132750961287192, "grad_norm": 2.3125, "learning_rate": 4.9379179092847295e-06, "loss": 0.402647066116333, "num_tokens": 11114573789.0, "step": 91000 }, { "epoch": 0.12135417499960002, "grad_norm": 1.6875, "learning_rate": 4.937763206274546e-06, "loss": 0.4196488380432129, "num_tokens": 11117065744.0, "step": 91020 }, { "epoch": 0.12138084038632813, "grad_norm": 2.25, "learning_rate": 4.937608313180593e-06, "loss": 0.4201647758483887, "num_tokens": 11119587930.0, "step": 91040 }, { "epoch": 0.12140750577305623, "grad_norm": 2.125, "learning_rate": 4.937453230014946e-06, "loss": 0.4229715347290039, "num_tokens": 11122043991.0, "step": 91060 }, { "epoch": 0.12143417115978433, "grad_norm": 1.9375, "learning_rate": 4.937297956789698e-06, "loss": 0.4196038722991943, "num_tokens": 11124562321.0, "step": 91080 }, { "epoch": 0.12146083654651244, "grad_norm": 2.5, "learning_rate": 4.937142493516957e-06, "loss": 0.426116418838501, "num_tokens": 11126891538.0, "step": 91100 }, { "epoch": 0.12148750193324054, "grad_norm": 2.203125, "learning_rate": 4.936986840208846e-06, "loss": 0.4231372833251953, "num_tokens": 11129182135.0, "step": 91120 }, { "epoch": 0.12151416731996864, "grad_norm": 1.90625, "learning_rate": 4.9368309968775e-06, "loss": 0.40950284004211424, "num_tokens": 11131617981.0, "step": 91140 }, { "epoch": 0.12154083270669674, "grad_norm": 2.203125, "learning_rate": 4.936674963535073e-06, "loss": 0.42029056549072263, "num_tokens": 11134070514.0, "step": 91160 }, { "epoch": 0.12156749809342485, "grad_norm": 2.640625, "learning_rate": 4.936518740193729e-06, "loss": 0.42035446166992185, "num_tokens": 11136477776.0, "step": 91180 }, { "epoch": 0.12159416348015295, "grad_norm": 1.890625, "learning_rate": 4.936362326865651e-06, "loss": 0.4131038665771484, "num_tokens": 11139199767.0, "step": 91200 }, { "epoch": 0.12162082886688105, "grad_norm": 1.9453125, "learning_rate": 4.9362057235630355e-06, "loss": 0.4194509983062744, "num_tokens": 11141534610.0, "step": 91220 }, { "epoch": 0.12164749425360916, "grad_norm": 1.8671875, "learning_rate": 4.936048930298094e-06, "loss": 0.4185638427734375, "num_tokens": 11144002626.0, "step": 91240 }, { "epoch": 0.12167415964033726, "grad_norm": 2.03125, "learning_rate": 4.935891947083051e-06, "loss": 0.4297060966491699, "num_tokens": 11146301858.0, "step": 91260 }, { "epoch": 0.12170082502706536, "grad_norm": 1.7421875, "learning_rate": 4.935734773930148e-06, "loss": 0.41374835968017576, "num_tokens": 11149000135.0, "step": 91280 }, { "epoch": 0.12172749041379347, "grad_norm": 2.09375, "learning_rate": 4.935577410851641e-06, "loss": 0.4029687881469727, "num_tokens": 11151429294.0, "step": 91300 }, { "epoch": 0.12175415580052157, "grad_norm": 2.328125, "learning_rate": 4.935419857859799e-06, "loss": 0.4329542636871338, "num_tokens": 11153765968.0, "step": 91320 }, { "epoch": 0.12178082118724969, "grad_norm": 2.0625, "learning_rate": 4.935262114966908e-06, "loss": 0.41037578582763673, "num_tokens": 11156028531.0, "step": 91340 }, { "epoch": 0.12180748657397779, "grad_norm": 2.140625, "learning_rate": 4.935104182185269e-06, "loss": 0.4193791389465332, "num_tokens": 11158540328.0, "step": 91360 }, { "epoch": 0.12183415196070589, "grad_norm": 2.140625, "learning_rate": 4.934946059527194e-06, "loss": 0.3975255489349365, "num_tokens": 11160827874.0, "step": 91380 }, { "epoch": 0.121860817347434, "grad_norm": 1.9765625, "learning_rate": 4.934787747005015e-06, "loss": 0.4226082801818848, "num_tokens": 11163295986.0, "step": 91400 }, { "epoch": 0.1218874827341621, "grad_norm": 1.9140625, "learning_rate": 4.934629244631075e-06, "loss": 0.4041788101196289, "num_tokens": 11165605459.0, "step": 91420 }, { "epoch": 0.1219141481208902, "grad_norm": 2.375, "learning_rate": 4.9344705524177345e-06, "loss": 0.4175728797912598, "num_tokens": 11168154437.0, "step": 91440 }, { "epoch": 0.1219408135076183, "grad_norm": 1.9296875, "learning_rate": 4.934311670377366e-06, "loss": 0.40473241806030275, "num_tokens": 11170516607.0, "step": 91460 }, { "epoch": 0.1219674788943464, "grad_norm": 1.8046875, "learning_rate": 4.93415259852236e-06, "loss": 0.4405405521392822, "num_tokens": 11172954009.0, "step": 91480 }, { "epoch": 0.12199414428107451, "grad_norm": 1.9921875, "learning_rate": 4.933993336865118e-06, "loss": 0.4152658462524414, "num_tokens": 11175471317.0, "step": 91500 }, { "epoch": 0.12202080966780261, "grad_norm": 1.96875, "learning_rate": 4.933833885418059e-06, "loss": 0.42614045143127444, "num_tokens": 11177996966.0, "step": 91520 }, { "epoch": 0.12204747505453072, "grad_norm": 1.9453125, "learning_rate": 4.933674244193618e-06, "loss": 0.41780815124511717, "num_tokens": 11180406730.0, "step": 91540 }, { "epoch": 0.12207414044125882, "grad_norm": 2.375, "learning_rate": 4.933514413204241e-06, "loss": 0.40740118026733396, "num_tokens": 11182864590.0, "step": 91560 }, { "epoch": 0.12210080582798692, "grad_norm": 1.9296875, "learning_rate": 4.933354392462391e-06, "loss": 0.4228048801422119, "num_tokens": 11185342005.0, "step": 91580 }, { "epoch": 0.12212747121471502, "grad_norm": 1.75, "learning_rate": 4.933194181980546e-06, "loss": 0.40746192932128905, "num_tokens": 11187808623.0, "step": 91600 }, { "epoch": 0.12215413660144313, "grad_norm": 1.765625, "learning_rate": 4.933033781771199e-06, "loss": 0.4302877902984619, "num_tokens": 11190326046.0, "step": 91620 }, { "epoch": 0.12218080198817123, "grad_norm": 1.9296875, "learning_rate": 4.9328731918468556e-06, "loss": 0.4238020896911621, "num_tokens": 11192803847.0, "step": 91640 }, { "epoch": 0.12220746737489933, "grad_norm": 2.234375, "learning_rate": 4.932712412220039e-06, "loss": 0.4329239368438721, "num_tokens": 11195313818.0, "step": 91660 }, { "epoch": 0.12223413276162744, "grad_norm": 2.203125, "learning_rate": 4.932551442903284e-06, "loss": 0.42036995887756345, "num_tokens": 11197680176.0, "step": 91680 }, { "epoch": 0.12226079814835554, "grad_norm": 2.609375, "learning_rate": 4.932390283909145e-06, "loss": 0.42600092887878416, "num_tokens": 11200142316.0, "step": 91700 }, { "epoch": 0.12228746353508364, "grad_norm": 1.828125, "learning_rate": 4.932228935250188e-06, "loss": 0.41387386322021485, "num_tokens": 11202572413.0, "step": 91720 }, { "epoch": 0.12231412892181175, "grad_norm": 2.515625, "learning_rate": 4.932067396938991e-06, "loss": 0.421063232421875, "num_tokens": 11205037875.0, "step": 91740 }, { "epoch": 0.12234079430853986, "grad_norm": 2.4375, "learning_rate": 4.931905668988153e-06, "loss": 0.43311848640441897, "num_tokens": 11207366031.0, "step": 91760 }, { "epoch": 0.12236745969526797, "grad_norm": 1.9609375, "learning_rate": 4.931743751410283e-06, "loss": 0.40772624015808107, "num_tokens": 11209818309.0, "step": 91780 }, { "epoch": 0.12239412508199607, "grad_norm": 1.7890625, "learning_rate": 4.931581644218008e-06, "loss": 0.41852359771728515, "num_tokens": 11212365238.0, "step": 91800 }, { "epoch": 0.12242079046872417, "grad_norm": 1.7421875, "learning_rate": 4.931419347423966e-06, "loss": 0.4176474094390869, "num_tokens": 11214658269.0, "step": 91820 }, { "epoch": 0.12244745585545228, "grad_norm": 2.1875, "learning_rate": 4.931256861040814e-06, "loss": 0.4259049415588379, "num_tokens": 11216959294.0, "step": 91840 }, { "epoch": 0.12247412124218038, "grad_norm": 2.25, "learning_rate": 4.931094185081221e-06, "loss": 0.3977967739105225, "num_tokens": 11219532168.0, "step": 91860 }, { "epoch": 0.12250078662890848, "grad_norm": 2.296875, "learning_rate": 4.930931319557872e-06, "loss": 0.41057748794555665, "num_tokens": 11221933103.0, "step": 91880 }, { "epoch": 0.12252745201563658, "grad_norm": 2.25, "learning_rate": 4.9307682644834664e-06, "loss": 0.42615280151367185, "num_tokens": 11224373728.0, "step": 91900 }, { "epoch": 0.12255411740236469, "grad_norm": 2.0625, "learning_rate": 4.930605019870718e-06, "loss": 0.442934513092041, "num_tokens": 11226606933.0, "step": 91920 }, { "epoch": 0.12258078278909279, "grad_norm": 1.859375, "learning_rate": 4.930441585732356e-06, "loss": 0.4282900333404541, "num_tokens": 11229109154.0, "step": 91940 }, { "epoch": 0.1226074481758209, "grad_norm": 1.9765625, "learning_rate": 4.930277962081124e-06, "loss": 0.4055063724517822, "num_tokens": 11231461154.0, "step": 91960 }, { "epoch": 0.122634113562549, "grad_norm": 1.703125, "learning_rate": 4.930114148929781e-06, "loss": 0.4196310043334961, "num_tokens": 11233693370.0, "step": 91980 }, { "epoch": 0.1226607789492771, "grad_norm": 1.9921875, "learning_rate": 4.929950146291099e-06, "loss": 0.4207160472869873, "num_tokens": 11236056054.0, "step": 92000 }, { "epoch": 0.1226874443360052, "grad_norm": 2.359375, "learning_rate": 4.929785954177867e-06, "loss": 0.4193730354309082, "num_tokens": 11238578182.0, "step": 92020 }, { "epoch": 0.1227141097227333, "grad_norm": 1.9609375, "learning_rate": 4.929621572602888e-06, "loss": 0.412954044342041, "num_tokens": 11240846689.0, "step": 92040 }, { "epoch": 0.12274077510946141, "grad_norm": 1.75, "learning_rate": 4.9294570015789785e-06, "loss": 0.4141650676727295, "num_tokens": 11243178066.0, "step": 92060 }, { "epoch": 0.12276744049618951, "grad_norm": 2.234375, "learning_rate": 4.929292241118972e-06, "loss": 0.4248797416687012, "num_tokens": 11245425675.0, "step": 92080 }, { "epoch": 0.12279410588291761, "grad_norm": 2.390625, "learning_rate": 4.9291272912357165e-06, "loss": 0.41546106338500977, "num_tokens": 11247972216.0, "step": 92100 }, { "epoch": 0.12282077126964572, "grad_norm": 2.015625, "learning_rate": 4.928962151942072e-06, "loss": 0.4134716033935547, "num_tokens": 11250533290.0, "step": 92120 }, { "epoch": 0.12284743665637382, "grad_norm": 1.53125, "learning_rate": 4.928796823250915e-06, "loss": 0.4293325901031494, "num_tokens": 11252893370.0, "step": 92140 }, { "epoch": 0.12287410204310194, "grad_norm": 2.046875, "learning_rate": 4.9286313051751385e-06, "loss": 0.40799508094787595, "num_tokens": 11255310015.0, "step": 92160 }, { "epoch": 0.12290076742983004, "grad_norm": 1.8046875, "learning_rate": 4.928465597727648e-06, "loss": 0.41986966133117676, "num_tokens": 11257832757.0, "step": 92180 }, { "epoch": 0.12292743281655814, "grad_norm": 1.953125, "learning_rate": 4.928299700921364e-06, "loss": 0.4198763847351074, "num_tokens": 11260238609.0, "step": 92200 }, { "epoch": 0.12295409820328625, "grad_norm": 1.84375, "learning_rate": 4.928133614769224e-06, "loss": 0.41014895439147947, "num_tokens": 11262668970.0, "step": 92220 }, { "epoch": 0.12298076359001435, "grad_norm": 1.9140625, "learning_rate": 4.927967339284176e-06, "loss": 0.4255671501159668, "num_tokens": 11265197549.0, "step": 92240 }, { "epoch": 0.12300742897674245, "grad_norm": 1.953125, "learning_rate": 4.927800874479187e-06, "loss": 0.40932111740112304, "num_tokens": 11267588419.0, "step": 92260 }, { "epoch": 0.12303409436347056, "grad_norm": 2.03125, "learning_rate": 4.927634220367236e-06, "loss": 0.43062477111816405, "num_tokens": 11269980430.0, "step": 92280 }, { "epoch": 0.12306075975019866, "grad_norm": 2.015625, "learning_rate": 4.92746737696132e-06, "loss": 0.4011233329772949, "num_tokens": 11272536180.0, "step": 92300 }, { "epoch": 0.12308742513692676, "grad_norm": 1.796875, "learning_rate": 4.927300344274446e-06, "loss": 0.4307356834411621, "num_tokens": 11275187645.0, "step": 92320 }, { "epoch": 0.12311409052365486, "grad_norm": 2.0625, "learning_rate": 4.927133122319639e-06, "loss": 0.421918249130249, "num_tokens": 11277491993.0, "step": 92340 }, { "epoch": 0.12314075591038297, "grad_norm": 2.03125, "learning_rate": 4.926965711109939e-06, "loss": 0.44045333862304686, "num_tokens": 11280134287.0, "step": 92360 }, { "epoch": 0.12316742129711107, "grad_norm": 1.5546875, "learning_rate": 4.926798110658399e-06, "loss": 0.43523440361022947, "num_tokens": 11282456988.0, "step": 92380 }, { "epoch": 0.12319408668383917, "grad_norm": 2.328125, "learning_rate": 4.926630320978088e-06, "loss": 0.41832809448242186, "num_tokens": 11284951908.0, "step": 92400 }, { "epoch": 0.12322075207056728, "grad_norm": 2.125, "learning_rate": 4.926462342082088e-06, "loss": 0.41649765968322755, "num_tokens": 11287491132.0, "step": 92420 }, { "epoch": 0.12324741745729538, "grad_norm": 2.21875, "learning_rate": 4.926294173983499e-06, "loss": 0.42793989181518555, "num_tokens": 11289620500.0, "step": 92440 }, { "epoch": 0.12327408284402348, "grad_norm": 2.25, "learning_rate": 4.9261258166954325e-06, "loss": 0.41611061096191404, "num_tokens": 11292236084.0, "step": 92460 }, { "epoch": 0.12330074823075159, "grad_norm": 2.109375, "learning_rate": 4.925957270231017e-06, "loss": 0.4128139019012451, "num_tokens": 11294715993.0, "step": 92480 }, { "epoch": 0.12332741361747969, "grad_norm": 1.7265625, "learning_rate": 4.925788534603395e-06, "loss": 0.4298295021057129, "num_tokens": 11297290966.0, "step": 92500 }, { "epoch": 0.12335407900420779, "grad_norm": 1.9296875, "learning_rate": 4.925619609825723e-06, "loss": 0.41194806098937986, "num_tokens": 11299546560.0, "step": 92520 }, { "epoch": 0.1233807443909359, "grad_norm": 1.734375, "learning_rate": 4.925450495911173e-06, "loss": 0.41744556427001955, "num_tokens": 11301987194.0, "step": 92540 }, { "epoch": 0.12340740977766401, "grad_norm": 2.203125, "learning_rate": 4.925281192872931e-06, "loss": 0.4201087474822998, "num_tokens": 11304441458.0, "step": 92560 }, { "epoch": 0.12343407516439212, "grad_norm": 1.96875, "learning_rate": 4.925111700724199e-06, "loss": 0.4183689594268799, "num_tokens": 11306790589.0, "step": 92580 }, { "epoch": 0.12346074055112022, "grad_norm": 2.3125, "learning_rate": 4.924942019478193e-06, "loss": 0.4118154525756836, "num_tokens": 11309174796.0, "step": 92600 }, { "epoch": 0.12348740593784832, "grad_norm": 2.515625, "learning_rate": 4.924772149148145e-06, "loss": 0.4179586887359619, "num_tokens": 11311719956.0, "step": 92620 }, { "epoch": 0.12351407132457642, "grad_norm": 1.953125, "learning_rate": 4.924602089747298e-06, "loss": 0.42972216606140134, "num_tokens": 11314253315.0, "step": 92640 }, { "epoch": 0.12354073671130453, "grad_norm": 1.7890625, "learning_rate": 4.924431841288915e-06, "loss": 0.42961864471435546, "num_tokens": 11316739879.0, "step": 92660 }, { "epoch": 0.12356740209803263, "grad_norm": 1.78125, "learning_rate": 4.924261403786269e-06, "loss": 0.41601009368896485, "num_tokens": 11319333607.0, "step": 92680 }, { "epoch": 0.12359406748476073, "grad_norm": 2.015625, "learning_rate": 4.92409077725265e-06, "loss": 0.40993528366088866, "num_tokens": 11321621116.0, "step": 92700 }, { "epoch": 0.12362073287148884, "grad_norm": 1.984375, "learning_rate": 4.923919961701365e-06, "loss": 0.44623756408691406, "num_tokens": 11324076657.0, "step": 92720 }, { "epoch": 0.12364739825821694, "grad_norm": 2.359375, "learning_rate": 4.92374895714573e-06, "loss": 0.4236490249633789, "num_tokens": 11326626820.0, "step": 92740 }, { "epoch": 0.12367406364494504, "grad_norm": 2.21875, "learning_rate": 4.9235777635990815e-06, "loss": 0.42952756881713866, "num_tokens": 11328907691.0, "step": 92760 }, { "epoch": 0.12370072903167315, "grad_norm": 1.84375, "learning_rate": 4.923406381074766e-06, "loss": 0.41708083152770997, "num_tokens": 11331456709.0, "step": 92780 }, { "epoch": 0.12372739441840125, "grad_norm": 2.734375, "learning_rate": 4.9232348095861494e-06, "loss": 0.4218256950378418, "num_tokens": 11333864636.0, "step": 92800 }, { "epoch": 0.12375405980512935, "grad_norm": 2.328125, "learning_rate": 4.923063049146608e-06, "loss": 0.4258464813232422, "num_tokens": 11336194661.0, "step": 92820 }, { "epoch": 0.12378072519185745, "grad_norm": 2.28125, "learning_rate": 4.922891099769535e-06, "loss": 0.41028485298156736, "num_tokens": 11338667645.0, "step": 92840 }, { "epoch": 0.12380739057858556, "grad_norm": 1.421875, "learning_rate": 4.9227189614683406e-06, "loss": 0.4202843189239502, "num_tokens": 11340980531.0, "step": 92860 }, { "epoch": 0.12383405596531366, "grad_norm": 1.8515625, "learning_rate": 4.922546634256445e-06, "loss": 0.4173093795776367, "num_tokens": 11343504355.0, "step": 92880 }, { "epoch": 0.12386072135204176, "grad_norm": 2.1875, "learning_rate": 4.922374118147284e-06, "loss": 0.4263772487640381, "num_tokens": 11345980737.0, "step": 92900 }, { "epoch": 0.12388738673876987, "grad_norm": 1.5078125, "learning_rate": 4.922201413154312e-06, "loss": 0.42498111724853516, "num_tokens": 11348257647.0, "step": 92920 }, { "epoch": 0.12391405212549797, "grad_norm": 2.265625, "learning_rate": 4.922028519290994e-06, "loss": 0.3990570306777954, "num_tokens": 11350710296.0, "step": 92940 }, { "epoch": 0.12394071751222607, "grad_norm": 2.40625, "learning_rate": 4.921855436570813e-06, "loss": 0.42560925483703616, "num_tokens": 11353017624.0, "step": 92960 }, { "epoch": 0.12396738289895419, "grad_norm": 2.15625, "learning_rate": 4.921682165007264e-06, "loss": 0.415449333190918, "num_tokens": 11355272797.0, "step": 92980 }, { "epoch": 0.12399404828568229, "grad_norm": 2.0, "learning_rate": 4.921508704613858e-06, "loss": 0.4225395679473877, "num_tokens": 11357477567.0, "step": 93000 }, { "epoch": 0.1240207136724104, "grad_norm": 1.8984375, "learning_rate": 4.92133505540412e-06, "loss": 0.4232766628265381, "num_tokens": 11359796587.0, "step": 93020 }, { "epoch": 0.1240473790591385, "grad_norm": 1.984375, "learning_rate": 4.921161217391592e-06, "loss": 0.4168076515197754, "num_tokens": 11362180612.0, "step": 93040 }, { "epoch": 0.1240740444458666, "grad_norm": 1.703125, "learning_rate": 4.920987190589826e-06, "loss": 0.4104365348815918, "num_tokens": 11364557628.0, "step": 93060 }, { "epoch": 0.1241007098325947, "grad_norm": 2.078125, "learning_rate": 4.9208129750123935e-06, "loss": 0.4311855316162109, "num_tokens": 11367016325.0, "step": 93080 }, { "epoch": 0.12412737521932281, "grad_norm": 1.8828125, "learning_rate": 4.920638570672879e-06, "loss": 0.4102329254150391, "num_tokens": 11369352170.0, "step": 93100 }, { "epoch": 0.12415404060605091, "grad_norm": 1.890625, "learning_rate": 4.920463977584882e-06, "loss": 0.43561515808105467, "num_tokens": 11371672823.0, "step": 93120 }, { "epoch": 0.12418070599277901, "grad_norm": 1.78125, "learning_rate": 4.9202891957620146e-06, "loss": 0.41611738204956056, "num_tokens": 11374216120.0, "step": 93140 }, { "epoch": 0.12420737137950712, "grad_norm": 2.125, "learning_rate": 4.920114225217907e-06, "loss": 0.422023344039917, "num_tokens": 11376838825.0, "step": 93160 }, { "epoch": 0.12423403676623522, "grad_norm": 1.9453125, "learning_rate": 4.919939065966202e-06, "loss": 0.40610380172729493, "num_tokens": 11379372294.0, "step": 93180 }, { "epoch": 0.12426070215296332, "grad_norm": 1.8046875, "learning_rate": 4.919763718020557e-06, "loss": 0.42329559326171873, "num_tokens": 11381903632.0, "step": 93200 }, { "epoch": 0.12428736753969143, "grad_norm": 2.046875, "learning_rate": 4.919588181394646e-06, "loss": 0.40582685470581054, "num_tokens": 11384437153.0, "step": 93220 }, { "epoch": 0.12431403292641953, "grad_norm": 2.546875, "learning_rate": 4.919412456102155e-06, "loss": 0.41792993545532225, "num_tokens": 11386684486.0, "step": 93240 }, { "epoch": 0.12434069831314763, "grad_norm": 2.09375, "learning_rate": 4.919236542156786e-06, "loss": 0.4095919609069824, "num_tokens": 11389137346.0, "step": 93260 }, { "epoch": 0.12436736369987574, "grad_norm": 1.859375, "learning_rate": 4.919060439572257e-06, "loss": 0.41117172241210936, "num_tokens": 11391550835.0, "step": 93280 }, { "epoch": 0.12439402908660384, "grad_norm": 1.2734375, "learning_rate": 4.9188841483623e-06, "loss": 0.40430307388305664, "num_tokens": 11394009734.0, "step": 93300 }, { "epoch": 0.12442069447333194, "grad_norm": 2.703125, "learning_rate": 4.91870766854066e-06, "loss": 0.4239158630371094, "num_tokens": 11396622729.0, "step": 93320 }, { "epoch": 0.12444735986006004, "grad_norm": 2.03125, "learning_rate": 4.918531000121098e-06, "loss": 0.4205334186553955, "num_tokens": 11399151719.0, "step": 93340 }, { "epoch": 0.12447402524678815, "grad_norm": 1.6171875, "learning_rate": 4.91835414311739e-06, "loss": 0.4072411060333252, "num_tokens": 11401852382.0, "step": 93360 }, { "epoch": 0.12450069063351626, "grad_norm": 2.375, "learning_rate": 4.918177097543326e-06, "loss": 0.42095065116882324, "num_tokens": 11404133742.0, "step": 93380 }, { "epoch": 0.12452735602024437, "grad_norm": 1.796875, "learning_rate": 4.917999863412712e-06, "loss": 0.4232132911682129, "num_tokens": 11406460379.0, "step": 93400 }, { "epoch": 0.12455402140697247, "grad_norm": 2.171875, "learning_rate": 4.917822440739367e-06, "loss": 0.4230476379394531, "num_tokens": 11408869988.0, "step": 93420 }, { "epoch": 0.12458068679370057, "grad_norm": 2.015625, "learning_rate": 4.917644829537125e-06, "loss": 0.4311038017272949, "num_tokens": 11411438824.0, "step": 93440 }, { "epoch": 0.12460735218042868, "grad_norm": 2.265625, "learning_rate": 4.917467029819837e-06, "loss": 0.4349165916442871, "num_tokens": 11413881506.0, "step": 93460 }, { "epoch": 0.12463401756715678, "grad_norm": 2.21875, "learning_rate": 4.9172890416013645e-06, "loss": 0.4071141242980957, "num_tokens": 11416149302.0, "step": 93480 }, { "epoch": 0.12466068295388488, "grad_norm": 2.421875, "learning_rate": 4.917110864895588e-06, "loss": 0.42736148834228516, "num_tokens": 11418664241.0, "step": 93500 }, { "epoch": 0.12468734834061299, "grad_norm": 1.9375, "learning_rate": 4.916932499716399e-06, "loss": 0.42762064933776855, "num_tokens": 11421343297.0, "step": 93520 }, { "epoch": 0.12471401372734109, "grad_norm": 2.015625, "learning_rate": 4.916753946077708e-06, "loss": 0.43187689781188965, "num_tokens": 11423886990.0, "step": 93540 }, { "epoch": 0.12474067911406919, "grad_norm": 2.203125, "learning_rate": 4.916575203993435e-06, "loss": 0.4228648662567139, "num_tokens": 11426221351.0, "step": 93560 }, { "epoch": 0.1247673445007973, "grad_norm": 1.90625, "learning_rate": 4.916396273477519e-06, "loss": 0.4153772830963135, "num_tokens": 11428692022.0, "step": 93580 }, { "epoch": 0.1247940098875254, "grad_norm": 2.21875, "learning_rate": 4.916217154543911e-06, "loss": 0.418172025680542, "num_tokens": 11431072871.0, "step": 93600 }, { "epoch": 0.1248206752742535, "grad_norm": 1.890625, "learning_rate": 4.916037847206579e-06, "loss": 0.4144841194152832, "num_tokens": 11433416471.0, "step": 93620 }, { "epoch": 0.1248473406609816, "grad_norm": 1.8046875, "learning_rate": 4.915858351479504e-06, "loss": 0.4292431354522705, "num_tokens": 11435629738.0, "step": 93640 }, { "epoch": 0.1248740060477097, "grad_norm": 2.109375, "learning_rate": 4.915678667376681e-06, "loss": 0.41910552978515625, "num_tokens": 11437967448.0, "step": 93660 }, { "epoch": 0.12490067143443781, "grad_norm": 2.640625, "learning_rate": 4.915498794912121e-06, "loss": 0.4160336494445801, "num_tokens": 11440409267.0, "step": 93680 }, { "epoch": 0.12492733682116591, "grad_norm": 2.265625, "learning_rate": 4.915318734099851e-06, "loss": 0.42154669761657715, "num_tokens": 11443042574.0, "step": 93700 }, { "epoch": 0.12495400220789402, "grad_norm": 2.15625, "learning_rate": 4.915138484953911e-06, "loss": 0.42467050552368163, "num_tokens": 11445472705.0, "step": 93720 }, { "epoch": 0.12498066759462212, "grad_norm": 2.140625, "learning_rate": 4.914958047488355e-06, "loss": 0.4311154842376709, "num_tokens": 11447882862.0, "step": 93740 }, { "epoch": 0.12500733298135022, "grad_norm": 1.921875, "learning_rate": 4.9147774217172515e-06, "loss": 0.4188109874725342, "num_tokens": 11450291023.0, "step": 93760 }, { "epoch": 0.12503399836807833, "grad_norm": 2.5, "learning_rate": 4.914596607654687e-06, "loss": 0.418316650390625, "num_tokens": 11452620674.0, "step": 93780 }, { "epoch": 0.12506066375480643, "grad_norm": 2.0625, "learning_rate": 4.914415605314759e-06, "loss": 0.4069984436035156, "num_tokens": 11455282977.0, "step": 93800 }, { "epoch": 0.12508732914153453, "grad_norm": 1.890625, "learning_rate": 4.914234414711581e-06, "loss": 0.42507228851318357, "num_tokens": 11457761987.0, "step": 93820 }, { "epoch": 0.12511399452826263, "grad_norm": 1.828125, "learning_rate": 4.914053035859283e-06, "loss": 0.41488847732543943, "num_tokens": 11460216807.0, "step": 93840 }, { "epoch": 0.12514065991499074, "grad_norm": 1.9921875, "learning_rate": 4.913871468772006e-06, "loss": 0.4335461616516113, "num_tokens": 11462632796.0, "step": 93860 }, { "epoch": 0.12516732530171884, "grad_norm": 1.53125, "learning_rate": 4.9136897134639084e-06, "loss": 0.435030460357666, "num_tokens": 11465113185.0, "step": 93880 }, { "epoch": 0.12519399068844694, "grad_norm": 2.0625, "learning_rate": 4.913507769949163e-06, "loss": 0.4353752136230469, "num_tokens": 11467615788.0, "step": 93900 }, { "epoch": 0.12522065607517505, "grad_norm": 2.5625, "learning_rate": 4.913325638241956e-06, "loss": 0.4154343605041504, "num_tokens": 11470133602.0, "step": 93920 }, { "epoch": 0.12524732146190315, "grad_norm": 1.5625, "learning_rate": 4.9131433183564885e-06, "loss": 0.4341731071472168, "num_tokens": 11472477890.0, "step": 93940 }, { "epoch": 0.12527398684863125, "grad_norm": 2.078125, "learning_rate": 4.9129608103069784e-06, "loss": 0.4057758331298828, "num_tokens": 11474912919.0, "step": 93960 }, { "epoch": 0.12530065223535938, "grad_norm": 2.1875, "learning_rate": 4.912778114107656e-06, "loss": 0.42659845352172854, "num_tokens": 11477194380.0, "step": 93980 }, { "epoch": 0.1253273176220875, "grad_norm": 1.9765625, "learning_rate": 4.9125952297727665e-06, "loss": 0.40619587898254395, "num_tokens": 11479733837.0, "step": 94000 }, { "epoch": 0.1253539830088156, "grad_norm": 2.1875, "learning_rate": 4.912412157316571e-06, "loss": 0.43012638092041017, "num_tokens": 11482070347.0, "step": 94020 }, { "epoch": 0.1253806483955437, "grad_norm": 1.8515625, "learning_rate": 4.9122288967533434e-06, "loss": 0.4167935848236084, "num_tokens": 11484503860.0, "step": 94040 }, { "epoch": 0.1254073137822718, "grad_norm": 1.7890625, "learning_rate": 4.912045448097374e-06, "loss": 0.4136970520019531, "num_tokens": 11487077826.0, "step": 94060 }, { "epoch": 0.1254339791689999, "grad_norm": 1.90625, "learning_rate": 4.911861811362969e-06, "loss": 0.42223563194274905, "num_tokens": 11489648521.0, "step": 94080 }, { "epoch": 0.125460644555728, "grad_norm": 2.015625, "learning_rate": 4.9116779865644444e-06, "loss": 0.41427226066589357, "num_tokens": 11491993898.0, "step": 94100 }, { "epoch": 0.1254873099424561, "grad_norm": 1.984375, "learning_rate": 4.911493973716135e-06, "loss": 0.42790589332580564, "num_tokens": 11494506467.0, "step": 94120 }, { "epoch": 0.1255139753291842, "grad_norm": 2.15625, "learning_rate": 4.91130977283239e-06, "loss": 0.4133650302886963, "num_tokens": 11496741104.0, "step": 94140 }, { "epoch": 0.1255406407159123, "grad_norm": 1.7578125, "learning_rate": 4.911125383927572e-06, "loss": 0.4232541561126709, "num_tokens": 11499199177.0, "step": 94160 }, { "epoch": 0.1255673061026404, "grad_norm": 1.859375, "learning_rate": 4.910940807016058e-06, "loss": 0.41518464088439944, "num_tokens": 11501778140.0, "step": 94180 }, { "epoch": 0.12559397148936852, "grad_norm": 2.0, "learning_rate": 4.91075604211224e-06, "loss": 0.41580495834350584, "num_tokens": 11504205542.0, "step": 94200 }, { "epoch": 0.12562063687609662, "grad_norm": 1.84375, "learning_rate": 4.910571089230527e-06, "loss": 0.4189313888549805, "num_tokens": 11506807827.0, "step": 94220 }, { "epoch": 0.12564730226282472, "grad_norm": 1.9609375, "learning_rate": 4.910385948385339e-06, "loss": 0.4429161071777344, "num_tokens": 11509275306.0, "step": 94240 }, { "epoch": 0.12567396764955283, "grad_norm": 2.0, "learning_rate": 4.910200619591112e-06, "loss": 0.41135492324829104, "num_tokens": 11511660938.0, "step": 94260 }, { "epoch": 0.12570063303628093, "grad_norm": 2.515625, "learning_rate": 4.910015102862298e-06, "loss": 0.42867450714111327, "num_tokens": 11513928167.0, "step": 94280 }, { "epoch": 0.12572729842300903, "grad_norm": 2.390625, "learning_rate": 4.909829398213362e-06, "loss": 0.4088355541229248, "num_tokens": 11516281618.0, "step": 94300 }, { "epoch": 0.12575396380973713, "grad_norm": 2.0, "learning_rate": 4.909643505658785e-06, "loss": 0.40633411407470704, "num_tokens": 11518492305.0, "step": 94320 }, { "epoch": 0.12578062919646524, "grad_norm": 2.03125, "learning_rate": 4.909457425213061e-06, "loss": 0.4072376251220703, "num_tokens": 11520988893.0, "step": 94340 }, { "epoch": 0.12580729458319334, "grad_norm": 2.03125, "learning_rate": 4.9092711568907e-06, "loss": 0.4197546005249023, "num_tokens": 11523397193.0, "step": 94360 }, { "epoch": 0.12583395996992144, "grad_norm": 2.078125, "learning_rate": 4.909084700706226e-06, "loss": 0.42322473526000975, "num_tokens": 11525924475.0, "step": 94380 }, { "epoch": 0.12586062535664955, "grad_norm": 1.71875, "learning_rate": 4.9088980566741775e-06, "loss": 0.41797332763671874, "num_tokens": 11528526832.0, "step": 94400 }, { "epoch": 0.12588729074337765, "grad_norm": 2.265625, "learning_rate": 4.908711224809109e-06, "loss": 0.4226435661315918, "num_tokens": 11531135895.0, "step": 94420 }, { "epoch": 0.12591395613010575, "grad_norm": 1.9140625, "learning_rate": 4.908524205125588e-06, "loss": 0.4173455238342285, "num_tokens": 11533559602.0, "step": 94440 }, { "epoch": 0.12594062151683386, "grad_norm": 2.375, "learning_rate": 4.908336997638198e-06, "loss": 0.41252927780151366, "num_tokens": 11536009844.0, "step": 94460 }, { "epoch": 0.12596728690356196, "grad_norm": 2.328125, "learning_rate": 4.908149602361535e-06, "loss": 0.42101593017578126, "num_tokens": 11538536765.0, "step": 94480 }, { "epoch": 0.12599395229029006, "grad_norm": 1.96875, "learning_rate": 4.907962019310213e-06, "loss": 0.4171909809112549, "num_tokens": 11540969911.0, "step": 94500 }, { "epoch": 0.12602061767701817, "grad_norm": 1.5390625, "learning_rate": 4.907774248498856e-06, "loss": 0.40855822563171384, "num_tokens": 11543570305.0, "step": 94520 }, { "epoch": 0.12604728306374627, "grad_norm": 1.9609375, "learning_rate": 4.907586289942109e-06, "loss": 0.4075871467590332, "num_tokens": 11546141689.0, "step": 94540 }, { "epoch": 0.12607394845047437, "grad_norm": 1.8671875, "learning_rate": 4.9073981436546256e-06, "loss": 0.4038442611694336, "num_tokens": 11548536911.0, "step": 94560 }, { "epoch": 0.12610061383720247, "grad_norm": 2.25, "learning_rate": 4.907209809651076e-06, "loss": 0.4191481590270996, "num_tokens": 11551029825.0, "step": 94580 }, { "epoch": 0.12612727922393058, "grad_norm": 2.109375, "learning_rate": 4.907021287946147e-06, "loss": 0.40731110572814944, "num_tokens": 11553560142.0, "step": 94600 }, { "epoch": 0.12615394461065868, "grad_norm": 2.328125, "learning_rate": 4.906832578554538e-06, "loss": 0.4180413246154785, "num_tokens": 11556086942.0, "step": 94620 }, { "epoch": 0.12618060999738678, "grad_norm": 1.96875, "learning_rate": 4.906643681490963e-06, "loss": 0.422426700592041, "num_tokens": 11558445876.0, "step": 94640 }, { "epoch": 0.1262072753841149, "grad_norm": 2.5625, "learning_rate": 4.906454596770152e-06, "loss": 0.42813692092895506, "num_tokens": 11560630740.0, "step": 94660 }, { "epoch": 0.126233940770843, "grad_norm": 2.234375, "learning_rate": 4.9062653244068485e-06, "loss": 0.4254547119140625, "num_tokens": 11563075560.0, "step": 94680 }, { "epoch": 0.1262606061575711, "grad_norm": 2.296875, "learning_rate": 4.906075864415811e-06, "loss": 0.4059422492980957, "num_tokens": 11565363236.0, "step": 94700 }, { "epoch": 0.1262872715442992, "grad_norm": 1.78125, "learning_rate": 4.905886216811812e-06, "loss": 0.4024971008300781, "num_tokens": 11567855500.0, "step": 94720 }, { "epoch": 0.1263139369310273, "grad_norm": 2.359375, "learning_rate": 4.905696381609641e-06, "loss": 0.4279679298400879, "num_tokens": 11570323255.0, "step": 94740 }, { "epoch": 0.1263406023177554, "grad_norm": 2.546875, "learning_rate": 4.905506358824097e-06, "loss": 0.4236229419708252, "num_tokens": 11572740455.0, "step": 94760 }, { "epoch": 0.1263672677044835, "grad_norm": 2.078125, "learning_rate": 4.905316148470001e-06, "loss": 0.4118049621582031, "num_tokens": 11575230062.0, "step": 94780 }, { "epoch": 0.12639393309121164, "grad_norm": 1.9296875, "learning_rate": 4.905125750562181e-06, "loss": 0.42466182708740235, "num_tokens": 11577729008.0, "step": 94800 }, { "epoch": 0.12642059847793974, "grad_norm": 1.890625, "learning_rate": 4.904935165115486e-06, "loss": 0.41954803466796875, "num_tokens": 11580095182.0, "step": 94820 }, { "epoch": 0.12644726386466784, "grad_norm": 2.234375, "learning_rate": 4.904744392144775e-06, "loss": 0.42616987228393555, "num_tokens": 11582432667.0, "step": 94840 }, { "epoch": 0.12647392925139594, "grad_norm": 2.1875, "learning_rate": 4.904553431664924e-06, "loss": 0.44089226722717284, "num_tokens": 11584638947.0, "step": 94860 }, { "epoch": 0.12650059463812405, "grad_norm": 1.71875, "learning_rate": 4.904362283690824e-06, "loss": 0.4145754337310791, "num_tokens": 11587330855.0, "step": 94880 }, { "epoch": 0.12652726002485215, "grad_norm": 1.671875, "learning_rate": 4.904170948237378e-06, "loss": 0.44078426361083983, "num_tokens": 11589671501.0, "step": 94900 }, { "epoch": 0.12655392541158025, "grad_norm": 1.734375, "learning_rate": 4.903979425319506e-06, "loss": 0.41237940788269045, "num_tokens": 11592062930.0, "step": 94920 }, { "epoch": 0.12658059079830836, "grad_norm": 2.25, "learning_rate": 4.903787714952143e-06, "loss": 0.4159232139587402, "num_tokens": 11594756206.0, "step": 94940 }, { "epoch": 0.12660725618503646, "grad_norm": 2.046875, "learning_rate": 4.903595817150236e-06, "loss": 0.4077202796936035, "num_tokens": 11597302196.0, "step": 94960 }, { "epoch": 0.12663392157176456, "grad_norm": 2.28125, "learning_rate": 4.903403731928748e-06, "loss": 0.41786508560180663, "num_tokens": 11599595557.0, "step": 94980 }, { "epoch": 0.12666058695849267, "grad_norm": 2.109375, "learning_rate": 4.903211459302659e-06, "loss": 0.439129638671875, "num_tokens": 11602088807.0, "step": 95000 }, { "epoch": 0.12668725234522077, "grad_norm": 1.9609375, "learning_rate": 4.903018999286961e-06, "loss": 0.42761950492858886, "num_tokens": 11604393923.0, "step": 95020 }, { "epoch": 0.12671391773194887, "grad_norm": 2.859375, "learning_rate": 4.902826351896658e-06, "loss": 0.43549528121948244, "num_tokens": 11606730734.0, "step": 95040 }, { "epoch": 0.12674058311867697, "grad_norm": 2.25, "learning_rate": 4.9026335171467745e-06, "loss": 0.4125277042388916, "num_tokens": 11609236301.0, "step": 95060 }, { "epoch": 0.12676724850540508, "grad_norm": 2.03125, "learning_rate": 4.9024404950523465e-06, "loss": 0.42319426536560056, "num_tokens": 11611783280.0, "step": 95080 }, { "epoch": 0.12679391389213318, "grad_norm": 2.34375, "learning_rate": 4.902247285628423e-06, "loss": 0.40594167709350587, "num_tokens": 11613996271.0, "step": 95100 }, { "epoch": 0.12682057927886128, "grad_norm": 1.8203125, "learning_rate": 4.902053888890071e-06, "loss": 0.4402346611022949, "num_tokens": 11616139037.0, "step": 95120 }, { "epoch": 0.1268472446655894, "grad_norm": 2.21875, "learning_rate": 4.9018603048523706e-06, "loss": 0.42108640670776365, "num_tokens": 11618545963.0, "step": 95140 }, { "epoch": 0.1268739100523175, "grad_norm": 2.046875, "learning_rate": 4.901666533530416e-06, "loss": 0.4235508918762207, "num_tokens": 11621128803.0, "step": 95160 }, { "epoch": 0.1269005754390456, "grad_norm": 2.359375, "learning_rate": 4.901472574939316e-06, "loss": 0.42702546119689944, "num_tokens": 11623630202.0, "step": 95180 }, { "epoch": 0.1269272408257737, "grad_norm": 2.140625, "learning_rate": 4.901278429094195e-06, "loss": 0.4129924297332764, "num_tokens": 11626160245.0, "step": 95200 }, { "epoch": 0.1269539062125018, "grad_norm": 1.78125, "learning_rate": 4.901084096010192e-06, "loss": 0.42946834564208985, "num_tokens": 11628591340.0, "step": 95220 }, { "epoch": 0.1269805715992299, "grad_norm": 2.59375, "learning_rate": 4.900889575702459e-06, "loss": 0.41573290824890136, "num_tokens": 11631107187.0, "step": 95240 }, { "epoch": 0.127007236985958, "grad_norm": 1.9921875, "learning_rate": 4.900694868186164e-06, "loss": 0.4258620262145996, "num_tokens": 11633620981.0, "step": 95260 }, { "epoch": 0.1270339023726861, "grad_norm": 2.203125, "learning_rate": 4.90049997347649e-06, "loss": 0.4304286003112793, "num_tokens": 11636004101.0, "step": 95280 }, { "epoch": 0.1270605677594142, "grad_norm": 2.078125, "learning_rate": 4.900304891588632e-06, "loss": 0.43085827827453616, "num_tokens": 11638408471.0, "step": 95300 }, { "epoch": 0.12708723314614231, "grad_norm": 2.453125, "learning_rate": 4.900109622537804e-06, "loss": 0.4183961391448975, "num_tokens": 11641072867.0, "step": 95320 }, { "epoch": 0.12711389853287042, "grad_norm": 2.28125, "learning_rate": 4.89991416633923e-06, "loss": 0.4101395606994629, "num_tokens": 11643383504.0, "step": 95340 }, { "epoch": 0.12714056391959852, "grad_norm": 2.4375, "learning_rate": 4.899718523008151e-06, "loss": 0.4189114570617676, "num_tokens": 11645780422.0, "step": 95360 }, { "epoch": 0.12716722930632662, "grad_norm": 2.328125, "learning_rate": 4.8995226925598236e-06, "loss": 0.41054749488830566, "num_tokens": 11648276374.0, "step": 95380 }, { "epoch": 0.12719389469305473, "grad_norm": 1.8515625, "learning_rate": 4.899326675009516e-06, "loss": 0.4372817039489746, "num_tokens": 11650877545.0, "step": 95400 }, { "epoch": 0.12722056007978283, "grad_norm": 1.671875, "learning_rate": 4.899130470372513e-06, "loss": 0.4226943016052246, "num_tokens": 11653396226.0, "step": 95420 }, { "epoch": 0.12724722546651093, "grad_norm": 2.140625, "learning_rate": 4.8989340786641145e-06, "loss": 0.4111181735992432, "num_tokens": 11655856609.0, "step": 95440 }, { "epoch": 0.12727389085323904, "grad_norm": 2.34375, "learning_rate": 4.898737499899633e-06, "loss": 0.41503238677978516, "num_tokens": 11658472644.0, "step": 95460 }, { "epoch": 0.12730055623996714, "grad_norm": 1.890625, "learning_rate": 4.898540734094397e-06, "loss": 0.4301905632019043, "num_tokens": 11660654490.0, "step": 95480 }, { "epoch": 0.12732722162669524, "grad_norm": 1.9453125, "learning_rate": 4.898343781263749e-06, "loss": 0.42339329719543456, "num_tokens": 11663011913.0, "step": 95500 }, { "epoch": 0.12735388701342334, "grad_norm": 1.890625, "learning_rate": 4.898146641423046e-06, "loss": 0.41578240394592286, "num_tokens": 11665331119.0, "step": 95520 }, { "epoch": 0.12738055240015145, "grad_norm": 2.125, "learning_rate": 4.897949314587661e-06, "loss": 0.42194466590881347, "num_tokens": 11667798994.0, "step": 95540 }, { "epoch": 0.12740721778687955, "grad_norm": 1.953125, "learning_rate": 4.897751800772981e-06, "loss": 0.4036696910858154, "num_tokens": 11670142843.0, "step": 95560 }, { "epoch": 0.12743388317360765, "grad_norm": 2.65625, "learning_rate": 4.897554099994405e-06, "loss": 0.38942561149597166, "num_tokens": 11672679851.0, "step": 95580 }, { "epoch": 0.12746054856033576, "grad_norm": 2.0625, "learning_rate": 4.89735621226735e-06, "loss": 0.4196597099304199, "num_tokens": 11675288923.0, "step": 95600 }, { "epoch": 0.1274872139470639, "grad_norm": 2.453125, "learning_rate": 4.897158137607246e-06, "loss": 0.397886061668396, "num_tokens": 11677718896.0, "step": 95620 }, { "epoch": 0.127513879333792, "grad_norm": 2.25, "learning_rate": 4.896959876029538e-06, "loss": 0.4241002082824707, "num_tokens": 11680279408.0, "step": 95640 }, { "epoch": 0.1275405447205201, "grad_norm": 1.84375, "learning_rate": 4.896761427549685e-06, "loss": 0.4274155616760254, "num_tokens": 11682862294.0, "step": 95660 }, { "epoch": 0.1275672101072482, "grad_norm": 1.7734375, "learning_rate": 4.89656279218316e-06, "loss": 0.4037888526916504, "num_tokens": 11685303356.0, "step": 95680 }, { "epoch": 0.1275938754939763, "grad_norm": 1.5234375, "learning_rate": 4.896363969945454e-06, "loss": 0.4236144065856934, "num_tokens": 11687836940.0, "step": 95700 }, { "epoch": 0.1276205408807044, "grad_norm": 1.8359375, "learning_rate": 4.896164960852068e-06, "loss": 0.4112241268157959, "num_tokens": 11690248102.0, "step": 95720 }, { "epoch": 0.1276472062674325, "grad_norm": 2.0, "learning_rate": 4.895965764918521e-06, "loss": 0.44577913284301757, "num_tokens": 11692684273.0, "step": 95740 }, { "epoch": 0.1276738716541606, "grad_norm": 1.4921875, "learning_rate": 4.895766382160344e-06, "loss": 0.4259422779083252, "num_tokens": 11695087214.0, "step": 95760 }, { "epoch": 0.1277005370408887, "grad_norm": 2.03125, "learning_rate": 4.895566812593085e-06, "loss": 0.4204461097717285, "num_tokens": 11697502494.0, "step": 95780 }, { "epoch": 0.12772720242761681, "grad_norm": 1.671875, "learning_rate": 4.895367056232304e-06, "loss": 0.40483713150024414, "num_tokens": 11699838197.0, "step": 95800 }, { "epoch": 0.12775386781434492, "grad_norm": 2.09375, "learning_rate": 4.895167113093578e-06, "loss": 0.4217376708984375, "num_tokens": 11702430540.0, "step": 95820 }, { "epoch": 0.12778053320107302, "grad_norm": 1.953125, "learning_rate": 4.894966983192498e-06, "loss": 0.4078237533569336, "num_tokens": 11704942798.0, "step": 95840 }, { "epoch": 0.12780719858780112, "grad_norm": 2.515625, "learning_rate": 4.894766666544668e-06, "loss": 0.4269510269165039, "num_tokens": 11707336506.0, "step": 95860 }, { "epoch": 0.12783386397452923, "grad_norm": 2.0, "learning_rate": 4.8945661631657075e-06, "loss": 0.4135298728942871, "num_tokens": 11709926641.0, "step": 95880 }, { "epoch": 0.12786052936125733, "grad_norm": 1.921875, "learning_rate": 4.894365473071251e-06, "loss": 0.41724367141723634, "num_tokens": 11712396060.0, "step": 95900 }, { "epoch": 0.12788719474798543, "grad_norm": 2.0, "learning_rate": 4.894164596276948e-06, "loss": 0.430104923248291, "num_tokens": 11715062822.0, "step": 95920 }, { "epoch": 0.12791386013471354, "grad_norm": 1.9140625, "learning_rate": 4.893963532798461e-06, "loss": 0.4177581310272217, "num_tokens": 11717602316.0, "step": 95940 }, { "epoch": 0.12794052552144164, "grad_norm": 1.875, "learning_rate": 4.893762282651467e-06, "loss": 0.4069610595703125, "num_tokens": 11719958493.0, "step": 95960 }, { "epoch": 0.12796719090816974, "grad_norm": 2.0625, "learning_rate": 4.8935608458516605e-06, "loss": 0.4436145782470703, "num_tokens": 11722442653.0, "step": 95980 }, { "epoch": 0.12799385629489785, "grad_norm": 2.5, "learning_rate": 4.893359222414747e-06, "loss": 0.43626060485839846, "num_tokens": 11724948389.0, "step": 96000 }, { "epoch": 0.12802052168162595, "grad_norm": 2.40625, "learning_rate": 4.8931574123564485e-06, "loss": 0.41501588821411134, "num_tokens": 11727210562.0, "step": 96020 }, { "epoch": 0.12804718706835405, "grad_norm": 1.7578125, "learning_rate": 4.8929554156925e-06, "loss": 0.4303913593292236, "num_tokens": 11729466174.0, "step": 96040 }, { "epoch": 0.12807385245508215, "grad_norm": 2.421875, "learning_rate": 4.892753232438655e-06, "loss": 0.4262913703918457, "num_tokens": 11732098509.0, "step": 96060 }, { "epoch": 0.12810051784181026, "grad_norm": 2.09375, "learning_rate": 4.892550862610676e-06, "loss": 0.4018250942230225, "num_tokens": 11734527223.0, "step": 96080 }, { "epoch": 0.12812718322853836, "grad_norm": 1.6484375, "learning_rate": 4.892348306224343e-06, "loss": 0.42054061889648436, "num_tokens": 11737063924.0, "step": 96100 }, { "epoch": 0.12815384861526646, "grad_norm": 1.9453125, "learning_rate": 4.892145563295451e-06, "loss": 0.4161373138427734, "num_tokens": 11739385184.0, "step": 96120 }, { "epoch": 0.12818051400199457, "grad_norm": 2.359375, "learning_rate": 4.891942633839809e-06, "loss": 0.42412776947021485, "num_tokens": 11741847411.0, "step": 96140 }, { "epoch": 0.12820717938872267, "grad_norm": 2.703125, "learning_rate": 4.8917395178732395e-06, "loss": 0.42473554611206055, "num_tokens": 11744201047.0, "step": 96160 }, { "epoch": 0.12823384477545077, "grad_norm": 2.046875, "learning_rate": 4.891536215411581e-06, "loss": 0.39932947158813475, "num_tokens": 11746707606.0, "step": 96180 }, { "epoch": 0.12826051016217888, "grad_norm": 1.890625, "learning_rate": 4.891332726470685e-06, "loss": 0.4211740493774414, "num_tokens": 11749015796.0, "step": 96200 }, { "epoch": 0.12828717554890698, "grad_norm": 2.0, "learning_rate": 4.89112905106642e-06, "loss": 0.4157404899597168, "num_tokens": 11751448539.0, "step": 96220 }, { "epoch": 0.12831384093563508, "grad_norm": 1.8046875, "learning_rate": 4.890925189214667e-06, "loss": 0.4254452228546143, "num_tokens": 11754009753.0, "step": 96240 }, { "epoch": 0.12834050632236318, "grad_norm": 1.921875, "learning_rate": 4.890721140931321e-06, "loss": 0.41184468269348146, "num_tokens": 11756449156.0, "step": 96260 }, { "epoch": 0.1283671717090913, "grad_norm": 2.1875, "learning_rate": 4.890516906232294e-06, "loss": 0.4073024749755859, "num_tokens": 11758986336.0, "step": 96280 }, { "epoch": 0.1283938370958194, "grad_norm": 2.421875, "learning_rate": 4.89031248513351e-06, "loss": 0.4029869079589844, "num_tokens": 11761363301.0, "step": 96300 }, { "epoch": 0.1284205024825475, "grad_norm": 1.8984375, "learning_rate": 4.89010787765091e-06, "loss": 0.4255364418029785, "num_tokens": 11763960811.0, "step": 96320 }, { "epoch": 0.1284471678692756, "grad_norm": 1.9765625, "learning_rate": 4.889903083800447e-06, "loss": 0.4308012008666992, "num_tokens": 11766280364.0, "step": 96340 }, { "epoch": 0.1284738332560037, "grad_norm": 2.421875, "learning_rate": 4.88969810359809e-06, "loss": 0.4241732120513916, "num_tokens": 11768877333.0, "step": 96360 }, { "epoch": 0.1285004986427318, "grad_norm": 2.34375, "learning_rate": 4.889492937059823e-06, "loss": 0.40591068267822267, "num_tokens": 11771502014.0, "step": 96380 }, { "epoch": 0.1285271640294599, "grad_norm": 2.015625, "learning_rate": 4.889287584201643e-06, "loss": 0.4100807189941406, "num_tokens": 11774002020.0, "step": 96400 }, { "epoch": 0.12855382941618804, "grad_norm": 2.109375, "learning_rate": 4.889082045039562e-06, "loss": 0.4141964912414551, "num_tokens": 11776429446.0, "step": 96420 }, { "epoch": 0.12858049480291614, "grad_norm": 2.5, "learning_rate": 4.888876319589607e-06, "loss": 0.41534423828125, "num_tokens": 11778738348.0, "step": 96440 }, { "epoch": 0.12860716018964424, "grad_norm": 1.8046875, "learning_rate": 4.88867040786782e-06, "loss": 0.41167407035827636, "num_tokens": 11781171739.0, "step": 96460 }, { "epoch": 0.12863382557637235, "grad_norm": 1.890625, "learning_rate": 4.888464309890258e-06, "loss": 0.4202865123748779, "num_tokens": 11783568273.0, "step": 96480 }, { "epoch": 0.12866049096310045, "grad_norm": 1.40625, "learning_rate": 4.888258025672989e-06, "loss": 0.41140294075012207, "num_tokens": 11786094268.0, "step": 96500 }, { "epoch": 0.12868715634982855, "grad_norm": 2.328125, "learning_rate": 4.888051555232099e-06, "loss": 0.4148708820343018, "num_tokens": 11788783793.0, "step": 96520 }, { "epoch": 0.12871382173655666, "grad_norm": 2.21875, "learning_rate": 4.887844898583688e-06, "loss": 0.41544189453125, "num_tokens": 11791200676.0, "step": 96540 }, { "epoch": 0.12874048712328476, "grad_norm": 2.234375, "learning_rate": 4.887638055743869e-06, "loss": 0.4132885456085205, "num_tokens": 11793490799.0, "step": 96560 }, { "epoch": 0.12876715251001286, "grad_norm": 2.46875, "learning_rate": 4.887431026728771e-06, "loss": 0.42061028480529783, "num_tokens": 11795910511.0, "step": 96580 }, { "epoch": 0.12879381789674096, "grad_norm": 1.890625, "learning_rate": 4.887223811554537e-06, "loss": 0.40189599990844727, "num_tokens": 11798349870.0, "step": 96600 }, { "epoch": 0.12882048328346907, "grad_norm": 2.21875, "learning_rate": 4.887016410237325e-06, "loss": 0.41219520568847656, "num_tokens": 11800726867.0, "step": 96620 }, { "epoch": 0.12884714867019717, "grad_norm": 2.140625, "learning_rate": 4.886808822793306e-06, "loss": 0.42731080055236814, "num_tokens": 11803192498.0, "step": 96640 }, { "epoch": 0.12887381405692527, "grad_norm": 2.203125, "learning_rate": 4.8866010492386675e-06, "loss": 0.397274374961853, "num_tokens": 11805895549.0, "step": 96660 }, { "epoch": 0.12890047944365338, "grad_norm": 2.3125, "learning_rate": 4.88639308958961e-06, "loss": 0.4285558223724365, "num_tokens": 11808574099.0, "step": 96680 }, { "epoch": 0.12892714483038148, "grad_norm": 1.765625, "learning_rate": 4.886184943862349e-06, "loss": 0.41120595932006837, "num_tokens": 11810840363.0, "step": 96700 }, { "epoch": 0.12895381021710958, "grad_norm": 2.125, "learning_rate": 4.885976612073115e-06, "loss": 0.4360494136810303, "num_tokens": 11813378582.0, "step": 96720 }, { "epoch": 0.12898047560383769, "grad_norm": 1.890625, "learning_rate": 4.885768094238152e-06, "loss": 0.4194209575653076, "num_tokens": 11815911114.0, "step": 96740 }, { "epoch": 0.1290071409905658, "grad_norm": 2.296875, "learning_rate": 4.88555939037372e-06, "loss": 0.4145377159118652, "num_tokens": 11818457199.0, "step": 96760 }, { "epoch": 0.1290338063772939, "grad_norm": 2.125, "learning_rate": 4.885350500496093e-06, "loss": 0.4235528945922852, "num_tokens": 11820984866.0, "step": 96780 }, { "epoch": 0.129060471764022, "grad_norm": 1.8125, "learning_rate": 4.885141424621557e-06, "loss": 0.4211289405822754, "num_tokens": 11823393161.0, "step": 96800 }, { "epoch": 0.1290871371507501, "grad_norm": 2.359375, "learning_rate": 4.884932162766417e-06, "loss": 0.4184123039245605, "num_tokens": 11825761121.0, "step": 96820 }, { "epoch": 0.1291138025374782, "grad_norm": 2.359375, "learning_rate": 4.884722714946988e-06, "loss": 0.4099109172821045, "num_tokens": 11828238394.0, "step": 96840 }, { "epoch": 0.1291404679242063, "grad_norm": 1.78125, "learning_rate": 4.884513081179604e-06, "loss": 0.42537741661071776, "num_tokens": 11830508727.0, "step": 96860 }, { "epoch": 0.1291671333109344, "grad_norm": 2.234375, "learning_rate": 4.884303261480609e-06, "loss": 0.40559959411621094, "num_tokens": 11832995318.0, "step": 96880 }, { "epoch": 0.1291937986976625, "grad_norm": 1.8828125, "learning_rate": 4.884093255866364e-06, "loss": 0.4121556758880615, "num_tokens": 11835552730.0, "step": 96900 }, { "epoch": 0.1292204640843906, "grad_norm": 1.8359375, "learning_rate": 4.883883064353245e-06, "loss": 0.41662302017211916, "num_tokens": 11837923068.0, "step": 96920 }, { "epoch": 0.12924712947111872, "grad_norm": 2.234375, "learning_rate": 4.883672686957642e-06, "loss": 0.4301882743835449, "num_tokens": 11840127148.0, "step": 96940 }, { "epoch": 0.12927379485784682, "grad_norm": 2.171875, "learning_rate": 4.883462123695958e-06, "loss": 0.4292599678039551, "num_tokens": 11842471162.0, "step": 96960 }, { "epoch": 0.12930046024457492, "grad_norm": 1.8828125, "learning_rate": 4.8832513745846115e-06, "loss": 0.41882848739624023, "num_tokens": 11844962847.0, "step": 96980 }, { "epoch": 0.12932712563130302, "grad_norm": 2.3125, "learning_rate": 4.8830404396400345e-06, "loss": 0.4322833061218262, "num_tokens": 11847241110.0, "step": 97000 }, { "epoch": 0.12935379101803113, "grad_norm": 1.6328125, "learning_rate": 4.882829318878678e-06, "loss": 0.41112527847290037, "num_tokens": 11849770341.0, "step": 97020 }, { "epoch": 0.12938045640475923, "grad_norm": 2.0625, "learning_rate": 4.8826180123170006e-06, "loss": 0.42421345710754393, "num_tokens": 11852098649.0, "step": 97040 }, { "epoch": 0.12940712179148733, "grad_norm": 2.140625, "learning_rate": 4.882406519971482e-06, "loss": 0.4089818954467773, "num_tokens": 11854679284.0, "step": 97060 }, { "epoch": 0.12943378717821544, "grad_norm": 2.140625, "learning_rate": 4.882194841858611e-06, "loss": 0.4289573669433594, "num_tokens": 11857128948.0, "step": 97080 }, { "epoch": 0.12946045256494354, "grad_norm": 1.8203125, "learning_rate": 4.881982977994893e-06, "loss": 0.42499465942382814, "num_tokens": 11859457005.0, "step": 97100 }, { "epoch": 0.12948711795167164, "grad_norm": 2.296875, "learning_rate": 4.881770928396849e-06, "loss": 0.406103515625, "num_tokens": 11861908188.0, "step": 97120 }, { "epoch": 0.12951378333839975, "grad_norm": 1.7890625, "learning_rate": 4.8815586930810144e-06, "loss": 0.412288761138916, "num_tokens": 11864362040.0, "step": 97140 }, { "epoch": 0.12954044872512785, "grad_norm": 2.125, "learning_rate": 4.881346272063936e-06, "loss": 0.39702744483947755, "num_tokens": 11867059080.0, "step": 97160 }, { "epoch": 0.12956711411185595, "grad_norm": 2.25, "learning_rate": 4.881133665362179e-06, "loss": 0.42565693855285647, "num_tokens": 11869575711.0, "step": 97180 }, { "epoch": 0.12959377949858406, "grad_norm": 1.71875, "learning_rate": 4.88092087299232e-06, "loss": 0.4257246971130371, "num_tokens": 11871851177.0, "step": 97200 }, { "epoch": 0.12962044488531216, "grad_norm": 2.359375, "learning_rate": 4.880707894970952e-06, "loss": 0.41251673698425295, "num_tokens": 11874271180.0, "step": 97220 }, { "epoch": 0.1296471102720403, "grad_norm": 1.7734375, "learning_rate": 4.880494731314682e-06, "loss": 0.4163493633270264, "num_tokens": 11876811007.0, "step": 97240 }, { "epoch": 0.1296737756587684, "grad_norm": 2.609375, "learning_rate": 4.880281382040131e-06, "loss": 0.4193865299224854, "num_tokens": 11879329625.0, "step": 97260 }, { "epoch": 0.1297004410454965, "grad_norm": 1.8984375, "learning_rate": 4.880067847163936e-06, "loss": 0.4270466804504395, "num_tokens": 11881721617.0, "step": 97280 }, { "epoch": 0.1297271064322246, "grad_norm": 2.109375, "learning_rate": 4.8798541267027454e-06, "loss": 0.41778173446655276, "num_tokens": 11884186949.0, "step": 97300 }, { "epoch": 0.1297537718189527, "grad_norm": 2.171875, "learning_rate": 4.8796402206732265e-06, "loss": 0.41495299339294434, "num_tokens": 11886560195.0, "step": 97320 }, { "epoch": 0.1297804372056808, "grad_norm": 2.4375, "learning_rate": 4.879426129092055e-06, "loss": 0.4119099617004395, "num_tokens": 11889128901.0, "step": 97340 }, { "epoch": 0.1298071025924089, "grad_norm": 2.328125, "learning_rate": 4.879211851975928e-06, "loss": 0.40906896591186526, "num_tokens": 11891570374.0, "step": 97360 }, { "epoch": 0.129833767979137, "grad_norm": 2.296875, "learning_rate": 4.8789973893415524e-06, "loss": 0.4192193984985352, "num_tokens": 11894205325.0, "step": 97380 }, { "epoch": 0.1298604333658651, "grad_norm": 1.4921875, "learning_rate": 4.878782741205651e-06, "loss": 0.42215967178344727, "num_tokens": 11896813165.0, "step": 97400 }, { "epoch": 0.12988709875259322, "grad_norm": 1.84375, "learning_rate": 4.87856790758496e-06, "loss": 0.42499432563781736, "num_tokens": 11899186389.0, "step": 97420 }, { "epoch": 0.12991376413932132, "grad_norm": 2.234375, "learning_rate": 4.8783528884962325e-06, "loss": 0.41121606826782225, "num_tokens": 11901793198.0, "step": 97440 }, { "epoch": 0.12994042952604942, "grad_norm": 2.5, "learning_rate": 4.878137683956234e-06, "loss": 0.4178154945373535, "num_tokens": 11903998912.0, "step": 97460 }, { "epoch": 0.12996709491277753, "grad_norm": 2.125, "learning_rate": 4.877922293981743e-06, "loss": 0.42901926040649413, "num_tokens": 11906443429.0, "step": 97480 }, { "epoch": 0.12999376029950563, "grad_norm": 1.765625, "learning_rate": 4.877706718589558e-06, "loss": 0.4224871635437012, "num_tokens": 11908993466.0, "step": 97500 }, { "epoch": 0.13002042568623373, "grad_norm": 1.9921875, "learning_rate": 4.877490957796486e-06, "loss": 0.4010354995727539, "num_tokens": 11911453006.0, "step": 97520 }, { "epoch": 0.13004709107296183, "grad_norm": 2.828125, "learning_rate": 4.877275011619351e-06, "loss": 0.42271924018859863, "num_tokens": 11913798842.0, "step": 97540 }, { "epoch": 0.13007375645968994, "grad_norm": 1.84375, "learning_rate": 4.877058880074993e-06, "loss": 0.4189880847930908, "num_tokens": 11916322011.0, "step": 97560 }, { "epoch": 0.13010042184641804, "grad_norm": 1.4453125, "learning_rate": 4.876842563180263e-06, "loss": 0.4001796722412109, "num_tokens": 11918880065.0, "step": 97580 }, { "epoch": 0.13012708723314614, "grad_norm": 1.9453125, "learning_rate": 4.876626060952029e-06, "loss": 0.4105845928192139, "num_tokens": 11921437046.0, "step": 97600 }, { "epoch": 0.13015375261987425, "grad_norm": 1.8203125, "learning_rate": 4.876409373407173e-06, "loss": 0.4119204044342041, "num_tokens": 11924059041.0, "step": 97620 }, { "epoch": 0.13018041800660235, "grad_norm": 1.859375, "learning_rate": 4.8761925005625894e-06, "loss": 0.399788498878479, "num_tokens": 11926435278.0, "step": 97640 }, { "epoch": 0.13020708339333045, "grad_norm": 2.65625, "learning_rate": 4.875975442435191e-06, "loss": 0.40698986053466796, "num_tokens": 11928800615.0, "step": 97660 }, { "epoch": 0.13023374878005856, "grad_norm": 1.984375, "learning_rate": 4.875758199041902e-06, "loss": 0.4089666366577148, "num_tokens": 11931160550.0, "step": 97680 }, { "epoch": 0.13026041416678666, "grad_norm": 2.28125, "learning_rate": 4.875540770399662e-06, "loss": 0.405104923248291, "num_tokens": 11933625310.0, "step": 97700 }, { "epoch": 0.13028707955351476, "grad_norm": 2.453125, "learning_rate": 4.875323156525424e-06, "loss": 0.4244892120361328, "num_tokens": 11936217114.0, "step": 97720 }, { "epoch": 0.13031374494024286, "grad_norm": 2.1875, "learning_rate": 4.8751053574361576e-06, "loss": 0.41580610275268554, "num_tokens": 11938684214.0, "step": 97740 }, { "epoch": 0.13034041032697097, "grad_norm": 1.8828125, "learning_rate": 4.874887373148845e-06, "loss": 0.4373189926147461, "num_tokens": 11941061295.0, "step": 97760 }, { "epoch": 0.13036707571369907, "grad_norm": 1.859375, "learning_rate": 4.874669203680484e-06, "loss": 0.41552295684814455, "num_tokens": 11943410412.0, "step": 97780 }, { "epoch": 0.13039374110042717, "grad_norm": 2.1875, "learning_rate": 4.874450849048085e-06, "loss": 0.4158200263977051, "num_tokens": 11945631080.0, "step": 97800 }, { "epoch": 0.13042040648715528, "grad_norm": 2.140625, "learning_rate": 4.874232309268676e-06, "loss": 0.4122608661651611, "num_tokens": 11947803007.0, "step": 97820 }, { "epoch": 0.13044707187388338, "grad_norm": 2.4375, "learning_rate": 4.8740135843592965e-06, "loss": 0.4076885223388672, "num_tokens": 11950359474.0, "step": 97840 }, { "epoch": 0.13047373726061148, "grad_norm": 2.09375, "learning_rate": 4.873794674337e-06, "loss": 0.4267259120941162, "num_tokens": 11953000030.0, "step": 97860 }, { "epoch": 0.1305004026473396, "grad_norm": 2.140625, "learning_rate": 4.873575579218859e-06, "loss": 0.4246214866638184, "num_tokens": 11955298753.0, "step": 97880 }, { "epoch": 0.1305270680340677, "grad_norm": 2.015625, "learning_rate": 4.873356299021955e-06, "loss": 0.4307753562927246, "num_tokens": 11957697974.0, "step": 97900 }, { "epoch": 0.1305537334207958, "grad_norm": 2.03125, "learning_rate": 4.873136833763388e-06, "loss": 0.4113675594329834, "num_tokens": 11960338864.0, "step": 97920 }, { "epoch": 0.1305803988075239, "grad_norm": 1.9296875, "learning_rate": 4.87291718346027e-06, "loss": 0.4222546577453613, "num_tokens": 11962574961.0, "step": 97940 }, { "epoch": 0.130607064194252, "grad_norm": 1.640625, "learning_rate": 4.8726973481297266e-06, "loss": 0.4335780143737793, "num_tokens": 11965009841.0, "step": 97960 }, { "epoch": 0.1306337295809801, "grad_norm": 1.6796875, "learning_rate": 4.872477327788903e-06, "loss": 0.4016726493835449, "num_tokens": 11967454040.0, "step": 97980 }, { "epoch": 0.1306603949677082, "grad_norm": 2.25, "learning_rate": 4.872257122454951e-06, "loss": 0.4136859893798828, "num_tokens": 11969804444.0, "step": 98000 }, { "epoch": 0.1306870603544363, "grad_norm": 2.109375, "learning_rate": 4.872036732145044e-06, "loss": 0.42205238342285156, "num_tokens": 11972387968.0, "step": 98020 }, { "epoch": 0.1307137257411644, "grad_norm": 1.984375, "learning_rate": 4.871816156876366e-06, "loss": 0.4139139652252197, "num_tokens": 11974636488.0, "step": 98040 }, { "epoch": 0.13074039112789254, "grad_norm": 2.171875, "learning_rate": 4.871595396666116e-06, "loss": 0.42847623825073244, "num_tokens": 11977043403.0, "step": 98060 }, { "epoch": 0.13076705651462064, "grad_norm": 2.015625, "learning_rate": 4.871374451531508e-06, "loss": 0.4327415466308594, "num_tokens": 11979270151.0, "step": 98080 }, { "epoch": 0.13079372190134875, "grad_norm": 1.90625, "learning_rate": 4.87115332148977e-06, "loss": 0.4145489692687988, "num_tokens": 11981475751.0, "step": 98100 }, { "epoch": 0.13082038728807685, "grad_norm": 1.8359375, "learning_rate": 4.870932006558144e-06, "loss": 0.40686855316162107, "num_tokens": 11983862566.0, "step": 98120 }, { "epoch": 0.13084705267480495, "grad_norm": 2.09375, "learning_rate": 4.870710506753889e-06, "loss": 0.42131986618041994, "num_tokens": 11986208354.0, "step": 98140 }, { "epoch": 0.13087371806153306, "grad_norm": 2.5, "learning_rate": 4.8704888220942745e-06, "loss": 0.42144207954406737, "num_tokens": 11988643770.0, "step": 98160 }, { "epoch": 0.13090038344826116, "grad_norm": 2.171875, "learning_rate": 4.870266952596586e-06, "loss": 0.4179418087005615, "num_tokens": 11991258403.0, "step": 98180 }, { "epoch": 0.13092704883498926, "grad_norm": 1.9375, "learning_rate": 4.870044898278125e-06, "loss": 0.42363934516906737, "num_tokens": 11993818862.0, "step": 98200 }, { "epoch": 0.13095371422171737, "grad_norm": 2.59375, "learning_rate": 4.869822659156205e-06, "loss": 0.40580320358276367, "num_tokens": 11996391104.0, "step": 98220 }, { "epoch": 0.13098037960844547, "grad_norm": 1.8046875, "learning_rate": 4.869600235248156e-06, "loss": 0.4170230865478516, "num_tokens": 11998838181.0, "step": 98240 }, { "epoch": 0.13100704499517357, "grad_norm": 2.5625, "learning_rate": 4.8693776265713215e-06, "loss": 0.4038799285888672, "num_tokens": 12001226950.0, "step": 98260 }, { "epoch": 0.13103371038190167, "grad_norm": 2.90625, "learning_rate": 4.869154833143058e-06, "loss": 0.4094487190246582, "num_tokens": 12003538376.0, "step": 98280 }, { "epoch": 0.13106037576862978, "grad_norm": 2.375, "learning_rate": 4.868931854980739e-06, "loss": 0.4197197914123535, "num_tokens": 12005737457.0, "step": 98300 }, { "epoch": 0.13108704115535788, "grad_norm": 2.1875, "learning_rate": 4.86870869210175e-06, "loss": 0.42368078231811523, "num_tokens": 12008296933.0, "step": 98320 }, { "epoch": 0.13111370654208598, "grad_norm": 2.0625, "learning_rate": 4.868485344523494e-06, "loss": 0.41985673904418946, "num_tokens": 12010823641.0, "step": 98340 }, { "epoch": 0.1311403719288141, "grad_norm": 1.9375, "learning_rate": 4.868261812263384e-06, "loss": 0.4157248020172119, "num_tokens": 12013282511.0, "step": 98360 }, { "epoch": 0.1311670373155422, "grad_norm": 2.09375, "learning_rate": 4.868038095338851e-06, "loss": 0.43752498626708985, "num_tokens": 12015788792.0, "step": 98380 }, { "epoch": 0.1311937027022703, "grad_norm": 1.8046875, "learning_rate": 4.867814193767339e-06, "loss": 0.42703399658203123, "num_tokens": 12018550150.0, "step": 98400 }, { "epoch": 0.1312203680889984, "grad_norm": 1.9140625, "learning_rate": 4.867590107566309e-06, "loss": 0.4073958396911621, "num_tokens": 12020815777.0, "step": 98420 }, { "epoch": 0.1312470334757265, "grad_norm": 2.03125, "learning_rate": 4.86736583675323e-06, "loss": 0.426556921005249, "num_tokens": 12023130568.0, "step": 98440 }, { "epoch": 0.1312736988624546, "grad_norm": 1.8359375, "learning_rate": 4.8671413813455924e-06, "loss": 0.41077098846435545, "num_tokens": 12025381659.0, "step": 98460 }, { "epoch": 0.1313003642491827, "grad_norm": 1.90625, "learning_rate": 4.8669167413608974e-06, "loss": 0.4144741535186768, "num_tokens": 12027749482.0, "step": 98480 }, { "epoch": 0.1313270296359108, "grad_norm": 2.375, "learning_rate": 4.86669191681666e-06, "loss": 0.4067834377288818, "num_tokens": 12030288407.0, "step": 98500 }, { "epoch": 0.1313536950226389, "grad_norm": 2.1875, "learning_rate": 4.866466907730413e-06, "loss": 0.4072750091552734, "num_tokens": 12032821449.0, "step": 98520 }, { "epoch": 0.13138036040936701, "grad_norm": 2.40625, "learning_rate": 4.866241714119699e-06, "loss": 0.41739253997802733, "num_tokens": 12035369403.0, "step": 98540 }, { "epoch": 0.13140702579609512, "grad_norm": 2.1875, "learning_rate": 4.866016336002078e-06, "loss": 0.42054290771484376, "num_tokens": 12037709518.0, "step": 98560 }, { "epoch": 0.13143369118282322, "grad_norm": 1.953125, "learning_rate": 4.865790773395126e-06, "loss": 0.40694427490234375, "num_tokens": 12040150130.0, "step": 98580 }, { "epoch": 0.13146035656955132, "grad_norm": 2.109375, "learning_rate": 4.865565026316429e-06, "loss": 0.4083712577819824, "num_tokens": 12042604673.0, "step": 98600 }, { "epoch": 0.13148702195627943, "grad_norm": 2.0625, "learning_rate": 4.86533909478359e-06, "loss": 0.4135434150695801, "num_tokens": 12045231002.0, "step": 98620 }, { "epoch": 0.13151368734300753, "grad_norm": 1.9921875, "learning_rate": 4.8651129788142245e-06, "loss": 0.42743310928344724, "num_tokens": 12047597787.0, "step": 98640 }, { "epoch": 0.13154035272973563, "grad_norm": 2.40625, "learning_rate": 4.864886678425968e-06, "loss": 0.4262815475463867, "num_tokens": 12050126646.0, "step": 98660 }, { "epoch": 0.13156701811646374, "grad_norm": 2.328125, "learning_rate": 4.8646601936364615e-06, "loss": 0.42142257690429685, "num_tokens": 12052616312.0, "step": 98680 }, { "epoch": 0.13159368350319184, "grad_norm": 2.34375, "learning_rate": 4.864433524463368e-06, "loss": 0.40795073509216306, "num_tokens": 12054903217.0, "step": 98700 }, { "epoch": 0.13162034888991994, "grad_norm": 2.078125, "learning_rate": 4.86420667092436e-06, "loss": 0.4300246238708496, "num_tokens": 12057374791.0, "step": 98720 }, { "epoch": 0.13164701427664804, "grad_norm": 2.09375, "learning_rate": 4.8639796330371285e-06, "loss": 0.4132275104522705, "num_tokens": 12059797090.0, "step": 98740 }, { "epoch": 0.13167367966337615, "grad_norm": 2.1875, "learning_rate": 4.863752410819375e-06, "loss": 0.4179199695587158, "num_tokens": 12062425039.0, "step": 98760 }, { "epoch": 0.13170034505010425, "grad_norm": 1.9453125, "learning_rate": 4.863525004288817e-06, "loss": 0.4191697120666504, "num_tokens": 12064840289.0, "step": 98780 }, { "epoch": 0.13172701043683235, "grad_norm": 1.9765625, "learning_rate": 4.8632974134631885e-06, "loss": 0.41951212882995603, "num_tokens": 12067135504.0, "step": 98800 }, { "epoch": 0.13175367582356046, "grad_norm": 2.15625, "learning_rate": 4.863069638360233e-06, "loss": 0.41702632904052733, "num_tokens": 12069479119.0, "step": 98820 }, { "epoch": 0.13178034121028856, "grad_norm": 2.5, "learning_rate": 4.862841678997713e-06, "loss": 0.42221670150756835, "num_tokens": 12071998408.0, "step": 98840 }, { "epoch": 0.13180700659701666, "grad_norm": 2.546875, "learning_rate": 4.862613535393403e-06, "loss": 0.4148579120635986, "num_tokens": 12074503049.0, "step": 98860 }, { "epoch": 0.1318336719837448, "grad_norm": 2.0625, "learning_rate": 4.862385207565093e-06, "loss": 0.44127912521362306, "num_tokens": 12076978604.0, "step": 98880 }, { "epoch": 0.1318603373704729, "grad_norm": 2.078125, "learning_rate": 4.862156695530587e-06, "loss": 0.39929924011230467, "num_tokens": 12079715525.0, "step": 98900 }, { "epoch": 0.131887002757201, "grad_norm": 2.046875, "learning_rate": 4.861927999307702e-06, "loss": 0.41280341148376465, "num_tokens": 12082150043.0, "step": 98920 }, { "epoch": 0.1319136681439291, "grad_norm": 1.890625, "learning_rate": 4.86169911891427e-06, "loss": 0.42044773101806643, "num_tokens": 12084583422.0, "step": 98940 }, { "epoch": 0.1319403335306572, "grad_norm": 1.796875, "learning_rate": 4.86147005436814e-06, "loss": 0.42759170532226565, "num_tokens": 12086923818.0, "step": 98960 }, { "epoch": 0.1319669989173853, "grad_norm": 1.7265625, "learning_rate": 4.861240805687172e-06, "loss": 0.40349879264831545, "num_tokens": 12089456610.0, "step": 98980 }, { "epoch": 0.1319936643041134, "grad_norm": 2.09375, "learning_rate": 4.861011372889241e-06, "loss": 0.4021602630615234, "num_tokens": 12092052520.0, "step": 99000 }, { "epoch": 0.13202032969084151, "grad_norm": 2.296875, "learning_rate": 4.860781755992239e-06, "loss": 0.4029257774353027, "num_tokens": 12094338989.0, "step": 99020 }, { "epoch": 0.13204699507756962, "grad_norm": 2.40625, "learning_rate": 4.8605519550140675e-06, "loss": 0.41687984466552735, "num_tokens": 12096939828.0, "step": 99040 }, { "epoch": 0.13207366046429772, "grad_norm": 2.1875, "learning_rate": 4.860321969972648e-06, "loss": 0.4225743770599365, "num_tokens": 12099267515.0, "step": 99060 }, { "epoch": 0.13210032585102582, "grad_norm": 1.8828125, "learning_rate": 4.860091800885911e-06, "loss": 0.4053358554840088, "num_tokens": 12101957506.0, "step": 99080 }, { "epoch": 0.13212699123775393, "grad_norm": 1.734375, "learning_rate": 4.859861447771806e-06, "loss": 0.4301333427429199, "num_tokens": 12104382797.0, "step": 99100 }, { "epoch": 0.13215365662448203, "grad_norm": 2.203125, "learning_rate": 4.8596309106482925e-06, "loss": 0.4070392608642578, "num_tokens": 12106805292.0, "step": 99120 }, { "epoch": 0.13218032201121013, "grad_norm": 1.90625, "learning_rate": 4.859400189533348e-06, "loss": 0.4150418758392334, "num_tokens": 12109155699.0, "step": 99140 }, { "epoch": 0.13220698739793824, "grad_norm": 2.015625, "learning_rate": 4.859169284444963e-06, "loss": 0.4144908905029297, "num_tokens": 12111693190.0, "step": 99160 }, { "epoch": 0.13223365278466634, "grad_norm": 2.375, "learning_rate": 4.858938195401142e-06, "loss": 0.4143192291259766, "num_tokens": 12114382523.0, "step": 99180 }, { "epoch": 0.13226031817139444, "grad_norm": 1.9453125, "learning_rate": 4.858706922419903e-06, "loss": 0.4086674690246582, "num_tokens": 12117020324.0, "step": 99200 }, { "epoch": 0.13228698355812255, "grad_norm": 1.75, "learning_rate": 4.858475465519282e-06, "loss": 0.43029117584228516, "num_tokens": 12119519399.0, "step": 99220 }, { "epoch": 0.13231364894485065, "grad_norm": 1.6484375, "learning_rate": 4.8582438247173235e-06, "loss": 0.40854792594909667, "num_tokens": 12121923637.0, "step": 99240 }, { "epoch": 0.13234031433157875, "grad_norm": 1.9375, "learning_rate": 4.858012000032093e-06, "loss": 0.40677080154418943, "num_tokens": 12124339056.0, "step": 99260 }, { "epoch": 0.13236697971830685, "grad_norm": 2.28125, "learning_rate": 4.857779991481664e-06, "loss": 0.419779634475708, "num_tokens": 12126677666.0, "step": 99280 }, { "epoch": 0.13239364510503496, "grad_norm": 2.03125, "learning_rate": 4.857547799084129e-06, "loss": 0.4292020797729492, "num_tokens": 12129020591.0, "step": 99300 }, { "epoch": 0.13242031049176306, "grad_norm": 1.6640625, "learning_rate": 4.857315422857592e-06, "loss": 0.4067702293395996, "num_tokens": 12131650280.0, "step": 99320 }, { "epoch": 0.13244697587849116, "grad_norm": 2.3125, "learning_rate": 4.857082862820174e-06, "loss": 0.422212028503418, "num_tokens": 12134036846.0, "step": 99340 }, { "epoch": 0.13247364126521927, "grad_norm": 1.5859375, "learning_rate": 4.856850118990008e-06, "loss": 0.4147987365722656, "num_tokens": 12136454511.0, "step": 99360 }, { "epoch": 0.13250030665194737, "grad_norm": 2.390625, "learning_rate": 4.856617191385241e-06, "loss": 0.408188533782959, "num_tokens": 12139094704.0, "step": 99380 }, { "epoch": 0.13252697203867547, "grad_norm": 2.046875, "learning_rate": 4.856384080024037e-06, "loss": 0.42609734535217286, "num_tokens": 12141290828.0, "step": 99400 }, { "epoch": 0.13255363742540358, "grad_norm": 1.796875, "learning_rate": 4.8561507849245724e-06, "loss": 0.4055694580078125, "num_tokens": 12143603443.0, "step": 99420 }, { "epoch": 0.13258030281213168, "grad_norm": 2.09375, "learning_rate": 4.855917306105038e-06, "loss": 0.4173409461975098, "num_tokens": 12146194446.0, "step": 99440 }, { "epoch": 0.13260696819885978, "grad_norm": 1.8515625, "learning_rate": 4.85568364358364e-06, "loss": 0.4076047897338867, "num_tokens": 12148710387.0, "step": 99460 }, { "epoch": 0.13263363358558788, "grad_norm": 2.03125, "learning_rate": 4.855449797378597e-06, "loss": 0.4198403835296631, "num_tokens": 12151248564.0, "step": 99480 }, { "epoch": 0.132660298972316, "grad_norm": 2.015625, "learning_rate": 4.855215767508143e-06, "loss": 0.40613231658935545, "num_tokens": 12153554017.0, "step": 99500 }, { "epoch": 0.1326869643590441, "grad_norm": 1.859375, "learning_rate": 4.854981553990528e-06, "loss": 0.4219648361206055, "num_tokens": 12156021647.0, "step": 99520 }, { "epoch": 0.1327136297457722, "grad_norm": 1.765625, "learning_rate": 4.854747156844014e-06, "loss": 0.42844815254211427, "num_tokens": 12158441653.0, "step": 99540 }, { "epoch": 0.1327402951325003, "grad_norm": 2.0625, "learning_rate": 4.854512576086877e-06, "loss": 0.40863285064697263, "num_tokens": 12160978726.0, "step": 99560 }, { "epoch": 0.1327669605192284, "grad_norm": 2.09375, "learning_rate": 4.8542778117374094e-06, "loss": 0.4138603687286377, "num_tokens": 12163369105.0, "step": 99580 }, { "epoch": 0.1327936259059565, "grad_norm": 2.375, "learning_rate": 4.854042863813917e-06, "loss": 0.41240711212158204, "num_tokens": 12165916891.0, "step": 99600 }, { "epoch": 0.1328202912926846, "grad_norm": 1.828125, "learning_rate": 4.853807732334719e-06, "loss": 0.41517128944396975, "num_tokens": 12168402022.0, "step": 99620 }, { "epoch": 0.1328469566794127, "grad_norm": 1.9453125, "learning_rate": 4.8535724173181504e-06, "loss": 0.4287548542022705, "num_tokens": 12170804748.0, "step": 99640 }, { "epoch": 0.1328736220661408, "grad_norm": 2.4375, "learning_rate": 4.853336918782559e-06, "loss": 0.4343167781829834, "num_tokens": 12173197949.0, "step": 99660 }, { "epoch": 0.13290028745286894, "grad_norm": 2.015625, "learning_rate": 4.853101236746309e-06, "loss": 0.4165091037750244, "num_tokens": 12175808487.0, "step": 99680 }, { "epoch": 0.13292695283959705, "grad_norm": 2.1875, "learning_rate": 4.852865371227776e-06, "loss": 0.4103870391845703, "num_tokens": 12178305486.0, "step": 99700 }, { "epoch": 0.13295361822632515, "grad_norm": 1.8671875, "learning_rate": 4.8526293222453535e-06, "loss": 0.41694889068603513, "num_tokens": 12180707220.0, "step": 99720 }, { "epoch": 0.13298028361305325, "grad_norm": 1.859375, "learning_rate": 4.852393089817445e-06, "loss": 0.40298070907592776, "num_tokens": 12183086882.0, "step": 99740 }, { "epoch": 0.13300694899978135, "grad_norm": 1.890625, "learning_rate": 4.852156673962472e-06, "loss": 0.4301877021789551, "num_tokens": 12185676359.0, "step": 99760 }, { "epoch": 0.13303361438650946, "grad_norm": 2.484375, "learning_rate": 4.851920074698871e-06, "loss": 0.4183084487915039, "num_tokens": 12188044422.0, "step": 99780 }, { "epoch": 0.13306027977323756, "grad_norm": 2.546875, "learning_rate": 4.851683292045086e-06, "loss": 0.4110909938812256, "num_tokens": 12190269892.0, "step": 99800 }, { "epoch": 0.13308694515996566, "grad_norm": 1.8359375, "learning_rate": 4.851446326019584e-06, "loss": 0.42997078895568847, "num_tokens": 12192820395.0, "step": 99820 }, { "epoch": 0.13311361054669377, "grad_norm": 2.0625, "learning_rate": 4.85120917664084e-06, "loss": 0.41693115234375, "num_tokens": 12195252658.0, "step": 99840 }, { "epoch": 0.13314027593342187, "grad_norm": 1.9765625, "learning_rate": 4.850971843927348e-06, "loss": 0.422833251953125, "num_tokens": 12197509185.0, "step": 99860 }, { "epoch": 0.13316694132014997, "grad_norm": 2.234375, "learning_rate": 4.8507343278976115e-06, "loss": 0.4212183952331543, "num_tokens": 12199955726.0, "step": 99880 }, { "epoch": 0.13319360670687808, "grad_norm": 2.125, "learning_rate": 4.850496628570152e-06, "loss": 0.41153154373168943, "num_tokens": 12202453525.0, "step": 99900 }, { "epoch": 0.13322027209360618, "grad_norm": 2.265625, "learning_rate": 4.850258745963504e-06, "loss": 0.40053625106811525, "num_tokens": 12204999991.0, "step": 99920 }, { "epoch": 0.13324693748033428, "grad_norm": 2.234375, "learning_rate": 4.850020680096216e-06, "loss": 0.4015016555786133, "num_tokens": 12207447898.0, "step": 99940 }, { "epoch": 0.13327360286706239, "grad_norm": 1.8125, "learning_rate": 4.8497824309868515e-06, "loss": 0.4245878219604492, "num_tokens": 12209971514.0, "step": 99960 }, { "epoch": 0.1333002682537905, "grad_norm": 2.0, "learning_rate": 4.849543998653987e-06, "loss": 0.39402782917022705, "num_tokens": 12212603356.0, "step": 99980 }, { "epoch": 0.1333269336405186, "grad_norm": 2.078125, "learning_rate": 4.8493053831162155e-06, "loss": 0.4048344612121582, "num_tokens": 12214926834.0, "step": 100000 }, { "epoch": 0.1333535990272467, "grad_norm": 2.046875, "learning_rate": 4.849066584392141e-06, "loss": 0.42863025665283205, "num_tokens": 12217625034.0, "step": 100020 }, { "epoch": 0.1333802644139748, "grad_norm": 1.8359375, "learning_rate": 4.8488276025003864e-06, "loss": 0.3948798656463623, "num_tokens": 12220158475.0, "step": 100040 }, { "epoch": 0.1334069298007029, "grad_norm": 2.125, "learning_rate": 4.848588437459585e-06, "loss": 0.4152480125427246, "num_tokens": 12222797623.0, "step": 100060 }, { "epoch": 0.133433595187431, "grad_norm": 2.015625, "learning_rate": 4.848349089288385e-06, "loss": 0.4124251365661621, "num_tokens": 12225473781.0, "step": 100080 }, { "epoch": 0.1334602605741591, "grad_norm": 2.140625, "learning_rate": 4.84810955800545e-06, "loss": 0.4257214546203613, "num_tokens": 12227783094.0, "step": 100100 }, { "epoch": 0.1334869259608872, "grad_norm": 2.359375, "learning_rate": 4.847869843629457e-06, "loss": 0.4175868988037109, "num_tokens": 12230179513.0, "step": 100120 }, { "epoch": 0.1335135913476153, "grad_norm": 2.03125, "learning_rate": 4.847629946179099e-06, "loss": 0.41181130409240724, "num_tokens": 12232713057.0, "step": 100140 }, { "epoch": 0.13354025673434342, "grad_norm": 1.8125, "learning_rate": 4.84738986567308e-06, "loss": 0.4183619499206543, "num_tokens": 12235019466.0, "step": 100160 }, { "epoch": 0.13356692212107152, "grad_norm": 2.25, "learning_rate": 4.847149602130121e-06, "loss": 0.4208847522735596, "num_tokens": 12237387839.0, "step": 100180 }, { "epoch": 0.13359358750779962, "grad_norm": 1.96875, "learning_rate": 4.8469091555689575e-06, "loss": 0.4201387405395508, "num_tokens": 12239813424.0, "step": 100200 }, { "epoch": 0.13362025289452772, "grad_norm": 2.140625, "learning_rate": 4.846668526008337e-06, "loss": 0.4240265846252441, "num_tokens": 12242300993.0, "step": 100220 }, { "epoch": 0.13364691828125583, "grad_norm": 2.046875, "learning_rate": 4.8464277134670226e-06, "loss": 0.4153632164001465, "num_tokens": 12244844656.0, "step": 100240 }, { "epoch": 0.13367358366798393, "grad_norm": 2.28125, "learning_rate": 4.846186717963791e-06, "loss": 0.41555447578430177, "num_tokens": 12247540758.0, "step": 100260 }, { "epoch": 0.13370024905471203, "grad_norm": 1.921875, "learning_rate": 4.845945539517436e-06, "loss": 0.4145786762237549, "num_tokens": 12249904055.0, "step": 100280 }, { "epoch": 0.13372691444144014, "grad_norm": 2.40625, "learning_rate": 4.845704178146761e-06, "loss": 0.4132869720458984, "num_tokens": 12252005717.0, "step": 100300 }, { "epoch": 0.13375357982816824, "grad_norm": 2.078125, "learning_rate": 4.845462633870588e-06, "loss": 0.41455650329589844, "num_tokens": 12254512953.0, "step": 100320 }, { "epoch": 0.13378024521489634, "grad_norm": 2.125, "learning_rate": 4.845220906707749e-06, "loss": 0.4065802574157715, "num_tokens": 12256966519.0, "step": 100340 }, { "epoch": 0.13380691060162445, "grad_norm": 1.8203125, "learning_rate": 4.844978996677095e-06, "loss": 0.42110681533813477, "num_tokens": 12259413310.0, "step": 100360 }, { "epoch": 0.13383357598835255, "grad_norm": 1.8671875, "learning_rate": 4.8447369037974876e-06, "loss": 0.42719593048095705, "num_tokens": 12261845452.0, "step": 100380 }, { "epoch": 0.13386024137508065, "grad_norm": 1.703125, "learning_rate": 4.844494628087805e-06, "loss": 0.4153417110443115, "num_tokens": 12264394239.0, "step": 100400 }, { "epoch": 0.13388690676180875, "grad_norm": 2.0, "learning_rate": 4.844252169566936e-06, "loss": 0.4231986999511719, "num_tokens": 12266692864.0, "step": 100420 }, { "epoch": 0.13391357214853686, "grad_norm": 2.546875, "learning_rate": 4.844009528253789e-06, "loss": 0.4090106964111328, "num_tokens": 12269098006.0, "step": 100440 }, { "epoch": 0.13394023753526496, "grad_norm": 2.078125, "learning_rate": 4.8437667041672835e-06, "loss": 0.4019512176513672, "num_tokens": 12271697417.0, "step": 100460 }, { "epoch": 0.13396690292199306, "grad_norm": 2.421875, "learning_rate": 4.843523697326351e-06, "loss": 0.41661558151245115, "num_tokens": 12274210336.0, "step": 100480 }, { "epoch": 0.1339935683087212, "grad_norm": 2.203125, "learning_rate": 4.843280507749943e-06, "loss": 0.4126897811889648, "num_tokens": 12276550331.0, "step": 100500 }, { "epoch": 0.1340202336954493, "grad_norm": 2.109375, "learning_rate": 4.843037135457021e-06, "loss": 0.41408867835998536, "num_tokens": 12278764255.0, "step": 100520 }, { "epoch": 0.1340468990821774, "grad_norm": 2.03125, "learning_rate": 4.8427935804665615e-06, "loss": 0.4101902961730957, "num_tokens": 12281492970.0, "step": 100540 }, { "epoch": 0.1340735644689055, "grad_norm": 2.078125, "learning_rate": 4.842549842797556e-06, "loss": 0.41430325508117677, "num_tokens": 12283927529.0, "step": 100560 }, { "epoch": 0.1341002298556336, "grad_norm": 2.0, "learning_rate": 4.842305922469009e-06, "loss": 0.4225127696990967, "num_tokens": 12286464488.0, "step": 100580 }, { "epoch": 0.1341268952423617, "grad_norm": 2.140625, "learning_rate": 4.842061819499943e-06, "loss": 0.4258782386779785, "num_tokens": 12288775681.0, "step": 100600 }, { "epoch": 0.1341535606290898, "grad_norm": 2.1875, "learning_rate": 4.841817533909387e-06, "loss": 0.41999425888061526, "num_tokens": 12291301989.0, "step": 100620 }, { "epoch": 0.13418022601581792, "grad_norm": 2.390625, "learning_rate": 4.841573065716394e-06, "loss": 0.420927906036377, "num_tokens": 12293900093.0, "step": 100640 }, { "epoch": 0.13420689140254602, "grad_norm": 2.140625, "learning_rate": 4.841328414940023e-06, "loss": 0.42269191741943357, "num_tokens": 12296440680.0, "step": 100660 }, { "epoch": 0.13423355678927412, "grad_norm": 2.109375, "learning_rate": 4.841083581599353e-06, "loss": 0.417179012298584, "num_tokens": 12298748598.0, "step": 100680 }, { "epoch": 0.13426022217600223, "grad_norm": 2.140625, "learning_rate": 4.8408385657134714e-06, "loss": 0.43846893310546875, "num_tokens": 12301231929.0, "step": 100700 }, { "epoch": 0.13428688756273033, "grad_norm": 2.6875, "learning_rate": 4.840593367301487e-06, "loss": 0.4136952877044678, "num_tokens": 12303724629.0, "step": 100720 }, { "epoch": 0.13431355294945843, "grad_norm": 2.015625, "learning_rate": 4.840347986382517e-06, "loss": 0.42191247940063475, "num_tokens": 12306231234.0, "step": 100740 }, { "epoch": 0.13434021833618653, "grad_norm": 1.9296875, "learning_rate": 4.840102422975696e-06, "loss": 0.42037482261657716, "num_tokens": 12308844827.0, "step": 100760 }, { "epoch": 0.13436688372291464, "grad_norm": 1.7734375, "learning_rate": 4.83985667710017e-06, "loss": 0.41750321388244627, "num_tokens": 12311289802.0, "step": 100780 }, { "epoch": 0.13439354910964274, "grad_norm": 2.375, "learning_rate": 4.839610748775102e-06, "loss": 0.3993007898330688, "num_tokens": 12313614168.0, "step": 100800 }, { "epoch": 0.13442021449637084, "grad_norm": 2.328125, "learning_rate": 4.839364638019668e-06, "loss": 0.4187471389770508, "num_tokens": 12315942715.0, "step": 100820 }, { "epoch": 0.13444687988309895, "grad_norm": 2.140625, "learning_rate": 4.839118344853059e-06, "loss": 0.4092456340789795, "num_tokens": 12318260496.0, "step": 100840 }, { "epoch": 0.13447354526982705, "grad_norm": 2.015625, "learning_rate": 4.838871869294479e-06, "loss": 0.4186856269836426, "num_tokens": 12320944274.0, "step": 100860 }, { "epoch": 0.13450021065655515, "grad_norm": 2.0, "learning_rate": 4.838625211363147e-06, "loss": 0.4056728363037109, "num_tokens": 12323468999.0, "step": 100880 }, { "epoch": 0.13452687604328326, "grad_norm": 2.4375, "learning_rate": 4.838378371078297e-06, "loss": 0.39596436023712156, "num_tokens": 12326002805.0, "step": 100900 }, { "epoch": 0.13455354143001136, "grad_norm": 2.15625, "learning_rate": 4.838131348459175e-06, "loss": 0.4088862419128418, "num_tokens": 12328686268.0, "step": 100920 }, { "epoch": 0.13458020681673946, "grad_norm": 2.046875, "learning_rate": 4.837884143525042e-06, "loss": 0.3988314151763916, "num_tokens": 12330956738.0, "step": 100940 }, { "epoch": 0.13460687220346756, "grad_norm": 1.8515625, "learning_rate": 4.837636756295176e-06, "loss": 0.4059715270996094, "num_tokens": 12333510317.0, "step": 100960 }, { "epoch": 0.13463353759019567, "grad_norm": 2.15625, "learning_rate": 4.837389186788865e-06, "loss": 0.4101998329162598, "num_tokens": 12335894033.0, "step": 100980 }, { "epoch": 0.13466020297692377, "grad_norm": 2.234375, "learning_rate": 4.837141435025413e-06, "loss": 0.4141660690307617, "num_tokens": 12338318921.0, "step": 101000 }, { "epoch": 0.13468686836365187, "grad_norm": 2.09375, "learning_rate": 4.83689350102414e-06, "loss": 0.3955241680145264, "num_tokens": 12340895684.0, "step": 101020 }, { "epoch": 0.13471353375037998, "grad_norm": 2.375, "learning_rate": 4.836645384804377e-06, "loss": 0.41152663230895997, "num_tokens": 12343386110.0, "step": 101040 }, { "epoch": 0.13474019913710808, "grad_norm": 2.125, "learning_rate": 4.836397086385472e-06, "loss": 0.40648679733276366, "num_tokens": 12345929607.0, "step": 101060 }, { "epoch": 0.13476686452383618, "grad_norm": 2.078125, "learning_rate": 4.8361486057867845e-06, "loss": 0.41398773193359373, "num_tokens": 12348458759.0, "step": 101080 }, { "epoch": 0.13479352991056429, "grad_norm": 2.390625, "learning_rate": 4.835899943027691e-06, "loss": 0.38906064033508303, "num_tokens": 12351013277.0, "step": 101100 }, { "epoch": 0.1348201952972924, "grad_norm": 1.78125, "learning_rate": 4.835651098127581e-06, "loss": 0.4129283905029297, "num_tokens": 12353403528.0, "step": 101120 }, { "epoch": 0.1348468606840205, "grad_norm": 1.90625, "learning_rate": 4.835402071105858e-06, "loss": 0.41254310607910155, "num_tokens": 12355754405.0, "step": 101140 }, { "epoch": 0.1348735260707486, "grad_norm": 2.171875, "learning_rate": 4.835152861981938e-06, "loss": 0.38617820739746095, "num_tokens": 12358394162.0, "step": 101160 }, { "epoch": 0.1349001914574767, "grad_norm": 2.203125, "learning_rate": 4.8349034707752555e-06, "loss": 0.39502968788146975, "num_tokens": 12360716949.0, "step": 101180 }, { "epoch": 0.1349268568442048, "grad_norm": 2.34375, "learning_rate": 4.834653897505255e-06, "loss": 0.4173295021057129, "num_tokens": 12363023271.0, "step": 101200 }, { "epoch": 0.1349535222309329, "grad_norm": 2.453125, "learning_rate": 4.834404142191397e-06, "loss": 0.40883827209472656, "num_tokens": 12365460571.0, "step": 101220 }, { "epoch": 0.134980187617661, "grad_norm": 2.390625, "learning_rate": 4.8341542048531575e-06, "loss": 0.41669321060180664, "num_tokens": 12367690586.0, "step": 101240 }, { "epoch": 0.1350068530043891, "grad_norm": 1.875, "learning_rate": 4.833904085510024e-06, "loss": 0.4303248405456543, "num_tokens": 12370184499.0, "step": 101260 }, { "epoch": 0.1350335183911172, "grad_norm": 1.984375, "learning_rate": 4.8336537841815e-06, "loss": 0.4271990299224854, "num_tokens": 12372519821.0, "step": 101280 }, { "epoch": 0.13506018377784532, "grad_norm": 1.8984375, "learning_rate": 4.8334033008871026e-06, "loss": 0.4093885898590088, "num_tokens": 12375072275.0, "step": 101300 }, { "epoch": 0.13508684916457345, "grad_norm": 1.625, "learning_rate": 4.833152635646362e-06, "loss": 0.4308038234710693, "num_tokens": 12377259383.0, "step": 101320 }, { "epoch": 0.13511351455130155, "grad_norm": 2.109375, "learning_rate": 4.832901788478827e-06, "loss": 0.4127608299255371, "num_tokens": 12379548284.0, "step": 101340 }, { "epoch": 0.13514017993802965, "grad_norm": 2.0625, "learning_rate": 4.832650759404053e-06, "loss": 0.42824320793151854, "num_tokens": 12382204343.0, "step": 101360 }, { "epoch": 0.13516684532475776, "grad_norm": 2.125, "learning_rate": 4.832399548441616e-06, "loss": 0.3972799301147461, "num_tokens": 12384560994.0, "step": 101380 }, { "epoch": 0.13519351071148586, "grad_norm": 2.015625, "learning_rate": 4.832148155611105e-06, "loss": 0.4236501693725586, "num_tokens": 12387042402.0, "step": 101400 }, { "epoch": 0.13522017609821396, "grad_norm": 1.90625, "learning_rate": 4.831896580932122e-06, "loss": 0.4057014942169189, "num_tokens": 12389489694.0, "step": 101420 }, { "epoch": 0.13524684148494207, "grad_norm": 2.28125, "learning_rate": 4.831644824424282e-06, "loss": 0.4193000316619873, "num_tokens": 12391769744.0, "step": 101440 }, { "epoch": 0.13527350687167017, "grad_norm": 2.078125, "learning_rate": 4.831392886107216e-06, "loss": 0.43131675720214846, "num_tokens": 12394047372.0, "step": 101460 }, { "epoch": 0.13530017225839827, "grad_norm": 1.8984375, "learning_rate": 4.831140766000569e-06, "loss": 0.3965909004211426, "num_tokens": 12396564956.0, "step": 101480 }, { "epoch": 0.13532683764512637, "grad_norm": 2.125, "learning_rate": 4.830888464124001e-06, "loss": 0.4023242473602295, "num_tokens": 12399099696.0, "step": 101500 }, { "epoch": 0.13535350303185448, "grad_norm": 2.40625, "learning_rate": 4.830635980497185e-06, "loss": 0.431441593170166, "num_tokens": 12401416535.0, "step": 101520 }, { "epoch": 0.13538016841858258, "grad_norm": 1.984375, "learning_rate": 4.830383315139806e-06, "loss": 0.4122481822967529, "num_tokens": 12403846447.0, "step": 101540 }, { "epoch": 0.13540683380531068, "grad_norm": 1.9296875, "learning_rate": 4.830130468071569e-06, "loss": 0.39866230487823484, "num_tokens": 12406506121.0, "step": 101560 }, { "epoch": 0.1354334991920388, "grad_norm": 1.8125, "learning_rate": 4.829877439312187e-06, "loss": 0.4069073677062988, "num_tokens": 12408953339.0, "step": 101580 }, { "epoch": 0.1354601645787669, "grad_norm": 1.6484375, "learning_rate": 4.829624228881391e-06, "loss": 0.4144248962402344, "num_tokens": 12411282117.0, "step": 101600 }, { "epoch": 0.135486829965495, "grad_norm": 2.078125, "learning_rate": 4.8293708367989236e-06, "loss": 0.42060484886169436, "num_tokens": 12413710680.0, "step": 101620 }, { "epoch": 0.1355134953522231, "grad_norm": 1.953125, "learning_rate": 4.8291172630845454e-06, "loss": 0.4137110710144043, "num_tokens": 12416000320.0, "step": 101640 }, { "epoch": 0.1355401607389512, "grad_norm": 2.984375, "learning_rate": 4.828863507758027e-06, "loss": 0.4170125961303711, "num_tokens": 12418379822.0, "step": 101660 }, { "epoch": 0.1355668261256793, "grad_norm": 2.046875, "learning_rate": 4.828609570839156e-06, "loss": 0.4240856170654297, "num_tokens": 12420569831.0, "step": 101680 }, { "epoch": 0.1355934915124074, "grad_norm": 1.7734375, "learning_rate": 4.828355452347731e-06, "loss": 0.4041567802429199, "num_tokens": 12422871052.0, "step": 101700 }, { "epoch": 0.1356201568991355, "grad_norm": 2.203125, "learning_rate": 4.8281011523035685e-06, "loss": 0.41077847480773927, "num_tokens": 12425101559.0, "step": 101720 }, { "epoch": 0.1356468222858636, "grad_norm": 2.421875, "learning_rate": 4.827846670726497e-06, "loss": 0.40573906898498535, "num_tokens": 12427586556.0, "step": 101740 }, { "epoch": 0.1356734876725917, "grad_norm": 2.09375, "learning_rate": 4.82759200763636e-06, "loss": 0.41724309921264646, "num_tokens": 12430236230.0, "step": 101760 }, { "epoch": 0.13570015305931982, "grad_norm": 2.140625, "learning_rate": 4.8273371630530155e-06, "loss": 0.4065562725067139, "num_tokens": 12432608847.0, "step": 101780 }, { "epoch": 0.13572681844604792, "grad_norm": 2.125, "learning_rate": 4.827082136996332e-06, "loss": 0.4089604377746582, "num_tokens": 12435013811.0, "step": 101800 }, { "epoch": 0.13575348383277602, "grad_norm": 2.546875, "learning_rate": 4.826826929486198e-06, "loss": 0.42690048217773435, "num_tokens": 12437538888.0, "step": 101820 }, { "epoch": 0.13578014921950413, "grad_norm": 2.25, "learning_rate": 4.826571540542512e-06, "loss": 0.41738123893737794, "num_tokens": 12439885937.0, "step": 101840 }, { "epoch": 0.13580681460623223, "grad_norm": 1.8125, "learning_rate": 4.826315970185188e-06, "loss": 0.41871051788330077, "num_tokens": 12442416404.0, "step": 101860 }, { "epoch": 0.13583347999296033, "grad_norm": 1.8515625, "learning_rate": 4.826060218434154e-06, "loss": 0.4087044715881348, "num_tokens": 12444713028.0, "step": 101880 }, { "epoch": 0.13586014537968844, "grad_norm": 1.828125, "learning_rate": 4.825804285309353e-06, "loss": 0.42153053283691405, "num_tokens": 12447094762.0, "step": 101900 }, { "epoch": 0.13588681076641654, "grad_norm": 2.421875, "learning_rate": 4.82554817083074e-06, "loss": 0.42246313095092775, "num_tokens": 12449474679.0, "step": 101920 }, { "epoch": 0.13591347615314464, "grad_norm": 1.9453125, "learning_rate": 4.825291875018286e-06, "loss": 0.39859790802001954, "num_tokens": 12452150144.0, "step": 101940 }, { "epoch": 0.13594014153987274, "grad_norm": 1.734375, "learning_rate": 4.825035397891976e-06, "loss": 0.4075889587402344, "num_tokens": 12454701205.0, "step": 101960 }, { "epoch": 0.13596680692660085, "grad_norm": 1.53125, "learning_rate": 4.824778739471807e-06, "loss": 0.4145027160644531, "num_tokens": 12457274437.0, "step": 101980 }, { "epoch": 0.13599347231332895, "grad_norm": 2.515625, "learning_rate": 4.824521899777794e-06, "loss": 0.4148248672485352, "num_tokens": 12459646335.0, "step": 102000 }, { "epoch": 0.13602013770005705, "grad_norm": 2.203125, "learning_rate": 4.824264878829963e-06, "loss": 0.41959781646728517, "num_tokens": 12461950244.0, "step": 102020 }, { "epoch": 0.13604680308678516, "grad_norm": 1.8203125, "learning_rate": 4.824007676648356e-06, "loss": 0.4143557548522949, "num_tokens": 12464550337.0, "step": 102040 }, { "epoch": 0.13607346847351326, "grad_norm": 2.046875, "learning_rate": 4.823750293253027e-06, "loss": 0.40812883377075193, "num_tokens": 12467106783.0, "step": 102060 }, { "epoch": 0.13610013386024136, "grad_norm": 1.8828125, "learning_rate": 4.8234927286640446e-06, "loss": 0.41670947074890136, "num_tokens": 12469551685.0, "step": 102080 }, { "epoch": 0.13612679924696947, "grad_norm": 2.09375, "learning_rate": 4.8232349829014945e-06, "loss": 0.42171225547790525, "num_tokens": 12472033223.0, "step": 102100 }, { "epoch": 0.1361534646336976, "grad_norm": 2.015625, "learning_rate": 4.822977055985474e-06, "loss": 0.42354764938354494, "num_tokens": 12474456806.0, "step": 102120 }, { "epoch": 0.1361801300204257, "grad_norm": 2.015625, "learning_rate": 4.822718947936094e-06, "loss": 0.4277472496032715, "num_tokens": 12476652264.0, "step": 102140 }, { "epoch": 0.1362067954071538, "grad_norm": 2.65625, "learning_rate": 4.82246065877348e-06, "loss": 0.4066883087158203, "num_tokens": 12479133938.0, "step": 102160 }, { "epoch": 0.1362334607938819, "grad_norm": 2.140625, "learning_rate": 4.8222021885177735e-06, "loss": 0.41388883590698244, "num_tokens": 12481503320.0, "step": 102180 }, { "epoch": 0.13626012618061, "grad_norm": 2.421875, "learning_rate": 4.8219435371891274e-06, "loss": 0.412310791015625, "num_tokens": 12483999792.0, "step": 102200 }, { "epoch": 0.1362867915673381, "grad_norm": 2.078125, "learning_rate": 4.821684704807711e-06, "loss": 0.4045081615447998, "num_tokens": 12486331458.0, "step": 102220 }, { "epoch": 0.13631345695406621, "grad_norm": 2.03125, "learning_rate": 4.821425691393705e-06, "loss": 0.3877739906311035, "num_tokens": 12488665157.0, "step": 102240 }, { "epoch": 0.13634012234079432, "grad_norm": 2.390625, "learning_rate": 4.8211664969673075e-06, "loss": 0.4236276626586914, "num_tokens": 12490842543.0, "step": 102260 }, { "epoch": 0.13636678772752242, "grad_norm": 2.28125, "learning_rate": 4.820907121548729e-06, "loss": 0.4141592025756836, "num_tokens": 12493080176.0, "step": 102280 }, { "epoch": 0.13639345311425052, "grad_norm": 1.734375, "learning_rate": 4.820647565158193e-06, "loss": 0.4265768051147461, "num_tokens": 12495517484.0, "step": 102300 }, { "epoch": 0.13642011850097863, "grad_norm": 1.796875, "learning_rate": 4.82038782781594e-06, "loss": 0.41482019424438477, "num_tokens": 12497851530.0, "step": 102320 }, { "epoch": 0.13644678388770673, "grad_norm": 2.28125, "learning_rate": 4.820127909542221e-06, "loss": 0.42755870819091796, "num_tokens": 12500103728.0, "step": 102340 }, { "epoch": 0.13647344927443483, "grad_norm": 1.984375, "learning_rate": 4.819867810357304e-06, "loss": 0.41036081314086914, "num_tokens": 12502580198.0, "step": 102360 }, { "epoch": 0.13650011466116294, "grad_norm": 2.265625, "learning_rate": 4.81960753028147e-06, "loss": 0.42551488876342775, "num_tokens": 12505053591.0, "step": 102380 }, { "epoch": 0.13652678004789104, "grad_norm": 2.1875, "learning_rate": 4.819347069335015e-06, "loss": 0.39421887397766114, "num_tokens": 12507567516.0, "step": 102400 }, { "epoch": 0.13655344543461914, "grad_norm": 2.140625, "learning_rate": 4.819086427538249e-06, "loss": 0.4309804916381836, "num_tokens": 12510194739.0, "step": 102420 }, { "epoch": 0.13658011082134724, "grad_norm": 1.796875, "learning_rate": 4.818825604911493e-06, "loss": 0.42683563232421873, "num_tokens": 12512738938.0, "step": 102440 }, { "epoch": 0.13660677620807535, "grad_norm": 1.703125, "learning_rate": 4.818564601475085e-06, "loss": 0.42370176315307617, "num_tokens": 12515264525.0, "step": 102460 }, { "epoch": 0.13663344159480345, "grad_norm": 2.34375, "learning_rate": 4.8183034172493785e-06, "loss": 0.41574344635009763, "num_tokens": 12517803026.0, "step": 102480 }, { "epoch": 0.13666010698153155, "grad_norm": 2.703125, "learning_rate": 4.8180420522547375e-06, "loss": 0.4169267177581787, "num_tokens": 12520331170.0, "step": 102500 }, { "epoch": 0.13668677236825966, "grad_norm": 1.9765625, "learning_rate": 4.817780506511544e-06, "loss": 0.4187145709991455, "num_tokens": 12522763972.0, "step": 102520 }, { "epoch": 0.13671343775498776, "grad_norm": 1.6640625, "learning_rate": 4.817518780040189e-06, "loss": 0.39821372032165525, "num_tokens": 12525087581.0, "step": 102540 }, { "epoch": 0.13674010314171586, "grad_norm": 2.0, "learning_rate": 4.8172568728610825e-06, "loss": 0.43526325225830076, "num_tokens": 12527694348.0, "step": 102560 }, { "epoch": 0.13676676852844397, "grad_norm": 1.890625, "learning_rate": 4.8169947849946466e-06, "loss": 0.4199979305267334, "num_tokens": 12530151991.0, "step": 102580 }, { "epoch": 0.13679343391517207, "grad_norm": 2.40625, "learning_rate": 4.816732516461317e-06, "loss": 0.41554460525512693, "num_tokens": 12532588344.0, "step": 102600 }, { "epoch": 0.13682009930190017, "grad_norm": 2.015625, "learning_rate": 4.816470067281543e-06, "loss": 0.4151052474975586, "num_tokens": 12535043844.0, "step": 102620 }, { "epoch": 0.13684676468862828, "grad_norm": 1.7265625, "learning_rate": 4.816207437475791e-06, "loss": 0.41232762336730955, "num_tokens": 12537507080.0, "step": 102640 }, { "epoch": 0.13687343007535638, "grad_norm": 1.890625, "learning_rate": 4.815944627064538e-06, "loss": 0.4204962730407715, "num_tokens": 12539848674.0, "step": 102660 }, { "epoch": 0.13690009546208448, "grad_norm": 2.21875, "learning_rate": 4.815681636068278e-06, "loss": 0.4031712055206299, "num_tokens": 12542247026.0, "step": 102680 }, { "epoch": 0.13692676084881258, "grad_norm": 1.546875, "learning_rate": 4.815418464507516e-06, "loss": 0.3855227708816528, "num_tokens": 12544715150.0, "step": 102700 }, { "epoch": 0.1369534262355407, "grad_norm": 1.78125, "learning_rate": 4.815155112402773e-06, "loss": 0.40979480743408203, "num_tokens": 12547234838.0, "step": 102720 }, { "epoch": 0.1369800916222688, "grad_norm": 1.9453125, "learning_rate": 4.814891579774586e-06, "loss": 0.39682018756866455, "num_tokens": 12549737657.0, "step": 102740 }, { "epoch": 0.1370067570089969, "grad_norm": 1.578125, "learning_rate": 4.8146278666435e-06, "loss": 0.40997753143310545, "num_tokens": 12552228978.0, "step": 102760 }, { "epoch": 0.137033422395725, "grad_norm": 2.0625, "learning_rate": 4.814363973030081e-06, "loss": 0.4192503929138184, "num_tokens": 12554819004.0, "step": 102780 }, { "epoch": 0.1370600877824531, "grad_norm": 2.21875, "learning_rate": 4.814099898954906e-06, "loss": 0.42448768615722654, "num_tokens": 12557267307.0, "step": 102800 }, { "epoch": 0.1370867531691812, "grad_norm": 1.921875, "learning_rate": 4.813835644438564e-06, "loss": 0.4003718376159668, "num_tokens": 12559642671.0, "step": 102820 }, { "epoch": 0.1371134185559093, "grad_norm": 1.8828125, "learning_rate": 4.81357120950166e-06, "loss": 0.42191853523254397, "num_tokens": 12561983296.0, "step": 102840 }, { "epoch": 0.1371400839426374, "grad_norm": 3.0, "learning_rate": 4.8133065941648175e-06, "loss": 0.422728157043457, "num_tokens": 12564462693.0, "step": 102860 }, { "epoch": 0.1371667493293655, "grad_norm": 1.9921875, "learning_rate": 4.813041798448665e-06, "loss": 0.4220258712768555, "num_tokens": 12566847403.0, "step": 102880 }, { "epoch": 0.13719341471609361, "grad_norm": 2.234375, "learning_rate": 4.812776822373852e-06, "loss": 0.41176481246948243, "num_tokens": 12569050052.0, "step": 102900 }, { "epoch": 0.13722008010282172, "grad_norm": 1.8828125, "learning_rate": 4.81251166596104e-06, "loss": 0.43185248374938967, "num_tokens": 12571558613.0, "step": 102920 }, { "epoch": 0.13724674548954985, "grad_norm": 1.9296875, "learning_rate": 4.812246329230903e-06, "loss": 0.4229252338409424, "num_tokens": 12574123867.0, "step": 102940 }, { "epoch": 0.13727341087627795, "grad_norm": 1.921875, "learning_rate": 4.8119808122041325e-06, "loss": 0.4239853858947754, "num_tokens": 12576543320.0, "step": 102960 }, { "epoch": 0.13730007626300605, "grad_norm": 2.15625, "learning_rate": 4.811715114901431e-06, "loss": 0.4242441177368164, "num_tokens": 12578883237.0, "step": 102980 }, { "epoch": 0.13732674164973416, "grad_norm": 1.921875, "learning_rate": 4.811449237343516e-06, "loss": 0.41584954261779783, "num_tokens": 12581241146.0, "step": 103000 }, { "epoch": 0.13735340703646226, "grad_norm": 2.640625, "learning_rate": 4.81118317955112e-06, "loss": 0.4191902160644531, "num_tokens": 12583737259.0, "step": 103020 }, { "epoch": 0.13738007242319036, "grad_norm": 2.453125, "learning_rate": 4.810916941544989e-06, "loss": 0.4266096591949463, "num_tokens": 12586102497.0, "step": 103040 }, { "epoch": 0.13740673780991847, "grad_norm": 2.125, "learning_rate": 4.810650523345881e-06, "loss": 0.4003407955169678, "num_tokens": 12588478412.0, "step": 103060 }, { "epoch": 0.13743340319664657, "grad_norm": 1.78125, "learning_rate": 4.810383924974572e-06, "loss": 0.4066908359527588, "num_tokens": 12591115259.0, "step": 103080 }, { "epoch": 0.13746006858337467, "grad_norm": 2.078125, "learning_rate": 4.810117146451849e-06, "loss": 0.4208277702331543, "num_tokens": 12593387187.0, "step": 103100 }, { "epoch": 0.13748673397010278, "grad_norm": 2.46875, "learning_rate": 4.809850187798513e-06, "loss": 0.39924499988555906, "num_tokens": 12595931289.0, "step": 103120 }, { "epoch": 0.13751339935683088, "grad_norm": 2.15625, "learning_rate": 4.809583049035382e-06, "loss": 0.42396063804626466, "num_tokens": 12598688615.0, "step": 103140 }, { "epoch": 0.13754006474355898, "grad_norm": 2.421875, "learning_rate": 4.809315730183284e-06, "loss": 0.40681066513061526, "num_tokens": 12601216672.0, "step": 103160 }, { "epoch": 0.13756673013028708, "grad_norm": 2.25, "learning_rate": 4.809048231263065e-06, "loss": 0.4109518051147461, "num_tokens": 12603630248.0, "step": 103180 }, { "epoch": 0.1375933955170152, "grad_norm": 1.8984375, "learning_rate": 4.808780552295582e-06, "loss": 0.40320711135864257, "num_tokens": 12606378254.0, "step": 103200 }, { "epoch": 0.1376200609037433, "grad_norm": 2.125, "learning_rate": 4.808512693301707e-06, "loss": 0.4086951732635498, "num_tokens": 12608770501.0, "step": 103220 }, { "epoch": 0.1376467262904714, "grad_norm": 2.078125, "learning_rate": 4.808244654302327e-06, "loss": 0.42497825622558594, "num_tokens": 12611085627.0, "step": 103240 }, { "epoch": 0.1376733916771995, "grad_norm": 1.984375, "learning_rate": 4.807976435318342e-06, "loss": 0.41464762687683104, "num_tokens": 12613589015.0, "step": 103260 }, { "epoch": 0.1377000570639276, "grad_norm": 2.03125, "learning_rate": 4.8077080363706655e-06, "loss": 0.41730265617370604, "num_tokens": 12616106928.0, "step": 103280 }, { "epoch": 0.1377267224506557, "grad_norm": 1.953125, "learning_rate": 4.807439457480227e-06, "loss": 0.4171881198883057, "num_tokens": 12618606362.0, "step": 103300 }, { "epoch": 0.1377533878373838, "grad_norm": 1.828125, "learning_rate": 4.807170698667968e-06, "loss": 0.4246262550354004, "num_tokens": 12621090561.0, "step": 103320 }, { "epoch": 0.1377800532241119, "grad_norm": 2.265625, "learning_rate": 4.8069017599548465e-06, "loss": 0.41262025833129884, "num_tokens": 12623363784.0, "step": 103340 }, { "epoch": 0.13780671861084, "grad_norm": 1.8671875, "learning_rate": 4.8066326413618304e-06, "loss": 0.41236085891723634, "num_tokens": 12625801150.0, "step": 103360 }, { "epoch": 0.13783338399756812, "grad_norm": 1.8515625, "learning_rate": 4.806363342909905e-06, "loss": 0.41811285018920896, "num_tokens": 12628421520.0, "step": 103380 }, { "epoch": 0.13786004938429622, "grad_norm": 2.578125, "learning_rate": 4.80609386462007e-06, "loss": 0.4235952377319336, "num_tokens": 12631068818.0, "step": 103400 }, { "epoch": 0.13788671477102432, "grad_norm": 2.109375, "learning_rate": 4.805824206513336e-06, "loss": 0.439359188079834, "num_tokens": 12633673926.0, "step": 103420 }, { "epoch": 0.13791338015775242, "grad_norm": 2.140625, "learning_rate": 4.805554368610732e-06, "loss": 0.42761826515197754, "num_tokens": 12636083171.0, "step": 103440 }, { "epoch": 0.13794004554448053, "grad_norm": 2.265625, "learning_rate": 4.805284350933296e-06, "loss": 0.407973051071167, "num_tokens": 12638668948.0, "step": 103460 }, { "epoch": 0.13796671093120863, "grad_norm": 1.9375, "learning_rate": 4.805014153502084e-06, "loss": 0.4147041797637939, "num_tokens": 12641152185.0, "step": 103480 }, { "epoch": 0.13799337631793673, "grad_norm": 1.7109375, "learning_rate": 4.804743776338164e-06, "loss": 0.3997480392456055, "num_tokens": 12643608589.0, "step": 103500 }, { "epoch": 0.13802004170466484, "grad_norm": 2.265625, "learning_rate": 4.804473219462619e-06, "loss": 0.4172191619873047, "num_tokens": 12646086049.0, "step": 103520 }, { "epoch": 0.13804670709139294, "grad_norm": 2.03125, "learning_rate": 4.804202482896544e-06, "loss": 0.4311846733093262, "num_tokens": 12648642912.0, "step": 103540 }, { "epoch": 0.13807337247812104, "grad_norm": 1.609375, "learning_rate": 4.803931566661052e-06, "loss": 0.4218919277191162, "num_tokens": 12651225956.0, "step": 103560 }, { "epoch": 0.13810003786484915, "grad_norm": 1.734375, "learning_rate": 4.8036604707772665e-06, "loss": 0.4187325477600098, "num_tokens": 12653839504.0, "step": 103580 }, { "epoch": 0.13812670325157725, "grad_norm": 1.7578125, "learning_rate": 4.803389195266325e-06, "loss": 0.42412528991699217, "num_tokens": 12656497172.0, "step": 103600 }, { "epoch": 0.13815336863830535, "grad_norm": 2.109375, "learning_rate": 4.803117740149382e-06, "loss": 0.4230160713195801, "num_tokens": 12658688642.0, "step": 103620 }, { "epoch": 0.13818003402503345, "grad_norm": 1.953125, "learning_rate": 4.802846105447603e-06, "loss": 0.3951749086380005, "num_tokens": 12661326176.0, "step": 103640 }, { "epoch": 0.13820669941176156, "grad_norm": 2.1875, "learning_rate": 4.802574291182169e-06, "loss": 0.4229219913482666, "num_tokens": 12663813034.0, "step": 103660 }, { "epoch": 0.13823336479848966, "grad_norm": 2.265625, "learning_rate": 4.802302297374275e-06, "loss": 0.4239556312561035, "num_tokens": 12666355577.0, "step": 103680 }, { "epoch": 0.13826003018521776, "grad_norm": 2.28125, "learning_rate": 4.802030124045129e-06, "loss": 0.4119633674621582, "num_tokens": 12668480461.0, "step": 103700 }, { "epoch": 0.13828669557194587, "grad_norm": 1.609375, "learning_rate": 4.801757771215953e-06, "loss": 0.41971192359924314, "num_tokens": 12671092495.0, "step": 103720 }, { "epoch": 0.13831336095867397, "grad_norm": 2.40625, "learning_rate": 4.801485238907986e-06, "loss": 0.4036909580230713, "num_tokens": 12673467905.0, "step": 103740 }, { "epoch": 0.1383400263454021, "grad_norm": 1.9375, "learning_rate": 4.801212527142476e-06, "loss": 0.4243324756622314, "num_tokens": 12675769975.0, "step": 103760 }, { "epoch": 0.1383666917321302, "grad_norm": 2.078125, "learning_rate": 4.800939635940689e-06, "loss": 0.402773380279541, "num_tokens": 12678261809.0, "step": 103780 }, { "epoch": 0.1383933571188583, "grad_norm": 1.828125, "learning_rate": 4.800666565323902e-06, "loss": 0.43303623199462893, "num_tokens": 12680634007.0, "step": 103800 }, { "epoch": 0.1384200225055864, "grad_norm": 2.125, "learning_rate": 4.800393315313411e-06, "loss": 0.4151029109954834, "num_tokens": 12682931535.0, "step": 103820 }, { "epoch": 0.1384466878923145, "grad_norm": 2.015625, "learning_rate": 4.800119885930519e-06, "loss": 0.42781496047973633, "num_tokens": 12685362925.0, "step": 103840 }, { "epoch": 0.13847335327904262, "grad_norm": 2.234375, "learning_rate": 4.799846277196548e-06, "loss": 0.42875065803527834, "num_tokens": 12687761007.0, "step": 103860 }, { "epoch": 0.13850001866577072, "grad_norm": 2.515625, "learning_rate": 4.799572489132834e-06, "loss": 0.4201984405517578, "num_tokens": 12690163279.0, "step": 103880 }, { "epoch": 0.13852668405249882, "grad_norm": 2.125, "learning_rate": 4.799298521760722e-06, "loss": 0.40597686767578123, "num_tokens": 12692700791.0, "step": 103900 }, { "epoch": 0.13855334943922692, "grad_norm": 1.953125, "learning_rate": 4.799024375101577e-06, "loss": 0.4040210247039795, "num_tokens": 12694957379.0, "step": 103920 }, { "epoch": 0.13858001482595503, "grad_norm": 2.0, "learning_rate": 4.798750049176776e-06, "loss": 0.413485050201416, "num_tokens": 12697492585.0, "step": 103940 }, { "epoch": 0.13860668021268313, "grad_norm": 2.0625, "learning_rate": 4.798475544007708e-06, "loss": 0.4098672866821289, "num_tokens": 12699960749.0, "step": 103960 }, { "epoch": 0.13863334559941123, "grad_norm": 1.90625, "learning_rate": 4.7982008596157785e-06, "loss": 0.40880551338195803, "num_tokens": 12702596716.0, "step": 103980 }, { "epoch": 0.13866001098613934, "grad_norm": 2.140625, "learning_rate": 4.797925996022404e-06, "loss": 0.4256914138793945, "num_tokens": 12704884705.0, "step": 104000 }, { "epoch": 0.13868667637286744, "grad_norm": 2.46875, "learning_rate": 4.79765095324902e-06, "loss": 0.4074747085571289, "num_tokens": 12707591054.0, "step": 104020 }, { "epoch": 0.13871334175959554, "grad_norm": 1.828125, "learning_rate": 4.797375731317071e-06, "loss": 0.42824397087097166, "num_tokens": 12710002804.0, "step": 104040 }, { "epoch": 0.13874000714632365, "grad_norm": 2.109375, "learning_rate": 4.797100330248017e-06, "loss": 0.425047779083252, "num_tokens": 12712600969.0, "step": 104060 }, { "epoch": 0.13876667253305175, "grad_norm": 2.28125, "learning_rate": 4.796824750063332e-06, "loss": 0.4207934379577637, "num_tokens": 12714966658.0, "step": 104080 }, { "epoch": 0.13879333791977985, "grad_norm": 1.7734375, "learning_rate": 4.7965489907845064e-06, "loss": 0.40247287750244143, "num_tokens": 12717418121.0, "step": 104100 }, { "epoch": 0.13882000330650796, "grad_norm": 2.234375, "learning_rate": 4.796273052433041e-06, "loss": 0.41400465965270994, "num_tokens": 12719677486.0, "step": 104120 }, { "epoch": 0.13884666869323606, "grad_norm": 2.203125, "learning_rate": 4.795996935030452e-06, "loss": 0.40343647003173827, "num_tokens": 12722306097.0, "step": 104140 }, { "epoch": 0.13887333407996416, "grad_norm": 2.171875, "learning_rate": 4.795720638598269e-06, "loss": 0.39872241020202637, "num_tokens": 12724543478.0, "step": 104160 }, { "epoch": 0.13889999946669226, "grad_norm": 2.15625, "learning_rate": 4.795444163158037e-06, "loss": 0.40845212936401365, "num_tokens": 12726999336.0, "step": 104180 }, { "epoch": 0.13892666485342037, "grad_norm": 2.09375, "learning_rate": 4.795167508731315e-06, "loss": 0.39572672843933104, "num_tokens": 12729509551.0, "step": 104200 }, { "epoch": 0.13895333024014847, "grad_norm": 1.9765625, "learning_rate": 4.794890675339673e-06, "loss": 0.40180211067199706, "num_tokens": 12732054646.0, "step": 104220 }, { "epoch": 0.13897999562687657, "grad_norm": 1.9296875, "learning_rate": 4.794613663004698e-06, "loss": 0.41297435760498047, "num_tokens": 12734524808.0, "step": 104240 }, { "epoch": 0.13900666101360468, "grad_norm": 1.921875, "learning_rate": 4.794336471747991e-06, "loss": 0.4309636116027832, "num_tokens": 12736943350.0, "step": 104260 }, { "epoch": 0.13903332640033278, "grad_norm": 1.578125, "learning_rate": 4.794059101591163e-06, "loss": 0.4192152976989746, "num_tokens": 12739313697.0, "step": 104280 }, { "epoch": 0.13905999178706088, "grad_norm": 1.8203125, "learning_rate": 4.793781552555845e-06, "loss": 0.4346455574035645, "num_tokens": 12741758362.0, "step": 104300 }, { "epoch": 0.13908665717378899, "grad_norm": 2.359375, "learning_rate": 4.793503824663677e-06, "loss": 0.441768741607666, "num_tokens": 12744191399.0, "step": 104320 }, { "epoch": 0.1391133225605171, "grad_norm": 1.859375, "learning_rate": 4.793225917936315e-06, "loss": 0.41848196983337405, "num_tokens": 12746595167.0, "step": 104340 }, { "epoch": 0.1391399879472452, "grad_norm": 1.9375, "learning_rate": 4.792947832395428e-06, "loss": 0.41516618728637694, "num_tokens": 12749085906.0, "step": 104360 }, { "epoch": 0.1391666533339733, "grad_norm": 2.453125, "learning_rate": 4.792669568062702e-06, "loss": 0.39664440155029296, "num_tokens": 12751644515.0, "step": 104380 }, { "epoch": 0.1391933187207014, "grad_norm": 2.171875, "learning_rate": 4.792391124959832e-06, "loss": 0.41017951965332033, "num_tokens": 12754002389.0, "step": 104400 }, { "epoch": 0.1392199841074295, "grad_norm": 2.0625, "learning_rate": 4.7921125031085316e-06, "loss": 0.42777481079101565, "num_tokens": 12756420240.0, "step": 104420 }, { "epoch": 0.1392466494941576, "grad_norm": 2.1875, "learning_rate": 4.791833702530524e-06, "loss": 0.4307730674743652, "num_tokens": 12758850310.0, "step": 104440 }, { "epoch": 0.1392733148808857, "grad_norm": 2.046875, "learning_rate": 4.79155472324755e-06, "loss": 0.410584831237793, "num_tokens": 12761453220.0, "step": 104460 }, { "epoch": 0.1392999802676138, "grad_norm": 2.1875, "learning_rate": 4.791275565281363e-06, "loss": 0.4247551918029785, "num_tokens": 12763947147.0, "step": 104480 }, { "epoch": 0.1393266456543419, "grad_norm": 1.8515625, "learning_rate": 4.79099622865373e-06, "loss": 0.40651898384094237, "num_tokens": 12766506611.0, "step": 104500 }, { "epoch": 0.13935331104107002, "grad_norm": 1.9921875, "learning_rate": 4.790716713386432e-06, "loss": 0.41361351013183595, "num_tokens": 12768969830.0, "step": 104520 }, { "epoch": 0.13937997642779812, "grad_norm": 1.96875, "learning_rate": 4.7904370195012636e-06, "loss": 0.42705788612365725, "num_tokens": 12771596546.0, "step": 104540 }, { "epoch": 0.13940664181452625, "grad_norm": 1.953125, "learning_rate": 4.790157147020036e-06, "loss": 0.40571203231811526, "num_tokens": 12774086452.0, "step": 104560 }, { "epoch": 0.13943330720125435, "grad_norm": 1.7265625, "learning_rate": 4.789877095964569e-06, "loss": 0.4263628005981445, "num_tokens": 12776627569.0, "step": 104580 }, { "epoch": 0.13945997258798246, "grad_norm": 2.109375, "learning_rate": 4.789596866356702e-06, "loss": 0.4103725433349609, "num_tokens": 12778911728.0, "step": 104600 }, { "epoch": 0.13948663797471056, "grad_norm": 2.453125, "learning_rate": 4.789316458218285e-06, "loss": 0.40983309745788576, "num_tokens": 12781430948.0, "step": 104620 }, { "epoch": 0.13951330336143866, "grad_norm": 2.09375, "learning_rate": 4.789035871571182e-06, "loss": 0.4206965923309326, "num_tokens": 12783616540.0, "step": 104640 }, { "epoch": 0.13953996874816676, "grad_norm": 2.59375, "learning_rate": 4.7887551064372725e-06, "loss": 0.4030604839324951, "num_tokens": 12786232042.0, "step": 104660 }, { "epoch": 0.13956663413489487, "grad_norm": 1.7578125, "learning_rate": 4.788474162838449e-06, "loss": 0.4108163833618164, "num_tokens": 12788620894.0, "step": 104680 }, { "epoch": 0.13959329952162297, "grad_norm": 3.15625, "learning_rate": 4.788193040796618e-06, "loss": 0.4242118835449219, "num_tokens": 12791091794.0, "step": 104700 }, { "epoch": 0.13961996490835107, "grad_norm": 2.03125, "learning_rate": 4.7879117403337e-06, "loss": 0.41250410079956057, "num_tokens": 12793259454.0, "step": 104720 }, { "epoch": 0.13964663029507918, "grad_norm": 2.03125, "learning_rate": 4.787630261471628e-06, "loss": 0.429170036315918, "num_tokens": 12795962795.0, "step": 104740 }, { "epoch": 0.13967329568180728, "grad_norm": 2.0625, "learning_rate": 4.787348604232352e-06, "loss": 0.4147909164428711, "num_tokens": 12798422200.0, "step": 104760 }, { "epoch": 0.13969996106853538, "grad_norm": 2.46875, "learning_rate": 4.787066768637834e-06, "loss": 0.41417737007141114, "num_tokens": 12800851573.0, "step": 104780 }, { "epoch": 0.1397266264552635, "grad_norm": 1.8828125, "learning_rate": 4.786784754710047e-06, "loss": 0.41141414642333984, "num_tokens": 12803419972.0, "step": 104800 }, { "epoch": 0.1397532918419916, "grad_norm": 1.703125, "learning_rate": 4.786502562470986e-06, "loss": 0.4102043151855469, "num_tokens": 12806069825.0, "step": 104820 }, { "epoch": 0.1397799572287197, "grad_norm": 2.390625, "learning_rate": 4.786220191942651e-06, "loss": 0.3976992130279541, "num_tokens": 12808616993.0, "step": 104840 }, { "epoch": 0.1398066226154478, "grad_norm": 1.4375, "learning_rate": 4.7859376431470615e-06, "loss": 0.42309112548828126, "num_tokens": 12811222097.0, "step": 104860 }, { "epoch": 0.1398332880021759, "grad_norm": 1.765625, "learning_rate": 4.785654916106249e-06, "loss": 0.4174467086791992, "num_tokens": 12813529954.0, "step": 104880 }, { "epoch": 0.139859953388904, "grad_norm": 2.0, "learning_rate": 4.785372010842258e-06, "loss": 0.4131648063659668, "num_tokens": 12815954621.0, "step": 104900 }, { "epoch": 0.1398866187756321, "grad_norm": 2.359375, "learning_rate": 4.7850889273771485e-06, "loss": 0.420849609375, "num_tokens": 12818573910.0, "step": 104920 }, { "epoch": 0.1399132841623602, "grad_norm": 1.640625, "learning_rate": 4.784805665732994e-06, "loss": 0.41426777839660645, "num_tokens": 12820812045.0, "step": 104940 }, { "epoch": 0.1399399495490883, "grad_norm": 2.1875, "learning_rate": 4.784522225931882e-06, "loss": 0.4182430267333984, "num_tokens": 12823453523.0, "step": 104960 }, { "epoch": 0.1399666149358164, "grad_norm": 1.640625, "learning_rate": 4.784238607995913e-06, "loss": 0.4132344722747803, "num_tokens": 12826062402.0, "step": 104980 }, { "epoch": 0.13999328032254452, "grad_norm": 2.21875, "learning_rate": 4.783954811947204e-06, "loss": 0.419340991973877, "num_tokens": 12828531230.0, "step": 105000 }, { "epoch": 0.14001994570927262, "grad_norm": 1.9296875, "learning_rate": 4.78367083780788e-06, "loss": 0.41350340843200684, "num_tokens": 12831083840.0, "step": 105020 }, { "epoch": 0.14004661109600072, "grad_norm": 1.7578125, "learning_rate": 4.783386685600087e-06, "loss": 0.4128582954406738, "num_tokens": 12833632787.0, "step": 105040 }, { "epoch": 0.14007327648272883, "grad_norm": 1.8671875, "learning_rate": 4.78310235534598e-06, "loss": 0.4096362113952637, "num_tokens": 12836189347.0, "step": 105060 }, { "epoch": 0.14009994186945693, "grad_norm": 2.0, "learning_rate": 4.7828178470677315e-06, "loss": 0.4298983573913574, "num_tokens": 12838786236.0, "step": 105080 }, { "epoch": 0.14012660725618503, "grad_norm": 2.015625, "learning_rate": 4.782533160787523e-06, "loss": 0.4113620281219482, "num_tokens": 12841392036.0, "step": 105100 }, { "epoch": 0.14015327264291313, "grad_norm": 2.203125, "learning_rate": 4.782248296527555e-06, "loss": 0.4097775459289551, "num_tokens": 12843795060.0, "step": 105120 }, { "epoch": 0.14017993802964124, "grad_norm": 2.328125, "learning_rate": 4.78196325431004e-06, "loss": 0.42851104736328127, "num_tokens": 12846067906.0, "step": 105140 }, { "epoch": 0.14020660341636934, "grad_norm": 2.015625, "learning_rate": 4.7816780341572034e-06, "loss": 0.4108184814453125, "num_tokens": 12848584277.0, "step": 105160 }, { "epoch": 0.14023326880309744, "grad_norm": 2.015625, "learning_rate": 4.781392636091284e-06, "loss": 0.4226734161376953, "num_tokens": 12851268169.0, "step": 105180 }, { "epoch": 0.14025993418982555, "grad_norm": 1.9140625, "learning_rate": 4.781107060134537e-06, "loss": 0.422957706451416, "num_tokens": 12853634227.0, "step": 105200 }, { "epoch": 0.14028659957655365, "grad_norm": 1.84375, "learning_rate": 4.78082130630923e-06, "loss": 0.4256889820098877, "num_tokens": 12856253753.0, "step": 105220 }, { "epoch": 0.14031326496328175, "grad_norm": 1.9140625, "learning_rate": 4.780535374637644e-06, "loss": 0.40740213394165037, "num_tokens": 12858790207.0, "step": 105240 }, { "epoch": 0.14033993035000986, "grad_norm": 1.953125, "learning_rate": 4.780249265142075e-06, "loss": 0.42974557876586916, "num_tokens": 12861201647.0, "step": 105260 }, { "epoch": 0.14036659573673796, "grad_norm": 2.203125, "learning_rate": 4.779962977844832e-06, "loss": 0.4250382423400879, "num_tokens": 12863561754.0, "step": 105280 }, { "epoch": 0.14039326112346606, "grad_norm": 2.59375, "learning_rate": 4.779676512768237e-06, "loss": 0.42079987525939944, "num_tokens": 12866161500.0, "step": 105300 }, { "epoch": 0.14041992651019417, "grad_norm": 2.3125, "learning_rate": 4.77938986993463e-06, "loss": 0.40578155517578124, "num_tokens": 12868640071.0, "step": 105320 }, { "epoch": 0.14044659189692227, "grad_norm": 2.21875, "learning_rate": 4.77910304936636e-06, "loss": 0.4298905849456787, "num_tokens": 12870945119.0, "step": 105340 }, { "epoch": 0.14047325728365037, "grad_norm": 1.9296875, "learning_rate": 4.77881605108579e-06, "loss": 0.42165117263793944, "num_tokens": 12873378965.0, "step": 105360 }, { "epoch": 0.1404999226703785, "grad_norm": 2.21875, "learning_rate": 4.778528875115301e-06, "loss": 0.40346269607543944, "num_tokens": 12875835078.0, "step": 105380 }, { "epoch": 0.1405265880571066, "grad_norm": 2.140625, "learning_rate": 4.778241521477286e-06, "loss": 0.41901345252990724, "num_tokens": 12878319791.0, "step": 105400 }, { "epoch": 0.1405532534438347, "grad_norm": 2.015625, "learning_rate": 4.777953990194148e-06, "loss": 0.4355898857116699, "num_tokens": 12880795183.0, "step": 105420 }, { "epoch": 0.1405799188305628, "grad_norm": 1.9296875, "learning_rate": 4.777666281288311e-06, "loss": 0.41726298332214357, "num_tokens": 12883276358.0, "step": 105440 }, { "epoch": 0.14060658421729091, "grad_norm": 2.375, "learning_rate": 4.777378394782207e-06, "loss": 0.4196028232574463, "num_tokens": 12885568746.0, "step": 105460 }, { "epoch": 0.14063324960401902, "grad_norm": 1.765625, "learning_rate": 4.7770903306982845e-06, "loss": 0.42050886154174805, "num_tokens": 12887915412.0, "step": 105480 }, { "epoch": 0.14065991499074712, "grad_norm": 2.0, "learning_rate": 4.776802089059004e-06, "loss": 0.4196082592010498, "num_tokens": 12890312840.0, "step": 105500 }, { "epoch": 0.14068658037747522, "grad_norm": 1.9375, "learning_rate": 4.776513669886843e-06, "loss": 0.41403512954711913, "num_tokens": 12892783749.0, "step": 105520 }, { "epoch": 0.14071324576420333, "grad_norm": 1.828125, "learning_rate": 4.776225073204289e-06, "loss": 0.42092366218566896, "num_tokens": 12895381474.0, "step": 105540 }, { "epoch": 0.14073991115093143, "grad_norm": 2.171875, "learning_rate": 4.775936299033846e-06, "loss": 0.4159036636352539, "num_tokens": 12897990205.0, "step": 105560 }, { "epoch": 0.14076657653765953, "grad_norm": 1.8203125, "learning_rate": 4.775647347398031e-06, "loss": 0.4138209342956543, "num_tokens": 12900525302.0, "step": 105580 }, { "epoch": 0.14079324192438764, "grad_norm": 2.609375, "learning_rate": 4.775358218319375e-06, "loss": 0.41895580291748047, "num_tokens": 12902901887.0, "step": 105600 }, { "epoch": 0.14081990731111574, "grad_norm": 1.9765625, "learning_rate": 4.775068911820423e-06, "loss": 0.41580824851989745, "num_tokens": 12905396103.0, "step": 105620 }, { "epoch": 0.14084657269784384, "grad_norm": 2.015625, "learning_rate": 4.774779427923734e-06, "loss": 0.4081686496734619, "num_tokens": 12907886205.0, "step": 105640 }, { "epoch": 0.14087323808457194, "grad_norm": 2.25, "learning_rate": 4.774489766651878e-06, "loss": 0.42206401824951173, "num_tokens": 12910319301.0, "step": 105660 }, { "epoch": 0.14089990347130005, "grad_norm": 2.09375, "learning_rate": 4.774199928027444e-06, "loss": 0.4003964900970459, "num_tokens": 12912911652.0, "step": 105680 }, { "epoch": 0.14092656885802815, "grad_norm": 3.171875, "learning_rate": 4.773909912073032e-06, "loss": 0.4184438228607178, "num_tokens": 12915267362.0, "step": 105700 }, { "epoch": 0.14095323424475625, "grad_norm": 2.390625, "learning_rate": 4.7736197188112535e-06, "loss": 0.41603384017944334, "num_tokens": 12917659686.0, "step": 105720 }, { "epoch": 0.14097989963148436, "grad_norm": 1.8671875, "learning_rate": 4.773329348264738e-06, "loss": 0.4027125358581543, "num_tokens": 12920220878.0, "step": 105740 }, { "epoch": 0.14100656501821246, "grad_norm": 2.484375, "learning_rate": 4.773038800456126e-06, "loss": 0.39888691902160645, "num_tokens": 12922639578.0, "step": 105760 }, { "epoch": 0.14103323040494056, "grad_norm": 1.6484375, "learning_rate": 4.772748075408075e-06, "loss": 0.40930023193359377, "num_tokens": 12924954320.0, "step": 105780 }, { "epoch": 0.14105989579166867, "grad_norm": 2.0625, "learning_rate": 4.772457173143252e-06, "loss": 0.40752830505371096, "num_tokens": 12927377703.0, "step": 105800 }, { "epoch": 0.14108656117839677, "grad_norm": 2.234375, "learning_rate": 4.772166093684341e-06, "loss": 0.41315011978149413, "num_tokens": 12929748349.0, "step": 105820 }, { "epoch": 0.14111322656512487, "grad_norm": 2.234375, "learning_rate": 4.771874837054038e-06, "loss": 0.4099287509918213, "num_tokens": 12932133881.0, "step": 105840 }, { "epoch": 0.14113989195185297, "grad_norm": 1.8828125, "learning_rate": 4.7715834032750555e-06, "loss": 0.4146083354949951, "num_tokens": 12934603177.0, "step": 105860 }, { "epoch": 0.14116655733858108, "grad_norm": 2.25, "learning_rate": 4.7712917923701155e-06, "loss": 0.40900368690490724, "num_tokens": 12936838599.0, "step": 105880 }, { "epoch": 0.14119322272530918, "grad_norm": 2.03125, "learning_rate": 4.771000004361958e-06, "loss": 0.40457758903503416, "num_tokens": 12939218220.0, "step": 105900 }, { "epoch": 0.14121988811203728, "grad_norm": 2.125, "learning_rate": 4.770708039273334e-06, "loss": 0.4167340278625488, "num_tokens": 12941669700.0, "step": 105920 }, { "epoch": 0.1412465534987654, "grad_norm": 1.859375, "learning_rate": 4.7704158971270105e-06, "loss": 0.40445871353149415, "num_tokens": 12944047073.0, "step": 105940 }, { "epoch": 0.1412732188854935, "grad_norm": 2.0625, "learning_rate": 4.770123577945766e-06, "loss": 0.4263150691986084, "num_tokens": 12946440797.0, "step": 105960 }, { "epoch": 0.1412998842722216, "grad_norm": 1.9375, "learning_rate": 4.769831081752395e-06, "loss": 0.416521692276001, "num_tokens": 12948838931.0, "step": 105980 }, { "epoch": 0.1413265496589497, "grad_norm": 2.59375, "learning_rate": 4.7695384085697044e-06, "loss": 0.4272762775421143, "num_tokens": 12951293589.0, "step": 106000 }, { "epoch": 0.1413532150456778, "grad_norm": 2.28125, "learning_rate": 4.769245558420515e-06, "loss": 0.4102319717407227, "num_tokens": 12953772614.0, "step": 106020 }, { "epoch": 0.1413798804324059, "grad_norm": 1.7734375, "learning_rate": 4.768952531327662e-06, "loss": 0.4130971908569336, "num_tokens": 12956243583.0, "step": 106040 }, { "epoch": 0.141406545819134, "grad_norm": 2.109375, "learning_rate": 4.768659327313994e-06, "loss": 0.4130832672119141, "num_tokens": 12958818298.0, "step": 106060 }, { "epoch": 0.1414332112058621, "grad_norm": 1.6484375, "learning_rate": 4.7683659464023736e-06, "loss": 0.43009223937988283, "num_tokens": 12961213115.0, "step": 106080 }, { "epoch": 0.1414598765925902, "grad_norm": 1.765625, "learning_rate": 4.768072388615677e-06, "loss": 0.4061887264251709, "num_tokens": 12963858211.0, "step": 106100 }, { "epoch": 0.14148654197931831, "grad_norm": 2.15625, "learning_rate": 4.767778653976795e-06, "loss": 0.40494556427001954, "num_tokens": 12966356756.0, "step": 106120 }, { "epoch": 0.14151320736604642, "grad_norm": 1.859375, "learning_rate": 4.767484742508629e-06, "loss": 0.41962614059448244, "num_tokens": 12968806423.0, "step": 106140 }, { "epoch": 0.14153987275277452, "grad_norm": 2.3125, "learning_rate": 4.7671906542341e-06, "loss": 0.41263751983642577, "num_tokens": 12971324162.0, "step": 106160 }, { "epoch": 0.14156653813950262, "grad_norm": 2.1875, "learning_rate": 4.7668963891761375e-06, "loss": 0.3931756496429443, "num_tokens": 12973712448.0, "step": 106180 }, { "epoch": 0.14159320352623075, "grad_norm": 2.1875, "learning_rate": 4.7666019473576866e-06, "loss": 0.4017148971557617, "num_tokens": 12976152130.0, "step": 106200 }, { "epoch": 0.14161986891295886, "grad_norm": 1.484375, "learning_rate": 4.766307328801707e-06, "loss": 0.4208024024963379, "num_tokens": 12978619939.0, "step": 106220 }, { "epoch": 0.14164653429968696, "grad_norm": 1.8671875, "learning_rate": 4.76601253353117e-06, "loss": 0.41085062026977537, "num_tokens": 12981066083.0, "step": 106240 }, { "epoch": 0.14167319968641506, "grad_norm": 2.0, "learning_rate": 4.765717561569065e-06, "loss": 0.41096787452697753, "num_tokens": 12983507096.0, "step": 106260 }, { "epoch": 0.14169986507314317, "grad_norm": 1.875, "learning_rate": 4.765422412938391e-06, "loss": 0.41263694763183595, "num_tokens": 12985818086.0, "step": 106280 }, { "epoch": 0.14172653045987127, "grad_norm": 1.75, "learning_rate": 4.76512708766216e-06, "loss": 0.3895644426345825, "num_tokens": 12988437007.0, "step": 106300 }, { "epoch": 0.14175319584659937, "grad_norm": 1.9296875, "learning_rate": 4.764831585763403e-06, "loss": 0.41250972747802733, "num_tokens": 12991107151.0, "step": 106320 }, { "epoch": 0.14177986123332748, "grad_norm": 2.171875, "learning_rate": 4.764535907265161e-06, "loss": 0.4243796348571777, "num_tokens": 12993619888.0, "step": 106340 }, { "epoch": 0.14180652662005558, "grad_norm": 2.4375, "learning_rate": 4.764240052190488e-06, "loss": 0.4106342315673828, "num_tokens": 12995867581.0, "step": 106360 }, { "epoch": 0.14183319200678368, "grad_norm": 2.171875, "learning_rate": 4.763944020562454e-06, "loss": 0.41713967323303225, "num_tokens": 12998133148.0, "step": 106380 }, { "epoch": 0.14185985739351178, "grad_norm": 2.015625, "learning_rate": 4.763647812404142e-06, "loss": 0.43306446075439453, "num_tokens": 13000774207.0, "step": 106400 }, { "epoch": 0.1418865227802399, "grad_norm": 2.078125, "learning_rate": 4.763351427738648e-06, "loss": 0.4166522979736328, "num_tokens": 13003280649.0, "step": 106420 }, { "epoch": 0.141913188166968, "grad_norm": 1.9453125, "learning_rate": 4.763054866589084e-06, "loss": 0.41671905517578123, "num_tokens": 13005824872.0, "step": 106440 }, { "epoch": 0.1419398535536961, "grad_norm": 2.109375, "learning_rate": 4.762758128978574e-06, "loss": 0.4232757091522217, "num_tokens": 13008338865.0, "step": 106460 }, { "epoch": 0.1419665189404242, "grad_norm": 2.25, "learning_rate": 4.762461214930255e-06, "loss": 0.42041935920715334, "num_tokens": 13010748189.0, "step": 106480 }, { "epoch": 0.1419931843271523, "grad_norm": 1.8203125, "learning_rate": 4.762164124467279e-06, "loss": 0.41457457542419435, "num_tokens": 13013175650.0, "step": 106500 }, { "epoch": 0.1420198497138804, "grad_norm": 1.96875, "learning_rate": 4.761866857612812e-06, "loss": 0.42188072204589844, "num_tokens": 13015680406.0, "step": 106520 }, { "epoch": 0.1420465151006085, "grad_norm": 2.109375, "learning_rate": 4.761569414390033e-06, "loss": 0.40596394538879393, "num_tokens": 13018104500.0, "step": 106540 }, { "epoch": 0.1420731804873366, "grad_norm": 2.296875, "learning_rate": 4.761271794822136e-06, "loss": 0.41869368553161623, "num_tokens": 13020607410.0, "step": 106560 }, { "epoch": 0.1420998458740647, "grad_norm": 1.8359375, "learning_rate": 4.7609739989323255e-06, "loss": 0.4253547191619873, "num_tokens": 13023234422.0, "step": 106580 }, { "epoch": 0.14212651126079281, "grad_norm": 2.375, "learning_rate": 4.760676026743824e-06, "loss": 0.40512685775756835, "num_tokens": 13025665463.0, "step": 106600 }, { "epoch": 0.14215317664752092, "grad_norm": 1.84375, "learning_rate": 4.7603778782798645e-06, "loss": 0.4094704627990723, "num_tokens": 13028206520.0, "step": 106620 }, { "epoch": 0.14217984203424902, "grad_norm": 2.203125, "learning_rate": 4.760079553563696e-06, "loss": 0.4165328502655029, "num_tokens": 13030511428.0, "step": 106640 }, { "epoch": 0.14220650742097712, "grad_norm": 1.8515625, "learning_rate": 4.759781052618581e-06, "loss": 0.4138148307800293, "num_tokens": 13032996170.0, "step": 106660 }, { "epoch": 0.14223317280770523, "grad_norm": 2.015625, "learning_rate": 4.759482375467794e-06, "loss": 0.4072237014770508, "num_tokens": 13035476010.0, "step": 106680 }, { "epoch": 0.14225983819443333, "grad_norm": 1.59375, "learning_rate": 4.759183522134624e-06, "loss": 0.3931319236755371, "num_tokens": 13037842077.0, "step": 106700 }, { "epoch": 0.14228650358116143, "grad_norm": 1.859375, "learning_rate": 4.758884492642374e-06, "loss": 0.3995676517486572, "num_tokens": 13040226802.0, "step": 106720 }, { "epoch": 0.14231316896788954, "grad_norm": 1.390625, "learning_rate": 4.758585287014361e-06, "loss": 0.40448894500732424, "num_tokens": 13042712346.0, "step": 106740 }, { "epoch": 0.14233983435461764, "grad_norm": 1.984375, "learning_rate": 4.758285905273916e-06, "loss": 0.415323543548584, "num_tokens": 13045387345.0, "step": 106760 }, { "epoch": 0.14236649974134574, "grad_norm": 2.109375, "learning_rate": 4.757986347444382e-06, "loss": 0.4089911460876465, "num_tokens": 13047848822.0, "step": 106780 }, { "epoch": 0.14239316512807385, "grad_norm": 1.8515625, "learning_rate": 4.757686613549118e-06, "loss": 0.4096034049987793, "num_tokens": 13050435299.0, "step": 106800 }, { "epoch": 0.14241983051480195, "grad_norm": 2.234375, "learning_rate": 4.757386703611494e-06, "loss": 0.4112400054931641, "num_tokens": 13052826536.0, "step": 106820 }, { "epoch": 0.14244649590153005, "grad_norm": 1.609375, "learning_rate": 4.757086617654898e-06, "loss": 0.4323333740234375, "num_tokens": 13055017781.0, "step": 106840 }, { "epoch": 0.14247316128825815, "grad_norm": 1.6171875, "learning_rate": 4.756786355702727e-06, "loss": 0.41436233520507815, "num_tokens": 13057450071.0, "step": 106860 }, { "epoch": 0.14249982667498626, "grad_norm": 1.8203125, "learning_rate": 4.756485917778394e-06, "loss": 0.4191756248474121, "num_tokens": 13059742504.0, "step": 106880 }, { "epoch": 0.14252649206171436, "grad_norm": 2.703125, "learning_rate": 4.7561853039053275e-06, "loss": 0.41073856353759763, "num_tokens": 13062176560.0, "step": 106900 }, { "epoch": 0.14255315744844246, "grad_norm": 2.09375, "learning_rate": 4.755884514106965e-06, "loss": 0.41411423683166504, "num_tokens": 13064786822.0, "step": 106920 }, { "epoch": 0.14257982283517057, "grad_norm": 2.421875, "learning_rate": 4.755583548406763e-06, "loss": 0.4138370990753174, "num_tokens": 13067134045.0, "step": 106940 }, { "epoch": 0.14260648822189867, "grad_norm": 2.375, "learning_rate": 4.755282406828187e-06, "loss": 0.4079278945922852, "num_tokens": 13069665808.0, "step": 106960 }, { "epoch": 0.14263315360862677, "grad_norm": 1.578125, "learning_rate": 4.7549810893947204e-06, "loss": 0.4189624786376953, "num_tokens": 13072358074.0, "step": 106980 }, { "epoch": 0.1426598189953549, "grad_norm": 2.171875, "learning_rate": 4.754679596129857e-06, "loss": 0.4131801605224609, "num_tokens": 13074867882.0, "step": 107000 }, { "epoch": 0.142686484382083, "grad_norm": 2.1875, "learning_rate": 4.754377927057106e-06, "loss": 0.41086726188659667, "num_tokens": 13077610989.0, "step": 107020 }, { "epoch": 0.1427131497688111, "grad_norm": 2.03125, "learning_rate": 4.754076082199989e-06, "loss": 0.43174195289611816, "num_tokens": 13080006917.0, "step": 107040 }, { "epoch": 0.1427398151555392, "grad_norm": 2.453125, "learning_rate": 4.753774061582044e-06, "loss": 0.39854934215545657, "num_tokens": 13082404554.0, "step": 107060 }, { "epoch": 0.14276648054226732, "grad_norm": 1.9140625, "learning_rate": 4.7534718652268195e-06, "loss": 0.41727685928344727, "num_tokens": 13084902750.0, "step": 107080 }, { "epoch": 0.14279314592899542, "grad_norm": 1.84375, "learning_rate": 4.753169493157881e-06, "loss": 0.4106264114379883, "num_tokens": 13087474387.0, "step": 107100 }, { "epoch": 0.14281981131572352, "grad_norm": 2.21875, "learning_rate": 4.752866945398804e-06, "loss": 0.4181736946105957, "num_tokens": 13089844169.0, "step": 107120 }, { "epoch": 0.14284647670245162, "grad_norm": 2.40625, "learning_rate": 4.7525642219731805e-06, "loss": 0.4168991565704346, "num_tokens": 13092084698.0, "step": 107140 }, { "epoch": 0.14287314208917973, "grad_norm": 1.96875, "learning_rate": 4.7522613229046146e-06, "loss": 0.42043294906616213, "num_tokens": 13094557661.0, "step": 107160 }, { "epoch": 0.14289980747590783, "grad_norm": 1.8203125, "learning_rate": 4.751958248216725e-06, "loss": 0.41869020462036133, "num_tokens": 13097047905.0, "step": 107180 }, { "epoch": 0.14292647286263593, "grad_norm": 2.015625, "learning_rate": 4.751654997933145e-06, "loss": 0.4035043716430664, "num_tokens": 13099409300.0, "step": 107200 }, { "epoch": 0.14295313824936404, "grad_norm": 1.8046875, "learning_rate": 4.751351572077518e-06, "loss": 0.4176264762878418, "num_tokens": 13101964648.0, "step": 107220 }, { "epoch": 0.14297980363609214, "grad_norm": 2.125, "learning_rate": 4.751047970673506e-06, "loss": 0.4113138675689697, "num_tokens": 13104333694.0, "step": 107240 }, { "epoch": 0.14300646902282024, "grad_norm": 2.328125, "learning_rate": 4.750744193744781e-06, "loss": 0.404207181930542, "num_tokens": 13106792858.0, "step": 107260 }, { "epoch": 0.14303313440954835, "grad_norm": 1.625, "learning_rate": 4.750440241315029e-06, "loss": 0.40786066055297854, "num_tokens": 13109316585.0, "step": 107280 }, { "epoch": 0.14305979979627645, "grad_norm": 1.9375, "learning_rate": 4.750136113407953e-06, "loss": 0.4191445350646973, "num_tokens": 13111810374.0, "step": 107300 }, { "epoch": 0.14308646518300455, "grad_norm": 2.53125, "learning_rate": 4.749831810047265e-06, "loss": 0.3982229709625244, "num_tokens": 13114399100.0, "step": 107320 }, { "epoch": 0.14311313056973266, "grad_norm": 1.9140625, "learning_rate": 4.749527331256694e-06, "loss": 0.4149466514587402, "num_tokens": 13116909698.0, "step": 107340 }, { "epoch": 0.14313979595646076, "grad_norm": 2.484375, "learning_rate": 4.749222677059983e-06, "loss": 0.41939358711242675, "num_tokens": 13119484637.0, "step": 107360 }, { "epoch": 0.14316646134318886, "grad_norm": 2.453125, "learning_rate": 4.748917847480884e-06, "loss": 0.4132856845855713, "num_tokens": 13121789786.0, "step": 107380 }, { "epoch": 0.14319312672991696, "grad_norm": 1.9609375, "learning_rate": 4.748612842543169e-06, "loss": 0.39423279762268065, "num_tokens": 13124298486.0, "step": 107400 }, { "epoch": 0.14321979211664507, "grad_norm": 2.328125, "learning_rate": 4.7483076622706185e-06, "loss": 0.400831413269043, "num_tokens": 13126562610.0, "step": 107420 }, { "epoch": 0.14324645750337317, "grad_norm": 2.140625, "learning_rate": 4.748002306687031e-06, "loss": 0.4201512336730957, "num_tokens": 13128839896.0, "step": 107440 }, { "epoch": 0.14327312289010127, "grad_norm": 2.109375, "learning_rate": 4.747696775816214e-06, "loss": 0.40122671127319337, "num_tokens": 13131354661.0, "step": 107460 }, { "epoch": 0.14329978827682938, "grad_norm": 1.921875, "learning_rate": 4.747391069681993e-06, "loss": 0.41588382720947265, "num_tokens": 13133783367.0, "step": 107480 }, { "epoch": 0.14332645366355748, "grad_norm": 1.796875, "learning_rate": 4.747085188308205e-06, "loss": 0.4059924125671387, "num_tokens": 13136343834.0, "step": 107500 }, { "epoch": 0.14335311905028558, "grad_norm": 2.109375, "learning_rate": 4.7467791317187005e-06, "loss": 0.41930017471313474, "num_tokens": 13138692111.0, "step": 107520 }, { "epoch": 0.14337978443701369, "grad_norm": 1.7890625, "learning_rate": 4.746472899937345e-06, "loss": 0.4205915451049805, "num_tokens": 13141050865.0, "step": 107540 }, { "epoch": 0.1434064498237418, "grad_norm": 1.7890625, "learning_rate": 4.746166492988016e-06, "loss": 0.41285367012023927, "num_tokens": 13143366338.0, "step": 107560 }, { "epoch": 0.1434331152104699, "grad_norm": 1.8984375, "learning_rate": 4.745859910894605e-06, "loss": 0.4216008186340332, "num_tokens": 13145980200.0, "step": 107580 }, { "epoch": 0.143459780597198, "grad_norm": 2.0625, "learning_rate": 4.745553153681019e-06, "loss": 0.4126870632171631, "num_tokens": 13148357575.0, "step": 107600 }, { "epoch": 0.1434864459839261, "grad_norm": 2.453125, "learning_rate": 4.745246221371177e-06, "loss": 0.40270156860351564, "num_tokens": 13150990231.0, "step": 107620 }, { "epoch": 0.1435131113706542, "grad_norm": 1.9921875, "learning_rate": 4.744939113989011e-06, "loss": 0.41181411743164065, "num_tokens": 13153422524.0, "step": 107640 }, { "epoch": 0.1435397767573823, "grad_norm": 1.9921875, "learning_rate": 4.7446318315584685e-06, "loss": 0.4071136474609375, "num_tokens": 13155837805.0, "step": 107660 }, { "epoch": 0.1435664421441104, "grad_norm": 2.03125, "learning_rate": 4.74432437410351e-06, "loss": 0.43309807777404785, "num_tokens": 13158109459.0, "step": 107680 }, { "epoch": 0.1435931075308385, "grad_norm": 2.34375, "learning_rate": 4.744016741648108e-06, "loss": 0.42023115158081054, "num_tokens": 13160216074.0, "step": 107700 }, { "epoch": 0.1436197729175666, "grad_norm": 2.21875, "learning_rate": 4.743708934216252e-06, "loss": 0.40958437919616697, "num_tokens": 13162571503.0, "step": 107720 }, { "epoch": 0.14364643830429472, "grad_norm": 1.9453125, "learning_rate": 4.743400951831941e-06, "loss": 0.41739764213562014, "num_tokens": 13164979884.0, "step": 107740 }, { "epoch": 0.14367310369102282, "grad_norm": 2.28125, "learning_rate": 4.743092794519192e-06, "loss": 0.4041770935058594, "num_tokens": 13167356432.0, "step": 107760 }, { "epoch": 0.14369976907775092, "grad_norm": 2.34375, "learning_rate": 4.742784462302031e-06, "loss": 0.4210843086242676, "num_tokens": 13169864451.0, "step": 107780 }, { "epoch": 0.14372643446447902, "grad_norm": 2.21875, "learning_rate": 4.742475955204503e-06, "loss": 0.4057255744934082, "num_tokens": 13172306888.0, "step": 107800 }, { "epoch": 0.14375309985120716, "grad_norm": 1.9375, "learning_rate": 4.742167273250661e-06, "loss": 0.40578594207763674, "num_tokens": 13175035925.0, "step": 107820 }, { "epoch": 0.14377976523793526, "grad_norm": 2.03125, "learning_rate": 4.741858416464576e-06, "loss": 0.4022704601287842, "num_tokens": 13177383947.0, "step": 107840 }, { "epoch": 0.14380643062466336, "grad_norm": 1.890625, "learning_rate": 4.74154938487033e-06, "loss": 0.41487646102905273, "num_tokens": 13179854779.0, "step": 107860 }, { "epoch": 0.14383309601139146, "grad_norm": 2.0, "learning_rate": 4.741240178492021e-06, "loss": 0.42078337669372556, "num_tokens": 13182352465.0, "step": 107880 }, { "epoch": 0.14385976139811957, "grad_norm": 2.265625, "learning_rate": 4.740930797353759e-06, "loss": 0.424210786819458, "num_tokens": 13185050689.0, "step": 107900 }, { "epoch": 0.14388642678484767, "grad_norm": 2.015625, "learning_rate": 4.740621241479666e-06, "loss": 0.4187312126159668, "num_tokens": 13187483079.0, "step": 107920 }, { "epoch": 0.14391309217157577, "grad_norm": 1.8515625, "learning_rate": 4.740311510893881e-06, "loss": 0.40215702056884767, "num_tokens": 13189719193.0, "step": 107940 }, { "epoch": 0.14393975755830388, "grad_norm": 2.25, "learning_rate": 4.740001605620556e-06, "loss": 0.41954813003540037, "num_tokens": 13191956953.0, "step": 107960 }, { "epoch": 0.14396642294503198, "grad_norm": 1.9296875, "learning_rate": 4.739691525683855e-06, "loss": 0.4121901035308838, "num_tokens": 13194418155.0, "step": 107980 }, { "epoch": 0.14399308833176008, "grad_norm": 2.4375, "learning_rate": 4.739381271107955e-06, "loss": 0.42130022048950194, "num_tokens": 13196873594.0, "step": 108000 } ], "logging_steps": 20, "max_steps": 750036, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.214148733296695e+20, "train_batch_size": 2, "trial_name": null, "trial_params": null }