{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 8457, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00011824524062906468, "grad_norm": 1.7525863647460938, "learning_rate": 0.0, "loss": 0.9062, "num_tokens": 628048.0, "step": 1 }, { "epoch": 0.00023649048125812936, "grad_norm": 1.6684751510620117, "learning_rate": 2.362204724409449e-07, "loss": 0.8713, "num_tokens": 1266689.0, "step": 2 }, { "epoch": 0.000354735721887194, "grad_norm": 1.684480905532837, "learning_rate": 4.724409448818898e-07, "loss": 0.875, "num_tokens": 1900338.0, "step": 3 }, { "epoch": 0.0004729809625162587, "grad_norm": 1.675436019897461, "learning_rate": 7.086614173228346e-07, "loss": 0.8711, "num_tokens": 2539095.0, "step": 4 }, { "epoch": 0.0005912262031453234, "grad_norm": 1.7783000469207764, "learning_rate": 9.448818897637796e-07, "loss": 0.8835, "num_tokens": 3172740.0, "step": 5 }, { "epoch": 0.000709471443774388, "grad_norm": 1.5693429708480835, "learning_rate": 1.1811023622047244e-06, "loss": 0.8503, "num_tokens": 3810090.0, "step": 6 }, { "epoch": 0.0008277166844034528, "grad_norm": 1.8225330114364624, "learning_rate": 1.4173228346456693e-06, "loss": 0.9399, "num_tokens": 4447913.0, "step": 7 }, { "epoch": 0.0009459619250325174, "grad_norm": 1.4727877378463745, "learning_rate": 1.6535433070866142e-06, "loss": 0.8423, "num_tokens": 5085253.0, "step": 8 }, { "epoch": 0.001064207165661582, "grad_norm": 1.5583330392837524, "learning_rate": 1.8897637795275591e-06, "loss": 0.905, "num_tokens": 5721930.0, "step": 9 }, { "epoch": 0.0011824524062906468, "grad_norm": 1.240740180015564, "learning_rate": 2.125984251968504e-06, "loss": 0.8987, "num_tokens": 6354428.0, "step": 10 }, { "epoch": 0.0013006976469197116, "grad_norm": 1.1561263799667358, "learning_rate": 2.3622047244094487e-06, "loss": 0.8831, "num_tokens": 6988898.0, "step": 11 }, { "epoch": 0.001418942887548776, "grad_norm": 1.2030363082885742, "learning_rate": 2.598425196850394e-06, "loss": 0.8804, "num_tokens": 7627058.0, "step": 12 }, { "epoch": 0.0015371881281778408, "grad_norm": 0.9616765975952148, "learning_rate": 2.8346456692913386e-06, "loss": 0.8467, "num_tokens": 8261730.0, "step": 13 }, { "epoch": 0.0016554333688069056, "grad_norm": 1.0608482360839844, "learning_rate": 3.0708661417322837e-06, "loss": 0.8523, "num_tokens": 8889249.0, "step": 14 }, { "epoch": 0.0017736786094359701, "grad_norm": 0.9416703581809998, "learning_rate": 3.3070866141732284e-06, "loss": 0.7888, "num_tokens": 9524710.0, "step": 15 }, { "epoch": 0.0018919238500650349, "grad_norm": 0.9269049763679504, "learning_rate": 3.543307086614173e-06, "loss": 0.813, "num_tokens": 10161723.0, "step": 16 }, { "epoch": 0.0020101690906940994, "grad_norm": 0.9301733374595642, "learning_rate": 3.7795275590551182e-06, "loss": 0.8276, "num_tokens": 10793622.0, "step": 17 }, { "epoch": 0.002128414331323164, "grad_norm": 0.8023355603218079, "learning_rate": 4.015748031496064e-06, "loss": 0.8047, "num_tokens": 11420435.0, "step": 18 }, { "epoch": 0.002246659571952229, "grad_norm": 1.0126051902770996, "learning_rate": 4.251968503937008e-06, "loss": 0.7874, "num_tokens": 12055174.0, "step": 19 }, { "epoch": 0.0023649048125812936, "grad_norm": 0.8739595413208008, "learning_rate": 4.488188976377953e-06, "loss": 0.7186, "num_tokens": 12687424.0, "step": 20 }, { "epoch": 0.0024831500532103584, "grad_norm": 0.9095150828361511, "learning_rate": 4.7244094488188975e-06, "loss": 0.7896, "num_tokens": 13318953.0, "step": 21 }, { "epoch": 0.002601395293839423, "grad_norm": 0.7513114809989929, "learning_rate": 4.960629921259843e-06, "loss": 0.7419, "num_tokens": 13937259.0, "step": 22 }, { "epoch": 0.0027196405344684874, "grad_norm": 0.6933557391166687, "learning_rate": 5.196850393700788e-06, "loss": 0.7802, "num_tokens": 14569053.0, "step": 23 }, { "epoch": 0.002837885775097552, "grad_norm": 0.6356580853462219, "learning_rate": 5.433070866141733e-06, "loss": 0.7855, "num_tokens": 15199901.0, "step": 24 }, { "epoch": 0.002956131015726617, "grad_norm": 0.6553327441215515, "learning_rate": 5.669291338582677e-06, "loss": 0.7372, "num_tokens": 15830276.0, "step": 25 }, { "epoch": 0.0030743762563556817, "grad_norm": 0.728373110294342, "learning_rate": 5.905511811023622e-06, "loss": 0.7451, "num_tokens": 16465223.0, "step": 26 }, { "epoch": 0.0031926214969847464, "grad_norm": 0.7462941408157349, "learning_rate": 6.141732283464567e-06, "loss": 0.7275, "num_tokens": 17102309.0, "step": 27 }, { "epoch": 0.003310866737613811, "grad_norm": 0.6434164643287659, "learning_rate": 6.3779527559055125e-06, "loss": 0.7155, "num_tokens": 17738787.0, "step": 28 }, { "epoch": 0.003429111978242876, "grad_norm": 0.5134767889976501, "learning_rate": 6.614173228346457e-06, "loss": 0.651, "num_tokens": 18370503.0, "step": 29 }, { "epoch": 0.0035473572188719402, "grad_norm": 0.48275768756866455, "learning_rate": 6.850393700787402e-06, "loss": 0.696, "num_tokens": 19008700.0, "step": 30 }, { "epoch": 0.003665602459501005, "grad_norm": 0.5202338695526123, "learning_rate": 7.086614173228346e-06, "loss": 0.6968, "num_tokens": 19641076.0, "step": 31 }, { "epoch": 0.0037838477001300697, "grad_norm": 0.4813470244407654, "learning_rate": 7.322834645669292e-06, "loss": 0.6086, "num_tokens": 20268470.0, "step": 32 }, { "epoch": 0.0039020929407591345, "grad_norm": 0.4875348210334778, "learning_rate": 7.5590551181102365e-06, "loss": 0.7017, "num_tokens": 20903791.0, "step": 33 }, { "epoch": 0.004020338181388199, "grad_norm": 0.44005027413368225, "learning_rate": 7.79527559055118e-06, "loss": 0.6758, "num_tokens": 21537889.0, "step": 34 }, { "epoch": 0.0041385834220172635, "grad_norm": 0.3610026240348816, "learning_rate": 8.031496062992128e-06, "loss": 0.6041, "num_tokens": 22169163.0, "step": 35 }, { "epoch": 0.004256828662646328, "grad_norm": 0.44464096426963806, "learning_rate": 8.267716535433071e-06, "loss": 0.6573, "num_tokens": 22802993.0, "step": 36 }, { "epoch": 0.004375073903275393, "grad_norm": 0.43667611479759216, "learning_rate": 8.503937007874016e-06, "loss": 0.6365, "num_tokens": 23436910.0, "step": 37 }, { "epoch": 0.004493319143904458, "grad_norm": 0.4393109977245331, "learning_rate": 8.74015748031496e-06, "loss": 0.6741, "num_tokens": 24066237.0, "step": 38 }, { "epoch": 0.0046115643845335225, "grad_norm": 0.3843165338039398, "learning_rate": 8.976377952755906e-06, "loss": 0.6166, "num_tokens": 24702070.0, "step": 39 }, { "epoch": 0.004729809625162587, "grad_norm": 0.3514043688774109, "learning_rate": 9.212598425196852e-06, "loss": 0.603, "num_tokens": 25335554.0, "step": 40 }, { "epoch": 0.004848054865791652, "grad_norm": 0.34545862674713135, "learning_rate": 9.448818897637795e-06, "loss": 0.5557, "num_tokens": 25967347.0, "step": 41 }, { "epoch": 0.004966300106420717, "grad_norm": 0.41074663400650024, "learning_rate": 9.68503937007874e-06, "loss": 0.5686, "num_tokens": 26599914.0, "step": 42 }, { "epoch": 0.0050845453470497815, "grad_norm": 0.38910239934921265, "learning_rate": 9.921259842519685e-06, "loss": 0.6343, "num_tokens": 27231412.0, "step": 43 }, { "epoch": 0.005202790587678846, "grad_norm": 0.354407399892807, "learning_rate": 1.015748031496063e-05, "loss": 0.6381, "num_tokens": 27868949.0, "step": 44 }, { "epoch": 0.005321035828307911, "grad_norm": 0.3544872999191284, "learning_rate": 1.0393700787401575e-05, "loss": 0.6084, "num_tokens": 28505673.0, "step": 45 }, { "epoch": 0.005439281068936975, "grad_norm": 0.3607017695903778, "learning_rate": 1.0629921259842519e-05, "loss": 0.6228, "num_tokens": 29143956.0, "step": 46 }, { "epoch": 0.00555752630956604, "grad_norm": 0.32265704870224, "learning_rate": 1.0866141732283466e-05, "loss": 0.6071, "num_tokens": 29780377.0, "step": 47 }, { "epoch": 0.005675771550195104, "grad_norm": 0.3478977680206299, "learning_rate": 1.110236220472441e-05, "loss": 0.6184, "num_tokens": 30419988.0, "step": 48 }, { "epoch": 0.005794016790824169, "grad_norm": 0.3131955564022064, "learning_rate": 1.1338582677165354e-05, "loss": 0.6124, "num_tokens": 31058641.0, "step": 49 }, { "epoch": 0.005912262031453234, "grad_norm": 0.35298073291778564, "learning_rate": 1.15748031496063e-05, "loss": 0.6124, "num_tokens": 31696113.0, "step": 50 }, { "epoch": 0.006030507272082299, "grad_norm": 0.37374797463417053, "learning_rate": 1.1811023622047245e-05, "loss": 0.6466, "num_tokens": 32335026.0, "step": 51 }, { "epoch": 0.006148752512711363, "grad_norm": 0.3394772410392761, "learning_rate": 1.204724409448819e-05, "loss": 0.5643, "num_tokens": 32968147.0, "step": 52 }, { "epoch": 0.006266997753340428, "grad_norm": 0.32347291707992554, "learning_rate": 1.2283464566929135e-05, "loss": 0.5978, "num_tokens": 33599687.0, "step": 53 }, { "epoch": 0.006385242993969493, "grad_norm": 0.2970774471759796, "learning_rate": 1.2519685039370078e-05, "loss": 0.5693, "num_tokens": 34235952.0, "step": 54 }, { "epoch": 0.006503488234598558, "grad_norm": 0.31798115372657776, "learning_rate": 1.2755905511811025e-05, "loss": 0.5948, "num_tokens": 34855614.0, "step": 55 }, { "epoch": 0.006621733475227622, "grad_norm": 0.3185499310493469, "learning_rate": 1.2992125984251968e-05, "loss": 0.631, "num_tokens": 35490773.0, "step": 56 }, { "epoch": 0.006739978715856687, "grad_norm": 0.3407413065433502, "learning_rate": 1.3228346456692914e-05, "loss": 0.6251, "num_tokens": 36118336.0, "step": 57 }, { "epoch": 0.006858223956485752, "grad_norm": 0.3412087559700012, "learning_rate": 1.3464566929133859e-05, "loss": 0.5845, "num_tokens": 36751911.0, "step": 58 }, { "epoch": 0.0069764691971148166, "grad_norm": 0.33669596910476685, "learning_rate": 1.3700787401574804e-05, "loss": 0.5731, "num_tokens": 37375413.0, "step": 59 }, { "epoch": 0.0070947144377438804, "grad_norm": 0.35709717869758606, "learning_rate": 1.3937007874015749e-05, "loss": 0.5508, "num_tokens": 38006888.0, "step": 60 }, { "epoch": 0.007212959678372945, "grad_norm": 0.325895220041275, "learning_rate": 1.4173228346456692e-05, "loss": 0.5658, "num_tokens": 38640497.0, "step": 61 }, { "epoch": 0.00733120491900201, "grad_norm": 0.3232080042362213, "learning_rate": 1.4409448818897638e-05, "loss": 0.5519, "num_tokens": 39273313.0, "step": 62 }, { "epoch": 0.007449450159631075, "grad_norm": 0.35531625151634216, "learning_rate": 1.4645669291338584e-05, "loss": 0.6824, "num_tokens": 39910620.0, "step": 63 }, { "epoch": 0.007567695400260139, "grad_norm": 0.3284654915332794, "learning_rate": 1.4881889763779528e-05, "loss": 0.5607, "num_tokens": 40546868.0, "step": 64 }, { "epoch": 0.007685940640889204, "grad_norm": 0.3582625091075897, "learning_rate": 1.5118110236220473e-05, "loss": 0.5232, "num_tokens": 41182518.0, "step": 65 }, { "epoch": 0.007804185881518269, "grad_norm": 0.33363229036331177, "learning_rate": 1.5354330708661416e-05, "loss": 0.5681, "num_tokens": 41817083.0, "step": 66 }, { "epoch": 0.007922431122147333, "grad_norm": 0.3670189380645752, "learning_rate": 1.559055118110236e-05, "loss": 0.5659, "num_tokens": 42454047.0, "step": 67 }, { "epoch": 0.008040676362776398, "grad_norm": 0.3469410538673401, "learning_rate": 1.5826771653543307e-05, "loss": 0.5469, "num_tokens": 43066339.0, "step": 68 }, { "epoch": 0.008158921603405462, "grad_norm": 0.37117958068847656, "learning_rate": 1.6062992125984255e-05, "loss": 0.5909, "num_tokens": 43703254.0, "step": 69 }, { "epoch": 0.008277166844034527, "grad_norm": 0.3573012351989746, "learning_rate": 1.6299212598425197e-05, "loss": 0.5824, "num_tokens": 44339105.0, "step": 70 }, { "epoch": 0.008395412084663592, "grad_norm": 0.3082917332649231, "learning_rate": 1.6535433070866142e-05, "loss": 0.5155, "num_tokens": 44969251.0, "step": 71 }, { "epoch": 0.008513657325292657, "grad_norm": 0.3488093912601471, "learning_rate": 1.6771653543307087e-05, "loss": 0.5615, "num_tokens": 45605957.0, "step": 72 }, { "epoch": 0.008631902565921721, "grad_norm": 0.3370296359062195, "learning_rate": 1.7007874015748032e-05, "loss": 0.514, "num_tokens": 46242216.0, "step": 73 }, { "epoch": 0.008750147806550786, "grad_norm": 0.35032886266708374, "learning_rate": 1.7244094488188977e-05, "loss": 0.5615, "num_tokens": 46879736.0, "step": 74 }, { "epoch": 0.00886839304717985, "grad_norm": 0.37501266598701477, "learning_rate": 1.748031496062992e-05, "loss": 0.5232, "num_tokens": 47508876.0, "step": 75 }, { "epoch": 0.008986638287808916, "grad_norm": 0.3348589539527893, "learning_rate": 1.7716535433070864e-05, "loss": 0.5199, "num_tokens": 48148110.0, "step": 76 }, { "epoch": 0.00910488352843798, "grad_norm": 0.3240467607975006, "learning_rate": 1.7952755905511813e-05, "loss": 0.5011, "num_tokens": 48779111.0, "step": 77 }, { "epoch": 0.009223128769067045, "grad_norm": 0.3383651375770569, "learning_rate": 1.8188976377952758e-05, "loss": 0.5048, "num_tokens": 49414520.0, "step": 78 }, { "epoch": 0.00934137400969611, "grad_norm": 0.3405618965625763, "learning_rate": 1.8425196850393703e-05, "loss": 0.5269, "num_tokens": 50047364.0, "step": 79 }, { "epoch": 0.009459619250325174, "grad_norm": 0.3804134130477905, "learning_rate": 1.8661417322834645e-05, "loss": 0.4899, "num_tokens": 50679909.0, "step": 80 }, { "epoch": 0.00957786449095424, "grad_norm": 0.3529819846153259, "learning_rate": 1.889763779527559e-05, "loss": 0.4982, "num_tokens": 51313449.0, "step": 81 }, { "epoch": 0.009696109731583304, "grad_norm": 0.39031022787094116, "learning_rate": 1.9133858267716535e-05, "loss": 0.5131, "num_tokens": 51949695.0, "step": 82 }, { "epoch": 0.009814354972212369, "grad_norm": 0.33148136734962463, "learning_rate": 1.937007874015748e-05, "loss": 0.5223, "num_tokens": 52582348.0, "step": 83 }, { "epoch": 0.009932600212841433, "grad_norm": 0.37511128187179565, "learning_rate": 1.9606299212598425e-05, "loss": 0.5209, "num_tokens": 53211571.0, "step": 84 }, { "epoch": 0.010050845453470498, "grad_norm": 0.37741589546203613, "learning_rate": 1.984251968503937e-05, "loss": 0.5176, "num_tokens": 53846179.0, "step": 85 }, { "epoch": 0.010169090694099563, "grad_norm": 0.38481634855270386, "learning_rate": 2.0078740157480316e-05, "loss": 0.5051, "num_tokens": 54478244.0, "step": 86 }, { "epoch": 0.010287335934728628, "grad_norm": 0.3978002667427063, "learning_rate": 2.031496062992126e-05, "loss": 0.5221, "num_tokens": 55113336.0, "step": 87 }, { "epoch": 0.010405581175357692, "grad_norm": 0.3445618450641632, "learning_rate": 2.0551181102362206e-05, "loss": 0.4839, "num_tokens": 55750901.0, "step": 88 }, { "epoch": 0.010523826415986757, "grad_norm": 0.42504075169563293, "learning_rate": 2.078740157480315e-05, "loss": 0.5612, "num_tokens": 56387320.0, "step": 89 }, { "epoch": 0.010642071656615822, "grad_norm": 0.34741857647895813, "learning_rate": 2.1023622047244093e-05, "loss": 0.486, "num_tokens": 57023503.0, "step": 90 }, { "epoch": 0.010760316897244887, "grad_norm": 0.4058804512023926, "learning_rate": 2.1259842519685038e-05, "loss": 0.5463, "num_tokens": 57651196.0, "step": 91 }, { "epoch": 0.01087856213787395, "grad_norm": 0.3957502841949463, "learning_rate": 2.1496062992125983e-05, "loss": 0.5007, "num_tokens": 58290170.0, "step": 92 }, { "epoch": 0.010996807378503014, "grad_norm": 0.3495681881904602, "learning_rate": 2.173228346456693e-05, "loss": 0.4674, "num_tokens": 58929585.0, "step": 93 }, { "epoch": 0.01111505261913208, "grad_norm": 0.34980306029319763, "learning_rate": 2.1968503937007877e-05, "loss": 0.4671, "num_tokens": 59569034.0, "step": 94 }, { "epoch": 0.011233297859761144, "grad_norm": 0.3535868525505066, "learning_rate": 2.220472440944882e-05, "loss": 0.5009, "num_tokens": 60197509.0, "step": 95 }, { "epoch": 0.011351543100390209, "grad_norm": 0.3588345944881439, "learning_rate": 2.2440944881889763e-05, "loss": 0.4757, "num_tokens": 60830564.0, "step": 96 }, { "epoch": 0.011469788341019273, "grad_norm": 0.3760831654071808, "learning_rate": 2.267716535433071e-05, "loss": 0.5039, "num_tokens": 61466570.0, "step": 97 }, { "epoch": 0.011588033581648338, "grad_norm": 0.37170034646987915, "learning_rate": 2.2913385826771654e-05, "loss": 0.4788, "num_tokens": 62100797.0, "step": 98 }, { "epoch": 0.011706278822277403, "grad_norm": 0.35085833072662354, "learning_rate": 2.31496062992126e-05, "loss": 0.4858, "num_tokens": 62734274.0, "step": 99 }, { "epoch": 0.011824524062906468, "grad_norm": 0.4327932894229889, "learning_rate": 2.3385826771653544e-05, "loss": 0.5087, "num_tokens": 63350637.0, "step": 100 }, { "epoch": 0.011942769303535532, "grad_norm": 0.3400493264198303, "learning_rate": 2.362204724409449e-05, "loss": 0.4849, "num_tokens": 63989248.0, "step": 101 }, { "epoch": 0.012061014544164597, "grad_norm": 0.39549216628074646, "learning_rate": 2.3858267716535434e-05, "loss": 0.4939, "num_tokens": 64623402.0, "step": 102 }, { "epoch": 0.012179259784793662, "grad_norm": 0.3358125686645508, "learning_rate": 2.409448818897638e-05, "loss": 0.4456, "num_tokens": 65263084.0, "step": 103 }, { "epoch": 0.012297505025422727, "grad_norm": 0.40909451246261597, "learning_rate": 2.4330708661417324e-05, "loss": 0.5114, "num_tokens": 65895544.0, "step": 104 }, { "epoch": 0.012415750266051791, "grad_norm": 0.3722812235355377, "learning_rate": 2.456692913385827e-05, "loss": 0.4689, "num_tokens": 66526292.0, "step": 105 }, { "epoch": 0.012533995506680856, "grad_norm": 0.3573037087917328, "learning_rate": 2.480314960629921e-05, "loss": 0.4814, "num_tokens": 67163008.0, "step": 106 }, { "epoch": 0.012652240747309921, "grad_norm": 0.3549536466598511, "learning_rate": 2.5039370078740156e-05, "loss": 0.4751, "num_tokens": 67799173.0, "step": 107 }, { "epoch": 0.012770485987938986, "grad_norm": 0.3583659529685974, "learning_rate": 2.52755905511811e-05, "loss": 0.4604, "num_tokens": 68433626.0, "step": 108 }, { "epoch": 0.01288873122856805, "grad_norm": 0.3569433093070984, "learning_rate": 2.551181102362205e-05, "loss": 0.4884, "num_tokens": 69071597.0, "step": 109 }, { "epoch": 0.013006976469197115, "grad_norm": 0.4352950155735016, "learning_rate": 2.5748031496062995e-05, "loss": 0.512, "num_tokens": 69708272.0, "step": 110 }, { "epoch": 0.01312522170982618, "grad_norm": 0.4185881018638611, "learning_rate": 2.5984251968503937e-05, "loss": 0.4956, "num_tokens": 70309426.0, "step": 111 }, { "epoch": 0.013243466950455245, "grad_norm": 0.4366341233253479, "learning_rate": 2.6220472440944882e-05, "loss": 0.5166, "num_tokens": 70946347.0, "step": 112 }, { "epoch": 0.01336171219108431, "grad_norm": 0.4029170870780945, "learning_rate": 2.6456692913385827e-05, "loss": 0.4563, "num_tokens": 71578069.0, "step": 113 }, { "epoch": 0.013479957431713374, "grad_norm": 0.3876260221004486, "learning_rate": 2.6692913385826772e-05, "loss": 0.4563, "num_tokens": 72216733.0, "step": 114 }, { "epoch": 0.013598202672342439, "grad_norm": 0.4163820445537567, "learning_rate": 2.6929133858267717e-05, "loss": 0.4856, "num_tokens": 72844928.0, "step": 115 }, { "epoch": 0.013716447912971504, "grad_norm": 0.38664260506629944, "learning_rate": 2.716535433070866e-05, "loss": 0.4846, "num_tokens": 73482837.0, "step": 116 }, { "epoch": 0.013834693153600568, "grad_norm": 0.3737328350543976, "learning_rate": 2.7401574803149608e-05, "loss": 0.5084, "num_tokens": 74118196.0, "step": 117 }, { "epoch": 0.013952938394229633, "grad_norm": 0.4432400166988373, "learning_rate": 2.7637795275590553e-05, "loss": 0.4521, "num_tokens": 74756174.0, "step": 118 }, { "epoch": 0.014071183634858696, "grad_norm": 0.4472179114818573, "learning_rate": 2.7874015748031498e-05, "loss": 0.5137, "num_tokens": 75393367.0, "step": 119 }, { "epoch": 0.014189428875487761, "grad_norm": 0.38611850142478943, "learning_rate": 2.8110236220472443e-05, "loss": 0.4585, "num_tokens": 76025875.0, "step": 120 }, { "epoch": 0.014307674116116826, "grad_norm": 0.44323235750198364, "learning_rate": 2.8346456692913385e-05, "loss": 0.4393, "num_tokens": 76657710.0, "step": 121 }, { "epoch": 0.01442591935674589, "grad_norm": 0.42476171255111694, "learning_rate": 2.858267716535433e-05, "loss": 0.4803, "num_tokens": 77289318.0, "step": 122 }, { "epoch": 0.014544164597374955, "grad_norm": 0.43145832419395447, "learning_rate": 2.8818897637795275e-05, "loss": 0.4646, "num_tokens": 77923482.0, "step": 123 }, { "epoch": 0.01466240983800402, "grad_norm": 0.42145732045173645, "learning_rate": 2.905511811023622e-05, "loss": 0.4844, "num_tokens": 78551959.0, "step": 124 }, { "epoch": 0.014780655078633085, "grad_norm": 0.44548267126083374, "learning_rate": 2.929133858267717e-05, "loss": 0.4717, "num_tokens": 79181459.0, "step": 125 }, { "epoch": 0.01489890031926215, "grad_norm": 0.38891032338142395, "learning_rate": 2.952755905511811e-05, "loss": 0.4396, "num_tokens": 79816398.0, "step": 126 }, { "epoch": 0.015017145559891214, "grad_norm": 0.39216190576553345, "learning_rate": 2.9763779527559056e-05, "loss": 0.4748, "num_tokens": 80451198.0, "step": 127 }, { "epoch": 0.015135390800520279, "grad_norm": 0.44334641098976135, "learning_rate": 3e-05, "loss": 0.4819, "num_tokens": 81087693.0, "step": 128 }, { "epoch": 0.015253636041149344, "grad_norm": 0.34925416111946106, "learning_rate": 3.0236220472440946e-05, "loss": 0.4347, "num_tokens": 81722590.0, "step": 129 }, { "epoch": 0.015371881281778408, "grad_norm": 0.434041827917099, "learning_rate": 3.047244094488189e-05, "loss": 0.5387, "num_tokens": 82358511.0, "step": 130 }, { "epoch": 0.015490126522407473, "grad_norm": 0.41715648770332336, "learning_rate": 3.070866141732283e-05, "loss": 0.4552, "num_tokens": 82997292.0, "step": 131 }, { "epoch": 0.015608371763036538, "grad_norm": 0.3948170840740204, "learning_rate": 3.094488188976378e-05, "loss": 0.4464, "num_tokens": 83636002.0, "step": 132 }, { "epoch": 0.0157266170036656, "grad_norm": 0.40963542461395264, "learning_rate": 3.118110236220472e-05, "loss": 0.4704, "num_tokens": 84266256.0, "step": 133 }, { "epoch": 0.015844862244294666, "grad_norm": 0.3941146731376648, "learning_rate": 3.141732283464567e-05, "loss": 0.4869, "num_tokens": 84901903.0, "step": 134 }, { "epoch": 0.01596310748492373, "grad_norm": 0.3751824200153351, "learning_rate": 3.165354330708661e-05, "loss": 0.4583, "num_tokens": 85532297.0, "step": 135 }, { "epoch": 0.016081352725552795, "grad_norm": 0.40312668681144714, "learning_rate": 3.1889763779527565e-05, "loss": 0.4108, "num_tokens": 86159004.0, "step": 136 }, { "epoch": 0.01619959796618186, "grad_norm": 0.36173275113105774, "learning_rate": 3.212598425196851e-05, "loss": 0.4597, "num_tokens": 86791668.0, "step": 137 }, { "epoch": 0.016317843206810925, "grad_norm": 0.4394810199737549, "learning_rate": 3.2362204724409455e-05, "loss": 0.4723, "num_tokens": 87415500.0, "step": 138 }, { "epoch": 0.01643608844743999, "grad_norm": 0.36011460423469543, "learning_rate": 3.2598425196850394e-05, "loss": 0.4395, "num_tokens": 88054250.0, "step": 139 }, { "epoch": 0.016554333688069054, "grad_norm": 0.38201114535331726, "learning_rate": 3.283464566929134e-05, "loss": 0.4202, "num_tokens": 88683640.0, "step": 140 }, { "epoch": 0.01667257892869812, "grad_norm": 0.4858458936214447, "learning_rate": 3.3070866141732284e-05, "loss": 0.4227, "num_tokens": 89322994.0, "step": 141 }, { "epoch": 0.016790824169327184, "grad_norm": 0.3499271273612976, "learning_rate": 3.330708661417323e-05, "loss": 0.4683, "num_tokens": 89959870.0, "step": 142 }, { "epoch": 0.01690906940995625, "grad_norm": 0.4980737864971161, "learning_rate": 3.3543307086614174e-05, "loss": 0.4692, "num_tokens": 90595155.0, "step": 143 }, { "epoch": 0.017027314650585313, "grad_norm": 0.3963717222213745, "learning_rate": 3.377952755905512e-05, "loss": 0.474, "num_tokens": 91229434.0, "step": 144 }, { "epoch": 0.017145559891214378, "grad_norm": 0.3766595125198364, "learning_rate": 3.4015748031496065e-05, "loss": 0.442, "num_tokens": 91864729.0, "step": 145 }, { "epoch": 0.017263805131843443, "grad_norm": 0.3464885950088501, "learning_rate": 3.425196850393701e-05, "loss": 0.4238, "num_tokens": 92497696.0, "step": 146 }, { "epoch": 0.017382050372472507, "grad_norm": 0.4586445391178131, "learning_rate": 3.4488188976377955e-05, "loss": 0.4988, "num_tokens": 93136961.0, "step": 147 }, { "epoch": 0.017500295613101572, "grad_norm": 0.4089699685573578, "learning_rate": 3.47244094488189e-05, "loss": 0.4816, "num_tokens": 93763389.0, "step": 148 }, { "epoch": 0.017618540853730637, "grad_norm": 0.36357101798057556, "learning_rate": 3.496062992125984e-05, "loss": 0.4786, "num_tokens": 94396681.0, "step": 149 }, { "epoch": 0.0177367860943597, "grad_norm": 0.38783901929855347, "learning_rate": 3.5196850393700783e-05, "loss": 0.4818, "num_tokens": 95024975.0, "step": 150 }, { "epoch": 0.017855031334988766, "grad_norm": 0.43509477376937866, "learning_rate": 3.543307086614173e-05, "loss": 0.4878, "num_tokens": 95647926.0, "step": 151 }, { "epoch": 0.01797327657561783, "grad_norm": 0.46600663661956787, "learning_rate": 3.566929133858268e-05, "loss": 0.4556, "num_tokens": 96284873.0, "step": 152 }, { "epoch": 0.018091521816246896, "grad_norm": 0.4405108392238617, "learning_rate": 3.5905511811023626e-05, "loss": 0.468, "num_tokens": 96921347.0, "step": 153 }, { "epoch": 0.01820976705687596, "grad_norm": 0.39027589559555054, "learning_rate": 3.614173228346457e-05, "loss": 0.4194, "num_tokens": 97555490.0, "step": 154 }, { "epoch": 0.018328012297505025, "grad_norm": 0.35783228278160095, "learning_rate": 3.6377952755905516e-05, "loss": 0.4336, "num_tokens": 98193055.0, "step": 155 }, { "epoch": 0.01844625753813409, "grad_norm": 0.43742242455482483, "learning_rate": 3.661417322834646e-05, "loss": 0.4144, "num_tokens": 98826897.0, "step": 156 }, { "epoch": 0.018564502778763155, "grad_norm": 0.43864238262176514, "learning_rate": 3.6850393700787406e-05, "loss": 0.4882, "num_tokens": 99462013.0, "step": 157 }, { "epoch": 0.01868274801939222, "grad_norm": 0.45138660073280334, "learning_rate": 3.708661417322835e-05, "loss": 0.4966, "num_tokens": 100097628.0, "step": 158 }, { "epoch": 0.018800993260021284, "grad_norm": 0.399861216545105, "learning_rate": 3.732283464566929e-05, "loss": 0.4347, "num_tokens": 100734556.0, "step": 159 }, { "epoch": 0.01891923850065035, "grad_norm": 0.4453473687171936, "learning_rate": 3.7559055118110235e-05, "loss": 0.4806, "num_tokens": 101371427.0, "step": 160 }, { "epoch": 0.019037483741279414, "grad_norm": 0.3969942033290863, "learning_rate": 3.779527559055118e-05, "loss": 0.4834, "num_tokens": 102008014.0, "step": 161 }, { "epoch": 0.01915572898190848, "grad_norm": 0.39051344990730286, "learning_rate": 3.8031496062992125e-05, "loss": 0.4471, "num_tokens": 102647195.0, "step": 162 }, { "epoch": 0.019273974222537543, "grad_norm": 0.3848780691623688, "learning_rate": 3.826771653543307e-05, "loss": 0.4603, "num_tokens": 103272117.0, "step": 163 }, { "epoch": 0.019392219463166608, "grad_norm": 0.36500951647758484, "learning_rate": 3.8503937007874015e-05, "loss": 0.4647, "num_tokens": 103911617.0, "step": 164 }, { "epoch": 0.019510464703795673, "grad_norm": 0.39290666580200195, "learning_rate": 3.874015748031496e-05, "loss": 0.475, "num_tokens": 104546025.0, "step": 165 }, { "epoch": 0.019628709944424737, "grad_norm": 0.3751648962497711, "learning_rate": 3.8976377952755905e-05, "loss": 0.4369, "num_tokens": 105181492.0, "step": 166 }, { "epoch": 0.019746955185053802, "grad_norm": 0.39470213651657104, "learning_rate": 3.921259842519685e-05, "loss": 0.4346, "num_tokens": 105818707.0, "step": 167 }, { "epoch": 0.019865200425682867, "grad_norm": 0.31658095121383667, "learning_rate": 3.94488188976378e-05, "loss": 0.4091, "num_tokens": 106447672.0, "step": 168 }, { "epoch": 0.01998344566631193, "grad_norm": 0.3789699971675873, "learning_rate": 3.968503937007874e-05, "loss": 0.4485, "num_tokens": 107079280.0, "step": 169 }, { "epoch": 0.020101690906940996, "grad_norm": 0.39270561933517456, "learning_rate": 3.9921259842519686e-05, "loss": 0.4508, "num_tokens": 107716692.0, "step": 170 }, { "epoch": 0.02021993614757006, "grad_norm": 0.43275970220565796, "learning_rate": 4.015748031496063e-05, "loss": 0.4484, "num_tokens": 108346190.0, "step": 171 }, { "epoch": 0.020338181388199126, "grad_norm": 0.4102923572063446, "learning_rate": 4.0393700787401576e-05, "loss": 0.4379, "num_tokens": 108972655.0, "step": 172 }, { "epoch": 0.02045642662882819, "grad_norm": 0.488037645816803, "learning_rate": 4.062992125984252e-05, "loss": 0.4636, "num_tokens": 109606533.0, "step": 173 }, { "epoch": 0.020574671869457255, "grad_norm": 0.4020083248615265, "learning_rate": 4.0866141732283466e-05, "loss": 0.4489, "num_tokens": 110242667.0, "step": 174 }, { "epoch": 0.02069291711008632, "grad_norm": 0.48326483368873596, "learning_rate": 4.110236220472441e-05, "loss": 0.5015, "num_tokens": 110879342.0, "step": 175 }, { "epoch": 0.020811162350715385, "grad_norm": 0.4717266857624054, "learning_rate": 4.133858267716536e-05, "loss": 0.4552, "num_tokens": 111515578.0, "step": 176 }, { "epoch": 0.02092940759134445, "grad_norm": 0.45702269673347473, "learning_rate": 4.15748031496063e-05, "loss": 0.4119, "num_tokens": 112148074.0, "step": 177 }, { "epoch": 0.021047652831973514, "grad_norm": 0.4984659254550934, "learning_rate": 4.181102362204725e-05, "loss": 0.5018, "num_tokens": 112784825.0, "step": 178 }, { "epoch": 0.02116589807260258, "grad_norm": 0.5247891545295715, "learning_rate": 4.2047244094488185e-05, "loss": 0.4761, "num_tokens": 113415511.0, "step": 179 }, { "epoch": 0.021284143313231644, "grad_norm": 0.4674440026283264, "learning_rate": 4.228346456692913e-05, "loss": 0.4763, "num_tokens": 114014233.0, "step": 180 }, { "epoch": 0.02140238855386071, "grad_norm": 0.44470831751823425, "learning_rate": 4.2519685039370076e-05, "loss": 0.4565, "num_tokens": 114646091.0, "step": 181 }, { "epoch": 0.021520633794489773, "grad_norm": 0.49073681235313416, "learning_rate": 4.275590551181102e-05, "loss": 0.4319, "num_tokens": 115248849.0, "step": 182 }, { "epoch": 0.021638879035118838, "grad_norm": 0.424702912569046, "learning_rate": 4.2992125984251966e-05, "loss": 0.4247, "num_tokens": 115882858.0, "step": 183 }, { "epoch": 0.0217571242757479, "grad_norm": 0.4730565845966339, "learning_rate": 4.322834645669292e-05, "loss": 0.4617, "num_tokens": 116516321.0, "step": 184 }, { "epoch": 0.021875369516376964, "grad_norm": 0.4084065556526184, "learning_rate": 4.346456692913386e-05, "loss": 0.4365, "num_tokens": 117149980.0, "step": 185 }, { "epoch": 0.02199361475700603, "grad_norm": 0.4059871733188629, "learning_rate": 4.370078740157481e-05, "loss": 0.421, "num_tokens": 117787055.0, "step": 186 }, { "epoch": 0.022111859997635094, "grad_norm": 0.4854094088077545, "learning_rate": 4.393700787401575e-05, "loss": 0.4451, "num_tokens": 118414231.0, "step": 187 }, { "epoch": 0.02223010523826416, "grad_norm": 0.41769087314605713, "learning_rate": 4.41732283464567e-05, "loss": 0.4602, "num_tokens": 119042723.0, "step": 188 }, { "epoch": 0.022348350478893223, "grad_norm": 0.41133975982666016, "learning_rate": 4.440944881889764e-05, "loss": 0.4897, "num_tokens": 119680244.0, "step": 189 }, { "epoch": 0.022466595719522288, "grad_norm": 0.5297573804855347, "learning_rate": 4.464566929133858e-05, "loss": 0.4438, "num_tokens": 120319137.0, "step": 190 }, { "epoch": 0.022584840960151353, "grad_norm": 0.5020959973335266, "learning_rate": 4.488188976377953e-05, "loss": 0.4547, "num_tokens": 120954282.0, "step": 191 }, { "epoch": 0.022703086200780417, "grad_norm": 0.5808700323104858, "learning_rate": 4.511811023622047e-05, "loss": 0.4541, "num_tokens": 121589180.0, "step": 192 }, { "epoch": 0.022821331441409482, "grad_norm": 0.5031986832618713, "learning_rate": 4.535433070866142e-05, "loss": 0.4882, "num_tokens": 122226045.0, "step": 193 }, { "epoch": 0.022939576682038547, "grad_norm": 0.5109791159629822, "learning_rate": 4.559055118110236e-05, "loss": 0.428, "num_tokens": 122863565.0, "step": 194 }, { "epoch": 0.02305782192266761, "grad_norm": 0.47593966126441956, "learning_rate": 4.582677165354331e-05, "loss": 0.4235, "num_tokens": 123498631.0, "step": 195 }, { "epoch": 0.023176067163296676, "grad_norm": 0.4609023332595825, "learning_rate": 4.606299212598425e-05, "loss": 0.4536, "num_tokens": 124134670.0, "step": 196 }, { "epoch": 0.02329431240392574, "grad_norm": 0.3966323435306549, "learning_rate": 4.62992125984252e-05, "loss": 0.4302, "num_tokens": 124770499.0, "step": 197 }, { "epoch": 0.023412557644554806, "grad_norm": 0.4776875078678131, "learning_rate": 4.653543307086614e-05, "loss": 0.484, "num_tokens": 125409234.0, "step": 198 }, { "epoch": 0.02353080288518387, "grad_norm": 0.45847830176353455, "learning_rate": 4.677165354330709e-05, "loss": 0.4543, "num_tokens": 126041885.0, "step": 199 }, { "epoch": 0.023649048125812935, "grad_norm": 0.44253844022750854, "learning_rate": 4.700787401574803e-05, "loss": 0.4725, "num_tokens": 126679673.0, "step": 200 }, { "epoch": 0.023767293366442, "grad_norm": 0.36983805894851685, "learning_rate": 4.724409448818898e-05, "loss": 0.4644, "num_tokens": 127318553.0, "step": 201 }, { "epoch": 0.023885538607071065, "grad_norm": 0.4198189973831177, "learning_rate": 4.748031496062992e-05, "loss": 0.4362, "num_tokens": 127957565.0, "step": 202 }, { "epoch": 0.02400378384770013, "grad_norm": 0.47528529167175293, "learning_rate": 4.771653543307087e-05, "loss": 0.5145, "num_tokens": 128588826.0, "step": 203 }, { "epoch": 0.024122029088329194, "grad_norm": 0.39212366938591003, "learning_rate": 4.7952755905511814e-05, "loss": 0.4608, "num_tokens": 129222173.0, "step": 204 }, { "epoch": 0.02424027432895826, "grad_norm": 0.40318480134010315, "learning_rate": 4.818897637795276e-05, "loss": 0.4773, "num_tokens": 129857385.0, "step": 205 }, { "epoch": 0.024358519569587324, "grad_norm": 0.38395074009895325, "learning_rate": 4.8425196850393704e-05, "loss": 0.3997, "num_tokens": 130486516.0, "step": 206 }, { "epoch": 0.02447676481021639, "grad_norm": 0.3886719048023224, "learning_rate": 4.866141732283465e-05, "loss": 0.4726, "num_tokens": 131117306.0, "step": 207 }, { "epoch": 0.024595010050845453, "grad_norm": 0.4131399095058441, "learning_rate": 4.8897637795275594e-05, "loss": 0.4928, "num_tokens": 131749433.0, "step": 208 }, { "epoch": 0.024713255291474518, "grad_norm": 0.36377304792404175, "learning_rate": 4.913385826771654e-05, "loss": 0.442, "num_tokens": 132382579.0, "step": 209 }, { "epoch": 0.024831500532103583, "grad_norm": 0.4461774230003357, "learning_rate": 4.937007874015748e-05, "loss": 0.458, "num_tokens": 133017061.0, "step": 210 }, { "epoch": 0.024949745772732648, "grad_norm": 0.36661165952682495, "learning_rate": 4.960629921259842e-05, "loss": 0.4475, "num_tokens": 133645920.0, "step": 211 }, { "epoch": 0.025067991013361712, "grad_norm": 0.40536758303642273, "learning_rate": 4.984251968503937e-05, "loss": 0.4873, "num_tokens": 134277976.0, "step": 212 }, { "epoch": 0.025186236253990777, "grad_norm": 0.375731885433197, "learning_rate": 5.007874015748031e-05, "loss": 0.4649, "num_tokens": 134913386.0, "step": 213 }, { "epoch": 0.025304481494619842, "grad_norm": 0.3638976514339447, "learning_rate": 5.031496062992126e-05, "loss": 0.4506, "num_tokens": 135544399.0, "step": 214 }, { "epoch": 0.025422726735248907, "grad_norm": 0.3698061406612396, "learning_rate": 5.05511811023622e-05, "loss": 0.459, "num_tokens": 136182707.0, "step": 215 }, { "epoch": 0.02554097197587797, "grad_norm": 0.34665971994400024, "learning_rate": 5.0787401574803155e-05, "loss": 0.4549, "num_tokens": 136819793.0, "step": 216 }, { "epoch": 0.025659217216507036, "grad_norm": 0.43249067664146423, "learning_rate": 5.10236220472441e-05, "loss": 0.4606, "num_tokens": 137459203.0, "step": 217 }, { "epoch": 0.0257774624571361, "grad_norm": 0.3585987985134125, "learning_rate": 5.1259842519685045e-05, "loss": 0.4534, "num_tokens": 138086108.0, "step": 218 }, { "epoch": 0.025895707697765166, "grad_norm": 0.33417120575904846, "learning_rate": 5.149606299212599e-05, "loss": 0.4417, "num_tokens": 138717361.0, "step": 219 }, { "epoch": 0.02601395293839423, "grad_norm": 0.43817681074142456, "learning_rate": 5.173228346456693e-05, "loss": 0.4269, "num_tokens": 139353768.0, "step": 220 }, { "epoch": 0.026132198179023295, "grad_norm": 0.4601864814758301, "learning_rate": 5.1968503937007874e-05, "loss": 0.4911, "num_tokens": 139986012.0, "step": 221 }, { "epoch": 0.02625044341965236, "grad_norm": 0.4016098082065582, "learning_rate": 5.220472440944882e-05, "loss": 0.4243, "num_tokens": 140618557.0, "step": 222 }, { "epoch": 0.026368688660281425, "grad_norm": 0.4208032786846161, "learning_rate": 5.2440944881889764e-05, "loss": 0.4166, "num_tokens": 141253328.0, "step": 223 }, { "epoch": 0.02648693390091049, "grad_norm": 0.39847874641418457, "learning_rate": 5.267716535433071e-05, "loss": 0.4537, "num_tokens": 141885327.0, "step": 224 }, { "epoch": 0.026605179141539554, "grad_norm": 0.5055342316627502, "learning_rate": 5.2913385826771654e-05, "loss": 0.4694, "num_tokens": 142518760.0, "step": 225 }, { "epoch": 0.02672342438216862, "grad_norm": 0.4474887549877167, "learning_rate": 5.31496062992126e-05, "loss": 0.4742, "num_tokens": 143154500.0, "step": 226 }, { "epoch": 0.026841669622797684, "grad_norm": 0.3544766306877136, "learning_rate": 5.3385826771653545e-05, "loss": 0.4586, "num_tokens": 143792062.0, "step": 227 }, { "epoch": 0.026959914863426748, "grad_norm": 0.4308817386627197, "learning_rate": 5.362204724409449e-05, "loss": 0.4089, "num_tokens": 144424810.0, "step": 228 }, { "epoch": 0.027078160104055813, "grad_norm": 0.38940486311912537, "learning_rate": 5.3858267716535435e-05, "loss": 0.433, "num_tokens": 145021445.0, "step": 229 }, { "epoch": 0.027196405344684878, "grad_norm": 0.3999221622943878, "learning_rate": 5.409448818897637e-05, "loss": 0.4856, "num_tokens": 145656556.0, "step": 230 }, { "epoch": 0.027314650585313942, "grad_norm": 0.41209641098976135, "learning_rate": 5.433070866141732e-05, "loss": 0.4321, "num_tokens": 146293020.0, "step": 231 }, { "epoch": 0.027432895825943007, "grad_norm": 0.4652935266494751, "learning_rate": 5.456692913385827e-05, "loss": 0.453, "num_tokens": 146922200.0, "step": 232 }, { "epoch": 0.027551141066572072, "grad_norm": 0.3938509225845337, "learning_rate": 5.4803149606299215e-05, "loss": 0.4984, "num_tokens": 147560483.0, "step": 233 }, { "epoch": 0.027669386307201137, "grad_norm": 0.46515530347824097, "learning_rate": 5.503937007874016e-05, "loss": 0.4387, "num_tokens": 148198475.0, "step": 234 }, { "epoch": 0.0277876315478302, "grad_norm": 0.40671953558921814, "learning_rate": 5.5275590551181106e-05, "loss": 0.4567, "num_tokens": 148811827.0, "step": 235 }, { "epoch": 0.027905876788459266, "grad_norm": 0.3534509837627411, "learning_rate": 5.551181102362205e-05, "loss": 0.4208, "num_tokens": 149446588.0, "step": 236 }, { "epoch": 0.028024122029088328, "grad_norm": 0.4207174479961395, "learning_rate": 5.5748031496062996e-05, "loss": 0.407, "num_tokens": 150080880.0, "step": 237 }, { "epoch": 0.028142367269717392, "grad_norm": 0.43959930539131165, "learning_rate": 5.598425196850394e-05, "loss": 0.4752, "num_tokens": 150720517.0, "step": 238 }, { "epoch": 0.028260612510346457, "grad_norm": 0.44256553053855896, "learning_rate": 5.6220472440944886e-05, "loss": 0.4429, "num_tokens": 151354865.0, "step": 239 }, { "epoch": 0.028378857750975522, "grad_norm": 0.36267372965812683, "learning_rate": 5.6456692913385825e-05, "loss": 0.4086, "num_tokens": 151989302.0, "step": 240 }, { "epoch": 0.028497102991604586, "grad_norm": 0.43711671233177185, "learning_rate": 5.669291338582677e-05, "loss": 0.4434, "num_tokens": 152622644.0, "step": 241 }, { "epoch": 0.02861534823223365, "grad_norm": 0.39039915800094604, "learning_rate": 5.6929133858267715e-05, "loss": 0.4811, "num_tokens": 153254978.0, "step": 242 }, { "epoch": 0.028733593472862716, "grad_norm": 0.3540744185447693, "learning_rate": 5.716535433070866e-05, "loss": 0.4188, "num_tokens": 153888439.0, "step": 243 }, { "epoch": 0.02885183871349178, "grad_norm": 0.3390049338340759, "learning_rate": 5.7401574803149605e-05, "loss": 0.3945, "num_tokens": 154521234.0, "step": 244 }, { "epoch": 0.028970083954120845, "grad_norm": 0.35965636372566223, "learning_rate": 5.763779527559055e-05, "loss": 0.4446, "num_tokens": 155156546.0, "step": 245 }, { "epoch": 0.02908832919474991, "grad_norm": 0.379519522190094, "learning_rate": 5.7874015748031495e-05, "loss": 0.446, "num_tokens": 155791974.0, "step": 246 }, { "epoch": 0.029206574435378975, "grad_norm": 0.34325993061065674, "learning_rate": 5.811023622047244e-05, "loss": 0.4048, "num_tokens": 156417737.0, "step": 247 }, { "epoch": 0.02932481967600804, "grad_norm": 0.3720738887786865, "learning_rate": 5.834645669291339e-05, "loss": 0.4749, "num_tokens": 157054324.0, "step": 248 }, { "epoch": 0.029443064916637104, "grad_norm": 0.3790745437145233, "learning_rate": 5.858267716535434e-05, "loss": 0.4619, "num_tokens": 157686875.0, "step": 249 }, { "epoch": 0.02956131015726617, "grad_norm": 0.39887726306915283, "learning_rate": 5.8818897637795276e-05, "loss": 0.4561, "num_tokens": 158318798.0, "step": 250 }, { "epoch": 0.029679555397895234, "grad_norm": 0.3997748792171478, "learning_rate": 5.905511811023622e-05, "loss": 0.4715, "num_tokens": 158956587.0, "step": 251 }, { "epoch": 0.0297978006385243, "grad_norm": 0.36501893401145935, "learning_rate": 5.9291338582677166e-05, "loss": 0.3901, "num_tokens": 159591859.0, "step": 252 }, { "epoch": 0.029916045879153363, "grad_norm": 0.3696199357509613, "learning_rate": 5.952755905511811e-05, "loss": 0.4286, "num_tokens": 160224798.0, "step": 253 }, { "epoch": 0.030034291119782428, "grad_norm": 0.3716777265071869, "learning_rate": 5.9763779527559056e-05, "loss": 0.4338, "num_tokens": 160855622.0, "step": 254 }, { "epoch": 0.030152536360411493, "grad_norm": 0.3752037286758423, "learning_rate": 6e-05, "loss": 0.4016, "num_tokens": 161489536.0, "step": 255 }, { "epoch": 0.030270781601040558, "grad_norm": 0.40871843695640564, "learning_rate": 5.9999998019895614e-05, "loss": 0.4907, "num_tokens": 162123541.0, "step": 256 }, { "epoch": 0.030389026841669622, "grad_norm": 0.4072965979576111, "learning_rate": 5.999999207958275e-05, "loss": 0.447, "num_tokens": 162754194.0, "step": 257 }, { "epoch": 0.030507272082298687, "grad_norm": 0.38330650329589844, "learning_rate": 5.999998217906228e-05, "loss": 0.4318, "num_tokens": 163389806.0, "step": 258 }, { "epoch": 0.030625517322927752, "grad_norm": 0.3522624671459198, "learning_rate": 5.9999968318335657e-05, "loss": 0.4204, "num_tokens": 164022489.0, "step": 259 }, { "epoch": 0.030743762563556817, "grad_norm": 0.3561548888683319, "learning_rate": 5.9999950497404904e-05, "loss": 0.4293, "num_tokens": 164655264.0, "step": 260 }, { "epoch": 0.03086200780418588, "grad_norm": 0.3547787070274353, "learning_rate": 5.999992871627265e-05, "loss": 0.42, "num_tokens": 165293663.0, "step": 261 }, { "epoch": 0.030980253044814946, "grad_norm": 0.35948118567466736, "learning_rate": 5.9999902974942065e-05, "loss": 0.4435, "num_tokens": 165930344.0, "step": 262 }, { "epoch": 0.03109849828544401, "grad_norm": 0.32509511709213257, "learning_rate": 5.9999873273416964e-05, "loss": 0.4019, "num_tokens": 166565332.0, "step": 263 }, { "epoch": 0.031216743526073076, "grad_norm": 0.34538733959198, "learning_rate": 5.999983961170167e-05, "loss": 0.446, "num_tokens": 167200224.0, "step": 264 }, { "epoch": 0.03133498876670214, "grad_norm": 0.32964932918548584, "learning_rate": 5.999980198980114e-05, "loss": 0.4376, "num_tokens": 167832144.0, "step": 265 }, { "epoch": 0.0314532340073312, "grad_norm": 0.3241742253303528, "learning_rate": 5.999976040772087e-05, "loss": 0.4177, "num_tokens": 168469994.0, "step": 266 }, { "epoch": 0.03157147924796027, "grad_norm": 0.3408147096633911, "learning_rate": 5.999971486546698e-05, "loss": 0.4521, "num_tokens": 169106098.0, "step": 267 }, { "epoch": 0.03168972448858933, "grad_norm": 0.3564241826534271, "learning_rate": 5.999966536304614e-05, "loss": 0.393, "num_tokens": 169737993.0, "step": 268 }, { "epoch": 0.0318079697292184, "grad_norm": 0.29604148864746094, "learning_rate": 5.999961190046561e-05, "loss": 0.4026, "num_tokens": 170368812.0, "step": 269 }, { "epoch": 0.03192621496984746, "grad_norm": 0.357176274061203, "learning_rate": 5.999955447773325e-05, "loss": 0.4777, "num_tokens": 171006424.0, "step": 270 }, { "epoch": 0.03204446021047653, "grad_norm": 0.29581350088119507, "learning_rate": 5.9999493094857446e-05, "loss": 0.4314, "num_tokens": 171645721.0, "step": 271 }, { "epoch": 0.03216270545110559, "grad_norm": 0.3121999204158783, "learning_rate": 5.9999427751847234e-05, "loss": 0.4169, "num_tokens": 172281075.0, "step": 272 }, { "epoch": 0.03228095069173466, "grad_norm": 0.289729505777359, "learning_rate": 5.9999358448712186e-05, "loss": 0.381, "num_tokens": 172914063.0, "step": 273 }, { "epoch": 0.03239919593236372, "grad_norm": 0.34112486243247986, "learning_rate": 5.9999285185462464e-05, "loss": 0.4464, "num_tokens": 173548325.0, "step": 274 }, { "epoch": 0.03251744117299279, "grad_norm": 0.34492093324661255, "learning_rate": 5.999920796210883e-05, "loss": 0.4427, "num_tokens": 174183975.0, "step": 275 }, { "epoch": 0.03263568641362185, "grad_norm": 0.3401881158351898, "learning_rate": 5.9999126778662586e-05, "loss": 0.4174, "num_tokens": 174818549.0, "step": 276 }, { "epoch": 0.03275393165425092, "grad_norm": 0.31887444853782654, "learning_rate": 5.999904163513566e-05, "loss": 0.4032, "num_tokens": 175451113.0, "step": 277 }, { "epoch": 0.03287217689487998, "grad_norm": 0.358944833278656, "learning_rate": 5.9998952531540525e-05, "loss": 0.4514, "num_tokens": 176087116.0, "step": 278 }, { "epoch": 0.03299042213550905, "grad_norm": 0.3526931405067444, "learning_rate": 5.999885946789026e-05, "loss": 0.4692, "num_tokens": 176724483.0, "step": 279 }, { "epoch": 0.03310866737613811, "grad_norm": 0.38777267932891846, "learning_rate": 5.999876244419851e-05, "loss": 0.4448, "num_tokens": 177361164.0, "step": 280 }, { "epoch": 0.033226912616767176, "grad_norm": 0.3258650004863739, "learning_rate": 5.999866146047951e-05, "loss": 0.396, "num_tokens": 177999098.0, "step": 281 }, { "epoch": 0.03334515785739624, "grad_norm": 0.3798566162586212, "learning_rate": 5.9998556516748074e-05, "loss": 0.4415, "num_tokens": 178627820.0, "step": 282 }, { "epoch": 0.033463403098025306, "grad_norm": 0.291117399930954, "learning_rate": 5.9998447613019584e-05, "loss": 0.4272, "num_tokens": 179259176.0, "step": 283 }, { "epoch": 0.03358164833865437, "grad_norm": 0.3250221014022827, "learning_rate": 5.999833474931003e-05, "loss": 0.4478, "num_tokens": 179890905.0, "step": 284 }, { "epoch": 0.033699893579283435, "grad_norm": 0.3202228844165802, "learning_rate": 5.999821792563594e-05, "loss": 0.4348, "num_tokens": 180518445.0, "step": 285 }, { "epoch": 0.0338181388199125, "grad_norm": 0.3162289261817932, "learning_rate": 5.9998097142014475e-05, "loss": 0.4682, "num_tokens": 181154667.0, "step": 286 }, { "epoch": 0.033936384060541565, "grad_norm": 0.37152108550071716, "learning_rate": 5.999797239846334e-05, "loss": 0.4515, "num_tokens": 181789184.0, "step": 287 }, { "epoch": 0.034054629301170626, "grad_norm": 0.337274432182312, "learning_rate": 5.9997843695000834e-05, "loss": 0.4343, "num_tokens": 182421462.0, "step": 288 }, { "epoch": 0.034172874541799694, "grad_norm": 0.32214683294296265, "learning_rate": 5.999771103164583e-05, "loss": 0.4197, "num_tokens": 183056216.0, "step": 289 }, { "epoch": 0.034291119782428756, "grad_norm": 0.4172515869140625, "learning_rate": 5.99975744084178e-05, "loss": 0.4511, "num_tokens": 183695586.0, "step": 290 }, { "epoch": 0.034409365023057824, "grad_norm": 0.31545501947402954, "learning_rate": 5.9997433825336753e-05, "loss": 0.4197, "num_tokens": 184323170.0, "step": 291 }, { "epoch": 0.034527610263686885, "grad_norm": 0.5075629353523254, "learning_rate": 5.999728928242334e-05, "loss": 0.4971, "num_tokens": 184923402.0, "step": 292 }, { "epoch": 0.03464585550431595, "grad_norm": 0.3310132622718811, "learning_rate": 5.999714077969875e-05, "loss": 0.4192, "num_tokens": 185562179.0, "step": 293 }, { "epoch": 0.034764100744945015, "grad_norm": 0.3701557219028473, "learning_rate": 5.999698831718477e-05, "loss": 0.4296, "num_tokens": 186196488.0, "step": 294 }, { "epoch": 0.03488234598557408, "grad_norm": 0.33413970470428467, "learning_rate": 5.999683189490375e-05, "loss": 0.4645, "num_tokens": 186795333.0, "step": 295 }, { "epoch": 0.035000591226203144, "grad_norm": 0.3070925176143646, "learning_rate": 5.999667151287865e-05, "loss": 0.4039, "num_tokens": 187431540.0, "step": 296 }, { "epoch": 0.03511883646683221, "grad_norm": 0.3607010841369629, "learning_rate": 5.999650717113297e-05, "loss": 0.4267, "num_tokens": 188067882.0, "step": 297 }, { "epoch": 0.035237081707461274, "grad_norm": 0.33303070068359375, "learning_rate": 5.9996338869690846e-05, "loss": 0.4637, "num_tokens": 188704706.0, "step": 298 }, { "epoch": 0.03535532694809034, "grad_norm": 0.3175329864025116, "learning_rate": 5.999616660857693e-05, "loss": 0.4056, "num_tokens": 189339944.0, "step": 299 }, { "epoch": 0.0354735721887194, "grad_norm": 0.4077093303203583, "learning_rate": 5.999599038781652e-05, "loss": 0.4911, "num_tokens": 189977157.0, "step": 300 }, { "epoch": 0.03559181742934847, "grad_norm": 0.3319735527038574, "learning_rate": 5.9995810207435434e-05, "loss": 0.4347, "num_tokens": 190586411.0, "step": 301 }, { "epoch": 0.03571006266997753, "grad_norm": 0.38066333532333374, "learning_rate": 5.9995626067460126e-05, "loss": 0.4521, "num_tokens": 191219345.0, "step": 302 }, { "epoch": 0.0358283079106066, "grad_norm": 0.2792882025241852, "learning_rate": 5.9995437967917586e-05, "loss": 0.4417, "num_tokens": 191855183.0, "step": 303 }, { "epoch": 0.03594655315123566, "grad_norm": 0.31642311811447144, "learning_rate": 5.9995245908835406e-05, "loss": 0.4135, "num_tokens": 192485493.0, "step": 304 }, { "epoch": 0.03606479839186473, "grad_norm": 0.333473801612854, "learning_rate": 5.999504989024177e-05, "loss": 0.4091, "num_tokens": 193122797.0, "step": 305 }, { "epoch": 0.03618304363249379, "grad_norm": 0.3209477365016937, "learning_rate": 5.99948499121654e-05, "loss": 0.4349, "num_tokens": 193759752.0, "step": 306 }, { "epoch": 0.03630128887312286, "grad_norm": 0.32120513916015625, "learning_rate": 5.999464597463566e-05, "loss": 0.4252, "num_tokens": 194394288.0, "step": 307 }, { "epoch": 0.03641953411375192, "grad_norm": 0.3158802092075348, "learning_rate": 5.9994438077682455e-05, "loss": 0.4232, "num_tokens": 195027778.0, "step": 308 }, { "epoch": 0.03653777935438099, "grad_norm": 0.3119392395019531, "learning_rate": 5.9994226221336275e-05, "loss": 0.3546, "num_tokens": 195660013.0, "step": 309 }, { "epoch": 0.03665602459501005, "grad_norm": 0.3570234775543213, "learning_rate": 5.999401040562818e-05, "loss": 0.443, "num_tokens": 196290478.0, "step": 310 }, { "epoch": 0.03677426983563912, "grad_norm": 0.35759982466697693, "learning_rate": 5.999379063058983e-05, "loss": 0.4598, "num_tokens": 196927060.0, "step": 311 }, { "epoch": 0.03689251507626818, "grad_norm": 0.3498620092868805, "learning_rate": 5.9993566896253484e-05, "loss": 0.4574, "num_tokens": 197557085.0, "step": 312 }, { "epoch": 0.03701076031689725, "grad_norm": 0.3548009991645813, "learning_rate": 5.999333920265192e-05, "loss": 0.4432, "num_tokens": 198189535.0, "step": 313 }, { "epoch": 0.03712900555752631, "grad_norm": 0.33351144194602966, "learning_rate": 5.9993107549818565e-05, "loss": 0.4611, "num_tokens": 198820737.0, "step": 314 }, { "epoch": 0.03724725079815538, "grad_norm": 0.3283405005931854, "learning_rate": 5.999287193778739e-05, "loss": 0.4181, "num_tokens": 199451792.0, "step": 315 }, { "epoch": 0.03736549603878444, "grad_norm": 0.3382594585418701, "learning_rate": 5.9992632366592946e-05, "loss": 0.4227, "num_tokens": 200086012.0, "step": 316 }, { "epoch": 0.0374837412794135, "grad_norm": 0.3032221496105194, "learning_rate": 5.9992388836270376e-05, "loss": 0.4129, "num_tokens": 200723155.0, "step": 317 }, { "epoch": 0.03760198652004257, "grad_norm": 0.37313637137413025, "learning_rate": 5.9992141346855394e-05, "loss": 0.4376, "num_tokens": 201362056.0, "step": 318 }, { "epoch": 0.03772023176067163, "grad_norm": 0.29884782433509827, "learning_rate": 5.999188989838432e-05, "loss": 0.4191, "num_tokens": 201996006.0, "step": 319 }, { "epoch": 0.0378384770013007, "grad_norm": 0.319193571805954, "learning_rate": 5.999163449089401e-05, "loss": 0.3882, "num_tokens": 202591057.0, "step": 320 }, { "epoch": 0.03795672224192976, "grad_norm": 0.31342220306396484, "learning_rate": 5.999137512442194e-05, "loss": 0.4032, "num_tokens": 203228812.0, "step": 321 }, { "epoch": 0.03807496748255883, "grad_norm": 0.30908358097076416, "learning_rate": 5.999111179900615e-05, "loss": 0.4329, "num_tokens": 203868294.0, "step": 322 }, { "epoch": 0.03819321272318789, "grad_norm": 0.3021951913833618, "learning_rate": 5.9990844514685254e-05, "loss": 0.3983, "num_tokens": 204499481.0, "step": 323 }, { "epoch": 0.03831145796381696, "grad_norm": 0.2702217102050781, "learning_rate": 5.9990573271498475e-05, "loss": 0.4133, "num_tokens": 205133494.0, "step": 324 }, { "epoch": 0.03842970320444602, "grad_norm": 0.26718276739120483, "learning_rate": 5.999029806948558e-05, "loss": 0.4274, "num_tokens": 205767024.0, "step": 325 }, { "epoch": 0.038547948445075086, "grad_norm": 0.32187291979789734, "learning_rate": 5.999001890868693e-05, "loss": 0.4462, "num_tokens": 206400825.0, "step": 326 }, { "epoch": 0.03866619368570415, "grad_norm": 0.2840275466442108, "learning_rate": 5.99897357891435e-05, "loss": 0.4298, "num_tokens": 207040239.0, "step": 327 }, { "epoch": 0.038784438926333216, "grad_norm": 0.3215208649635315, "learning_rate": 5.998944871089679e-05, "loss": 0.4108, "num_tokens": 207676732.0, "step": 328 }, { "epoch": 0.03890268416696228, "grad_norm": 0.2716936767101288, "learning_rate": 5.998915767398891e-05, "loss": 0.4048, "num_tokens": 208313021.0, "step": 329 }, { "epoch": 0.039020929407591345, "grad_norm": 0.3153400421142578, "learning_rate": 5.998886267846256e-05, "loss": 0.4653, "num_tokens": 208946344.0, "step": 330 }, { "epoch": 0.03913917464822041, "grad_norm": 0.34911447763442993, "learning_rate": 5.9988563724360996e-05, "loss": 0.4867, "num_tokens": 209577542.0, "step": 331 }, { "epoch": 0.039257419888849475, "grad_norm": 0.2895367443561554, "learning_rate": 5.998826081172807e-05, "loss": 0.4775, "num_tokens": 210213046.0, "step": 332 }, { "epoch": 0.039375665129478536, "grad_norm": 0.30785229802131653, "learning_rate": 5.9987953940608215e-05, "loss": 0.4314, "num_tokens": 210848130.0, "step": 333 }, { "epoch": 0.039493910370107604, "grad_norm": 0.27349531650543213, "learning_rate": 5.998764311104644e-05, "loss": 0.4202, "num_tokens": 211482121.0, "step": 334 }, { "epoch": 0.039612155610736666, "grad_norm": 0.32428908348083496, "learning_rate": 5.9987328323088335e-05, "loss": 0.467, "num_tokens": 212120668.0, "step": 335 }, { "epoch": 0.039730400851365734, "grad_norm": 0.3082425892353058, "learning_rate": 5.998700957678007e-05, "loss": 0.4321, "num_tokens": 212756102.0, "step": 336 }, { "epoch": 0.039848646091994795, "grad_norm": 0.32667168974876404, "learning_rate": 5.99866868721684e-05, "loss": 0.46, "num_tokens": 213392130.0, "step": 337 }, { "epoch": 0.03996689133262386, "grad_norm": 0.2850625216960907, "learning_rate": 5.998636020930065e-05, "loss": 0.4091, "num_tokens": 214021365.0, "step": 338 }, { "epoch": 0.040085136573252925, "grad_norm": 0.26266756653785706, "learning_rate": 5.9986029588224745e-05, "loss": 0.4235, "num_tokens": 214651340.0, "step": 339 }, { "epoch": 0.04020338181388199, "grad_norm": 0.30058911442756653, "learning_rate": 5.998569500898917e-05, "loss": 0.4506, "num_tokens": 215284267.0, "step": 340 }, { "epoch": 0.040321627054511054, "grad_norm": 0.3373875617980957, "learning_rate": 5.9985356471643e-05, "loss": 0.4623, "num_tokens": 215920647.0, "step": 341 }, { "epoch": 0.04043987229514012, "grad_norm": 0.3120020031929016, "learning_rate": 5.9985013976235895e-05, "loss": 0.3953, "num_tokens": 216555518.0, "step": 342 }, { "epoch": 0.040558117535769184, "grad_norm": 0.2891152501106262, "learning_rate": 5.9984667522818074e-05, "loss": 0.3965, "num_tokens": 217191272.0, "step": 343 }, { "epoch": 0.04067636277639825, "grad_norm": 0.3276785612106323, "learning_rate": 5.998431711144038e-05, "loss": 0.4281, "num_tokens": 217824700.0, "step": 344 }, { "epoch": 0.04079460801702731, "grad_norm": 0.2784927785396576, "learning_rate": 5.998396274215419e-05, "loss": 0.3892, "num_tokens": 218455560.0, "step": 345 }, { "epoch": 0.04091285325765638, "grad_norm": 0.32685917615890503, "learning_rate": 5.998360441501149e-05, "loss": 0.4397, "num_tokens": 219091298.0, "step": 346 }, { "epoch": 0.04103109849828544, "grad_norm": 0.27822571992874146, "learning_rate": 5.998324213006483e-05, "loss": 0.4114, "num_tokens": 219727733.0, "step": 347 }, { "epoch": 0.04114934373891451, "grad_norm": 0.29395437240600586, "learning_rate": 5.9982875887367345e-05, "loss": 0.4658, "num_tokens": 220360695.0, "step": 348 }, { "epoch": 0.04126758897954357, "grad_norm": 0.30603286623954773, "learning_rate": 5.998250568697277e-05, "loss": 0.4545, "num_tokens": 220993405.0, "step": 349 }, { "epoch": 0.04138583422017264, "grad_norm": 0.2973026633262634, "learning_rate": 5.998213152893538e-05, "loss": 0.4482, "num_tokens": 221619384.0, "step": 350 }, { "epoch": 0.0415040794608017, "grad_norm": 0.3167627453804016, "learning_rate": 5.9981753413310086e-05, "loss": 0.4315, "num_tokens": 222250253.0, "step": 351 }, { "epoch": 0.04162232470143077, "grad_norm": 0.29249465465545654, "learning_rate": 5.998137134015232e-05, "loss": 0.4185, "num_tokens": 222884935.0, "step": 352 }, { "epoch": 0.04174056994205983, "grad_norm": 0.28415942192077637, "learning_rate": 5.998098530951812e-05, "loss": 0.4186, "num_tokens": 223516629.0, "step": 353 }, { "epoch": 0.0418588151826889, "grad_norm": 0.29491230845451355, "learning_rate": 5.9980595321464134e-05, "loss": 0.4283, "num_tokens": 224150031.0, "step": 354 }, { "epoch": 0.04197706042331796, "grad_norm": 0.2777010202407837, "learning_rate": 5.998020137604755e-05, "loss": 0.4128, "num_tokens": 224786071.0, "step": 355 }, { "epoch": 0.04209530566394703, "grad_norm": 0.30928027629852295, "learning_rate": 5.9979803473326144e-05, "loss": 0.4476, "num_tokens": 225421240.0, "step": 356 }, { "epoch": 0.04221355090457609, "grad_norm": 0.33089756965637207, "learning_rate": 5.9979401613358275e-05, "loss": 0.4141, "num_tokens": 226052174.0, "step": 357 }, { "epoch": 0.04233179614520516, "grad_norm": 0.3305080533027649, "learning_rate": 5.99789957962029e-05, "loss": 0.4534, "num_tokens": 226689398.0, "step": 358 }, { "epoch": 0.04245004138583422, "grad_norm": 0.2907538414001465, "learning_rate": 5.997858602191953e-05, "loss": 0.4188, "num_tokens": 227327616.0, "step": 359 }, { "epoch": 0.04256828662646329, "grad_norm": 0.33554351329803467, "learning_rate": 5.9978172290568274e-05, "loss": 0.4232, "num_tokens": 227952686.0, "step": 360 }, { "epoch": 0.04268653186709235, "grad_norm": 0.3194364309310913, "learning_rate": 5.997775460220982e-05, "loss": 0.4352, "num_tokens": 228587586.0, "step": 361 }, { "epoch": 0.04280477710772142, "grad_norm": 0.31662896275520325, "learning_rate": 5.997733295690542e-05, "loss": 0.4224, "num_tokens": 229185285.0, "step": 362 }, { "epoch": 0.04292302234835048, "grad_norm": 0.2837565839290619, "learning_rate": 5.997690735471692e-05, "loss": 0.4155, "num_tokens": 229815886.0, "step": 363 }, { "epoch": 0.04304126758897955, "grad_norm": 0.3400239050388336, "learning_rate": 5.9976477795706763e-05, "loss": 0.4277, "num_tokens": 230449875.0, "step": 364 }, { "epoch": 0.04315951282960861, "grad_norm": 0.2718769907951355, "learning_rate": 5.9976044279937935e-05, "loss": 0.429, "num_tokens": 231087769.0, "step": 365 }, { "epoch": 0.043277758070237676, "grad_norm": 0.33681654930114746, "learning_rate": 5.9975606807474034e-05, "loss": 0.4153, "num_tokens": 231720719.0, "step": 366 }, { "epoch": 0.04339600331086674, "grad_norm": 0.28005146980285645, "learning_rate": 5.997516537837922e-05, "loss": 0.4083, "num_tokens": 232354694.0, "step": 367 }, { "epoch": 0.0435142485514958, "grad_norm": 0.30170536041259766, "learning_rate": 5.997471999271823e-05, "loss": 0.4377, "num_tokens": 232986853.0, "step": 368 }, { "epoch": 0.04363249379212487, "grad_norm": 0.3472014367580414, "learning_rate": 5.997427065055641e-05, "loss": 0.4764, "num_tokens": 233626246.0, "step": 369 }, { "epoch": 0.04375073903275393, "grad_norm": 0.2982926666736603, "learning_rate": 5.997381735195965e-05, "loss": 0.3674, "num_tokens": 234256236.0, "step": 370 }, { "epoch": 0.043868984273383, "grad_norm": 0.33007562160491943, "learning_rate": 5.9973360096994456e-05, "loss": 0.4373, "num_tokens": 234894656.0, "step": 371 }, { "epoch": 0.04398722951401206, "grad_norm": 0.30718088150024414, "learning_rate": 5.9972898885727876e-05, "loss": 0.4194, "num_tokens": 235530175.0, "step": 372 }, { "epoch": 0.044105474754641126, "grad_norm": 0.302002876996994, "learning_rate": 5.9972433718227566e-05, "loss": 0.4062, "num_tokens": 236165286.0, "step": 373 }, { "epoch": 0.04422371999527019, "grad_norm": 0.3003771901130676, "learning_rate": 5.9971964594561756e-05, "loss": 0.4051, "num_tokens": 236799872.0, "step": 374 }, { "epoch": 0.044341965235899256, "grad_norm": 0.28838953375816345, "learning_rate": 5.997149151479925e-05, "loss": 0.38, "num_tokens": 237436361.0, "step": 375 }, { "epoch": 0.04446021047652832, "grad_norm": 0.2884643077850342, "learning_rate": 5.9971014479009435e-05, "loss": 0.3995, "num_tokens": 238072053.0, "step": 376 }, { "epoch": 0.044578455717157385, "grad_norm": 0.3635023236274719, "learning_rate": 5.99705334872623e-05, "loss": 0.4463, "num_tokens": 238707861.0, "step": 377 }, { "epoch": 0.044696700957786446, "grad_norm": 0.3112204968929291, "learning_rate": 5.997004853962837e-05, "loss": 0.4122, "num_tokens": 239340071.0, "step": 378 }, { "epoch": 0.044814946198415515, "grad_norm": 0.3291580080986023, "learning_rate": 5.9969559636178776e-05, "loss": 0.4365, "num_tokens": 239974165.0, "step": 379 }, { "epoch": 0.044933191439044576, "grad_norm": 0.30556878447532654, "learning_rate": 5.996906677698525e-05, "loss": 0.4045, "num_tokens": 240606077.0, "step": 380 }, { "epoch": 0.045051436679673644, "grad_norm": 0.32267481088638306, "learning_rate": 5.996856996212006e-05, "loss": 0.4191, "num_tokens": 241243195.0, "step": 381 }, { "epoch": 0.045169681920302705, "grad_norm": 0.31723731756210327, "learning_rate": 5.996806919165608e-05, "loss": 0.4637, "num_tokens": 241881692.0, "step": 382 }, { "epoch": 0.045287927160931774, "grad_norm": 0.3272183835506439, "learning_rate": 5.996756446566676e-05, "loss": 0.4353, "num_tokens": 242520949.0, "step": 383 }, { "epoch": 0.045406172401560835, "grad_norm": 0.26670175790786743, "learning_rate": 5.996705578422613e-05, "loss": 0.4146, "num_tokens": 243160575.0, "step": 384 }, { "epoch": 0.0455244176421899, "grad_norm": 0.3358438313007355, "learning_rate": 5.996654314740881e-05, "loss": 0.4302, "num_tokens": 243790943.0, "step": 385 }, { "epoch": 0.045642662882818964, "grad_norm": 0.3380783200263977, "learning_rate": 5.9966026555289985e-05, "loss": 0.4777, "num_tokens": 244423844.0, "step": 386 }, { "epoch": 0.04576090812344803, "grad_norm": 0.27845922112464905, "learning_rate": 5.996550600794543e-05, "loss": 0.4081, "num_tokens": 245054470.0, "step": 387 }, { "epoch": 0.045879153364077094, "grad_norm": 0.26127803325653076, "learning_rate": 5.996498150545148e-05, "loss": 0.3962, "num_tokens": 245688487.0, "step": 388 }, { "epoch": 0.04599739860470616, "grad_norm": 0.29639074206352234, "learning_rate": 5.996445304788508e-05, "loss": 0.4269, "num_tokens": 246325682.0, "step": 389 }, { "epoch": 0.04611564384533522, "grad_norm": 0.27596673369407654, "learning_rate": 5.996392063532374e-05, "loss": 0.447, "num_tokens": 246961506.0, "step": 390 }, { "epoch": 0.04623388908596429, "grad_norm": 0.26508697867393494, "learning_rate": 5.996338426784556e-05, "loss": 0.3958, "num_tokens": 247594032.0, "step": 391 }, { "epoch": 0.04635213432659335, "grad_norm": 0.3066747784614563, "learning_rate": 5.996284394552919e-05, "loss": 0.4438, "num_tokens": 248227593.0, "step": 392 }, { "epoch": 0.04647037956722242, "grad_norm": 0.2677036225795746, "learning_rate": 5.996229966845389e-05, "loss": 0.4237, "num_tokens": 248859376.0, "step": 393 }, { "epoch": 0.04658862480785148, "grad_norm": 0.30630677938461304, "learning_rate": 5.9961751436699496e-05, "loss": 0.4431, "num_tokens": 249493350.0, "step": 394 }, { "epoch": 0.04670687004848055, "grad_norm": 0.29309946298599243, "learning_rate": 5.9961199250346424e-05, "loss": 0.4016, "num_tokens": 250126507.0, "step": 395 }, { "epoch": 0.04682511528910961, "grad_norm": 0.2800038754940033, "learning_rate": 5.996064310947565e-05, "loss": 0.4405, "num_tokens": 250765825.0, "step": 396 }, { "epoch": 0.04694336052973868, "grad_norm": 0.28637009859085083, "learning_rate": 5.9960083014168756e-05, "loss": 0.4335, "num_tokens": 251403372.0, "step": 397 }, { "epoch": 0.04706160577036774, "grad_norm": 0.28876709938049316, "learning_rate": 5.99595189645079e-05, "loss": 0.441, "num_tokens": 252041762.0, "step": 398 }, { "epoch": 0.04717985101099681, "grad_norm": 0.3044595718383789, "learning_rate": 5.9958950960575806e-05, "loss": 0.4199, "num_tokens": 252673716.0, "step": 399 }, { "epoch": 0.04729809625162587, "grad_norm": 0.2870004177093506, "learning_rate": 5.995837900245578e-05, "loss": 0.4297, "num_tokens": 253310805.0, "step": 400 }, { "epoch": 0.04741634149225494, "grad_norm": 0.25463756918907166, "learning_rate": 5.995780309023172e-05, "loss": 0.3557, "num_tokens": 253944650.0, "step": 401 }, { "epoch": 0.047534586732884, "grad_norm": 0.25597402453422546, "learning_rate": 5.99572232239881e-05, "loss": 0.3969, "num_tokens": 254579224.0, "step": 402 }, { "epoch": 0.04765283197351307, "grad_norm": 0.25922319293022156, "learning_rate": 5.995663940380996e-05, "loss": 0.4138, "num_tokens": 255212393.0, "step": 403 }, { "epoch": 0.04777107721414213, "grad_norm": 0.27946922183036804, "learning_rate": 5.9956051629782955e-05, "loss": 0.4386, "num_tokens": 255846216.0, "step": 404 }, { "epoch": 0.0478893224547712, "grad_norm": 0.26831814646720886, "learning_rate": 5.995545990199327e-05, "loss": 0.4457, "num_tokens": 256484541.0, "step": 405 }, { "epoch": 0.04800756769540026, "grad_norm": 0.3052463233470917, "learning_rate": 5.995486422052771e-05, "loss": 0.4077, "num_tokens": 257114691.0, "step": 406 }, { "epoch": 0.04812581293602933, "grad_norm": 0.26136288046836853, "learning_rate": 5.995426458547364e-05, "loss": 0.422, "num_tokens": 257748044.0, "step": 407 }, { "epoch": 0.04824405817665839, "grad_norm": 0.24126434326171875, "learning_rate": 5.9953660996919024e-05, "loss": 0.3762, "num_tokens": 258386984.0, "step": 408 }, { "epoch": 0.04836230341728746, "grad_norm": 0.27943384647369385, "learning_rate": 5.995305345495238e-05, "loss": 0.4465, "num_tokens": 259023318.0, "step": 409 }, { "epoch": 0.04848054865791652, "grad_norm": 0.24933411180973053, "learning_rate": 5.9952441959662814e-05, "loss": 0.3989, "num_tokens": 259657204.0, "step": 410 }, { "epoch": 0.048598793898545586, "grad_norm": 0.26967093348503113, "learning_rate": 5.9951826511140035e-05, "loss": 0.3994, "num_tokens": 260290556.0, "step": 411 }, { "epoch": 0.04871703913917465, "grad_norm": 0.26504048705101013, "learning_rate": 5.99512071094743e-05, "loss": 0.4615, "num_tokens": 260928576.0, "step": 412 }, { "epoch": 0.048835284379803716, "grad_norm": 0.2649416923522949, "learning_rate": 5.995058375475647e-05, "loss": 0.4113, "num_tokens": 261563484.0, "step": 413 }, { "epoch": 0.04895352962043278, "grad_norm": 0.26874297857284546, "learning_rate": 5.994995644707796e-05, "loss": 0.38, "num_tokens": 262198058.0, "step": 414 }, { "epoch": 0.049071774861061845, "grad_norm": 0.28117337822914124, "learning_rate": 5.99493251865308e-05, "loss": 0.4402, "num_tokens": 262825300.0, "step": 415 }, { "epoch": 0.04919002010169091, "grad_norm": 0.2871474325656891, "learning_rate": 5.994868997320756e-05, "loss": 0.4554, "num_tokens": 263458362.0, "step": 416 }, { "epoch": 0.049308265342319975, "grad_norm": 0.2570261061191559, "learning_rate": 5.994805080720142e-05, "loss": 0.3955, "num_tokens": 264088367.0, "step": 417 }, { "epoch": 0.049426510582949036, "grad_norm": 0.3157186806201935, "learning_rate": 5.994740768860612e-05, "loss": 0.4108, "num_tokens": 264724592.0, "step": 418 }, { "epoch": 0.049544755823578104, "grad_norm": 0.295461505651474, "learning_rate": 5.994676061751601e-05, "loss": 0.3976, "num_tokens": 265360262.0, "step": 419 }, { "epoch": 0.049663001064207166, "grad_norm": 0.30874693393707275, "learning_rate": 5.994610959402597e-05, "loss": 0.4512, "num_tokens": 265997689.0, "step": 420 }, { "epoch": 0.04978124630483623, "grad_norm": 0.29907622933387756, "learning_rate": 5.994545461823151e-05, "loss": 0.4202, "num_tokens": 266631262.0, "step": 421 }, { "epoch": 0.049899491545465295, "grad_norm": 0.32576265931129456, "learning_rate": 5.994479569022869e-05, "loss": 0.4139, "num_tokens": 267268979.0, "step": 422 }, { "epoch": 0.050017736786094356, "grad_norm": 0.2726156413555145, "learning_rate": 5.994413281011417e-05, "loss": 0.4171, "num_tokens": 267904627.0, "step": 423 }, { "epoch": 0.050135982026723425, "grad_norm": 0.25041118264198303, "learning_rate": 5.994346597798516e-05, "loss": 0.3719, "num_tokens": 268543181.0, "step": 424 }, { "epoch": 0.050254227267352486, "grad_norm": 0.30094441771507263, "learning_rate": 5.9942795193939473e-05, "loss": 0.4197, "num_tokens": 269179996.0, "step": 425 }, { "epoch": 0.050372472507981554, "grad_norm": 0.24550040066242218, "learning_rate": 5.9942120458075504e-05, "loss": 0.3995, "num_tokens": 269811253.0, "step": 426 }, { "epoch": 0.050490717748610615, "grad_norm": 0.2693399488925934, "learning_rate": 5.99414417704922e-05, "loss": 0.4066, "num_tokens": 270441984.0, "step": 427 }, { "epoch": 0.050608962989239684, "grad_norm": 0.2829120457172394, "learning_rate": 5.9940759131289126e-05, "loss": 0.4171, "num_tokens": 271074224.0, "step": 428 }, { "epoch": 0.050727208229868745, "grad_norm": 0.3007882237434387, "learning_rate": 5.99400725405664e-05, "loss": 0.4637, "num_tokens": 271705707.0, "step": 429 }, { "epoch": 0.05084545347049781, "grad_norm": 0.2930167317390442, "learning_rate": 5.993938199842473e-05, "loss": 0.3958, "num_tokens": 272318335.0, "step": 430 }, { "epoch": 0.050963698711126874, "grad_norm": 0.32805997133255005, "learning_rate": 5.99386875049654e-05, "loss": 0.4319, "num_tokens": 272955472.0, "step": 431 }, { "epoch": 0.05108194395175594, "grad_norm": 0.30262523889541626, "learning_rate": 5.993798906029027e-05, "loss": 0.4221, "num_tokens": 273559901.0, "step": 432 }, { "epoch": 0.051200189192385004, "grad_norm": 0.2800939977169037, "learning_rate": 5.9937286664501794e-05, "loss": 0.4186, "num_tokens": 274192648.0, "step": 433 }, { "epoch": 0.05131843443301407, "grad_norm": 0.2910088300704956, "learning_rate": 5.993658031770298e-05, "loss": 0.3898, "num_tokens": 274831825.0, "step": 434 }, { "epoch": 0.05143667967364313, "grad_norm": 0.24513041973114014, "learning_rate": 5.9935870019997446e-05, "loss": 0.3754, "num_tokens": 275456510.0, "step": 435 }, { "epoch": 0.0515549249142722, "grad_norm": 0.29972150921821594, "learning_rate": 5.993515577148937e-05, "loss": 0.3788, "num_tokens": 276091876.0, "step": 436 }, { "epoch": 0.05167317015490126, "grad_norm": 0.267413467168808, "learning_rate": 5.9934437572283514e-05, "loss": 0.4265, "num_tokens": 276727335.0, "step": 437 }, { "epoch": 0.05179141539553033, "grad_norm": 0.352626234292984, "learning_rate": 5.9933715422485214e-05, "loss": 0.4534, "num_tokens": 277359081.0, "step": 438 }, { "epoch": 0.05190966063615939, "grad_norm": 0.2743447721004486, "learning_rate": 5.9932989322200396e-05, "loss": 0.3864, "num_tokens": 277990765.0, "step": 439 }, { "epoch": 0.05202790587678846, "grad_norm": 0.27246472239494324, "learning_rate": 5.9932259271535564e-05, "loss": 0.4135, "num_tokens": 278612675.0, "step": 440 }, { "epoch": 0.05214615111741752, "grad_norm": 0.2742938995361328, "learning_rate": 5.993152527059779e-05, "loss": 0.3904, "num_tokens": 279246447.0, "step": 441 }, { "epoch": 0.05226439635804659, "grad_norm": 0.2966417074203491, "learning_rate": 5.993078731949473e-05, "loss": 0.4609, "num_tokens": 279880451.0, "step": 442 }, { "epoch": 0.05238264159867565, "grad_norm": 0.293805330991745, "learning_rate": 5.9930045418334645e-05, "loss": 0.4509, "num_tokens": 280517149.0, "step": 443 }, { "epoch": 0.05250088683930472, "grad_norm": 0.2761572003364563, "learning_rate": 5.9929299567226326e-05, "loss": 0.3887, "num_tokens": 281144009.0, "step": 444 }, { "epoch": 0.05261913207993378, "grad_norm": 0.2697403132915497, "learning_rate": 5.992854976627918e-05, "loss": 0.4044, "num_tokens": 281781076.0, "step": 445 }, { "epoch": 0.05273737732056285, "grad_norm": 0.2626025974750519, "learning_rate": 5.992779601560318e-05, "loss": 0.4121, "num_tokens": 282418028.0, "step": 446 }, { "epoch": 0.05285562256119191, "grad_norm": 0.2892271876335144, "learning_rate": 5.99270383153089e-05, "loss": 0.4017, "num_tokens": 283052667.0, "step": 447 }, { "epoch": 0.05297386780182098, "grad_norm": 0.28252655267715454, "learning_rate": 5.992627666550745e-05, "loss": 0.4503, "num_tokens": 283689384.0, "step": 448 }, { "epoch": 0.05309211304245004, "grad_norm": 0.2242220938205719, "learning_rate": 5.992551106631056e-05, "loss": 0.376, "num_tokens": 284325980.0, "step": 449 }, { "epoch": 0.05321035828307911, "grad_norm": 0.2983514666557312, "learning_rate": 5.992474151783052e-05, "loss": 0.4331, "num_tokens": 284927410.0, "step": 450 }, { "epoch": 0.05332860352370817, "grad_norm": 0.2764870524406433, "learning_rate": 5.9923968020180204e-05, "loss": 0.4186, "num_tokens": 285561853.0, "step": 451 }, { "epoch": 0.05344684876433724, "grad_norm": 0.27077996730804443, "learning_rate": 5.992319057347307e-05, "loss": 0.4302, "num_tokens": 286200667.0, "step": 452 }, { "epoch": 0.0535650940049663, "grad_norm": 0.27675661444664, "learning_rate": 5.992240917782313e-05, "loss": 0.392, "num_tokens": 286838297.0, "step": 453 }, { "epoch": 0.05368333924559537, "grad_norm": 0.25199928879737854, "learning_rate": 5.992162383334502e-05, "loss": 0.396, "num_tokens": 287478013.0, "step": 454 }, { "epoch": 0.05380158448622443, "grad_norm": 0.2786621153354645, "learning_rate": 5.992083454015391e-05, "loss": 0.429, "num_tokens": 288113641.0, "step": 455 }, { "epoch": 0.053919829726853497, "grad_norm": 0.24246902763843536, "learning_rate": 5.9920041298365575e-05, "loss": 0.421, "num_tokens": 288748606.0, "step": 456 }, { "epoch": 0.05403807496748256, "grad_norm": 0.26285168528556824, "learning_rate": 5.9919244108096374e-05, "loss": 0.4001, "num_tokens": 289387862.0, "step": 457 }, { "epoch": 0.054156320208111626, "grad_norm": 0.2636911869049072, "learning_rate": 5.991844296946321e-05, "loss": 0.4017, "num_tokens": 290025351.0, "step": 458 }, { "epoch": 0.05427456544874069, "grad_norm": 0.2718662619590759, "learning_rate": 5.9917637882583616e-05, "loss": 0.3919, "num_tokens": 290660065.0, "step": 459 }, { "epoch": 0.054392810689369755, "grad_norm": 0.26348644495010376, "learning_rate": 5.991682884757567e-05, "loss": 0.3961, "num_tokens": 291293769.0, "step": 460 }, { "epoch": 0.05451105592999882, "grad_norm": 0.271066278219223, "learning_rate": 5.991601586455803e-05, "loss": 0.4033, "num_tokens": 291932970.0, "step": 461 }, { "epoch": 0.054629301170627885, "grad_norm": 0.2642495930194855, "learning_rate": 5.991519893364995e-05, "loss": 0.413, "num_tokens": 292570196.0, "step": 462 }, { "epoch": 0.054747546411256946, "grad_norm": 0.2661541700363159, "learning_rate": 5.9914378054971235e-05, "loss": 0.4453, "num_tokens": 293208953.0, "step": 463 }, { "epoch": 0.054865791651886014, "grad_norm": 0.26078978180885315, "learning_rate": 5.9913553228642294e-05, "loss": 0.4143, "num_tokens": 293841194.0, "step": 464 }, { "epoch": 0.054984036892515076, "grad_norm": 0.2549993693828583, "learning_rate": 5.9912724454784124e-05, "loss": 0.4194, "num_tokens": 294478601.0, "step": 465 }, { "epoch": 0.055102282133144144, "grad_norm": 0.2646372318267822, "learning_rate": 5.991189173351827e-05, "loss": 0.4149, "num_tokens": 295110727.0, "step": 466 }, { "epoch": 0.055220527373773205, "grad_norm": 0.24355708062648773, "learning_rate": 5.9911055064966865e-05, "loss": 0.3884, "num_tokens": 295744614.0, "step": 467 }, { "epoch": 0.05533877261440227, "grad_norm": 0.24830622971057892, "learning_rate": 5.991021444925265e-05, "loss": 0.403, "num_tokens": 296380861.0, "step": 468 }, { "epoch": 0.055457017855031335, "grad_norm": 0.249701589345932, "learning_rate": 5.99093698864989e-05, "loss": 0.4128, "num_tokens": 297007279.0, "step": 469 }, { "epoch": 0.0555752630956604, "grad_norm": 0.24881191551685333, "learning_rate": 5.9908521376829496e-05, "loss": 0.3928, "num_tokens": 297623126.0, "step": 470 }, { "epoch": 0.055693508336289464, "grad_norm": 0.29072409868240356, "learning_rate": 5.9907668920368885e-05, "loss": 0.4525, "num_tokens": 298255789.0, "step": 471 }, { "epoch": 0.05581175357691853, "grad_norm": 0.2584925591945648, "learning_rate": 5.990681251724212e-05, "loss": 0.4277, "num_tokens": 298885491.0, "step": 472 }, { "epoch": 0.055929998817547594, "grad_norm": 0.24288643896579742, "learning_rate": 5.9905952167574804e-05, "loss": 0.3956, "num_tokens": 299521680.0, "step": 473 }, { "epoch": 0.056048244058176655, "grad_norm": 0.26308974623680115, "learning_rate": 5.9905087871493125e-05, "loss": 0.4072, "num_tokens": 300161168.0, "step": 474 }, { "epoch": 0.05616648929880572, "grad_norm": 0.23737689852714539, "learning_rate": 5.990421962912387e-05, "loss": 0.3967, "num_tokens": 300793878.0, "step": 475 }, { "epoch": 0.056284734539434784, "grad_norm": 0.2462092489004135, "learning_rate": 5.990334744059436e-05, "loss": 0.4219, "num_tokens": 301427428.0, "step": 476 }, { "epoch": 0.05640297978006385, "grad_norm": 0.26951780915260315, "learning_rate": 5.990247130603254e-05, "loss": 0.4507, "num_tokens": 302063334.0, "step": 477 }, { "epoch": 0.056521225020692914, "grad_norm": 0.2312493771314621, "learning_rate": 5.9901591225566905e-05, "loss": 0.3923, "num_tokens": 302696034.0, "step": 478 }, { "epoch": 0.05663947026132198, "grad_norm": 0.2832271158695221, "learning_rate": 5.990070719932656e-05, "loss": 0.4183, "num_tokens": 303331655.0, "step": 479 }, { "epoch": 0.056757715501951043, "grad_norm": 0.27955010533332825, "learning_rate": 5.989981922744115e-05, "loss": 0.4908, "num_tokens": 303962802.0, "step": 480 }, { "epoch": 0.05687596074258011, "grad_norm": 0.26498380303382874, "learning_rate": 5.989892731004093e-05, "loss": 0.4087, "num_tokens": 304595703.0, "step": 481 }, { "epoch": 0.05699420598320917, "grad_norm": 0.2506261169910431, "learning_rate": 5.989803144725671e-05, "loss": 0.4111, "num_tokens": 305227732.0, "step": 482 }, { "epoch": 0.05711245122383824, "grad_norm": 0.269447386264801, "learning_rate": 5.98971316392199e-05, "loss": 0.4121, "num_tokens": 305861092.0, "step": 483 }, { "epoch": 0.0572306964644673, "grad_norm": 0.27055007219314575, "learning_rate": 5.989622788606248e-05, "loss": 0.4158, "num_tokens": 306497173.0, "step": 484 }, { "epoch": 0.05734894170509637, "grad_norm": 0.2409479320049286, "learning_rate": 5.989532018791701e-05, "loss": 0.401, "num_tokens": 307133773.0, "step": 485 }, { "epoch": 0.05746718694572543, "grad_norm": 0.2865072190761566, "learning_rate": 5.98944085449166e-05, "loss": 0.3962, "num_tokens": 307760142.0, "step": 486 }, { "epoch": 0.0575854321863545, "grad_norm": 0.22869904339313507, "learning_rate": 5.989349295719499e-05, "loss": 0.4254, "num_tokens": 308392658.0, "step": 487 }, { "epoch": 0.05770367742698356, "grad_norm": 0.2581104338169098, "learning_rate": 5.9892573424886474e-05, "loss": 0.3824, "num_tokens": 309023200.0, "step": 488 }, { "epoch": 0.05782192266761263, "grad_norm": 0.2575315833091736, "learning_rate": 5.989164994812591e-05, "loss": 0.4279, "num_tokens": 309657191.0, "step": 489 }, { "epoch": 0.05794016790824169, "grad_norm": 0.23082426190376282, "learning_rate": 5.989072252704877e-05, "loss": 0.4197, "num_tokens": 310290361.0, "step": 490 }, { "epoch": 0.05805841314887076, "grad_norm": 0.26561152935028076, "learning_rate": 5.9889791161791044e-05, "loss": 0.4385, "num_tokens": 310922697.0, "step": 491 }, { "epoch": 0.05817665838949982, "grad_norm": 0.2753608226776123, "learning_rate": 5.988885585248937e-05, "loss": 0.4297, "num_tokens": 311557453.0, "step": 492 }, { "epoch": 0.05829490363012889, "grad_norm": 0.28612983226776123, "learning_rate": 5.988791659928094e-05, "loss": 0.4313, "num_tokens": 312193193.0, "step": 493 }, { "epoch": 0.05841314887075795, "grad_norm": 0.24572238326072693, "learning_rate": 5.98869734023035e-05, "loss": 0.4292, "num_tokens": 312828473.0, "step": 494 }, { "epoch": 0.05853139411138702, "grad_norm": 0.244500994682312, "learning_rate": 5.9886026261695396e-05, "loss": 0.3995, "num_tokens": 313462344.0, "step": 495 }, { "epoch": 0.05864963935201608, "grad_norm": 0.2500053644180298, "learning_rate": 5.988507517759555e-05, "loss": 0.3989, "num_tokens": 314080640.0, "step": 496 }, { "epoch": 0.05876788459264515, "grad_norm": 0.2736838459968567, "learning_rate": 5.9884120150143474e-05, "loss": 0.4124, "num_tokens": 314717785.0, "step": 497 }, { "epoch": 0.05888612983327421, "grad_norm": 0.2550274133682251, "learning_rate": 5.9883161179479224e-05, "loss": 0.4681, "num_tokens": 315351322.0, "step": 498 }, { "epoch": 0.05900437507390328, "grad_norm": 0.26602238416671753, "learning_rate": 5.988219826574348e-05, "loss": 0.4216, "num_tokens": 315987690.0, "step": 499 }, { "epoch": 0.05912262031453234, "grad_norm": 182.5827178955078, "learning_rate": 5.988123140907745e-05, "loss": 4.6781, "num_tokens": 316588692.0, "step": 500 }, { "epoch": 0.05924086555516141, "grad_norm": 0.35426440834999084, "learning_rate": 5.9880260609622975e-05, "loss": 0.4045, "num_tokens": 317208507.0, "step": 501 }, { "epoch": 0.05935911079579047, "grad_norm": 0.2600221335887909, "learning_rate": 5.987928586752243e-05, "loss": 0.4508, "num_tokens": 317844277.0, "step": 502 }, { "epoch": 0.059477356036419536, "grad_norm": 0.30101799964904785, "learning_rate": 5.987830718291879e-05, "loss": 0.4375, "num_tokens": 318478032.0, "step": 503 }, { "epoch": 0.0595956012770486, "grad_norm": 0.2639267146587372, "learning_rate": 5.98773245559556e-05, "loss": 0.4657, "num_tokens": 319114663.0, "step": 504 }, { "epoch": 0.059713846517677666, "grad_norm": 0.28301772475242615, "learning_rate": 5.987633798677699e-05, "loss": 0.3621, "num_tokens": 319744940.0, "step": 505 }, { "epoch": 0.05983209175830673, "grad_norm": 0.24197930097579956, "learning_rate": 5.987534747552766e-05, "loss": 0.409, "num_tokens": 320377220.0, "step": 506 }, { "epoch": 0.059950336998935795, "grad_norm": 0.2843497693538666, "learning_rate": 5.987435302235289e-05, "loss": 0.4122, "num_tokens": 321010441.0, "step": 507 }, { "epoch": 0.060068582239564856, "grad_norm": 0.22696319222450256, "learning_rate": 5.987335462739855e-05, "loss": 0.3931, "num_tokens": 321643644.0, "step": 508 }, { "epoch": 0.060186827480193925, "grad_norm": 0.23439037799835205, "learning_rate": 5.987235229081107e-05, "loss": 0.4045, "num_tokens": 322275355.0, "step": 509 }, { "epoch": 0.060305072720822986, "grad_norm": 0.29763585329055786, "learning_rate": 5.9871346012737474e-05, "loss": 0.4299, "num_tokens": 322908591.0, "step": 510 }, { "epoch": 0.060423317961452054, "grad_norm": 0.25528407096862793, "learning_rate": 5.9870335793325364e-05, "loss": 0.4276, "num_tokens": 323542459.0, "step": 511 }, { "epoch": 0.060541563202081115, "grad_norm": 0.251544326543808, "learning_rate": 5.986932163272289e-05, "loss": 0.4075, "num_tokens": 324176254.0, "step": 512 }, { "epoch": 0.060659808442710184, "grad_norm": 0.2560863792896271, "learning_rate": 5.986830353107883e-05, "loss": 0.3828, "num_tokens": 324803884.0, "step": 513 }, { "epoch": 0.060778053683339245, "grad_norm": 0.25024428963661194, "learning_rate": 5.986728148854249e-05, "loss": 0.4395, "num_tokens": 325442394.0, "step": 514 }, { "epoch": 0.06089629892396831, "grad_norm": 0.2645378112792969, "learning_rate": 5.986625550526379e-05, "loss": 0.4293, "num_tokens": 326081004.0, "step": 515 }, { "epoch": 0.061014544164597374, "grad_norm": 0.23543602228164673, "learning_rate": 5.986522558139322e-05, "loss": 0.4099, "num_tokens": 326711646.0, "step": 516 }, { "epoch": 0.06113278940522644, "grad_norm": 0.240159273147583, "learning_rate": 5.9864191717081834e-05, "loss": 0.4036, "num_tokens": 327346340.0, "step": 517 }, { "epoch": 0.061251034645855504, "grad_norm": 0.24217161536216736, "learning_rate": 5.986315391248128e-05, "loss": 0.3834, "num_tokens": 327982364.0, "step": 518 }, { "epoch": 0.06136927988648457, "grad_norm": 0.2656776010990143, "learning_rate": 5.9862112167743766e-05, "loss": 0.4463, "num_tokens": 328613513.0, "step": 519 }, { "epoch": 0.06148752512711363, "grad_norm": 0.2532880902290344, "learning_rate": 5.9861066483022106e-05, "loss": 0.4225, "num_tokens": 329246797.0, "step": 520 }, { "epoch": 0.0616057703677427, "grad_norm": 0.22812607884407043, "learning_rate": 5.9860016858469664e-05, "loss": 0.432, "num_tokens": 329882051.0, "step": 521 }, { "epoch": 0.06172401560837176, "grad_norm": 0.2631569802761078, "learning_rate": 5.985896329424039e-05, "loss": 0.4055, "num_tokens": 330518781.0, "step": 522 }, { "epoch": 0.06184226084900083, "grad_norm": 0.2430521696805954, "learning_rate": 5.9857905790488824e-05, "loss": 0.4093, "num_tokens": 331148822.0, "step": 523 }, { "epoch": 0.06196050608962989, "grad_norm": 0.23621663451194763, "learning_rate": 5.9856844347370075e-05, "loss": 0.4285, "num_tokens": 331779530.0, "step": 524 }, { "epoch": 0.06207875133025896, "grad_norm": 0.2397841066122055, "learning_rate": 5.985577896503982e-05, "loss": 0.3691, "num_tokens": 332409781.0, "step": 525 }, { "epoch": 0.06219699657088802, "grad_norm": 0.2821633219718933, "learning_rate": 5.985470964365433e-05, "loss": 0.4567, "num_tokens": 333047880.0, "step": 526 }, { "epoch": 0.06231524181151708, "grad_norm": 0.2530534267425537, "learning_rate": 5.985363638337046e-05, "loss": 0.4191, "num_tokens": 333683586.0, "step": 527 }, { "epoch": 0.06243348705214615, "grad_norm": 0.2527203857898712, "learning_rate": 5.98525591843456e-05, "loss": 0.4198, "num_tokens": 334315506.0, "step": 528 }, { "epoch": 0.06255173229277522, "grad_norm": 0.23863491415977478, "learning_rate": 5.985147804673777e-05, "loss": 0.3901, "num_tokens": 334951291.0, "step": 529 }, { "epoch": 0.06266997753340428, "grad_norm": 0.23777925968170166, "learning_rate": 5.985039297070554e-05, "loss": 0.3996, "num_tokens": 335585867.0, "step": 530 }, { "epoch": 0.06278822277403334, "grad_norm": 0.27206823229789734, "learning_rate": 5.984930395640805e-05, "loss": 0.3895, "num_tokens": 336220052.0, "step": 531 }, { "epoch": 0.0629064680146624, "grad_norm": 0.2532435655593872, "learning_rate": 5.984821100400505e-05, "loss": 0.4403, "num_tokens": 336856340.0, "step": 532 }, { "epoch": 0.06302471325529148, "grad_norm": 0.2718495726585388, "learning_rate": 5.984711411365685e-05, "loss": 0.3934, "num_tokens": 337491980.0, "step": 533 }, { "epoch": 0.06314295849592054, "grad_norm": 0.230064257979393, "learning_rate": 5.9846013285524315e-05, "loss": 0.3973, "num_tokens": 338130649.0, "step": 534 }, { "epoch": 0.0632612037365496, "grad_norm": 0.34002846479415894, "learning_rate": 5.9844908519768916e-05, "loss": 0.4429, "num_tokens": 338765865.0, "step": 535 }, { "epoch": 0.06337944897717866, "grad_norm": 0.23137465119361877, "learning_rate": 5.9843799816552706e-05, "loss": 0.4134, "num_tokens": 339363366.0, "step": 536 }, { "epoch": 0.06349769421780774, "grad_norm": 0.2997404634952545, "learning_rate": 5.984268717603829e-05, "loss": 0.4592, "num_tokens": 339999650.0, "step": 537 }, { "epoch": 0.0636159394584368, "grad_norm": 0.24933534860610962, "learning_rate": 5.984157059838887e-05, "loss": 0.4176, "num_tokens": 340634297.0, "step": 538 }, { "epoch": 0.06373418469906586, "grad_norm": 0.25580599904060364, "learning_rate": 5.984045008376822e-05, "loss": 0.4421, "num_tokens": 341269246.0, "step": 539 }, { "epoch": 0.06385242993969492, "grad_norm": 0.2631864845752716, "learning_rate": 5.9839325632340686e-05, "loss": 0.4291, "num_tokens": 341900107.0, "step": 540 }, { "epoch": 0.063970675180324, "grad_norm": 0.26889941096305847, "learning_rate": 5.9838197244271194e-05, "loss": 0.4333, "num_tokens": 342539152.0, "step": 541 }, { "epoch": 0.06408892042095306, "grad_norm": 0.26322346925735474, "learning_rate": 5.983706491972526e-05, "loss": 0.432, "num_tokens": 343166271.0, "step": 542 }, { "epoch": 0.06420716566158212, "grad_norm": 0.23105262219905853, "learning_rate": 5.983592865886896e-05, "loss": 0.3905, "num_tokens": 343801657.0, "step": 543 }, { "epoch": 0.06432541090221118, "grad_norm": 0.22492574155330658, "learning_rate": 5.983478846186896e-05, "loss": 0.4049, "num_tokens": 344436926.0, "step": 544 }, { "epoch": 0.06444365614284026, "grad_norm": 0.2354458123445511, "learning_rate": 5.983364432889249e-05, "loss": 0.3899, "num_tokens": 345071661.0, "step": 545 }, { "epoch": 0.06456190138346932, "grad_norm": 0.2799477279186249, "learning_rate": 5.983249626010737e-05, "loss": 0.4738, "num_tokens": 345710262.0, "step": 546 }, { "epoch": 0.06468014662409838, "grad_norm": 0.2199067324399948, "learning_rate": 5.983134425568198e-05, "loss": 0.4136, "num_tokens": 346345760.0, "step": 547 }, { "epoch": 0.06479839186472744, "grad_norm": 0.21643853187561035, "learning_rate": 5.983018831578532e-05, "loss": 0.3905, "num_tokens": 346978472.0, "step": 548 }, { "epoch": 0.06491663710535651, "grad_norm": 0.26121851801872253, "learning_rate": 5.982902844058691e-05, "loss": 0.4054, "num_tokens": 347615067.0, "step": 549 }, { "epoch": 0.06503488234598558, "grad_norm": 0.23675411939620972, "learning_rate": 5.982786463025687e-05, "loss": 0.428, "num_tokens": 348251545.0, "step": 550 }, { "epoch": 0.06515312758661464, "grad_norm": 0.23048795759677887, "learning_rate": 5.982669688496592e-05, "loss": 0.4178, "num_tokens": 348888946.0, "step": 551 }, { "epoch": 0.0652713728272437, "grad_norm": 0.2456454485654831, "learning_rate": 5.982552520488533e-05, "loss": 0.4562, "num_tokens": 349519613.0, "step": 552 }, { "epoch": 0.06538961806787277, "grad_norm": 0.23844270408153534, "learning_rate": 5.982434959018697e-05, "loss": 0.4071, "num_tokens": 350152254.0, "step": 553 }, { "epoch": 0.06550786330850183, "grad_norm": 0.22582827508449554, "learning_rate": 5.982317004104324e-05, "loss": 0.4046, "num_tokens": 350786547.0, "step": 554 }, { "epoch": 0.0656261085491309, "grad_norm": 0.23753145337104797, "learning_rate": 5.982198655762717e-05, "loss": 0.4132, "num_tokens": 351422383.0, "step": 555 }, { "epoch": 0.06574435378975996, "grad_norm": 0.25780999660491943, "learning_rate": 5.9820799140112354e-05, "loss": 0.4246, "num_tokens": 352061087.0, "step": 556 }, { "epoch": 0.06586259903038903, "grad_norm": 0.22120600938796997, "learning_rate": 5.981960778867294e-05, "loss": 0.384, "num_tokens": 352697788.0, "step": 557 }, { "epoch": 0.0659808442710181, "grad_norm": 0.2410825490951538, "learning_rate": 5.9818412503483684e-05, "loss": 0.4059, "num_tokens": 353327836.0, "step": 558 }, { "epoch": 0.06609908951164715, "grad_norm": 0.2254493087530136, "learning_rate": 5.9817213284719896e-05, "loss": 0.3967, "num_tokens": 353963351.0, "step": 559 }, { "epoch": 0.06621733475227622, "grad_norm": 0.23941707611083984, "learning_rate": 5.9816010132557456e-05, "loss": 0.4305, "num_tokens": 354576403.0, "step": 560 }, { "epoch": 0.06633557999290529, "grad_norm": 0.243499293923378, "learning_rate": 5.981480304717287e-05, "loss": 0.435, "num_tokens": 355211385.0, "step": 561 }, { "epoch": 0.06645382523353435, "grad_norm": 0.2744150757789612, "learning_rate": 5.9813592028743146e-05, "loss": 0.4363, "num_tokens": 355842242.0, "step": 562 }, { "epoch": 0.06657207047416341, "grad_norm": 0.2424369752407074, "learning_rate": 5.9812377077445944e-05, "loss": 0.387, "num_tokens": 356473678.0, "step": 563 }, { "epoch": 0.06669031571479248, "grad_norm": 0.24096077680587769, "learning_rate": 5.981115819345944e-05, "loss": 0.3776, "num_tokens": 357112492.0, "step": 564 }, { "epoch": 0.06680856095542155, "grad_norm": 0.27210044860839844, "learning_rate": 5.980993537696243e-05, "loss": 0.4036, "num_tokens": 357748442.0, "step": 565 }, { "epoch": 0.06692680619605061, "grad_norm": 0.22120971977710724, "learning_rate": 5.980870862813427e-05, "loss": 0.3691, "num_tokens": 358387012.0, "step": 566 }, { "epoch": 0.06704505143667967, "grad_norm": 0.20881879329681396, "learning_rate": 5.9807477947154874e-05, "loss": 0.3845, "num_tokens": 359025550.0, "step": 567 }, { "epoch": 0.06716329667730873, "grad_norm": 0.29638388752937317, "learning_rate": 5.9806243334204785e-05, "loss": 0.4261, "num_tokens": 359661915.0, "step": 568 }, { "epoch": 0.06728154191793781, "grad_norm": 0.21381410956382751, "learning_rate": 5.9805004789465056e-05, "loss": 0.386, "num_tokens": 360295739.0, "step": 569 }, { "epoch": 0.06739978715856687, "grad_norm": 0.24475575983524323, "learning_rate": 5.980376231311736e-05, "loss": 0.4426, "num_tokens": 360929829.0, "step": 570 }, { "epoch": 0.06751803239919593, "grad_norm": 0.2416916936635971, "learning_rate": 5.980251590534394e-05, "loss": 0.3796, "num_tokens": 361568418.0, "step": 571 }, { "epoch": 0.067636277639825, "grad_norm": 0.21783798933029175, "learning_rate": 5.980126556632763e-05, "loss": 0.3679, "num_tokens": 362197303.0, "step": 572 }, { "epoch": 0.06775452288045407, "grad_norm": 0.2724454700946808, "learning_rate": 5.9800011296251786e-05, "loss": 0.4114, "num_tokens": 362825904.0, "step": 573 }, { "epoch": 0.06787276812108313, "grad_norm": 0.2509877383708954, "learning_rate": 5.979875309530041e-05, "loss": 0.4036, "num_tokens": 363465140.0, "step": 574 }, { "epoch": 0.06799101336171219, "grad_norm": 0.23424389958381653, "learning_rate": 5.979749096365801e-05, "loss": 0.4048, "num_tokens": 364097168.0, "step": 575 }, { "epoch": 0.06810925860234125, "grad_norm": 0.22140657901763916, "learning_rate": 5.979622490150974e-05, "loss": 0.3818, "num_tokens": 364728470.0, "step": 576 }, { "epoch": 0.06822750384297033, "grad_norm": 0.2484249472618103, "learning_rate": 5.9794954909041295e-05, "loss": 0.4463, "num_tokens": 365363497.0, "step": 577 }, { "epoch": 0.06834574908359939, "grad_norm": 0.2453734576702118, "learning_rate": 5.979368098643895e-05, "loss": 0.4637, "num_tokens": 366000699.0, "step": 578 }, { "epoch": 0.06846399432422845, "grad_norm": 0.19592195749282837, "learning_rate": 5.979240313388955e-05, "loss": 0.3696, "num_tokens": 366624851.0, "step": 579 }, { "epoch": 0.06858223956485751, "grad_norm": 0.24671649932861328, "learning_rate": 5.979112135158052e-05, "loss": 0.4022, "num_tokens": 367261099.0, "step": 580 }, { "epoch": 0.06870048480548657, "grad_norm": 0.22181187570095062, "learning_rate": 5.978983563969988e-05, "loss": 0.3722, "num_tokens": 367897458.0, "step": 581 }, { "epoch": 0.06881873004611565, "grad_norm": 0.28038522601127625, "learning_rate": 5.97885459984362e-05, "loss": 0.4424, "num_tokens": 368534927.0, "step": 582 }, { "epoch": 0.06893697528674471, "grad_norm": 0.25139427185058594, "learning_rate": 5.9787252427978634e-05, "loss": 0.4322, "num_tokens": 369169358.0, "step": 583 }, { "epoch": 0.06905522052737377, "grad_norm": 0.2521320581436157, "learning_rate": 5.978595492851692e-05, "loss": 0.4225, "num_tokens": 369806601.0, "step": 584 }, { "epoch": 0.06917346576800283, "grad_norm": 0.22590598464012146, "learning_rate": 5.978465350024137e-05, "loss": 0.4089, "num_tokens": 370442728.0, "step": 585 }, { "epoch": 0.0692917110086319, "grad_norm": 0.2574167251586914, "learning_rate": 5.978334814334288e-05, "loss": 0.4214, "num_tokens": 371079275.0, "step": 586 }, { "epoch": 0.06940995624926097, "grad_norm": 0.23776789009571075, "learning_rate": 5.9782038858012895e-05, "loss": 0.4163, "num_tokens": 371716282.0, "step": 587 }, { "epoch": 0.06952820148989003, "grad_norm": 0.2423408031463623, "learning_rate": 5.978072564444346e-05, "loss": 0.4189, "num_tokens": 372347661.0, "step": 588 }, { "epoch": 0.06964644673051909, "grad_norm": 0.21097609400749207, "learning_rate": 5.977940850282718e-05, "loss": 0.4066, "num_tokens": 372977936.0, "step": 589 }, { "epoch": 0.06976469197114817, "grad_norm": 0.24239498376846313, "learning_rate": 5.977808743335726e-05, "loss": 0.4283, "num_tokens": 373613192.0, "step": 590 }, { "epoch": 0.06988293721177723, "grad_norm": 0.2559630274772644, "learning_rate": 5.977676243622747e-05, "loss": 0.4194, "num_tokens": 374251716.0, "step": 591 }, { "epoch": 0.07000118245240629, "grad_norm": 0.22951319813728333, "learning_rate": 5.9775433511632146e-05, "loss": 0.3748, "num_tokens": 374880470.0, "step": 592 }, { "epoch": 0.07011942769303535, "grad_norm": 0.22360825538635254, "learning_rate": 5.9774100659766195e-05, "loss": 0.4169, "num_tokens": 375518014.0, "step": 593 }, { "epoch": 0.07023767293366442, "grad_norm": 0.2517605721950531, "learning_rate": 5.9772763880825134e-05, "loss": 0.4188, "num_tokens": 376156899.0, "step": 594 }, { "epoch": 0.07035591817429349, "grad_norm": 0.2209700345993042, "learning_rate": 5.977142317500502e-05, "loss": 0.4015, "num_tokens": 376796416.0, "step": 595 }, { "epoch": 0.07047416341492255, "grad_norm": 0.2531750202178955, "learning_rate": 5.977007854250251e-05, "loss": 0.4008, "num_tokens": 377430299.0, "step": 596 }, { "epoch": 0.07059240865555161, "grad_norm": 0.20702487230300903, "learning_rate": 5.9768729983514826e-05, "loss": 0.3497, "num_tokens": 378036010.0, "step": 597 }, { "epoch": 0.07071065389618068, "grad_norm": 0.25798746943473816, "learning_rate": 5.976737749823975e-05, "loss": 0.4419, "num_tokens": 378662576.0, "step": 598 }, { "epoch": 0.07082889913680974, "grad_norm": 0.24296574294567108, "learning_rate": 5.976602108687568e-05, "loss": 0.3694, "num_tokens": 379292706.0, "step": 599 }, { "epoch": 0.0709471443774388, "grad_norm": 0.2633711099624634, "learning_rate": 5.976466074962156e-05, "loss": 0.4307, "num_tokens": 379923752.0, "step": 600 }, { "epoch": 0.07106538961806787, "grad_norm": 0.2388352006673813, "learning_rate": 5.97632964866769e-05, "loss": 0.4005, "num_tokens": 380556540.0, "step": 601 }, { "epoch": 0.07118363485869694, "grad_norm": 0.2621101140975952, "learning_rate": 5.976192829824182e-05, "loss": 0.3745, "num_tokens": 381191645.0, "step": 602 }, { "epoch": 0.071301880099326, "grad_norm": 0.26202258467674255, "learning_rate": 5.976055618451701e-05, "loss": 0.432, "num_tokens": 381827763.0, "step": 603 }, { "epoch": 0.07142012533995507, "grad_norm": 0.2130504846572876, "learning_rate": 5.9759180145703694e-05, "loss": 0.3972, "num_tokens": 382466825.0, "step": 604 }, { "epoch": 0.07153837058058413, "grad_norm": 0.25796785950660706, "learning_rate": 5.975780018200372e-05, "loss": 0.399, "num_tokens": 383103907.0, "step": 605 }, { "epoch": 0.0716566158212132, "grad_norm": 0.2350090742111206, "learning_rate": 5.975641629361949e-05, "loss": 0.4189, "num_tokens": 383736705.0, "step": 606 }, { "epoch": 0.07177486106184226, "grad_norm": 0.2584359049797058, "learning_rate": 5.9755028480753976e-05, "loss": 0.4265, "num_tokens": 384376310.0, "step": 607 }, { "epoch": 0.07189310630247132, "grad_norm": 0.23181407153606415, "learning_rate": 5.9753636743610755e-05, "loss": 0.3983, "num_tokens": 385007094.0, "step": 608 }, { "epoch": 0.07201135154310039, "grad_norm": 0.21590688824653625, "learning_rate": 5.9752241082393936e-05, "loss": 0.3779, "num_tokens": 385646534.0, "step": 609 }, { "epoch": 0.07212959678372946, "grad_norm": 0.21996422111988068, "learning_rate": 5.9750841497308245e-05, "loss": 0.371, "num_tokens": 386283106.0, "step": 610 }, { "epoch": 0.07224784202435852, "grad_norm": 0.22569195926189423, "learning_rate": 5.9749437988558955e-05, "loss": 0.3682, "num_tokens": 386916395.0, "step": 611 }, { "epoch": 0.07236608726498758, "grad_norm": 0.21085919439792633, "learning_rate": 5.9748030556351934e-05, "loss": 0.4181, "num_tokens": 387553614.0, "step": 612 }, { "epoch": 0.07248433250561664, "grad_norm": 0.22579286992549896, "learning_rate": 5.97466192008936e-05, "loss": 0.3826, "num_tokens": 388189908.0, "step": 613 }, { "epoch": 0.07260257774624572, "grad_norm": 0.2409718781709671, "learning_rate": 5.974520392239098e-05, "loss": 0.4241, "num_tokens": 388827274.0, "step": 614 }, { "epoch": 0.07272082298687478, "grad_norm": 0.19454914331436157, "learning_rate": 5.974378472105165e-05, "loss": 0.3907, "num_tokens": 389460786.0, "step": 615 }, { "epoch": 0.07283906822750384, "grad_norm": 0.2482302486896515, "learning_rate": 5.974236159708377e-05, "loss": 0.4163, "num_tokens": 390066154.0, "step": 616 }, { "epoch": 0.0729573134681329, "grad_norm": 0.22031879425048828, "learning_rate": 5.974093455069608e-05, "loss": 0.3824, "num_tokens": 390700370.0, "step": 617 }, { "epoch": 0.07307555870876198, "grad_norm": 0.24333076179027557, "learning_rate": 5.97395035820979e-05, "loss": 0.3878, "num_tokens": 391333815.0, "step": 618 }, { "epoch": 0.07319380394939104, "grad_norm": 0.23806525766849518, "learning_rate": 5.9738068691499086e-05, "loss": 0.4055, "num_tokens": 391972641.0, "step": 619 }, { "epoch": 0.0733120491900201, "grad_norm": 0.26481977105140686, "learning_rate": 5.973662987911013e-05, "loss": 0.4293, "num_tokens": 392607122.0, "step": 620 }, { "epoch": 0.07343029443064916, "grad_norm": 0.18870246410369873, "learning_rate": 5.9735187145142065e-05, "loss": 0.3782, "num_tokens": 393234821.0, "step": 621 }, { "epoch": 0.07354853967127824, "grad_norm": 0.2833087146282196, "learning_rate": 5.973374048980649e-05, "loss": 0.3813, "num_tokens": 393867338.0, "step": 622 }, { "epoch": 0.0736667849119073, "grad_norm": 0.22519761323928833, "learning_rate": 5.973228991331561e-05, "loss": 0.3936, "num_tokens": 394505968.0, "step": 623 }, { "epoch": 0.07378503015253636, "grad_norm": 0.24126821756362915, "learning_rate": 5.9730835415882155e-05, "loss": 0.4397, "num_tokens": 395143798.0, "step": 624 }, { "epoch": 0.07390327539316542, "grad_norm": 0.23300473392009735, "learning_rate": 5.972937699771949e-05, "loss": 0.3851, "num_tokens": 395781916.0, "step": 625 }, { "epoch": 0.0740215206337945, "grad_norm": 0.22893673181533813, "learning_rate": 5.972791465904154e-05, "loss": 0.4296, "num_tokens": 396419440.0, "step": 626 }, { "epoch": 0.07413976587442356, "grad_norm": 0.21381692588329315, "learning_rate": 5.972644840006275e-05, "loss": 0.3707, "num_tokens": 397053137.0, "step": 627 }, { "epoch": 0.07425801111505262, "grad_norm": 0.2146274596452713, "learning_rate": 5.972497822099822e-05, "loss": 0.4105, "num_tokens": 397685003.0, "step": 628 }, { "epoch": 0.07437625635568168, "grad_norm": 0.22609341144561768, "learning_rate": 5.9723504122063576e-05, "loss": 0.4235, "num_tokens": 398323686.0, "step": 629 }, { "epoch": 0.07449450159631076, "grad_norm": 0.23529234528541565, "learning_rate": 5.972202610347503e-05, "loss": 0.4221, "num_tokens": 398958685.0, "step": 630 }, { "epoch": 0.07461274683693982, "grad_norm": 0.22462113201618195, "learning_rate": 5.9720544165449366e-05, "loss": 0.4128, "num_tokens": 399595823.0, "step": 631 }, { "epoch": 0.07473099207756888, "grad_norm": 0.2564484179019928, "learning_rate": 5.971905830820395e-05, "loss": 0.4037, "num_tokens": 400234363.0, "step": 632 }, { "epoch": 0.07484923731819794, "grad_norm": 0.2249382883310318, "learning_rate": 5.971756853195672e-05, "loss": 0.4188, "num_tokens": 400860312.0, "step": 633 }, { "epoch": 0.074967482558827, "grad_norm": 0.22319148480892181, "learning_rate": 5.971607483692618e-05, "loss": 0.411, "num_tokens": 401493314.0, "step": 634 }, { "epoch": 0.07508572779945608, "grad_norm": 0.2561909854412079, "learning_rate": 5.971457722333143e-05, "loss": 0.4235, "num_tokens": 402131410.0, "step": 635 }, { "epoch": 0.07520397304008514, "grad_norm": 0.20809005200862885, "learning_rate": 5.971307569139212e-05, "loss": 0.4353, "num_tokens": 402767694.0, "step": 636 }, { "epoch": 0.0753222182807142, "grad_norm": 0.20245227217674255, "learning_rate": 5.971157024132849e-05, "loss": 0.421, "num_tokens": 403401170.0, "step": 637 }, { "epoch": 0.07544046352134326, "grad_norm": 0.22636578977108002, "learning_rate": 5.9710060873361354e-05, "loss": 0.4079, "num_tokens": 404033223.0, "step": 638 }, { "epoch": 0.07555870876197233, "grad_norm": 0.21431688964366913, "learning_rate": 5.97085475877121e-05, "loss": 0.4, "num_tokens": 404669142.0, "step": 639 }, { "epoch": 0.0756769540026014, "grad_norm": 0.2226439118385315, "learning_rate": 5.970703038460267e-05, "loss": 0.4159, "num_tokens": 405305698.0, "step": 640 }, { "epoch": 0.07579519924323046, "grad_norm": 0.22632507979869843, "learning_rate": 5.970550926425563e-05, "loss": 0.4416, "num_tokens": 405935366.0, "step": 641 }, { "epoch": 0.07591344448385952, "grad_norm": 0.2210267037153244, "learning_rate": 5.970398422689406e-05, "loss": 0.4125, "num_tokens": 406563939.0, "step": 642 }, { "epoch": 0.0760316897244886, "grad_norm": 0.20809195935726166, "learning_rate": 5.970245527274166e-05, "loss": 0.3969, "num_tokens": 407201876.0, "step": 643 }, { "epoch": 0.07614993496511765, "grad_norm": 0.2390618920326233, "learning_rate": 5.970092240202269e-05, "loss": 0.3904, "num_tokens": 407834876.0, "step": 644 }, { "epoch": 0.07626818020574672, "grad_norm": 0.21365554630756378, "learning_rate": 5.969938561496197e-05, "loss": 0.3918, "num_tokens": 408474126.0, "step": 645 }, { "epoch": 0.07638642544637578, "grad_norm": 0.20896582305431366, "learning_rate": 5.969784491178492e-05, "loss": 0.3527, "num_tokens": 409110752.0, "step": 646 }, { "epoch": 0.07650467068700485, "grad_norm": 0.26439669728279114, "learning_rate": 5.969630029271751e-05, "loss": 0.4171, "num_tokens": 409744230.0, "step": 647 }, { "epoch": 0.07662291592763391, "grad_norm": 0.23103934526443481, "learning_rate": 5.96947517579863e-05, "loss": 0.4061, "num_tokens": 410377730.0, "step": 648 }, { "epoch": 0.07674116116826298, "grad_norm": 0.24195146560668945, "learning_rate": 5.969319930781844e-05, "loss": 0.3851, "num_tokens": 411014041.0, "step": 649 }, { "epoch": 0.07685940640889204, "grad_norm": 0.2641776502132416, "learning_rate": 5.969164294244161e-05, "loss": 0.3957, "num_tokens": 411653738.0, "step": 650 }, { "epoch": 0.07697765164952111, "grad_norm": 0.18694916367530823, "learning_rate": 5.969008266208409e-05, "loss": 0.3636, "num_tokens": 412288459.0, "step": 651 }, { "epoch": 0.07709589689015017, "grad_norm": 0.2271735966205597, "learning_rate": 5.9688518466974745e-05, "loss": 0.4039, "num_tokens": 412924573.0, "step": 652 }, { "epoch": 0.07721414213077923, "grad_norm": 0.20750701427459717, "learning_rate": 5.968695035734299e-05, "loss": 0.3585, "num_tokens": 413557882.0, "step": 653 }, { "epoch": 0.0773323873714083, "grad_norm": 0.21297581493854523, "learning_rate": 5.9685378333418835e-05, "loss": 0.3602, "num_tokens": 414190765.0, "step": 654 }, { "epoch": 0.07745063261203737, "grad_norm": 0.19417662918567657, "learning_rate": 5.9683802395432864e-05, "loss": 0.3683, "num_tokens": 414824481.0, "step": 655 }, { "epoch": 0.07756887785266643, "grad_norm": 0.23400019109249115, "learning_rate": 5.9682222543616214e-05, "loss": 0.3927, "num_tokens": 415462470.0, "step": 656 }, { "epoch": 0.0776871230932955, "grad_norm": 0.22486793994903564, "learning_rate": 5.968063877820061e-05, "loss": 0.4229, "num_tokens": 416098823.0, "step": 657 }, { "epoch": 0.07780536833392455, "grad_norm": 0.21691784262657166, "learning_rate": 5.9679051099418346e-05, "loss": 0.402, "num_tokens": 416727909.0, "step": 658 }, { "epoch": 0.07792361357455363, "grad_norm": 0.2643430829048157, "learning_rate": 5.9677459507502305e-05, "loss": 0.4285, "num_tokens": 417362676.0, "step": 659 }, { "epoch": 0.07804185881518269, "grad_norm": 0.21169278025627136, "learning_rate": 5.967586400268593e-05, "loss": 0.3811, "num_tokens": 417996258.0, "step": 660 }, { "epoch": 0.07816010405581175, "grad_norm": 0.23603154718875885, "learning_rate": 5.967426458520324e-05, "loss": 0.4241, "num_tokens": 418626034.0, "step": 661 }, { "epoch": 0.07827834929644081, "grad_norm": 0.23596768081188202, "learning_rate": 5.967266125528881e-05, "loss": 0.4484, "num_tokens": 419263225.0, "step": 662 }, { "epoch": 0.07839659453706989, "grad_norm": 0.21102780103683472, "learning_rate": 5.967105401317784e-05, "loss": 0.4084, "num_tokens": 419892898.0, "step": 663 }, { "epoch": 0.07851483977769895, "grad_norm": 0.20284058153629303, "learning_rate": 5.966944285910606e-05, "loss": 0.395, "num_tokens": 420527528.0, "step": 664 }, { "epoch": 0.07863308501832801, "grad_norm": 0.22077961266040802, "learning_rate": 5.9667827793309766e-05, "loss": 0.3992, "num_tokens": 421156121.0, "step": 665 }, { "epoch": 0.07875133025895707, "grad_norm": 0.20429295301437378, "learning_rate": 5.9666208816025866e-05, "loss": 0.3473, "num_tokens": 421790061.0, "step": 666 }, { "epoch": 0.07886957549958615, "grad_norm": 0.22327901422977448, "learning_rate": 5.966458592749182e-05, "loss": 0.4075, "num_tokens": 422426991.0, "step": 667 }, { "epoch": 0.07898782074021521, "grad_norm": 0.20008808374404907, "learning_rate": 5.966295912794566e-05, "loss": 0.3636, "num_tokens": 423063834.0, "step": 668 }, { "epoch": 0.07910606598084427, "grad_norm": 0.22449634969234467, "learning_rate": 5.9661328417626e-05, "loss": 0.3986, "num_tokens": 423703537.0, "step": 669 }, { "epoch": 0.07922431122147333, "grad_norm": 0.24579022824764252, "learning_rate": 5.965969379677202e-05, "loss": 0.4163, "num_tokens": 424330180.0, "step": 670 }, { "epoch": 0.0793425564621024, "grad_norm": 0.22332175076007843, "learning_rate": 5.9658055265623475e-05, "loss": 0.4307, "num_tokens": 424966976.0, "step": 671 }, { "epoch": 0.07946080170273147, "grad_norm": 0.2505580484867096, "learning_rate": 5.9656412824420705e-05, "loss": 0.4019, "num_tokens": 425604191.0, "step": 672 }, { "epoch": 0.07957904694336053, "grad_norm": 0.20224231481552124, "learning_rate": 5.9654766473404604e-05, "loss": 0.3508, "num_tokens": 426240218.0, "step": 673 }, { "epoch": 0.07969729218398959, "grad_norm": 0.26638996601104736, "learning_rate": 5.9653116212816666e-05, "loss": 0.4153, "num_tokens": 426878525.0, "step": 674 }, { "epoch": 0.07981553742461867, "grad_norm": 0.19225278496742249, "learning_rate": 5.965146204289891e-05, "loss": 0.3839, "num_tokens": 427512790.0, "step": 675 }, { "epoch": 0.07993378266524773, "grad_norm": 0.25565943121910095, "learning_rate": 5.9649803963893984e-05, "loss": 0.4281, "num_tokens": 428150456.0, "step": 676 }, { "epoch": 0.08005202790587679, "grad_norm": 0.2132185995578766, "learning_rate": 5.9648141976045086e-05, "loss": 0.3624, "num_tokens": 428783253.0, "step": 677 }, { "epoch": 0.08017027314650585, "grad_norm": 0.23190529644489288, "learning_rate": 5.9646476079595984e-05, "loss": 0.4036, "num_tokens": 429419007.0, "step": 678 }, { "epoch": 0.08028851838713492, "grad_norm": 0.20084795355796814, "learning_rate": 5.9644806274791025e-05, "loss": 0.3941, "num_tokens": 430051255.0, "step": 679 }, { "epoch": 0.08040676362776399, "grad_norm": 0.2357756346464157, "learning_rate": 5.964313256187512e-05, "loss": 0.4399, "num_tokens": 430683994.0, "step": 680 }, { "epoch": 0.08052500886839305, "grad_norm": 0.19553275406360626, "learning_rate": 5.9641454941093756e-05, "loss": 0.3705, "num_tokens": 431314161.0, "step": 681 }, { "epoch": 0.08064325410902211, "grad_norm": 0.22715002298355103, "learning_rate": 5.963977341269301e-05, "loss": 0.395, "num_tokens": 431943550.0, "step": 682 }, { "epoch": 0.08076149934965117, "grad_norm": 0.20378069579601288, "learning_rate": 5.9638087976919505e-05, "loss": 0.3853, "num_tokens": 432580022.0, "step": 683 }, { "epoch": 0.08087974459028024, "grad_norm": 0.22423827648162842, "learning_rate": 5.963639863402047e-05, "loss": 0.4335, "num_tokens": 433215860.0, "step": 684 }, { "epoch": 0.0809979898309093, "grad_norm": 0.2042044848203659, "learning_rate": 5.963470538424366e-05, "loss": 0.387, "num_tokens": 433849766.0, "step": 685 }, { "epoch": 0.08111623507153837, "grad_norm": 0.18783143162727356, "learning_rate": 5.963300822783747e-05, "loss": 0.3661, "num_tokens": 434484743.0, "step": 686 }, { "epoch": 0.08123448031216743, "grad_norm": 0.224294975399971, "learning_rate": 5.963130716505079e-05, "loss": 0.3773, "num_tokens": 435100372.0, "step": 687 }, { "epoch": 0.0813527255527965, "grad_norm": 0.21837015450000763, "learning_rate": 5.962960219613316e-05, "loss": 0.4446, "num_tokens": 435733891.0, "step": 688 }, { "epoch": 0.08147097079342557, "grad_norm": 0.23237627744674683, "learning_rate": 5.962789332133462e-05, "loss": 0.3818, "num_tokens": 436365690.0, "step": 689 }, { "epoch": 0.08158921603405463, "grad_norm": 0.21637146174907684, "learning_rate": 5.9626180540905835e-05, "loss": 0.369, "num_tokens": 437003639.0, "step": 690 }, { "epoch": 0.08170746127468369, "grad_norm": 0.22261546552181244, "learning_rate": 5.9624463855098026e-05, "loss": 0.3997, "num_tokens": 437642920.0, "step": 691 }, { "epoch": 0.08182570651531276, "grad_norm": 0.227298304438591, "learning_rate": 5.962274326416299e-05, "loss": 0.4015, "num_tokens": 438277063.0, "step": 692 }, { "epoch": 0.08194395175594182, "grad_norm": 0.21183635294437408, "learning_rate": 5.9621018768353087e-05, "loss": 0.4108, "num_tokens": 438915083.0, "step": 693 }, { "epoch": 0.08206219699657089, "grad_norm": 0.22464387118816376, "learning_rate": 5.9619290367921256e-05, "loss": 0.423, "num_tokens": 439554233.0, "step": 694 }, { "epoch": 0.08218044223719995, "grad_norm": 0.19152754545211792, "learning_rate": 5.9617558063121026e-05, "loss": 0.376, "num_tokens": 440187716.0, "step": 695 }, { "epoch": 0.08229868747782902, "grad_norm": 0.21420860290527344, "learning_rate": 5.961582185420645e-05, "loss": 0.4034, "num_tokens": 440823462.0, "step": 696 }, { "epoch": 0.08241693271845808, "grad_norm": 0.19221828877925873, "learning_rate": 5.961408174143221e-05, "loss": 0.3925, "num_tokens": 441461261.0, "step": 697 }, { "epoch": 0.08253517795908714, "grad_norm": 0.20715348422527313, "learning_rate": 5.9612337725053535e-05, "loss": 0.4165, "num_tokens": 442092406.0, "step": 698 }, { "epoch": 0.0826534231997162, "grad_norm": 0.2044675350189209, "learning_rate": 5.961058980532621e-05, "loss": 0.4209, "num_tokens": 442729228.0, "step": 699 }, { "epoch": 0.08277166844034528, "grad_norm": 0.19767135381698608, "learning_rate": 5.960883798250663e-05, "loss": 0.4343, "num_tokens": 443368453.0, "step": 700 }, { "epoch": 0.08288991368097434, "grad_norm": 0.19732125103473663, "learning_rate": 5.9607082256851735e-05, "loss": 0.3781, "num_tokens": 444004993.0, "step": 701 }, { "epoch": 0.0830081589216034, "grad_norm": 0.21824349462985992, "learning_rate": 5.960532262861904e-05, "loss": 0.4474, "num_tokens": 444641198.0, "step": 702 }, { "epoch": 0.08312640416223246, "grad_norm": 0.1879548281431198, "learning_rate": 5.960355909806665e-05, "loss": 0.4014, "num_tokens": 445272456.0, "step": 703 }, { "epoch": 0.08324464940286154, "grad_norm": 0.25764986872673035, "learning_rate": 5.9601791665453205e-05, "loss": 0.4467, "num_tokens": 445909612.0, "step": 704 }, { "epoch": 0.0833628946434906, "grad_norm": 0.23542919754981995, "learning_rate": 5.960002033103797e-05, "loss": 0.4653, "num_tokens": 446544323.0, "step": 705 }, { "epoch": 0.08348113988411966, "grad_norm": 0.23595286905765533, "learning_rate": 5.959824509508074e-05, "loss": 0.4137, "num_tokens": 447177697.0, "step": 706 }, { "epoch": 0.08359938512474872, "grad_norm": 0.22727781534194946, "learning_rate": 5.95964659578419e-05, "loss": 0.4227, "num_tokens": 447813578.0, "step": 707 }, { "epoch": 0.0837176303653778, "grad_norm": 0.20374025404453278, "learning_rate": 5.959468291958241e-05, "loss": 0.3862, "num_tokens": 448448247.0, "step": 708 }, { "epoch": 0.08383587560600686, "grad_norm": 0.21747368574142456, "learning_rate": 5.9592895980563784e-05, "loss": 0.4077, "num_tokens": 449081260.0, "step": 709 }, { "epoch": 0.08395412084663592, "grad_norm": 0.21284298598766327, "learning_rate": 5.959110514104813e-05, "loss": 0.3696, "num_tokens": 449718960.0, "step": 710 }, { "epoch": 0.08407236608726498, "grad_norm": 0.21694108843803406, "learning_rate": 5.9589310401298104e-05, "loss": 0.3825, "num_tokens": 450354221.0, "step": 711 }, { "epoch": 0.08419061132789406, "grad_norm": 0.2283201515674591, "learning_rate": 5.958751176157696e-05, "loss": 0.4335, "num_tokens": 450990864.0, "step": 712 }, { "epoch": 0.08430885656852312, "grad_norm": 0.21608145534992218, "learning_rate": 5.958570922214852e-05, "loss": 0.365, "num_tokens": 451629963.0, "step": 713 }, { "epoch": 0.08442710180915218, "grad_norm": 0.23234720528125763, "learning_rate": 5.958390278327715e-05, "loss": 0.3681, "num_tokens": 452263375.0, "step": 714 }, { "epoch": 0.08454534704978124, "grad_norm": 0.26225054264068604, "learning_rate": 5.958209244522784e-05, "loss": 0.4451, "num_tokens": 452899788.0, "step": 715 }, { "epoch": 0.08466359229041032, "grad_norm": 0.21448248624801636, "learning_rate": 5.9580278208266085e-05, "loss": 0.3698, "num_tokens": 453532800.0, "step": 716 }, { "epoch": 0.08478183753103938, "grad_norm": 0.21870549023151398, "learning_rate": 5.9578460072658e-05, "loss": 0.4199, "num_tokens": 454167051.0, "step": 717 }, { "epoch": 0.08490008277166844, "grad_norm": 0.2482336312532425, "learning_rate": 5.9576638038670265e-05, "loss": 0.4147, "num_tokens": 454801891.0, "step": 718 }, { "epoch": 0.0850183280122975, "grad_norm": 0.20604267716407776, "learning_rate": 5.9574812106570125e-05, "loss": 0.3978, "num_tokens": 455434940.0, "step": 719 }, { "epoch": 0.08513657325292658, "grad_norm": 0.2363281548023224, "learning_rate": 5.9572982276625386e-05, "loss": 0.4342, "num_tokens": 456070522.0, "step": 720 }, { "epoch": 0.08525481849355564, "grad_norm": 0.22392651438713074, "learning_rate": 5.957114854910445e-05, "loss": 0.4304, "num_tokens": 456700607.0, "step": 721 }, { "epoch": 0.0853730637341847, "grad_norm": 0.19227036833763123, "learning_rate": 5.9569310924276273e-05, "loss": 0.3907, "num_tokens": 457333591.0, "step": 722 }, { "epoch": 0.08549130897481376, "grad_norm": 0.2363775074481964, "learning_rate": 5.956746940241039e-05, "loss": 0.4115, "num_tokens": 457972538.0, "step": 723 }, { "epoch": 0.08560955421544283, "grad_norm": 0.20116376876831055, "learning_rate": 5.9565623983776897e-05, "loss": 0.3989, "num_tokens": 458608333.0, "step": 724 }, { "epoch": 0.0857277994560719, "grad_norm": 0.22240105271339417, "learning_rate": 5.956377466864649e-05, "loss": 0.4019, "num_tokens": 459238593.0, "step": 725 }, { "epoch": 0.08584604469670096, "grad_norm": 0.20206324756145477, "learning_rate": 5.956192145729039e-05, "loss": 0.384, "num_tokens": 459848240.0, "step": 726 }, { "epoch": 0.08596428993733002, "grad_norm": 0.2597886621952057, "learning_rate": 5.956006434998043e-05, "loss": 0.4325, "num_tokens": 460483298.0, "step": 727 }, { "epoch": 0.0860825351779591, "grad_norm": 0.21016588807106018, "learning_rate": 5.955820334698899e-05, "loss": 0.3941, "num_tokens": 461109500.0, "step": 728 }, { "epoch": 0.08620078041858815, "grad_norm": 0.226288303732872, "learning_rate": 5.9556338448589044e-05, "loss": 0.3961, "num_tokens": 461746594.0, "step": 729 }, { "epoch": 0.08631902565921722, "grad_norm": 0.20841780304908752, "learning_rate": 5.955446965505413e-05, "loss": 0.3998, "num_tokens": 462380799.0, "step": 730 }, { "epoch": 0.08643727089984628, "grad_norm": 0.2084282636642456, "learning_rate": 5.955259696665833e-05, "loss": 0.4041, "num_tokens": 463016862.0, "step": 731 }, { "epoch": 0.08655551614047535, "grad_norm": 0.22025752067565918, "learning_rate": 5.9550720383676333e-05, "loss": 0.3738, "num_tokens": 463650306.0, "step": 732 }, { "epoch": 0.08667376138110441, "grad_norm": 0.20598146319389343, "learning_rate": 5.954883990638339e-05, "loss": 0.3943, "num_tokens": 464282371.0, "step": 733 }, { "epoch": 0.08679200662173348, "grad_norm": 0.232939213514328, "learning_rate": 5.954695553505531e-05, "loss": 0.3856, "num_tokens": 464918493.0, "step": 734 }, { "epoch": 0.08691025186236254, "grad_norm": 0.22439885139465332, "learning_rate": 5.954506726996848e-05, "loss": 0.3927, "num_tokens": 465551682.0, "step": 735 }, { "epoch": 0.0870284971029916, "grad_norm": 0.19752070307731628, "learning_rate": 5.954317511139986e-05, "loss": 0.3917, "num_tokens": 466189560.0, "step": 736 }, { "epoch": 0.08714674234362067, "grad_norm": 0.2472476065158844, "learning_rate": 5.954127905962701e-05, "loss": 0.3937, "num_tokens": 466826010.0, "step": 737 }, { "epoch": 0.08726498758424973, "grad_norm": 0.18340525031089783, "learning_rate": 5.953937911492799e-05, "loss": 0.362, "num_tokens": 467461700.0, "step": 738 }, { "epoch": 0.0873832328248788, "grad_norm": 0.20885103940963745, "learning_rate": 5.9537475277581494e-05, "loss": 0.3504, "num_tokens": 468097256.0, "step": 739 }, { "epoch": 0.08750147806550786, "grad_norm": 0.23874777555465698, "learning_rate": 5.953556754786677e-05, "loss": 0.4248, "num_tokens": 468731535.0, "step": 740 }, { "epoch": 0.08761972330613693, "grad_norm": 0.20849305391311646, "learning_rate": 5.953365592606362e-05, "loss": 0.3721, "num_tokens": 469363357.0, "step": 741 }, { "epoch": 0.087737968546766, "grad_norm": 0.20586763322353363, "learning_rate": 5.953174041245245e-05, "loss": 0.3951, "num_tokens": 469996228.0, "step": 742 }, { "epoch": 0.08785621378739505, "grad_norm": 0.2180345505475998, "learning_rate": 5.952982100731419e-05, "loss": 0.4243, "num_tokens": 470628575.0, "step": 743 }, { "epoch": 0.08797445902802412, "grad_norm": 0.20929546654224396, "learning_rate": 5.952789771093039e-05, "loss": 0.4054, "num_tokens": 471265485.0, "step": 744 }, { "epoch": 0.08809270426865319, "grad_norm": 0.20646362006664276, "learning_rate": 5.952597052358314e-05, "loss": 0.3947, "num_tokens": 471898145.0, "step": 745 }, { "epoch": 0.08821094950928225, "grad_norm": 0.20441462099552155, "learning_rate": 5.952403944555509e-05, "loss": 0.3917, "num_tokens": 472533209.0, "step": 746 }, { "epoch": 0.08832919474991131, "grad_norm": 0.19860997796058655, "learning_rate": 5.952210447712952e-05, "loss": 0.4011, "num_tokens": 473171790.0, "step": 747 }, { "epoch": 0.08844743999054037, "grad_norm": 0.22276534140110016, "learning_rate": 5.952016561859022e-05, "loss": 0.3672, "num_tokens": 473806600.0, "step": 748 }, { "epoch": 0.08856568523116945, "grad_norm": 0.21372990310192108, "learning_rate": 5.951822287022157e-05, "loss": 0.4014, "num_tokens": 474446221.0, "step": 749 }, { "epoch": 0.08868393047179851, "grad_norm": 0.1953679621219635, "learning_rate": 5.951627623230851e-05, "loss": 0.3698, "num_tokens": 475080946.0, "step": 750 }, { "epoch": 0.08880217571242757, "grad_norm": 0.2028491050004959, "learning_rate": 5.951432570513657e-05, "loss": 0.3407, "num_tokens": 475715589.0, "step": 751 }, { "epoch": 0.08892042095305663, "grad_norm": 0.22133274376392365, "learning_rate": 5.951237128899187e-05, "loss": 0.405, "num_tokens": 476350099.0, "step": 752 }, { "epoch": 0.08903866619368571, "grad_norm": 0.20396260917186737, "learning_rate": 5.951041298416103e-05, "loss": 0.4188, "num_tokens": 476986608.0, "step": 753 }, { "epoch": 0.08915691143431477, "grad_norm": 0.19532376527786255, "learning_rate": 5.950845079093129e-05, "loss": 0.4001, "num_tokens": 477626286.0, "step": 754 }, { "epoch": 0.08927515667494383, "grad_norm": 0.20720648765563965, "learning_rate": 5.9506484709590494e-05, "loss": 0.4017, "num_tokens": 478262327.0, "step": 755 }, { "epoch": 0.08939340191557289, "grad_norm": 0.20395033061504364, "learning_rate": 5.9504514740426964e-05, "loss": 0.3769, "num_tokens": 478899878.0, "step": 756 }, { "epoch": 0.08951164715620197, "grad_norm": 0.20014165341854095, "learning_rate": 5.950254088372968e-05, "loss": 0.3921, "num_tokens": 479538362.0, "step": 757 }, { "epoch": 0.08962989239683103, "grad_norm": 0.21296697854995728, "learning_rate": 5.950056313978814e-05, "loss": 0.3793, "num_tokens": 480177217.0, "step": 758 }, { "epoch": 0.08974813763746009, "grad_norm": 0.22354593873023987, "learning_rate": 5.949858150889243e-05, "loss": 0.403, "num_tokens": 480809891.0, "step": 759 }, { "epoch": 0.08986638287808915, "grad_norm": 0.19848179817199707, "learning_rate": 5.949659599133321e-05, "loss": 0.3935, "num_tokens": 481446104.0, "step": 760 }, { "epoch": 0.08998462811871823, "grad_norm": 0.20830827951431274, "learning_rate": 5.94946065874017e-05, "loss": 0.3744, "num_tokens": 482084578.0, "step": 761 }, { "epoch": 0.09010287335934729, "grad_norm": 0.20338177680969238, "learning_rate": 5.94926132973897e-05, "loss": 0.3851, "num_tokens": 482713767.0, "step": 762 }, { "epoch": 0.09022111859997635, "grad_norm": 0.2425883710384369, "learning_rate": 5.949061612158956e-05, "loss": 0.4547, "num_tokens": 483353280.0, "step": 763 }, { "epoch": 0.09033936384060541, "grad_norm": 0.177634596824646, "learning_rate": 5.9488615060294224e-05, "loss": 0.3807, "num_tokens": 483985107.0, "step": 764 }, { "epoch": 0.09045760908123449, "grad_norm": 0.2314428687095642, "learning_rate": 5.9486610113797204e-05, "loss": 0.4037, "num_tokens": 484615672.0, "step": 765 }, { "epoch": 0.09057585432186355, "grad_norm": 0.2066926509141922, "learning_rate": 5.9484601282392564e-05, "loss": 0.4002, "num_tokens": 485253394.0, "step": 766 }, { "epoch": 0.09069409956249261, "grad_norm": 0.19318552315235138, "learning_rate": 5.9482588566374956e-05, "loss": 0.4055, "num_tokens": 485886884.0, "step": 767 }, { "epoch": 0.09081234480312167, "grad_norm": 0.21655116975307465, "learning_rate": 5.948057196603958e-05, "loss": 0.3949, "num_tokens": 486516495.0, "step": 768 }, { "epoch": 0.09093059004375074, "grad_norm": 0.22732804715633392, "learning_rate": 5.947855148168224e-05, "loss": 0.4424, "num_tokens": 487151670.0, "step": 769 }, { "epoch": 0.0910488352843798, "grad_norm": 0.24614840745925903, "learning_rate": 5.9476527113599274e-05, "loss": 0.3997, "num_tokens": 487786678.0, "step": 770 }, { "epoch": 0.09116708052500887, "grad_norm": 0.22694902122020721, "learning_rate": 5.947449886208762e-05, "loss": 0.4159, "num_tokens": 488423430.0, "step": 771 }, { "epoch": 0.09128532576563793, "grad_norm": 0.21561278402805328, "learning_rate": 5.947246672744474e-05, "loss": 0.3873, "num_tokens": 489055057.0, "step": 772 }, { "epoch": 0.091403571006267, "grad_norm": 0.19266895949840546, "learning_rate": 5.9470430709968726e-05, "loss": 0.3785, "num_tokens": 489690318.0, "step": 773 }, { "epoch": 0.09152181624689606, "grad_norm": 0.21929582953453064, "learning_rate": 5.94683908099582e-05, "loss": 0.4299, "num_tokens": 490315503.0, "step": 774 }, { "epoch": 0.09164006148752513, "grad_norm": 0.2022363394498825, "learning_rate": 5.946634702771236e-05, "loss": 0.3507, "num_tokens": 490951658.0, "step": 775 }, { "epoch": 0.09175830672815419, "grad_norm": 0.18368880450725555, "learning_rate": 5.9464299363530986e-05, "loss": 0.3546, "num_tokens": 491582936.0, "step": 776 }, { "epoch": 0.09187655196878326, "grad_norm": 0.24958765506744385, "learning_rate": 5.946224781771441e-05, "loss": 0.4229, "num_tokens": 492221808.0, "step": 777 }, { "epoch": 0.09199479720941232, "grad_norm": 0.2304445505142212, "learning_rate": 5.946019239056354e-05, "loss": 0.4023, "num_tokens": 492851928.0, "step": 778 }, { "epoch": 0.09211304245004139, "grad_norm": 0.18877649307250977, "learning_rate": 5.945813308237986e-05, "loss": 0.3555, "num_tokens": 493484521.0, "step": 779 }, { "epoch": 0.09223128769067045, "grad_norm": 0.2220849245786667, "learning_rate": 5.945606989346541e-05, "loss": 0.4324, "num_tokens": 494120278.0, "step": 780 }, { "epoch": 0.09234953293129952, "grad_norm": 0.22457221150398254, "learning_rate": 5.945400282412281e-05, "loss": 0.4273, "num_tokens": 494748375.0, "step": 781 }, { "epoch": 0.09246777817192858, "grad_norm": 0.2456682324409485, "learning_rate": 5.945193187465526e-05, "loss": 0.4164, "num_tokens": 495383913.0, "step": 782 }, { "epoch": 0.09258602341255764, "grad_norm": 0.1961415410041809, "learning_rate": 5.944985704536649e-05, "loss": 0.3836, "num_tokens": 496008813.0, "step": 783 }, { "epoch": 0.0927042686531867, "grad_norm": 0.2240913361310959, "learning_rate": 5.944777833656085e-05, "loss": 0.4124, "num_tokens": 496639648.0, "step": 784 }, { "epoch": 0.09282251389381578, "grad_norm": 0.20168708264827728, "learning_rate": 5.9445695748543214e-05, "loss": 0.3579, "num_tokens": 497274171.0, "step": 785 }, { "epoch": 0.09294075913444484, "grad_norm": 0.1972842663526535, "learning_rate": 5.944360928161904e-05, "loss": 0.3868, "num_tokens": 497913138.0, "step": 786 }, { "epoch": 0.0930590043750739, "grad_norm": 0.20366361737251282, "learning_rate": 5.9441518936094395e-05, "loss": 0.4232, "num_tokens": 498551639.0, "step": 787 }, { "epoch": 0.09317724961570296, "grad_norm": 0.21147920191287994, "learning_rate": 5.943942471227584e-05, "loss": 0.4136, "num_tokens": 499182979.0, "step": 788 }, { "epoch": 0.09329549485633203, "grad_norm": 0.19454365968704224, "learning_rate": 5.9437326610470555e-05, "loss": 0.3909, "num_tokens": 499817574.0, "step": 789 }, { "epoch": 0.0934137400969611, "grad_norm": 0.22112563252449036, "learning_rate": 5.9435224630986286e-05, "loss": 0.439, "num_tokens": 500450987.0, "step": 790 }, { "epoch": 0.09353198533759016, "grad_norm": 0.2163226455450058, "learning_rate": 5.9433118774131343e-05, "loss": 0.4268, "num_tokens": 501086255.0, "step": 791 }, { "epoch": 0.09365023057821922, "grad_norm": 0.2187754511833191, "learning_rate": 5.943100904021459e-05, "loss": 0.3912, "num_tokens": 501724060.0, "step": 792 }, { "epoch": 0.09376847581884828, "grad_norm": 0.21357394754886627, "learning_rate": 5.9428895429545474e-05, "loss": 0.3863, "num_tokens": 502336426.0, "step": 793 }, { "epoch": 0.09388672105947736, "grad_norm": 0.22484149038791656, "learning_rate": 5.9426777942434006e-05, "loss": 0.4193, "num_tokens": 502970977.0, "step": 794 }, { "epoch": 0.09400496630010642, "grad_norm": 0.18038183450698853, "learning_rate": 5.942465657919077e-05, "loss": 0.3476, "num_tokens": 503604914.0, "step": 795 }, { "epoch": 0.09412321154073548, "grad_norm": 0.21893523633480072, "learning_rate": 5.942253134012691e-05, "loss": 0.3834, "num_tokens": 504241729.0, "step": 796 }, { "epoch": 0.09424145678136454, "grad_norm": 0.21444416046142578, "learning_rate": 5.942040222555416e-05, "loss": 0.3842, "num_tokens": 504874311.0, "step": 797 }, { "epoch": 0.09435970202199362, "grad_norm": 0.2258690893650055, "learning_rate": 5.941826923578479e-05, "loss": 0.4061, "num_tokens": 505507864.0, "step": 798 }, { "epoch": 0.09447794726262268, "grad_norm": 0.19788385927677155, "learning_rate": 5.9416132371131656e-05, "loss": 0.4086, "num_tokens": 506141369.0, "step": 799 }, { "epoch": 0.09459619250325174, "grad_norm": 0.2037307620048523, "learning_rate": 5.9413991631908194e-05, "loss": 0.385, "num_tokens": 506777001.0, "step": 800 }, { "epoch": 0.0947144377438808, "grad_norm": 0.18437469005584717, "learning_rate": 5.9411847018428375e-05, "loss": 0.3451, "num_tokens": 507406711.0, "step": 801 }, { "epoch": 0.09483268298450988, "grad_norm": 0.1965869963169098, "learning_rate": 5.940969853100678e-05, "loss": 0.4006, "num_tokens": 508042267.0, "step": 802 }, { "epoch": 0.09495092822513894, "grad_norm": 0.20581954717636108, "learning_rate": 5.940754616995853e-05, "loss": 0.3705, "num_tokens": 508675609.0, "step": 803 }, { "epoch": 0.095069173465768, "grad_norm": 0.21519795060157776, "learning_rate": 5.940538993559932e-05, "loss": 0.3731, "num_tokens": 509312065.0, "step": 804 }, { "epoch": 0.09518741870639706, "grad_norm": 0.18444538116455078, "learning_rate": 5.940322982824542e-05, "loss": 0.3836, "num_tokens": 509946717.0, "step": 805 }, { "epoch": 0.09530566394702614, "grad_norm": 0.2025160938501358, "learning_rate": 5.940106584821365e-05, "loss": 0.3866, "num_tokens": 510583518.0, "step": 806 }, { "epoch": 0.0954239091876552, "grad_norm": 0.2339797168970108, "learning_rate": 5.939889799582141e-05, "loss": 0.416, "num_tokens": 511221107.0, "step": 807 }, { "epoch": 0.09554215442828426, "grad_norm": 0.20244067907333374, "learning_rate": 5.9396726271386684e-05, "loss": 0.4014, "num_tokens": 511860697.0, "step": 808 }, { "epoch": 0.09566039966891332, "grad_norm": 0.213352769613266, "learning_rate": 5.9394550675228e-05, "loss": 0.3901, "num_tokens": 512498128.0, "step": 809 }, { "epoch": 0.0957786449095424, "grad_norm": 0.22154740989208221, "learning_rate": 5.9392371207664464e-05, "loss": 0.4268, "num_tokens": 513128610.0, "step": 810 }, { "epoch": 0.09589689015017146, "grad_norm": 0.21336127817630768, "learning_rate": 5.939018786901574e-05, "loss": 0.3531, "num_tokens": 513765105.0, "step": 811 }, { "epoch": 0.09601513539080052, "grad_norm": 0.2185400277376175, "learning_rate": 5.9388000659602074e-05, "loss": 0.4146, "num_tokens": 514394034.0, "step": 812 }, { "epoch": 0.09613338063142958, "grad_norm": 0.18028222024440765, "learning_rate": 5.938580957974428e-05, "loss": 0.3909, "num_tokens": 515033384.0, "step": 813 }, { "epoch": 0.09625162587205865, "grad_norm": 0.19437555968761444, "learning_rate": 5.938361462976373e-05, "loss": 0.3745, "num_tokens": 515667789.0, "step": 814 }, { "epoch": 0.09636987111268772, "grad_norm": 0.1947416216135025, "learning_rate": 5.938141580998236e-05, "loss": 0.3779, "num_tokens": 516301878.0, "step": 815 }, { "epoch": 0.09648811635331678, "grad_norm": 0.17571400105953217, "learning_rate": 5.937921312072268e-05, "loss": 0.3683, "num_tokens": 516936923.0, "step": 816 }, { "epoch": 0.09660636159394584, "grad_norm": 0.20403383672237396, "learning_rate": 5.9377006562307776e-05, "loss": 0.4171, "num_tokens": 517574893.0, "step": 817 }, { "epoch": 0.09672460683457491, "grad_norm": 0.18126535415649414, "learning_rate": 5.9374796135061296e-05, "loss": 0.3845, "num_tokens": 518213624.0, "step": 818 }, { "epoch": 0.09684285207520398, "grad_norm": 0.19547349214553833, "learning_rate": 5.9372581839307434e-05, "loss": 0.3689, "num_tokens": 518852531.0, "step": 819 }, { "epoch": 0.09696109731583304, "grad_norm": 0.19579048454761505, "learning_rate": 5.9370363675371e-05, "loss": 0.4054, "num_tokens": 519492204.0, "step": 820 }, { "epoch": 0.0970793425564621, "grad_norm": 0.18471238017082214, "learning_rate": 5.936814164357732e-05, "loss": 0.4124, "num_tokens": 520130296.0, "step": 821 }, { "epoch": 0.09719758779709117, "grad_norm": 0.20744086802005768, "learning_rate": 5.9365915744252304e-05, "loss": 0.3786, "num_tokens": 520765986.0, "step": 822 }, { "epoch": 0.09731583303772023, "grad_norm": 0.20836985111236572, "learning_rate": 5.9363685977722455e-05, "loss": 0.4384, "num_tokens": 521400644.0, "step": 823 }, { "epoch": 0.0974340782783493, "grad_norm": 0.21213865280151367, "learning_rate": 5.936145234431482e-05, "loss": 0.3799, "num_tokens": 522039463.0, "step": 824 }, { "epoch": 0.09755232351897836, "grad_norm": 0.2076306790113449, "learning_rate": 5.9359214844357005e-05, "loss": 0.3829, "num_tokens": 522671508.0, "step": 825 }, { "epoch": 0.09767056875960743, "grad_norm": 0.18418815732002258, "learning_rate": 5.93569734781772e-05, "loss": 0.3749, "num_tokens": 523311225.0, "step": 826 }, { "epoch": 0.0977888140002365, "grad_norm": 0.19653356075286865, "learning_rate": 5.935472824610414e-05, "loss": 0.3576, "num_tokens": 523949133.0, "step": 827 }, { "epoch": 0.09790705924086555, "grad_norm": 0.19827792048454285, "learning_rate": 5.935247914846717e-05, "loss": 0.3953, "num_tokens": 524577955.0, "step": 828 }, { "epoch": 0.09802530448149462, "grad_norm": 0.184154212474823, "learning_rate": 5.935022618559616e-05, "loss": 0.3943, "num_tokens": 525211514.0, "step": 829 }, { "epoch": 0.09814354972212369, "grad_norm": 0.18998359143733978, "learning_rate": 5.9347969357821564e-05, "loss": 0.4186, "num_tokens": 525848086.0, "step": 830 }, { "epoch": 0.09826179496275275, "grad_norm": 0.20624585449695587, "learning_rate": 5.9345708665474395e-05, "loss": 0.379, "num_tokens": 526481722.0, "step": 831 }, { "epoch": 0.09838004020338181, "grad_norm": 0.21540367603302002, "learning_rate": 5.9343444108886253e-05, "loss": 0.3971, "num_tokens": 527117956.0, "step": 832 }, { "epoch": 0.09849828544401087, "grad_norm": 0.20378103852272034, "learning_rate": 5.9341175688389286e-05, "loss": 0.4198, "num_tokens": 527750234.0, "step": 833 }, { "epoch": 0.09861653068463995, "grad_norm": 0.21263852715492249, "learning_rate": 5.93389034043162e-05, "loss": 0.3916, "num_tokens": 528386561.0, "step": 834 }, { "epoch": 0.09873477592526901, "grad_norm": 0.2068236619234085, "learning_rate": 5.933662725700029e-05, "loss": 0.4277, "num_tokens": 529021750.0, "step": 835 }, { "epoch": 0.09885302116589807, "grad_norm": 0.18366239964962006, "learning_rate": 5.933434724677541e-05, "loss": 0.3694, "num_tokens": 529661276.0, "step": 836 }, { "epoch": 0.09897126640652713, "grad_norm": 0.20166461169719696, "learning_rate": 5.933206337397598e-05, "loss": 0.4092, "num_tokens": 530297609.0, "step": 837 }, { "epoch": 0.09908951164715621, "grad_norm": 0.18944892287254333, "learning_rate": 5.932977563893699e-05, "loss": 0.3818, "num_tokens": 530933315.0, "step": 838 }, { "epoch": 0.09920775688778527, "grad_norm": 0.18379046022891998, "learning_rate": 5.9327484041993976e-05, "loss": 0.3963, "num_tokens": 531564788.0, "step": 839 }, { "epoch": 0.09932600212841433, "grad_norm": 0.2127266377210617, "learning_rate": 5.932518858348307e-05, "loss": 0.4011, "num_tokens": 532193430.0, "step": 840 }, { "epoch": 0.09944424736904339, "grad_norm": 0.21608415246009827, "learning_rate": 5.932288926374095e-05, "loss": 0.4081, "num_tokens": 532828601.0, "step": 841 }, { "epoch": 0.09956249260967245, "grad_norm": 0.20951804518699646, "learning_rate": 5.932058608310488e-05, "loss": 0.3648, "num_tokens": 533466099.0, "step": 842 }, { "epoch": 0.09968073785030153, "grad_norm": 0.19429035484790802, "learning_rate": 5.931827904191266e-05, "loss": 0.4017, "num_tokens": 534102611.0, "step": 843 }, { "epoch": 0.09979898309093059, "grad_norm": 0.20188310742378235, "learning_rate": 5.9315968140502686e-05, "loss": 0.389, "num_tokens": 534731488.0, "step": 844 }, { "epoch": 0.09991722833155965, "grad_norm": 0.22233061492443085, "learning_rate": 5.9313653379213905e-05, "loss": 0.3933, "num_tokens": 535370116.0, "step": 845 }, { "epoch": 0.10003547357218871, "grad_norm": 0.1926896721124649, "learning_rate": 5.931133475838582e-05, "loss": 0.3976, "num_tokens": 536006561.0, "step": 846 }, { "epoch": 0.10015371881281779, "grad_norm": 0.20466208457946777, "learning_rate": 5.9309012278358544e-05, "loss": 0.4124, "num_tokens": 536639992.0, "step": 847 }, { "epoch": 0.10027196405344685, "grad_norm": 0.20461921393871307, "learning_rate": 5.93066859394727e-05, "loss": 0.3791, "num_tokens": 537253609.0, "step": 848 }, { "epoch": 0.10039020929407591, "grad_norm": 0.19282756745815277, "learning_rate": 5.930435574206951e-05, "loss": 0.3892, "num_tokens": 537890122.0, "step": 849 }, { "epoch": 0.10050845453470497, "grad_norm": 0.18683715164661407, "learning_rate": 5.930202168649075e-05, "loss": 0.4342, "num_tokens": 538526166.0, "step": 850 }, { "epoch": 0.10062669977533405, "grad_norm": 0.20167586207389832, "learning_rate": 5.929968377307877e-05, "loss": 0.4097, "num_tokens": 539164864.0, "step": 851 }, { "epoch": 0.10074494501596311, "grad_norm": 0.19572073221206665, "learning_rate": 5.929734200217648e-05, "loss": 0.3772, "num_tokens": 539802408.0, "step": 852 }, { "epoch": 0.10086319025659217, "grad_norm": 0.20114095509052277, "learning_rate": 5.929499637412737e-05, "loss": 0.4015, "num_tokens": 540432320.0, "step": 853 }, { "epoch": 0.10098143549722123, "grad_norm": 0.20984825491905212, "learning_rate": 5.929264688927547e-05, "loss": 0.4058, "num_tokens": 541067861.0, "step": 854 }, { "epoch": 0.1010996807378503, "grad_norm": 0.19220128655433655, "learning_rate": 5.92902935479654e-05, "loss": 0.4156, "num_tokens": 541704638.0, "step": 855 }, { "epoch": 0.10121792597847937, "grad_norm": 0.1991347223520279, "learning_rate": 5.928793635054231e-05, "loss": 0.3934, "num_tokens": 542339733.0, "step": 856 }, { "epoch": 0.10133617121910843, "grad_norm": 0.20600467920303345, "learning_rate": 5.928557529735197e-05, "loss": 0.3792, "num_tokens": 542975991.0, "step": 857 }, { "epoch": 0.10145441645973749, "grad_norm": 0.19091619551181793, "learning_rate": 5.928321038874068e-05, "loss": 0.3763, "num_tokens": 543610157.0, "step": 858 }, { "epoch": 0.10157266170036656, "grad_norm": 0.18597568571567535, "learning_rate": 5.9280841625055294e-05, "loss": 0.3936, "num_tokens": 544242202.0, "step": 859 }, { "epoch": 0.10169090694099563, "grad_norm": 0.1994190514087677, "learning_rate": 5.9278469006643266e-05, "loss": 0.4089, "num_tokens": 544876956.0, "step": 860 }, { "epoch": 0.10180915218162469, "grad_norm": 0.22919932007789612, "learning_rate": 5.927609253385259e-05, "loss": 0.387, "num_tokens": 545512522.0, "step": 861 }, { "epoch": 0.10192739742225375, "grad_norm": 0.22826938331127167, "learning_rate": 5.927371220703184e-05, "loss": 0.4203, "num_tokens": 546149492.0, "step": 862 }, { "epoch": 0.10204564266288282, "grad_norm": 0.18368317186832428, "learning_rate": 5.927132802653014e-05, "loss": 0.3718, "num_tokens": 546785991.0, "step": 863 }, { "epoch": 0.10216388790351189, "grad_norm": 0.20906513929367065, "learning_rate": 5.92689399926972e-05, "loss": 0.4113, "num_tokens": 547421213.0, "step": 864 }, { "epoch": 0.10228213314414095, "grad_norm": 0.22728331387043, "learning_rate": 5.9266548105883276e-05, "loss": 0.4149, "num_tokens": 548057540.0, "step": 865 }, { "epoch": 0.10240037838477001, "grad_norm": 0.2050984650850296, "learning_rate": 5.92641523664392e-05, "loss": 0.4282, "num_tokens": 548694755.0, "step": 866 }, { "epoch": 0.10251862362539908, "grad_norm": 0.2409392148256302, "learning_rate": 5.926175277471636e-05, "loss": 0.4601, "num_tokens": 549330967.0, "step": 867 }, { "epoch": 0.10263686886602814, "grad_norm": 0.20705898106098175, "learning_rate": 5.925934933106672e-05, "loss": 0.3884, "num_tokens": 549965342.0, "step": 868 }, { "epoch": 0.1027551141066572, "grad_norm": 0.18907377123832703, "learning_rate": 5.925694203584281e-05, "loss": 0.3751, "num_tokens": 550595153.0, "step": 869 }, { "epoch": 0.10287335934728627, "grad_norm": 0.23540030419826508, "learning_rate": 5.92545308893977e-05, "loss": 0.4009, "num_tokens": 551228741.0, "step": 870 }, { "epoch": 0.10299160458791534, "grad_norm": 0.22005510330200195, "learning_rate": 5.925211589208506e-05, "loss": 0.4114, "num_tokens": 551838042.0, "step": 871 }, { "epoch": 0.1031098498285444, "grad_norm": 0.20690566301345825, "learning_rate": 5.92496970442591e-05, "loss": 0.3894, "num_tokens": 552467738.0, "step": 872 }, { "epoch": 0.10322809506917346, "grad_norm": 0.2133379429578781, "learning_rate": 5.924727434627461e-05, "loss": 0.3969, "num_tokens": 553103599.0, "step": 873 }, { "epoch": 0.10334634030980253, "grad_norm": 0.20975719392299652, "learning_rate": 5.924484779848693e-05, "loss": 0.3894, "num_tokens": 553736538.0, "step": 874 }, { "epoch": 0.1034645855504316, "grad_norm": 0.2121574431657791, "learning_rate": 5.924241740125197e-05, "loss": 0.3784, "num_tokens": 554369713.0, "step": 875 }, { "epoch": 0.10358283079106066, "grad_norm": 0.20777979493141174, "learning_rate": 5.923998315492622e-05, "loss": 0.3857, "num_tokens": 555000107.0, "step": 876 }, { "epoch": 0.10370107603168972, "grad_norm": 0.20001624524593353, "learning_rate": 5.9237545059866716e-05, "loss": 0.3744, "num_tokens": 555632229.0, "step": 877 }, { "epoch": 0.10381932127231878, "grad_norm": 0.2205020636320114, "learning_rate": 5.923510311643106e-05, "loss": 0.3948, "num_tokens": 556267726.0, "step": 878 }, { "epoch": 0.10393756651294786, "grad_norm": 0.21397434175014496, "learning_rate": 5.9232657324977426e-05, "loss": 0.3578, "num_tokens": 556878328.0, "step": 879 }, { "epoch": 0.10405581175357692, "grad_norm": 0.18168963491916656, "learning_rate": 5.923020768586454e-05, "loss": 0.3806, "num_tokens": 557513764.0, "step": 880 }, { "epoch": 0.10417405699420598, "grad_norm": 0.2187216877937317, "learning_rate": 5.922775419945172e-05, "loss": 0.3781, "num_tokens": 558143004.0, "step": 881 }, { "epoch": 0.10429230223483504, "grad_norm": 0.19809655845165253, "learning_rate": 5.922529686609882e-05, "loss": 0.3768, "num_tokens": 558778105.0, "step": 882 }, { "epoch": 0.10441054747546412, "grad_norm": 0.22245179116725922, "learning_rate": 5.922283568616625e-05, "loss": 0.3946, "num_tokens": 559416233.0, "step": 883 }, { "epoch": 0.10452879271609318, "grad_norm": 0.19087687134742737, "learning_rate": 5.9220370660015036e-05, "loss": 0.3925, "num_tokens": 560051126.0, "step": 884 }, { "epoch": 0.10464703795672224, "grad_norm": 0.23630313575267792, "learning_rate": 5.9217901788006715e-05, "loss": 0.4052, "num_tokens": 560687449.0, "step": 885 }, { "epoch": 0.1047652831973513, "grad_norm": 0.20360274612903595, "learning_rate": 5.9215429070503406e-05, "loss": 0.379, "num_tokens": 561317929.0, "step": 886 }, { "epoch": 0.10488352843798038, "grad_norm": 0.20254698395729065, "learning_rate": 5.92129525078678e-05, "loss": 0.4044, "num_tokens": 561956472.0, "step": 887 }, { "epoch": 0.10500177367860944, "grad_norm": 0.20740734040737152, "learning_rate": 5.921047210046314e-05, "loss": 0.4007, "num_tokens": 562593057.0, "step": 888 }, { "epoch": 0.1051200189192385, "grad_norm": 0.2019333690404892, "learning_rate": 5.9207987848653244e-05, "loss": 0.4153, "num_tokens": 563227832.0, "step": 889 }, { "epoch": 0.10523826415986756, "grad_norm": 0.16545109450817108, "learning_rate": 5.9205499752802475e-05, "loss": 0.3792, "num_tokens": 563864035.0, "step": 890 }, { "epoch": 0.10535650940049664, "grad_norm": 0.23113656044006348, "learning_rate": 5.920300781327579e-05, "loss": 0.4394, "num_tokens": 564478244.0, "step": 891 }, { "epoch": 0.1054747546411257, "grad_norm": 0.20446434617042542, "learning_rate": 5.920051203043869e-05, "loss": 0.3514, "num_tokens": 565113319.0, "step": 892 }, { "epoch": 0.10559299988175476, "grad_norm": 0.21588028967380524, "learning_rate": 5.919801240465723e-05, "loss": 0.4044, "num_tokens": 565746893.0, "step": 893 }, { "epoch": 0.10571124512238382, "grad_norm": 0.18379488587379456, "learning_rate": 5.919550893629805e-05, "loss": 0.3939, "num_tokens": 566356947.0, "step": 894 }, { "epoch": 0.10582949036301288, "grad_norm": 0.23202674090862274, "learning_rate": 5.919300162572835e-05, "loss": 0.4387, "num_tokens": 566994122.0, "step": 895 }, { "epoch": 0.10594773560364196, "grad_norm": 0.24331165850162506, "learning_rate": 5.919049047331588e-05, "loss": 0.4299, "num_tokens": 567631151.0, "step": 896 }, { "epoch": 0.10606598084427102, "grad_norm": 0.2084115445613861, "learning_rate": 5.918797547942895e-05, "loss": 0.3936, "num_tokens": 568268077.0, "step": 897 }, { "epoch": 0.10618422608490008, "grad_norm": 0.2248111069202423, "learning_rate": 5.9185456644436474e-05, "loss": 0.4056, "num_tokens": 568906566.0, "step": 898 }, { "epoch": 0.10630247132552914, "grad_norm": 0.20781925320625305, "learning_rate": 5.9182933968707884e-05, "loss": 0.3956, "num_tokens": 569542062.0, "step": 899 }, { "epoch": 0.10642071656615822, "grad_norm": 0.19531913101673126, "learning_rate": 5.9180407452613186e-05, "loss": 0.3917, "num_tokens": 570177721.0, "step": 900 }, { "epoch": 0.10653896180678728, "grad_norm": 0.22657686471939087, "learning_rate": 5.917787709652297e-05, "loss": 0.4542, "num_tokens": 570816714.0, "step": 901 }, { "epoch": 0.10665720704741634, "grad_norm": 0.1770208477973938, "learning_rate": 5.917534290080837e-05, "loss": 0.3801, "num_tokens": 571449734.0, "step": 902 }, { "epoch": 0.1067754522880454, "grad_norm": 0.2136555016040802, "learning_rate": 5.917280486584108e-05, "loss": 0.4117, "num_tokens": 572088174.0, "step": 903 }, { "epoch": 0.10689369752867448, "grad_norm": 0.1817179024219513, "learning_rate": 5.917026299199337e-05, "loss": 0.3784, "num_tokens": 572726629.0, "step": 904 }, { "epoch": 0.10701194276930354, "grad_norm": 0.1832682341337204, "learning_rate": 5.916771727963807e-05, "loss": 0.3769, "num_tokens": 573356851.0, "step": 905 }, { "epoch": 0.1071301880099326, "grad_norm": 0.1859453171491623, "learning_rate": 5.916516772914856e-05, "loss": 0.3774, "num_tokens": 573993246.0, "step": 906 }, { "epoch": 0.10724843325056166, "grad_norm": 0.21618540585041046, "learning_rate": 5.9162614340898804e-05, "loss": 0.4093, "num_tokens": 574632107.0, "step": 907 }, { "epoch": 0.10736667849119073, "grad_norm": 0.18722978234291077, "learning_rate": 5.916005711526333e-05, "loss": 0.4053, "num_tokens": 575267606.0, "step": 908 }, { "epoch": 0.1074849237318198, "grad_norm": 0.17559999227523804, "learning_rate": 5.915749605261719e-05, "loss": 0.3894, "num_tokens": 575898213.0, "step": 909 }, { "epoch": 0.10760316897244886, "grad_norm": 0.22479428350925446, "learning_rate": 5.915493115333604e-05, "loss": 0.4412, "num_tokens": 576535415.0, "step": 910 }, { "epoch": 0.10772141421307792, "grad_norm": 0.20265212655067444, "learning_rate": 5.915236241779609e-05, "loss": 0.4005, "num_tokens": 577170916.0, "step": 911 }, { "epoch": 0.10783965945370699, "grad_norm": 0.2082071453332901, "learning_rate": 5.914978984637411e-05, "loss": 0.3973, "num_tokens": 577801174.0, "step": 912 }, { "epoch": 0.10795790469433605, "grad_norm": 0.21270740032196045, "learning_rate": 5.914721343944741e-05, "loss": 0.4299, "num_tokens": 578437720.0, "step": 913 }, { "epoch": 0.10807614993496512, "grad_norm": 0.20403820276260376, "learning_rate": 5.9144633197393906e-05, "loss": 0.3905, "num_tokens": 579073461.0, "step": 914 }, { "epoch": 0.10819439517559418, "grad_norm": 0.20049042999744415, "learning_rate": 5.9142049120592045e-05, "loss": 0.4052, "num_tokens": 579712385.0, "step": 915 }, { "epoch": 0.10831264041622325, "grad_norm": 0.20228621363639832, "learning_rate": 5.9139461209420844e-05, "loss": 0.3874, "num_tokens": 580346374.0, "step": 916 }, { "epoch": 0.10843088565685231, "grad_norm": 0.20957380533218384, "learning_rate": 5.913686946425988e-05, "loss": 0.3938, "num_tokens": 580976923.0, "step": 917 }, { "epoch": 0.10854913089748137, "grad_norm": 0.20983189344406128, "learning_rate": 5.9134273885489293e-05, "loss": 0.3738, "num_tokens": 581611122.0, "step": 918 }, { "epoch": 0.10866737613811044, "grad_norm": 0.18045011162757874, "learning_rate": 5.91316744734898e-05, "loss": 0.3994, "num_tokens": 582241753.0, "step": 919 }, { "epoch": 0.10878562137873951, "grad_norm": 0.2465183287858963, "learning_rate": 5.912907122864266e-05, "loss": 0.4093, "num_tokens": 582880022.0, "step": 920 }, { "epoch": 0.10890386661936857, "grad_norm": 0.1796322762966156, "learning_rate": 5.9126464151329706e-05, "loss": 0.3688, "num_tokens": 583516737.0, "step": 921 }, { "epoch": 0.10902211185999763, "grad_norm": 0.1997017115354538, "learning_rate": 5.912385324193334e-05, "loss": 0.3627, "num_tokens": 584155270.0, "step": 922 }, { "epoch": 0.1091403571006267, "grad_norm": 0.2108139544725418, "learning_rate": 5.912123850083648e-05, "loss": 0.3967, "num_tokens": 584788413.0, "step": 923 }, { "epoch": 0.10925860234125577, "grad_norm": 0.21540546417236328, "learning_rate": 5.911861992842269e-05, "loss": 0.4142, "num_tokens": 585424357.0, "step": 924 }, { "epoch": 0.10937684758188483, "grad_norm": 0.21150155365467072, "learning_rate": 5.911599752507601e-05, "loss": 0.3981, "num_tokens": 586058909.0, "step": 925 }, { "epoch": 0.10949509282251389, "grad_norm": 0.1995604932308197, "learning_rate": 5.9113371291181084e-05, "loss": 0.3973, "num_tokens": 586688711.0, "step": 926 }, { "epoch": 0.10961333806314295, "grad_norm": 0.20506781339645386, "learning_rate": 5.911074122712314e-05, "loss": 0.3313, "num_tokens": 587318129.0, "step": 927 }, { "epoch": 0.10973158330377203, "grad_norm": 0.2037949115037918, "learning_rate": 5.9108107333287905e-05, "loss": 0.3711, "num_tokens": 587954063.0, "step": 928 }, { "epoch": 0.10984982854440109, "grad_norm": 0.2114054411649704, "learning_rate": 5.910546961006174e-05, "loss": 0.4064, "num_tokens": 588586978.0, "step": 929 }, { "epoch": 0.10996807378503015, "grad_norm": 0.19443312287330627, "learning_rate": 5.910282805783149e-05, "loss": 0.3874, "num_tokens": 589223333.0, "step": 930 }, { "epoch": 0.11008631902565921, "grad_norm": 0.2032671570777893, "learning_rate": 5.910018267698464e-05, "loss": 0.3615, "num_tokens": 589857753.0, "step": 931 }, { "epoch": 0.11020456426628829, "grad_norm": 0.19129875302314758, "learning_rate": 5.9097533467909184e-05, "loss": 0.3776, "num_tokens": 590492668.0, "step": 932 }, { "epoch": 0.11032280950691735, "grad_norm": 0.1976146101951599, "learning_rate": 5.9094880430993695e-05, "loss": 0.3873, "num_tokens": 591130240.0, "step": 933 }, { "epoch": 0.11044105474754641, "grad_norm": 0.2143929898738861, "learning_rate": 5.9092223566627304e-05, "loss": 0.4338, "num_tokens": 591744210.0, "step": 934 }, { "epoch": 0.11055929998817547, "grad_norm": 0.17198820412158966, "learning_rate": 5.908956287519971e-05, "loss": 0.3571, "num_tokens": 592379832.0, "step": 935 }, { "epoch": 0.11067754522880455, "grad_norm": 0.18812568485736847, "learning_rate": 5.9086898357101155e-05, "loss": 0.3883, "num_tokens": 593014264.0, "step": 936 }, { "epoch": 0.11079579046943361, "grad_norm": 0.23020492494106293, "learning_rate": 5.908423001272247e-05, "loss": 0.4166, "num_tokens": 593647737.0, "step": 937 }, { "epoch": 0.11091403571006267, "grad_norm": 0.206504687666893, "learning_rate": 5.9081557842455035e-05, "loss": 0.4274, "num_tokens": 594284342.0, "step": 938 }, { "epoch": 0.11103228095069173, "grad_norm": 0.18730279803276062, "learning_rate": 5.9078881846690776e-05, "loss": 0.3984, "num_tokens": 594917472.0, "step": 939 }, { "epoch": 0.1111505261913208, "grad_norm": 0.19778351485729218, "learning_rate": 5.90762020258222e-05, "loss": 0.3935, "num_tokens": 595555577.0, "step": 940 }, { "epoch": 0.11126877143194987, "grad_norm": 0.1937170773744583, "learning_rate": 5.907351838024236e-05, "loss": 0.415, "num_tokens": 596194919.0, "step": 941 }, { "epoch": 0.11138701667257893, "grad_norm": 0.20279517769813538, "learning_rate": 5.9070830910344885e-05, "loss": 0.4033, "num_tokens": 596828877.0, "step": 942 }, { "epoch": 0.11150526191320799, "grad_norm": 0.21621660888195038, "learning_rate": 5.9068139616523974e-05, "loss": 0.4439, "num_tokens": 597463751.0, "step": 943 }, { "epoch": 0.11162350715383706, "grad_norm": 0.1936977654695511, "learning_rate": 5.9065444499174336e-05, "loss": 0.379, "num_tokens": 598101711.0, "step": 944 }, { "epoch": 0.11174175239446613, "grad_norm": 0.20167700946331024, "learning_rate": 5.9062745558691307e-05, "loss": 0.3693, "num_tokens": 598735765.0, "step": 945 }, { "epoch": 0.11185999763509519, "grad_norm": 0.19760532677173615, "learning_rate": 5.906004279547074e-05, "loss": 0.4132, "num_tokens": 599372011.0, "step": 946 }, { "epoch": 0.11197824287572425, "grad_norm": 0.20617800951004028, "learning_rate": 5.905733620990906e-05, "loss": 0.374, "num_tokens": 600001896.0, "step": 947 }, { "epoch": 0.11209648811635331, "grad_norm": 0.1828487366437912, "learning_rate": 5.905462580240325e-05, "loss": 0.3682, "num_tokens": 600632801.0, "step": 948 }, { "epoch": 0.11221473335698239, "grad_norm": 0.19734539091587067, "learning_rate": 5.905191157335087e-05, "loss": 0.3601, "num_tokens": 601266569.0, "step": 949 }, { "epoch": 0.11233297859761145, "grad_norm": 0.1946217268705368, "learning_rate": 5.9049193523150016e-05, "loss": 0.3939, "num_tokens": 601904732.0, "step": 950 }, { "epoch": 0.11245122383824051, "grad_norm": 0.16676105558872223, "learning_rate": 5.9046471652199356e-05, "loss": 0.368, "num_tokens": 602535124.0, "step": 951 }, { "epoch": 0.11256946907886957, "grad_norm": 0.2097654640674591, "learning_rate": 5.904374596089814e-05, "loss": 0.4002, "num_tokens": 603172120.0, "step": 952 }, { "epoch": 0.11268771431949864, "grad_norm": 0.20065370202064514, "learning_rate": 5.904101644964613e-05, "loss": 0.415, "num_tokens": 603810653.0, "step": 953 }, { "epoch": 0.1128059595601277, "grad_norm": 0.18786388635635376, "learning_rate": 5.903828311884368e-05, "loss": 0.3544, "num_tokens": 604445259.0, "step": 954 }, { "epoch": 0.11292420480075677, "grad_norm": 0.20331786572933197, "learning_rate": 5.903554596889172e-05, "loss": 0.3666, "num_tokens": 605079841.0, "step": 955 }, { "epoch": 0.11304245004138583, "grad_norm": 0.21914105117321014, "learning_rate": 5.9032805000191696e-05, "loss": 0.4166, "num_tokens": 605716847.0, "step": 956 }, { "epoch": 0.1131606952820149, "grad_norm": 0.19006988406181335, "learning_rate": 5.9030060213145656e-05, "loss": 0.402, "num_tokens": 606349039.0, "step": 957 }, { "epoch": 0.11327894052264396, "grad_norm": 0.19913968443870544, "learning_rate": 5.902731160815617e-05, "loss": 0.3948, "num_tokens": 606979557.0, "step": 958 }, { "epoch": 0.11339718576327303, "grad_norm": 0.1824326068162918, "learning_rate": 5.9024559185626415e-05, "loss": 0.3377, "num_tokens": 607606199.0, "step": 959 }, { "epoch": 0.11351543100390209, "grad_norm": 0.20349013805389404, "learning_rate": 5.902180294596007e-05, "loss": 0.3769, "num_tokens": 608215251.0, "step": 960 }, { "epoch": 0.11363367624453116, "grad_norm": 0.20139801502227783, "learning_rate": 5.901904288956143e-05, "loss": 0.4112, "num_tokens": 608851164.0, "step": 961 }, { "epoch": 0.11375192148516022, "grad_norm": 0.18247152864933014, "learning_rate": 5.901627901683531e-05, "loss": 0.3915, "num_tokens": 609484264.0, "step": 962 }, { "epoch": 0.11387016672578928, "grad_norm": 0.19457964599132538, "learning_rate": 5.90135113281871e-05, "loss": 0.3765, "num_tokens": 610095969.0, "step": 963 }, { "epoch": 0.11398841196641835, "grad_norm": 0.19099676609039307, "learning_rate": 5.901073982402276e-05, "loss": 0.3674, "num_tokens": 610726014.0, "step": 964 }, { "epoch": 0.11410665720704742, "grad_norm": 0.1778656244277954, "learning_rate": 5.90079645047488e-05, "loss": 0.3344, "num_tokens": 611354133.0, "step": 965 }, { "epoch": 0.11422490244767648, "grad_norm": 0.17714790999889374, "learning_rate": 5.900518537077227e-05, "loss": 0.3764, "num_tokens": 611983546.0, "step": 966 }, { "epoch": 0.11434314768830554, "grad_norm": 0.19128604233264923, "learning_rate": 5.9002402422500815e-05, "loss": 0.388, "num_tokens": 612614615.0, "step": 967 }, { "epoch": 0.1144613929289346, "grad_norm": 0.18347622454166412, "learning_rate": 5.8999615660342614e-05, "loss": 0.4076, "num_tokens": 613251367.0, "step": 968 }, { "epoch": 0.11457963816956368, "grad_norm": 0.22676466405391693, "learning_rate": 5.8996825084706404e-05, "loss": 0.4366, "num_tokens": 613882406.0, "step": 969 }, { "epoch": 0.11469788341019274, "grad_norm": 0.18649126589298248, "learning_rate": 5.899403069600151e-05, "loss": 0.3888, "num_tokens": 614516798.0, "step": 970 }, { "epoch": 0.1148161286508218, "grad_norm": 0.19619029760360718, "learning_rate": 5.89912324946378e-05, "loss": 0.4073, "num_tokens": 615155542.0, "step": 971 }, { "epoch": 0.11493437389145086, "grad_norm": 0.158069908618927, "learning_rate": 5.898843048102567e-05, "loss": 0.3173, "num_tokens": 615790325.0, "step": 972 }, { "epoch": 0.11505261913207994, "grad_norm": 0.1830310970544815, "learning_rate": 5.8985624655576124e-05, "loss": 0.353, "num_tokens": 616420924.0, "step": 973 }, { "epoch": 0.115170864372709, "grad_norm": 0.1999857872724533, "learning_rate": 5.8982815018700714e-05, "loss": 0.3621, "num_tokens": 617058249.0, "step": 974 }, { "epoch": 0.11528910961333806, "grad_norm": 0.18664878606796265, "learning_rate": 5.898000157081152e-05, "loss": 0.3712, "num_tokens": 617692601.0, "step": 975 }, { "epoch": 0.11540735485396712, "grad_norm": 0.19772273302078247, "learning_rate": 5.8977184312321225e-05, "loss": 0.3727, "num_tokens": 618329266.0, "step": 976 }, { "epoch": 0.1155256000945962, "grad_norm": 0.2023383229970932, "learning_rate": 5.8974363243643025e-05, "loss": 0.3727, "num_tokens": 618961318.0, "step": 977 }, { "epoch": 0.11564384533522526, "grad_norm": 0.19946004450321198, "learning_rate": 5.897153836519071e-05, "loss": 0.4375, "num_tokens": 619595493.0, "step": 978 }, { "epoch": 0.11576209057585432, "grad_norm": 0.18358178436756134, "learning_rate": 5.8968709677378626e-05, "loss": 0.3877, "num_tokens": 620231952.0, "step": 979 }, { "epoch": 0.11588033581648338, "grad_norm": 0.18453611433506012, "learning_rate": 5.896587718062165e-05, "loss": 0.3688, "num_tokens": 620865927.0, "step": 980 }, { "epoch": 0.11599858105711246, "grad_norm": 0.233365997672081, "learning_rate": 5.896304087533526e-05, "loss": 0.4312, "num_tokens": 621502818.0, "step": 981 }, { "epoch": 0.11611682629774152, "grad_norm": 0.18773207068443298, "learning_rate": 5.8960200761935455e-05, "loss": 0.3986, "num_tokens": 622139430.0, "step": 982 }, { "epoch": 0.11623507153837058, "grad_norm": 0.20038506388664246, "learning_rate": 5.8957356840838804e-05, "loss": 0.3551, "num_tokens": 622775467.0, "step": 983 }, { "epoch": 0.11635331677899964, "grad_norm": 0.18527834117412567, "learning_rate": 5.8954509112462446e-05, "loss": 0.3923, "num_tokens": 623412555.0, "step": 984 }, { "epoch": 0.11647156201962872, "grad_norm": 0.19297479093074799, "learning_rate": 5.895165757722406e-05, "loss": 0.4078, "num_tokens": 624051117.0, "step": 985 }, { "epoch": 0.11658980726025778, "grad_norm": 0.19642016291618347, "learning_rate": 5.894880223554189e-05, "loss": 0.3701, "num_tokens": 624689341.0, "step": 986 }, { "epoch": 0.11670805250088684, "grad_norm": 0.17201705276966095, "learning_rate": 5.894594308783477e-05, "loss": 0.374, "num_tokens": 625321056.0, "step": 987 }, { "epoch": 0.1168262977415159, "grad_norm": 0.1829884946346283, "learning_rate": 5.894308013452204e-05, "loss": 0.3784, "num_tokens": 625951744.0, "step": 988 }, { "epoch": 0.11694454298214498, "grad_norm": 0.20056377351284027, "learning_rate": 5.8940213376023625e-05, "loss": 0.3988, "num_tokens": 626587647.0, "step": 989 }, { "epoch": 0.11706278822277404, "grad_norm": 0.18736428022384644, "learning_rate": 5.893734281276e-05, "loss": 0.3826, "num_tokens": 627226191.0, "step": 990 }, { "epoch": 0.1171810334634031, "grad_norm": 0.1869923323392868, "learning_rate": 5.893446844515222e-05, "loss": 0.4083, "num_tokens": 627854887.0, "step": 991 }, { "epoch": 0.11729927870403216, "grad_norm": 0.18189452588558197, "learning_rate": 5.893159027362186e-05, "loss": 0.3881, "num_tokens": 628470814.0, "step": 992 }, { "epoch": 0.11741752394466123, "grad_norm": 0.19038516283035278, "learning_rate": 5.892870829859109e-05, "loss": 0.3954, "num_tokens": 629095432.0, "step": 993 }, { "epoch": 0.1175357691852903, "grad_norm": 0.1841634213924408, "learning_rate": 5.892582252048262e-05, "loss": 0.4136, "num_tokens": 629735034.0, "step": 994 }, { "epoch": 0.11765401442591936, "grad_norm": 0.18203872442245483, "learning_rate": 5.892293293971971e-05, "loss": 0.3929, "num_tokens": 630370989.0, "step": 995 }, { "epoch": 0.11777225966654842, "grad_norm": 0.17584508657455444, "learning_rate": 5.89200395567262e-05, "loss": 0.4062, "num_tokens": 631007330.0, "step": 996 }, { "epoch": 0.11789050490717749, "grad_norm": 0.20625509321689606, "learning_rate": 5.8917142371926475e-05, "loss": 0.3936, "num_tokens": 631644616.0, "step": 997 }, { "epoch": 0.11800875014780655, "grad_norm": 0.19223004579544067, "learning_rate": 5.891424138574546e-05, "loss": 0.392, "num_tokens": 632278554.0, "step": 998 }, { "epoch": 0.11812699538843562, "grad_norm": 0.18212991952896118, "learning_rate": 5.8911336598608665e-05, "loss": 0.3949, "num_tokens": 632912104.0, "step": 999 }, { "epoch": 0.11824524062906468, "grad_norm": 0.1958996057510376, "learning_rate": 5.8908428010942165e-05, "loss": 0.4221, "num_tokens": 633548167.0, "step": 1000 }, { "epoch": 0.11836348586969374, "grad_norm": 0.1716260313987732, "learning_rate": 5.8905515623172556e-05, "loss": 0.3677, "num_tokens": 634181746.0, "step": 1001 }, { "epoch": 0.11848173111032281, "grad_norm": 0.1925710290670395, "learning_rate": 5.8902599435727014e-05, "loss": 0.4227, "num_tokens": 634816694.0, "step": 1002 }, { "epoch": 0.11859997635095187, "grad_norm": 0.18390604853630066, "learning_rate": 5.8899679449033264e-05, "loss": 0.3901, "num_tokens": 635455070.0, "step": 1003 }, { "epoch": 0.11871822159158094, "grad_norm": 0.1970352828502655, "learning_rate": 5.8896755663519605e-05, "loss": 0.4228, "num_tokens": 636090133.0, "step": 1004 }, { "epoch": 0.11883646683221, "grad_norm": 0.1776798516511917, "learning_rate": 5.889382807961488e-05, "loss": 0.4133, "num_tokens": 636718996.0, "step": 1005 }, { "epoch": 0.11895471207283907, "grad_norm": 0.207365021109581, "learning_rate": 5.8890896697748484e-05, "loss": 0.4145, "num_tokens": 637355713.0, "step": 1006 }, { "epoch": 0.11907295731346813, "grad_norm": 0.1959090679883957, "learning_rate": 5.888796151835038e-05, "loss": 0.3942, "num_tokens": 637990698.0, "step": 1007 }, { "epoch": 0.1191912025540972, "grad_norm": 0.19560785591602325, "learning_rate": 5.888502254185108e-05, "loss": 0.3917, "num_tokens": 638622647.0, "step": 1008 }, { "epoch": 0.11930944779472626, "grad_norm": 0.20387545228004456, "learning_rate": 5.888207976868167e-05, "loss": 0.4084, "num_tokens": 639251231.0, "step": 1009 }, { "epoch": 0.11942769303535533, "grad_norm": 0.19802500307559967, "learning_rate": 5.887913319927375e-05, "loss": 0.3932, "num_tokens": 639886902.0, "step": 1010 }, { "epoch": 0.11954593827598439, "grad_norm": 0.19033659994602203, "learning_rate": 5.887618283405953e-05, "loss": 0.3782, "num_tokens": 640524463.0, "step": 1011 }, { "epoch": 0.11966418351661345, "grad_norm": 0.19589732587337494, "learning_rate": 5.887322867347176e-05, "loss": 0.4095, "num_tokens": 641163335.0, "step": 1012 }, { "epoch": 0.11978242875724252, "grad_norm": 0.1952582448720932, "learning_rate": 5.887027071794371e-05, "loss": 0.4045, "num_tokens": 641802244.0, "step": 1013 }, { "epoch": 0.11990067399787159, "grad_norm": 0.21646951138973236, "learning_rate": 5.8867308967909265e-05, "loss": 0.4156, "num_tokens": 642437930.0, "step": 1014 }, { "epoch": 0.12001891923850065, "grad_norm": 0.19207905232906342, "learning_rate": 5.886434342380282e-05, "loss": 0.394, "num_tokens": 643076089.0, "step": 1015 }, { "epoch": 0.12013716447912971, "grad_norm": 0.20507968962192535, "learning_rate": 5.8861374086059355e-05, "loss": 0.4246, "num_tokens": 643711689.0, "step": 1016 }, { "epoch": 0.12025540971975877, "grad_norm": 0.20484769344329834, "learning_rate": 5.885840095511439e-05, "loss": 0.3921, "num_tokens": 644349772.0, "step": 1017 }, { "epoch": 0.12037365496038785, "grad_norm": 0.17594784498214722, "learning_rate": 5.8855424031404015e-05, "loss": 0.3881, "num_tokens": 644988741.0, "step": 1018 }, { "epoch": 0.12049190020101691, "grad_norm": 0.20519398152828217, "learning_rate": 5.885244331536486e-05, "loss": 0.365, "num_tokens": 645614498.0, "step": 1019 }, { "epoch": 0.12061014544164597, "grad_norm": 0.17110878229141235, "learning_rate": 5.884945880743412e-05, "loss": 0.33, "num_tokens": 646252937.0, "step": 1020 }, { "epoch": 0.12072839068227503, "grad_norm": 0.19074691832065582, "learning_rate": 5.884647050804955e-05, "loss": 0.4269, "num_tokens": 646886273.0, "step": 1021 }, { "epoch": 0.12084663592290411, "grad_norm": 0.19936510920524597, "learning_rate": 5.8843478417649456e-05, "loss": 0.4203, "num_tokens": 647519729.0, "step": 1022 }, { "epoch": 0.12096488116353317, "grad_norm": 0.17155998945236206, "learning_rate": 5.88404825366727e-05, "loss": 0.3688, "num_tokens": 648154792.0, "step": 1023 }, { "epoch": 0.12108312640416223, "grad_norm": 0.18858250975608826, "learning_rate": 5.88374828655587e-05, "loss": 0.3752, "num_tokens": 648787406.0, "step": 1024 }, { "epoch": 0.12120137164479129, "grad_norm": 0.18652376532554626, "learning_rate": 5.883447940474744e-05, "loss": 0.3989, "num_tokens": 649419550.0, "step": 1025 }, { "epoch": 0.12131961688542037, "grad_norm": 0.16763560473918915, "learning_rate": 5.883147215467943e-05, "loss": 0.3662, "num_tokens": 650055332.0, "step": 1026 }, { "epoch": 0.12143786212604943, "grad_norm": 0.20067349076271057, "learning_rate": 5.8828461115795775e-05, "loss": 0.427, "num_tokens": 650692693.0, "step": 1027 }, { "epoch": 0.12155610736667849, "grad_norm": 0.19913692772388458, "learning_rate": 5.8825446288538114e-05, "loss": 0.399, "num_tokens": 651324330.0, "step": 1028 }, { "epoch": 0.12167435260730755, "grad_norm": 0.16819269955158234, "learning_rate": 5.8822427673348647e-05, "loss": 0.3434, "num_tokens": 651953179.0, "step": 1029 }, { "epoch": 0.12179259784793663, "grad_norm": 0.21736393868923187, "learning_rate": 5.881940527067012e-05, "loss": 0.3698, "num_tokens": 652577781.0, "step": 1030 }, { "epoch": 0.12191084308856569, "grad_norm": 0.18837624788284302, "learning_rate": 5.881637908094584e-05, "loss": 0.3719, "num_tokens": 653215791.0, "step": 1031 }, { "epoch": 0.12202908832919475, "grad_norm": 0.18725302815437317, "learning_rate": 5.8813349104619684e-05, "loss": 0.4066, "num_tokens": 653817011.0, "step": 1032 }, { "epoch": 0.12214733356982381, "grad_norm": 0.16949571669101715, "learning_rate": 5.881031534213607e-05, "loss": 0.3278, "num_tokens": 654453498.0, "step": 1033 }, { "epoch": 0.12226557881045289, "grad_norm": 0.23363782465457916, "learning_rate": 5.880727779393996e-05, "loss": 0.4153, "num_tokens": 655090159.0, "step": 1034 }, { "epoch": 0.12238382405108195, "grad_norm": 0.201279416680336, "learning_rate": 5.88042364604769e-05, "loss": 0.4339, "num_tokens": 655725879.0, "step": 1035 }, { "epoch": 0.12250206929171101, "grad_norm": 0.18663513660430908, "learning_rate": 5.880119134219296e-05, "loss": 0.3708, "num_tokens": 656361031.0, "step": 1036 }, { "epoch": 0.12262031453234007, "grad_norm": 0.19537858664989471, "learning_rate": 5.879814243953479e-05, "loss": 0.4409, "num_tokens": 656995890.0, "step": 1037 }, { "epoch": 0.12273855977296914, "grad_norm": 0.17773759365081787, "learning_rate": 5.8795089752949595e-05, "loss": 0.3723, "num_tokens": 657629671.0, "step": 1038 }, { "epoch": 0.1228568050135982, "grad_norm": 0.18035361170768738, "learning_rate": 5.8792033282885104e-05, "loss": 0.3929, "num_tokens": 658262765.0, "step": 1039 }, { "epoch": 0.12297505025422727, "grad_norm": 0.186748668551445, "learning_rate": 5.878897302978964e-05, "loss": 0.3808, "num_tokens": 658892913.0, "step": 1040 }, { "epoch": 0.12309329549485633, "grad_norm": 0.18138442933559418, "learning_rate": 5.878590899411207e-05, "loss": 0.3761, "num_tokens": 659528177.0, "step": 1041 }, { "epoch": 0.1232115407354854, "grad_norm": 0.20199914276599884, "learning_rate": 5.878284117630179e-05, "loss": 0.3887, "num_tokens": 660159039.0, "step": 1042 }, { "epoch": 0.12332978597611446, "grad_norm": 0.1713065505027771, "learning_rate": 5.877976957680878e-05, "loss": 0.3781, "num_tokens": 660793735.0, "step": 1043 }, { "epoch": 0.12344803121674353, "grad_norm": 0.20553681254386902, "learning_rate": 5.8776694196083566e-05, "loss": 0.3969, "num_tokens": 661430126.0, "step": 1044 }, { "epoch": 0.12356627645737259, "grad_norm": 0.1817728877067566, "learning_rate": 5.877361503457723e-05, "loss": 0.4056, "num_tokens": 662069040.0, "step": 1045 }, { "epoch": 0.12368452169800166, "grad_norm": 0.20783452689647675, "learning_rate": 5.877053209274139e-05, "loss": 0.3619, "num_tokens": 662703135.0, "step": 1046 }, { "epoch": 0.12380276693863072, "grad_norm": 0.19852496683597565, "learning_rate": 5.876744537102826e-05, "loss": 0.4121, "num_tokens": 663337022.0, "step": 1047 }, { "epoch": 0.12392101217925978, "grad_norm": 0.19973742961883545, "learning_rate": 5.876435486989056e-05, "loss": 0.3824, "num_tokens": 663969631.0, "step": 1048 }, { "epoch": 0.12403925741988885, "grad_norm": 0.21149806678295135, "learning_rate": 5.876126058978161e-05, "loss": 0.436, "num_tokens": 664607928.0, "step": 1049 }, { "epoch": 0.12415750266051792, "grad_norm": 0.17766813933849335, "learning_rate": 5.875816253115523e-05, "loss": 0.4192, "num_tokens": 665246971.0, "step": 1050 }, { "epoch": 0.12427574790114698, "grad_norm": 0.2028605341911316, "learning_rate": 5.875506069446586e-05, "loss": 0.3775, "num_tokens": 665880124.0, "step": 1051 }, { "epoch": 0.12439399314177604, "grad_norm": 0.18141509592533112, "learning_rate": 5.8751955080168436e-05, "loss": 0.3931, "num_tokens": 666516590.0, "step": 1052 }, { "epoch": 0.1245122383824051, "grad_norm": 0.17826445400714874, "learning_rate": 5.8748845688718483e-05, "loss": 0.3597, "num_tokens": 667141423.0, "step": 1053 }, { "epoch": 0.12463048362303417, "grad_norm": 0.19209261238574982, "learning_rate": 5.874573252057208e-05, "loss": 0.4019, "num_tokens": 667775999.0, "step": 1054 }, { "epoch": 0.12474872886366324, "grad_norm": 0.1815352737903595, "learning_rate": 5.8742615576185824e-05, "loss": 0.3907, "num_tokens": 668413601.0, "step": 1055 }, { "epoch": 0.1248669741042923, "grad_norm": 0.18189500272274017, "learning_rate": 5.8739494856016896e-05, "loss": 0.3797, "num_tokens": 669045590.0, "step": 1056 }, { "epoch": 0.12498521934492136, "grad_norm": 0.23037736117839813, "learning_rate": 5.873637036052305e-05, "loss": 0.4057, "num_tokens": 669680260.0, "step": 1057 }, { "epoch": 0.12510346458555044, "grad_norm": 0.19524945318698883, "learning_rate": 5.873324209016254e-05, "loss": 0.3748, "num_tokens": 670314552.0, "step": 1058 }, { "epoch": 0.1252217098261795, "grad_norm": 0.20395110547542572, "learning_rate": 5.873011004539422e-05, "loss": 0.3778, "num_tokens": 670948491.0, "step": 1059 }, { "epoch": 0.12533995506680856, "grad_norm": 0.1821562647819519, "learning_rate": 5.8726974226677466e-05, "loss": 0.3603, "num_tokens": 671580682.0, "step": 1060 }, { "epoch": 0.12545820030743762, "grad_norm": 0.20132775604724884, "learning_rate": 5.872383463447224e-05, "loss": 0.3655, "num_tokens": 672210589.0, "step": 1061 }, { "epoch": 0.12557644554806668, "grad_norm": 0.1914817988872528, "learning_rate": 5.872069126923904e-05, "loss": 0.3894, "num_tokens": 672840526.0, "step": 1062 }, { "epoch": 0.12569469078869575, "grad_norm": 0.185361847281456, "learning_rate": 5.8717544131438895e-05, "loss": 0.3779, "num_tokens": 673466186.0, "step": 1063 }, { "epoch": 0.1258129360293248, "grad_norm": 0.18650561571121216, "learning_rate": 5.871439322153344e-05, "loss": 0.3122, "num_tokens": 674089397.0, "step": 1064 }, { "epoch": 0.1259311812699539, "grad_norm": 0.18817034363746643, "learning_rate": 5.871123853998481e-05, "loss": 0.39, "num_tokens": 674726487.0, "step": 1065 }, { "epoch": 0.12604942651058296, "grad_norm": 0.17024920880794525, "learning_rate": 5.8708080087255717e-05, "loss": 0.3598, "num_tokens": 675360889.0, "step": 1066 }, { "epoch": 0.12616767175121202, "grad_norm": 0.199906587600708, "learning_rate": 5.870491786380944e-05, "loss": 0.3686, "num_tokens": 675999411.0, "step": 1067 }, { "epoch": 0.12628591699184108, "grad_norm": 0.17569608986377716, "learning_rate": 5.8701751870109784e-05, "loss": 0.3542, "num_tokens": 676631267.0, "step": 1068 }, { "epoch": 0.12640416223247014, "grad_norm": 0.20070427656173706, "learning_rate": 5.8698582106621124e-05, "loss": 0.3815, "num_tokens": 677241383.0, "step": 1069 }, { "epoch": 0.1265224074730992, "grad_norm": 0.19804538786411285, "learning_rate": 5.869540857380838e-05, "loss": 0.3943, "num_tokens": 677877358.0, "step": 1070 }, { "epoch": 0.12664065271372826, "grad_norm": 0.18061521649360657, "learning_rate": 5.869223127213703e-05, "loss": 0.3724, "num_tokens": 678505498.0, "step": 1071 }, { "epoch": 0.12675889795435732, "grad_norm": 0.19706299901008606, "learning_rate": 5.8689050202073096e-05, "loss": 0.3658, "num_tokens": 679124916.0, "step": 1072 }, { "epoch": 0.1268771431949864, "grad_norm": 0.22217020392417908, "learning_rate": 5.868586536408317e-05, "loss": 0.4113, "num_tokens": 679749193.0, "step": 1073 }, { "epoch": 0.12699538843561547, "grad_norm": 0.1882840394973755, "learning_rate": 5.868267675863439e-05, "loss": 0.3482, "num_tokens": 680380293.0, "step": 1074 }, { "epoch": 0.12711363367624454, "grad_norm": 0.19420884549617767, "learning_rate": 5.867948438619443e-05, "loss": 0.3853, "num_tokens": 681017380.0, "step": 1075 }, { "epoch": 0.1272318789168736, "grad_norm": 0.2163049280643463, "learning_rate": 5.867628824723153e-05, "loss": 0.3867, "num_tokens": 681647130.0, "step": 1076 }, { "epoch": 0.12735012415750266, "grad_norm": 0.18931736052036285, "learning_rate": 5.867308834221449e-05, "loss": 0.3659, "num_tokens": 682285081.0, "step": 1077 }, { "epoch": 0.12746836939813172, "grad_norm": 0.19782030582427979, "learning_rate": 5.866988467161265e-05, "loss": 0.3835, "num_tokens": 682919537.0, "step": 1078 }, { "epoch": 0.12758661463876078, "grad_norm": 0.20192410051822662, "learning_rate": 5.866667723589591e-05, "loss": 0.3932, "num_tokens": 683554203.0, "step": 1079 }, { "epoch": 0.12770485987938984, "grad_norm": 0.18825575709342957, "learning_rate": 5.866346603553471e-05, "loss": 0.4002, "num_tokens": 684192561.0, "step": 1080 }, { "epoch": 0.12782310512001893, "grad_norm": 0.18523067235946655, "learning_rate": 5.866025107100007e-05, "loss": 0.395, "num_tokens": 684826318.0, "step": 1081 }, { "epoch": 0.127941350360648, "grad_norm": 0.19380231201648712, "learning_rate": 5.865703234276351e-05, "loss": 0.4069, "num_tokens": 685457932.0, "step": 1082 }, { "epoch": 0.12805959560127705, "grad_norm": 0.19216714799404144, "learning_rate": 5.8653809851297164e-05, "loss": 0.3715, "num_tokens": 686094682.0, "step": 1083 }, { "epoch": 0.12817784084190612, "grad_norm": 0.1776556819677353, "learning_rate": 5.865058359707368e-05, "loss": 0.3867, "num_tokens": 686724923.0, "step": 1084 }, { "epoch": 0.12829608608253518, "grad_norm": 0.19032572209835052, "learning_rate": 5.864735358056626e-05, "loss": 0.3856, "num_tokens": 687358762.0, "step": 1085 }, { "epoch": 0.12841433132316424, "grad_norm": 0.1809643805027008, "learning_rate": 5.864411980224868e-05, "loss": 0.382, "num_tokens": 687997038.0, "step": 1086 }, { "epoch": 0.1285325765637933, "grad_norm": 0.1635531336069107, "learning_rate": 5.864088226259523e-05, "loss": 0.3965, "num_tokens": 688632728.0, "step": 1087 }, { "epoch": 0.12865082180442236, "grad_norm": 0.18728560209274292, "learning_rate": 5.86376409620808e-05, "loss": 0.3995, "num_tokens": 689237615.0, "step": 1088 }, { "epoch": 0.12876906704505145, "grad_norm": 0.1824684888124466, "learning_rate": 5.863439590118079e-05, "loss": 0.402, "num_tokens": 689872830.0, "step": 1089 }, { "epoch": 0.1288873122856805, "grad_norm": 0.17901511490345, "learning_rate": 5.863114708037117e-05, "loss": 0.3787, "num_tokens": 690505835.0, "step": 1090 }, { "epoch": 0.12900555752630957, "grad_norm": 0.17714473605155945, "learning_rate": 5.862789450012846e-05, "loss": 0.407, "num_tokens": 691138889.0, "step": 1091 }, { "epoch": 0.12912380276693863, "grad_norm": 0.18312568962574005, "learning_rate": 5.862463816092973e-05, "loss": 0.3893, "num_tokens": 691778212.0, "step": 1092 }, { "epoch": 0.1292420480075677, "grad_norm": 0.16895712912082672, "learning_rate": 5.862137806325261e-05, "loss": 0.3629, "num_tokens": 692400014.0, "step": 1093 }, { "epoch": 0.12936029324819676, "grad_norm": 0.17453403770923615, "learning_rate": 5.861811420757525e-05, "loss": 0.3656, "num_tokens": 693035177.0, "step": 1094 }, { "epoch": 0.12947853848882582, "grad_norm": 0.17386431992053986, "learning_rate": 5.8614846594376395e-05, "loss": 0.3868, "num_tokens": 693672098.0, "step": 1095 }, { "epoch": 0.12959678372945488, "grad_norm": 0.1625804752111435, "learning_rate": 5.861157522413531e-05, "loss": 0.3419, "num_tokens": 694306278.0, "step": 1096 }, { "epoch": 0.12971502897008397, "grad_norm": 0.20103080570697784, "learning_rate": 5.860830009733182e-05, "loss": 0.4371, "num_tokens": 694943202.0, "step": 1097 }, { "epoch": 0.12983327421071303, "grad_norm": 0.1749889850616455, "learning_rate": 5.860502121444632e-05, "loss": 0.4052, "num_tokens": 695581748.0, "step": 1098 }, { "epoch": 0.1299515194513421, "grad_norm": 0.18577910959720612, "learning_rate": 5.8601738575959726e-05, "loss": 0.4176, "num_tokens": 696217907.0, "step": 1099 }, { "epoch": 0.13006976469197115, "grad_norm": 0.18519137799739838, "learning_rate": 5.859845218235351e-05, "loss": 0.3528, "num_tokens": 696832400.0, "step": 1100 }, { "epoch": 0.1301880099326002, "grad_norm": 0.204584002494812, "learning_rate": 5.859516203410971e-05, "loss": 0.4119, "num_tokens": 697462714.0, "step": 1101 }, { "epoch": 0.13030625517322927, "grad_norm": 0.16900528967380524, "learning_rate": 5.859186813171091e-05, "loss": 0.3364, "num_tokens": 698096072.0, "step": 1102 }, { "epoch": 0.13042450041385834, "grad_norm": 0.18135744333267212, "learning_rate": 5.8588570475640226e-05, "loss": 0.3715, "num_tokens": 698732104.0, "step": 1103 }, { "epoch": 0.1305427456544874, "grad_norm": 0.17664462327957153, "learning_rate": 5.8585269066381354e-05, "loss": 0.371, "num_tokens": 699360778.0, "step": 1104 }, { "epoch": 0.13066099089511646, "grad_norm": 0.18523205816745758, "learning_rate": 5.8581963904418516e-05, "loss": 0.3707, "num_tokens": 699998919.0, "step": 1105 }, { "epoch": 0.13077923613574555, "grad_norm": 0.19784501194953918, "learning_rate": 5.8578654990236514e-05, "loss": 0.3795, "num_tokens": 700636435.0, "step": 1106 }, { "epoch": 0.1308974813763746, "grad_norm": 0.1751593053340912, "learning_rate": 5.857534232432066e-05, "loss": 0.3741, "num_tokens": 701266798.0, "step": 1107 }, { "epoch": 0.13101572661700367, "grad_norm": 0.19628801941871643, "learning_rate": 5.857202590715684e-05, "loss": 0.4047, "num_tokens": 701896284.0, "step": 1108 }, { "epoch": 0.13113397185763273, "grad_norm": 0.1718233823776245, "learning_rate": 5.856870573923149e-05, "loss": 0.4212, "num_tokens": 702532392.0, "step": 1109 }, { "epoch": 0.1312522170982618, "grad_norm": 0.16952401399612427, "learning_rate": 5.8565381821031605e-05, "loss": 0.3731, "num_tokens": 703167803.0, "step": 1110 }, { "epoch": 0.13137046233889085, "grad_norm": 0.2077304869890213, "learning_rate": 5.8562054153044705e-05, "loss": 0.429, "num_tokens": 703804312.0, "step": 1111 }, { "epoch": 0.13148870757951991, "grad_norm": 0.1744917929172516, "learning_rate": 5.855872273575888e-05, "loss": 0.36, "num_tokens": 704434487.0, "step": 1112 }, { "epoch": 0.13160695282014898, "grad_norm": 0.19674289226531982, "learning_rate": 5.8555387569662755e-05, "loss": 0.407, "num_tokens": 705069074.0, "step": 1113 }, { "epoch": 0.13172519806077806, "grad_norm": 0.17777958512306213, "learning_rate": 5.8552048655245534e-05, "loss": 0.3853, "num_tokens": 705694863.0, "step": 1114 }, { "epoch": 0.13184344330140713, "grad_norm": 0.18883197009563446, "learning_rate": 5.854870599299693e-05, "loss": 0.3873, "num_tokens": 706332062.0, "step": 1115 }, { "epoch": 0.1319616885420362, "grad_norm": 0.17751985788345337, "learning_rate": 5.8545359583407235e-05, "loss": 0.3798, "num_tokens": 706961719.0, "step": 1116 }, { "epoch": 0.13207993378266525, "grad_norm": 0.17789576947689056, "learning_rate": 5.854200942696727e-05, "loss": 0.3613, "num_tokens": 707593241.0, "step": 1117 }, { "epoch": 0.1321981790232943, "grad_norm": 0.18914833664894104, "learning_rate": 5.8538655524168436e-05, "loss": 0.424, "num_tokens": 708223670.0, "step": 1118 }, { "epoch": 0.13231642426392337, "grad_norm": 0.18527646362781525, "learning_rate": 5.853529787550265e-05, "loss": 0.3774, "num_tokens": 708853803.0, "step": 1119 }, { "epoch": 0.13243466950455243, "grad_norm": 0.1986585259437561, "learning_rate": 5.85319364814624e-05, "loss": 0.3929, "num_tokens": 709488191.0, "step": 1120 }, { "epoch": 0.1325529147451815, "grad_norm": 0.16915933787822723, "learning_rate": 5.8528571342540715e-05, "loss": 0.3668, "num_tokens": 710121271.0, "step": 1121 }, { "epoch": 0.13267115998581058, "grad_norm": 0.16356077790260315, "learning_rate": 5.852520245923117e-05, "loss": 0.3845, "num_tokens": 710755348.0, "step": 1122 }, { "epoch": 0.13278940522643964, "grad_norm": 0.19125157594680786, "learning_rate": 5.8521829832027906e-05, "loss": 0.4277, "num_tokens": 711388900.0, "step": 1123 }, { "epoch": 0.1329076504670687, "grad_norm": 0.18533861637115479, "learning_rate": 5.8518453461425586e-05, "loss": 0.3984, "num_tokens": 712027710.0, "step": 1124 }, { "epoch": 0.13302589570769777, "grad_norm": 0.19419923424720764, "learning_rate": 5.851507334791945e-05, "loss": 0.3928, "num_tokens": 712662431.0, "step": 1125 }, { "epoch": 0.13314414094832683, "grad_norm": 0.17307303845882416, "learning_rate": 5.851168949200527e-05, "loss": 0.3845, "num_tokens": 713298321.0, "step": 1126 }, { "epoch": 0.1332623861889559, "grad_norm": 0.1701747626066208, "learning_rate": 5.850830189417937e-05, "loss": 0.4012, "num_tokens": 713929502.0, "step": 1127 }, { "epoch": 0.13338063142958495, "grad_norm": 0.17819592356681824, "learning_rate": 5.8504910554938606e-05, "loss": 0.3995, "num_tokens": 714563684.0, "step": 1128 }, { "epoch": 0.133498876670214, "grad_norm": 0.1839131861925125, "learning_rate": 5.850151547478044e-05, "loss": 0.4205, "num_tokens": 715194139.0, "step": 1129 }, { "epoch": 0.1336171219108431, "grad_norm": 0.16751742362976074, "learning_rate": 5.849811665420281e-05, "loss": 0.3942, "num_tokens": 715829633.0, "step": 1130 }, { "epoch": 0.13373536715147216, "grad_norm": 0.17934882640838623, "learning_rate": 5.849471409370426e-05, "loss": 0.3927, "num_tokens": 716437097.0, "step": 1131 }, { "epoch": 0.13385361239210122, "grad_norm": 0.1632968783378601, "learning_rate": 5.8491307793783834e-05, "loss": 0.3307, "num_tokens": 717069185.0, "step": 1132 }, { "epoch": 0.13397185763273028, "grad_norm": 0.17871174216270447, "learning_rate": 5.848789775494116e-05, "loss": 0.4044, "num_tokens": 717707723.0, "step": 1133 }, { "epoch": 0.13409010287335935, "grad_norm": 0.16258849203586578, "learning_rate": 5.848448397767641e-05, "loss": 0.3793, "num_tokens": 718339982.0, "step": 1134 }, { "epoch": 0.1342083481139884, "grad_norm": 0.18856140971183777, "learning_rate": 5.848106646249029e-05, "loss": 0.3978, "num_tokens": 718970674.0, "step": 1135 }, { "epoch": 0.13432659335461747, "grad_norm": 0.18031026422977448, "learning_rate": 5.847764520988406e-05, "loss": 0.4246, "num_tokens": 719606724.0, "step": 1136 }, { "epoch": 0.13444483859524653, "grad_norm": 0.17049449682235718, "learning_rate": 5.8474220220359536e-05, "loss": 0.3931, "num_tokens": 720209640.0, "step": 1137 }, { "epoch": 0.13456308383587562, "grad_norm": 0.1810472011566162, "learning_rate": 5.847079149441908e-05, "loss": 0.3879, "num_tokens": 720845332.0, "step": 1138 }, { "epoch": 0.13468132907650468, "grad_norm": 0.17387984693050385, "learning_rate": 5.846735903256559e-05, "loss": 0.3788, "num_tokens": 721471001.0, "step": 1139 }, { "epoch": 0.13479957431713374, "grad_norm": 0.17349916696548462, "learning_rate": 5.8463922835302525e-05, "loss": 0.3633, "num_tokens": 722105712.0, "step": 1140 }, { "epoch": 0.1349178195577628, "grad_norm": 0.20115481317043304, "learning_rate": 5.8460482903133875e-05, "loss": 0.4114, "num_tokens": 722735260.0, "step": 1141 }, { "epoch": 0.13503606479839186, "grad_norm": 0.16364425420761108, "learning_rate": 5.8457039236564206e-05, "loss": 0.3714, "num_tokens": 723369238.0, "step": 1142 }, { "epoch": 0.13515431003902093, "grad_norm": 0.21767592430114746, "learning_rate": 5.845359183609862e-05, "loss": 0.3914, "num_tokens": 724005091.0, "step": 1143 }, { "epoch": 0.13527255527965, "grad_norm": 0.1838195025920868, "learning_rate": 5.845014070224275e-05, "loss": 0.3638, "num_tokens": 724644703.0, "step": 1144 }, { "epoch": 0.13539080052027905, "grad_norm": 0.18516889214515686, "learning_rate": 5.844668583550278e-05, "loss": 0.3934, "num_tokens": 725277008.0, "step": 1145 }, { "epoch": 0.13550904576090814, "grad_norm": 0.1950863152742386, "learning_rate": 5.8443227236385466e-05, "loss": 0.3637, "num_tokens": 725912854.0, "step": 1146 }, { "epoch": 0.1356272910015372, "grad_norm": 0.19289898872375488, "learning_rate": 5.84397649053981e-05, "loss": 0.4099, "num_tokens": 726552374.0, "step": 1147 }, { "epoch": 0.13574553624216626, "grad_norm": 0.17906807363033295, "learning_rate": 5.84362988430485e-05, "loss": 0.3657, "num_tokens": 727190596.0, "step": 1148 }, { "epoch": 0.13586378148279532, "grad_norm": 0.17261406779289246, "learning_rate": 5.8432829049845056e-05, "loss": 0.3499, "num_tokens": 727820231.0, "step": 1149 }, { "epoch": 0.13598202672342438, "grad_norm": 0.17040127515792847, "learning_rate": 5.84293555262967e-05, "loss": 0.3397, "num_tokens": 728440540.0, "step": 1150 }, { "epoch": 0.13610027196405344, "grad_norm": 0.2098436951637268, "learning_rate": 5.842587827291292e-05, "loss": 0.4293, "num_tokens": 729072563.0, "step": 1151 }, { "epoch": 0.1362185172046825, "grad_norm": 0.18615762889385223, "learning_rate": 5.842239729020372e-05, "loss": 0.4259, "num_tokens": 729704300.0, "step": 1152 }, { "epoch": 0.13633676244531157, "grad_norm": 0.18648044764995575, "learning_rate": 5.841891257867968e-05, "loss": 0.4029, "num_tokens": 730308427.0, "step": 1153 }, { "epoch": 0.13645500768594065, "grad_norm": 0.1908675581216812, "learning_rate": 5.841542413885192e-05, "loss": 0.4426, "num_tokens": 730946156.0, "step": 1154 }, { "epoch": 0.13657325292656972, "grad_norm": 0.16413751244544983, "learning_rate": 5.8411931971232094e-05, "loss": 0.3596, "num_tokens": 731577262.0, "step": 1155 }, { "epoch": 0.13669149816719878, "grad_norm": 0.18698996305465698, "learning_rate": 5.840843607633244e-05, "loss": 0.4108, "num_tokens": 732204517.0, "step": 1156 }, { "epoch": 0.13680974340782784, "grad_norm": 0.17655009031295776, "learning_rate": 5.840493645466568e-05, "loss": 0.3597, "num_tokens": 732841654.0, "step": 1157 }, { "epoch": 0.1369279886484569, "grad_norm": 0.17955833673477173, "learning_rate": 5.840143310674515e-05, "loss": 0.3866, "num_tokens": 733481344.0, "step": 1158 }, { "epoch": 0.13704623388908596, "grad_norm": 0.17413663864135742, "learning_rate": 5.8397926033084676e-05, "loss": 0.3924, "num_tokens": 734116157.0, "step": 1159 }, { "epoch": 0.13716447912971502, "grad_norm": 0.17806783318519592, "learning_rate": 5.8394415234198686e-05, "loss": 0.3619, "num_tokens": 734752960.0, "step": 1160 }, { "epoch": 0.13728272437034408, "grad_norm": 0.18375813961029053, "learning_rate": 5.8390900710602086e-05, "loss": 0.3766, "num_tokens": 735390605.0, "step": 1161 }, { "epoch": 0.13740096961097314, "grad_norm": 0.1933940351009369, "learning_rate": 5.83873824628104e-05, "loss": 0.3571, "num_tokens": 736025653.0, "step": 1162 }, { "epoch": 0.13751921485160223, "grad_norm": 0.19638876616954803, "learning_rate": 5.8383860491339654e-05, "loss": 0.42, "num_tokens": 736662381.0, "step": 1163 }, { "epoch": 0.1376374600922313, "grad_norm": 0.18557952344417572, "learning_rate": 5.838033479670643e-05, "loss": 0.3859, "num_tokens": 737298285.0, "step": 1164 }, { "epoch": 0.13775570533286036, "grad_norm": 0.18788571655750275, "learning_rate": 5.8376805379427846e-05, "loss": 0.3715, "num_tokens": 737932027.0, "step": 1165 }, { "epoch": 0.13787395057348942, "grad_norm": 0.19567787647247314, "learning_rate": 5.8373272240021594e-05, "loss": 0.4105, "num_tokens": 738566204.0, "step": 1166 }, { "epoch": 0.13799219581411848, "grad_norm": 0.17667856812477112, "learning_rate": 5.836973537900589e-05, "loss": 0.3654, "num_tokens": 739201202.0, "step": 1167 }, { "epoch": 0.13811044105474754, "grad_norm": 0.17980270087718964, "learning_rate": 5.836619479689951e-05, "loss": 0.3833, "num_tokens": 739833572.0, "step": 1168 }, { "epoch": 0.1382286862953766, "grad_norm": 0.19048798084259033, "learning_rate": 5.8362650494221735e-05, "loss": 0.3892, "num_tokens": 740467809.0, "step": 1169 }, { "epoch": 0.13834693153600566, "grad_norm": 0.19775250554084778, "learning_rate": 5.8359102471492454e-05, "loss": 0.4034, "num_tokens": 741101167.0, "step": 1170 }, { "epoch": 0.13846517677663475, "grad_norm": 0.18212459981441498, "learning_rate": 5.835555072923207e-05, "loss": 0.3739, "num_tokens": 741738147.0, "step": 1171 }, { "epoch": 0.1385834220172638, "grad_norm": 0.1815088838338852, "learning_rate": 5.835199526796151e-05, "loss": 0.3825, "num_tokens": 742371844.0, "step": 1172 }, { "epoch": 0.13870166725789287, "grad_norm": 0.19300366938114166, "learning_rate": 5.83484360882023e-05, "loss": 0.4057, "num_tokens": 743011533.0, "step": 1173 }, { "epoch": 0.13881991249852194, "grad_norm": 0.20350226759910583, "learning_rate": 5.8344873190476457e-05, "loss": 0.4141, "num_tokens": 743646159.0, "step": 1174 }, { "epoch": 0.138938157739151, "grad_norm": 0.18870776891708374, "learning_rate": 5.834130657530657e-05, "loss": 0.34, "num_tokens": 744282963.0, "step": 1175 }, { "epoch": 0.13905640297978006, "grad_norm": 0.2118806093931198, "learning_rate": 5.833773624321578e-05, "loss": 0.4178, "num_tokens": 744920861.0, "step": 1176 }, { "epoch": 0.13917464822040912, "grad_norm": 0.2019195407629013, "learning_rate": 5.833416219472776e-05, "loss": 0.4136, "num_tokens": 745552170.0, "step": 1177 }, { "epoch": 0.13929289346103818, "grad_norm": 0.17696236073970795, "learning_rate": 5.833058443036672e-05, "loss": 0.3708, "num_tokens": 746184040.0, "step": 1178 }, { "epoch": 0.13941113870166727, "grad_norm": 0.20400632917881012, "learning_rate": 5.8327002950657445e-05, "loss": 0.3848, "num_tokens": 746821128.0, "step": 1179 }, { "epoch": 0.13952938394229633, "grad_norm": 0.21883828938007355, "learning_rate": 5.8323417756125236e-05, "loss": 0.4011, "num_tokens": 747451607.0, "step": 1180 }, { "epoch": 0.1396476291829254, "grad_norm": 0.19244424998760223, "learning_rate": 5.831982884729595e-05, "loss": 0.3853, "num_tokens": 748083163.0, "step": 1181 }, { "epoch": 0.13976587442355445, "grad_norm": 0.18018127977848053, "learning_rate": 5.831623622469598e-05, "loss": 0.3562, "num_tokens": 748721979.0, "step": 1182 }, { "epoch": 0.13988411966418351, "grad_norm": 0.22138157486915588, "learning_rate": 5.831263988885229e-05, "loss": 0.4059, "num_tokens": 749355743.0, "step": 1183 }, { "epoch": 0.14000236490481258, "grad_norm": 0.20464839041233063, "learning_rate": 5.8309039840292354e-05, "loss": 0.3677, "num_tokens": 749990459.0, "step": 1184 }, { "epoch": 0.14012061014544164, "grad_norm": 0.1863955557346344, "learning_rate": 5.830543607954422e-05, "loss": 0.3851, "num_tokens": 750625529.0, "step": 1185 }, { "epoch": 0.1402388553860707, "grad_norm": 0.19712600111961365, "learning_rate": 5.830182860713646e-05, "loss": 0.3692, "num_tokens": 751257308.0, "step": 1186 }, { "epoch": 0.1403571006266998, "grad_norm": 0.178093820810318, "learning_rate": 5.829821742359819e-05, "loss": 0.3598, "num_tokens": 751890563.0, "step": 1187 }, { "epoch": 0.14047534586732885, "grad_norm": 0.1857881247997284, "learning_rate": 5.8294602529459094e-05, "loss": 0.3568, "num_tokens": 752523163.0, "step": 1188 }, { "epoch": 0.1405935911079579, "grad_norm": 0.17565834522247314, "learning_rate": 5.8290983925249374e-05, "loss": 0.372, "num_tokens": 753157270.0, "step": 1189 }, { "epoch": 0.14071183634858697, "grad_norm": 0.19136302173137665, "learning_rate": 5.82873616114998e-05, "loss": 0.3796, "num_tokens": 753790631.0, "step": 1190 }, { "epoch": 0.14083008158921603, "grad_norm": 0.19209633767604828, "learning_rate": 5.828373558874165e-05, "loss": 0.4301, "num_tokens": 754422344.0, "step": 1191 }, { "epoch": 0.1409483268298451, "grad_norm": 0.17911899089813232, "learning_rate": 5.8280105857506786e-05, "loss": 0.415, "num_tokens": 755054602.0, "step": 1192 }, { "epoch": 0.14106657207047416, "grad_norm": 0.18169888854026794, "learning_rate": 5.827647241832759e-05, "loss": 0.4042, "num_tokens": 755686194.0, "step": 1193 }, { "epoch": 0.14118481731110322, "grad_norm": 0.18486203253269196, "learning_rate": 5.8272835271737004e-05, "loss": 0.4007, "num_tokens": 756324764.0, "step": 1194 }, { "epoch": 0.1413030625517323, "grad_norm": 0.19197550415992737, "learning_rate": 5.826919441826849e-05, "loss": 0.4199, "num_tokens": 756963038.0, "step": 1195 }, { "epoch": 0.14142130779236137, "grad_norm": 0.18793098628520966, "learning_rate": 5.8265549858456075e-05, "loss": 0.3326, "num_tokens": 757590896.0, "step": 1196 }, { "epoch": 0.14153955303299043, "grad_norm": 0.19355428218841553, "learning_rate": 5.826190159283432e-05, "loss": 0.3976, "num_tokens": 758226989.0, "step": 1197 }, { "epoch": 0.1416577982736195, "grad_norm": 0.1778559386730194, "learning_rate": 5.8258249621938344e-05, "loss": 0.4047, "num_tokens": 758863618.0, "step": 1198 }, { "epoch": 0.14177604351424855, "grad_norm": 0.16422145068645477, "learning_rate": 5.825459394630379e-05, "loss": 0.3552, "num_tokens": 759499176.0, "step": 1199 }, { "epoch": 0.1418942887548776, "grad_norm": 0.2082889974117279, "learning_rate": 5.825093456646684e-05, "loss": 0.4095, "num_tokens": 760138725.0, "step": 1200 }, { "epoch": 0.14201253399550667, "grad_norm": 0.16329261660575867, "learning_rate": 5.824727148296424e-05, "loss": 0.386, "num_tokens": 760778173.0, "step": 1201 }, { "epoch": 0.14213077923613573, "grad_norm": 0.1950369030237198, "learning_rate": 5.8243604696333274e-05, "loss": 0.436, "num_tokens": 761411949.0, "step": 1202 }, { "epoch": 0.14224902447676482, "grad_norm": 0.1799474060535431, "learning_rate": 5.823993420711177e-05, "loss": 0.3782, "num_tokens": 762049507.0, "step": 1203 }, { "epoch": 0.14236726971739389, "grad_norm": 0.20556974411010742, "learning_rate": 5.823626001583808e-05, "loss": 0.4183, "num_tokens": 762685373.0, "step": 1204 }, { "epoch": 0.14248551495802295, "grad_norm": 0.17798088490962982, "learning_rate": 5.8232582123051125e-05, "loss": 0.3938, "num_tokens": 763323824.0, "step": 1205 }, { "epoch": 0.142603760198652, "grad_norm": 0.19575227797031403, "learning_rate": 5.8228900529290364e-05, "loss": 0.4289, "num_tokens": 763962195.0, "step": 1206 }, { "epoch": 0.14272200543928107, "grad_norm": 0.17322410643100739, "learning_rate": 5.822521523509578e-05, "loss": 0.3949, "num_tokens": 764599228.0, "step": 1207 }, { "epoch": 0.14284025067991013, "grad_norm": 0.17104260623455048, "learning_rate": 5.822152624100791e-05, "loss": 0.3493, "num_tokens": 765233848.0, "step": 1208 }, { "epoch": 0.1429584959205392, "grad_norm": 0.17549818754196167, "learning_rate": 5.821783354756784e-05, "loss": 0.3981, "num_tokens": 765873589.0, "step": 1209 }, { "epoch": 0.14307674116116825, "grad_norm": 0.16941574215888977, "learning_rate": 5.8214137155317196e-05, "loss": 0.3394, "num_tokens": 766504261.0, "step": 1210 }, { "epoch": 0.14319498640179731, "grad_norm": 0.1799101084470749, "learning_rate": 5.8210437064798145e-05, "loss": 0.4131, "num_tokens": 767140922.0, "step": 1211 }, { "epoch": 0.1433132316424264, "grad_norm": 0.17754989862442017, "learning_rate": 5.820673327655339e-05, "loss": 0.4059, "num_tokens": 767775931.0, "step": 1212 }, { "epoch": 0.14343147688305546, "grad_norm": 0.1822412610054016, "learning_rate": 5.8203025791126184e-05, "loss": 0.4171, "num_tokens": 768411905.0, "step": 1213 }, { "epoch": 0.14354972212368453, "grad_norm": 0.18451784551143646, "learning_rate": 5.8199314609060317e-05, "loss": 0.3774, "num_tokens": 769046890.0, "step": 1214 }, { "epoch": 0.1436679673643136, "grad_norm": 0.18620455265045166, "learning_rate": 5.819559973090013e-05, "loss": 0.386, "num_tokens": 769671163.0, "step": 1215 }, { "epoch": 0.14378621260494265, "grad_norm": 0.1666678637266159, "learning_rate": 5.81918811571905e-05, "loss": 0.3833, "num_tokens": 770306885.0, "step": 1216 }, { "epoch": 0.1439044578455717, "grad_norm": 0.17063529789447784, "learning_rate": 5.8188158888476844e-05, "loss": 0.3482, "num_tokens": 770935952.0, "step": 1217 }, { "epoch": 0.14402270308620077, "grad_norm": 0.18935726583003998, "learning_rate": 5.8184432925305126e-05, "loss": 0.3708, "num_tokens": 771566252.0, "step": 1218 }, { "epoch": 0.14414094832682983, "grad_norm": 0.1599024087190628, "learning_rate": 5.818070326822184e-05, "loss": 0.3662, "num_tokens": 772196019.0, "step": 1219 }, { "epoch": 0.14425919356745892, "grad_norm": 0.17319998145103455, "learning_rate": 5.8176969917774046e-05, "loss": 0.3853, "num_tokens": 772829557.0, "step": 1220 }, { "epoch": 0.14437743880808798, "grad_norm": 0.19027172029018402, "learning_rate": 5.817323287450933e-05, "loss": 0.3942, "num_tokens": 773462306.0, "step": 1221 }, { "epoch": 0.14449568404871704, "grad_norm": 0.185812845826149, "learning_rate": 5.816949213897581e-05, "loss": 0.4263, "num_tokens": 774094333.0, "step": 1222 }, { "epoch": 0.1446139292893461, "grad_norm": 0.17623886466026306, "learning_rate": 5.816574771172216e-05, "loss": 0.3971, "num_tokens": 774727506.0, "step": 1223 }, { "epoch": 0.14473217452997517, "grad_norm": 0.22737926244735718, "learning_rate": 5.816199959329759e-05, "loss": 0.4321, "num_tokens": 775365455.0, "step": 1224 }, { "epoch": 0.14485041977060423, "grad_norm": 0.1845507174730301, "learning_rate": 5.815824778425185e-05, "loss": 0.4198, "num_tokens": 775989745.0, "step": 1225 }, { "epoch": 0.1449686650112333, "grad_norm": 0.18559856712818146, "learning_rate": 5.815449228513525e-05, "loss": 0.3676, "num_tokens": 776627802.0, "step": 1226 }, { "epoch": 0.14508691025186235, "grad_norm": 0.2027086764574051, "learning_rate": 5.815073309649861e-05, "loss": 0.3611, "num_tokens": 777261077.0, "step": 1227 }, { "epoch": 0.14520515549249144, "grad_norm": 0.21276064217090607, "learning_rate": 5.814697021889331e-05, "loss": 0.4041, "num_tokens": 777900544.0, "step": 1228 }, { "epoch": 0.1453234007331205, "grad_norm": 0.2052856683731079, "learning_rate": 5.814320365287127e-05, "loss": 0.3414, "num_tokens": 778526351.0, "step": 1229 }, { "epoch": 0.14544164597374956, "grad_norm": 0.1973879486322403, "learning_rate": 5.8139433398984954e-05, "loss": 0.387, "num_tokens": 779161010.0, "step": 1230 }, { "epoch": 0.14555989121437862, "grad_norm": 0.17447389662265778, "learning_rate": 5.813565945778735e-05, "loss": 0.3885, "num_tokens": 779795294.0, "step": 1231 }, { "epoch": 0.14567813645500768, "grad_norm": 0.17841024696826935, "learning_rate": 5.813188182983201e-05, "loss": 0.3685, "num_tokens": 780434525.0, "step": 1232 }, { "epoch": 0.14579638169563675, "grad_norm": 0.16201606392860413, "learning_rate": 5.8128100515673004e-05, "loss": 0.3575, "num_tokens": 781047733.0, "step": 1233 }, { "epoch": 0.1459146269362658, "grad_norm": 0.17151504755020142, "learning_rate": 5.812431551586497e-05, "loss": 0.3745, "num_tokens": 781680882.0, "step": 1234 }, { "epoch": 0.14603287217689487, "grad_norm": 0.17381155490875244, "learning_rate": 5.8120526830963057e-05, "loss": 0.3849, "num_tokens": 782316078.0, "step": 1235 }, { "epoch": 0.14615111741752396, "grad_norm": 0.19896014034748077, "learning_rate": 5.811673446152297e-05, "loss": 0.4242, "num_tokens": 782945690.0, "step": 1236 }, { "epoch": 0.14626936265815302, "grad_norm": 0.17818230390548706, "learning_rate": 5.811293840810095e-05, "loss": 0.4191, "num_tokens": 783578548.0, "step": 1237 }, { "epoch": 0.14638760789878208, "grad_norm": 0.19138866662979126, "learning_rate": 5.810913867125379e-05, "loss": 0.3629, "num_tokens": 784211272.0, "step": 1238 }, { "epoch": 0.14650585313941114, "grad_norm": 0.17537151277065277, "learning_rate": 5.810533525153881e-05, "loss": 0.4045, "num_tokens": 784844206.0, "step": 1239 }, { "epoch": 0.1466240983800402, "grad_norm": 0.18834330141544342, "learning_rate": 5.8101528149513874e-05, "loss": 0.3918, "num_tokens": 785479541.0, "step": 1240 }, { "epoch": 0.14674234362066926, "grad_norm": 0.18729843199253082, "learning_rate": 5.8097717365737386e-05, "loss": 0.3621, "num_tokens": 786105562.0, "step": 1241 }, { "epoch": 0.14686058886129832, "grad_norm": 0.18254274129867554, "learning_rate": 5.8093902900768285e-05, "loss": 0.4008, "num_tokens": 786739257.0, "step": 1242 }, { "epoch": 0.14697883410192739, "grad_norm": 0.16369964182376862, "learning_rate": 5.809008475516606e-05, "loss": 0.3514, "num_tokens": 787367527.0, "step": 1243 }, { "epoch": 0.14709707934255647, "grad_norm": 0.20594632625579834, "learning_rate": 5.808626292949075e-05, "loss": 0.3643, "num_tokens": 787999760.0, "step": 1244 }, { "epoch": 0.14721532458318554, "grad_norm": 0.19036948680877686, "learning_rate": 5.808243742430289e-05, "loss": 0.364, "num_tokens": 788637623.0, "step": 1245 }, { "epoch": 0.1473335698238146, "grad_norm": 0.16600456833839417, "learning_rate": 5.80786082401636e-05, "loss": 0.3564, "num_tokens": 789276023.0, "step": 1246 }, { "epoch": 0.14745181506444366, "grad_norm": 0.18281590938568115, "learning_rate": 5.8074775377634535e-05, "loss": 0.3879, "num_tokens": 789907579.0, "step": 1247 }, { "epoch": 0.14757006030507272, "grad_norm": 0.18071088194847107, "learning_rate": 5.8070938837277856e-05, "loss": 0.4122, "num_tokens": 790542282.0, "step": 1248 }, { "epoch": 0.14768830554570178, "grad_norm": 0.17504839599132538, "learning_rate": 5.806709861965629e-05, "loss": 0.3924, "num_tokens": 791177851.0, "step": 1249 }, { "epoch": 0.14780655078633084, "grad_norm": 0.1808909922838211, "learning_rate": 5.806325472533311e-05, "loss": 0.3724, "num_tokens": 791809529.0, "step": 1250 }, { "epoch": 0.1479247960269599, "grad_norm": 0.16908492147922516, "learning_rate": 5.80594071548721e-05, "loss": 0.4081, "num_tokens": 792448822.0, "step": 1251 }, { "epoch": 0.148043041267589, "grad_norm": 0.17591680586338043, "learning_rate": 5.805555590883762e-05, "loss": 0.3982, "num_tokens": 793083686.0, "step": 1252 }, { "epoch": 0.14816128650821805, "grad_norm": 0.19707359373569489, "learning_rate": 5.8051700987794534e-05, "loss": 0.4493, "num_tokens": 793720893.0, "step": 1253 }, { "epoch": 0.14827953174884712, "grad_norm": 0.16740645468235016, "learning_rate": 5.804784239230826e-05, "loss": 0.3658, "num_tokens": 794350771.0, "step": 1254 }, { "epoch": 0.14839777698947618, "grad_norm": 0.18248078227043152, "learning_rate": 5.804398012294476e-05, "loss": 0.3911, "num_tokens": 794988418.0, "step": 1255 }, { "epoch": 0.14851602223010524, "grad_norm": 0.18627490103244781, "learning_rate": 5.8040114180270544e-05, "loss": 0.4266, "num_tokens": 795619860.0, "step": 1256 }, { "epoch": 0.1486342674707343, "grad_norm": 0.19804096221923828, "learning_rate": 5.803624456485262e-05, "loss": 0.3809, "num_tokens": 796258236.0, "step": 1257 }, { "epoch": 0.14875251271136336, "grad_norm": 0.18971596658229828, "learning_rate": 5.803237127725858e-05, "loss": 0.3701, "num_tokens": 796889178.0, "step": 1258 }, { "epoch": 0.14887075795199242, "grad_norm": 0.1789889633655548, "learning_rate": 5.8028494318056534e-05, "loss": 0.395, "num_tokens": 797526666.0, "step": 1259 }, { "epoch": 0.1489890031926215, "grad_norm": 0.17677906155586243, "learning_rate": 5.802461368781512e-05, "loss": 0.3796, "num_tokens": 798162731.0, "step": 1260 }, { "epoch": 0.14910724843325057, "grad_norm": 0.1816256046295166, "learning_rate": 5.802072938710354e-05, "loss": 0.3939, "num_tokens": 798797285.0, "step": 1261 }, { "epoch": 0.14922549367387963, "grad_norm": 0.17264655232429504, "learning_rate": 5.8016841416491516e-05, "loss": 0.3877, "num_tokens": 799430881.0, "step": 1262 }, { "epoch": 0.1493437389145087, "grad_norm": 0.18055939674377441, "learning_rate": 5.8012949776549325e-05, "loss": 0.3444, "num_tokens": 800065809.0, "step": 1263 }, { "epoch": 0.14946198415513776, "grad_norm": 0.18371006846427917, "learning_rate": 5.8009054467847746e-05, "loss": 0.3336, "num_tokens": 800695505.0, "step": 1264 }, { "epoch": 0.14958022939576682, "grad_norm": 0.17915686964988708, "learning_rate": 5.800515549095816e-05, "loss": 0.393, "num_tokens": 801324547.0, "step": 1265 }, { "epoch": 0.14969847463639588, "grad_norm": 0.20150579512119293, "learning_rate": 5.8001252846452396e-05, "loss": 0.3835, "num_tokens": 801954766.0, "step": 1266 }, { "epoch": 0.14981671987702494, "grad_norm": 0.17538531124591827, "learning_rate": 5.799734653490292e-05, "loss": 0.3458, "num_tokens": 802585735.0, "step": 1267 }, { "epoch": 0.149934965117654, "grad_norm": 0.18659847974777222, "learning_rate": 5.799343655688266e-05, "loss": 0.4268, "num_tokens": 803222978.0, "step": 1268 }, { "epoch": 0.1500532103582831, "grad_norm": 0.1732473224401474, "learning_rate": 5.7989522912965116e-05, "loss": 0.3058, "num_tokens": 803861621.0, "step": 1269 }, { "epoch": 0.15017145559891215, "grad_norm": 0.1537596434354782, "learning_rate": 5.798560560372433e-05, "loss": 0.2912, "num_tokens": 804493467.0, "step": 1270 }, { "epoch": 0.1502897008395412, "grad_norm": 0.1769077181816101, "learning_rate": 5.798168462973485e-05, "loss": 0.3734, "num_tokens": 805126588.0, "step": 1271 }, { "epoch": 0.15040794608017027, "grad_norm": 0.19403451681137085, "learning_rate": 5.79777599915718e-05, "loss": 0.3717, "num_tokens": 805765213.0, "step": 1272 }, { "epoch": 0.15052619132079934, "grad_norm": 0.19550135731697083, "learning_rate": 5.7973831689810814e-05, "loss": 0.3824, "num_tokens": 806404016.0, "step": 1273 }, { "epoch": 0.1506444365614284, "grad_norm": 0.16883718967437744, "learning_rate": 5.796989972502808e-05, "loss": 0.3592, "num_tokens": 807036599.0, "step": 1274 }, { "epoch": 0.15076268180205746, "grad_norm": 0.1836583912372589, "learning_rate": 5.796596409780031e-05, "loss": 0.3763, "num_tokens": 807672182.0, "step": 1275 }, { "epoch": 0.15088092704268652, "grad_norm": 0.17509691417217255, "learning_rate": 5.796202480870477e-05, "loss": 0.4035, "num_tokens": 808281458.0, "step": 1276 }, { "epoch": 0.1509991722833156, "grad_norm": 0.19384072721004486, "learning_rate": 5.795808185831925e-05, "loss": 0.3929, "num_tokens": 808881586.0, "step": 1277 }, { "epoch": 0.15111741752394467, "grad_norm": 0.20052595436573029, "learning_rate": 5.795413524722207e-05, "loss": 0.4075, "num_tokens": 809481267.0, "step": 1278 }, { "epoch": 0.15123566276457373, "grad_norm": 0.15665853023529053, "learning_rate": 5.79501849759921e-05, "loss": 0.3561, "num_tokens": 810113774.0, "step": 1279 }, { "epoch": 0.1513539080052028, "grad_norm": 0.18756869435310364, "learning_rate": 5.794623104520875e-05, "loss": 0.3853, "num_tokens": 810745803.0, "step": 1280 }, { "epoch": 0.15147215324583185, "grad_norm": 0.18970626592636108, "learning_rate": 5.794227345545196e-05, "loss": 0.4111, "num_tokens": 811385263.0, "step": 1281 }, { "epoch": 0.15159039848646091, "grad_norm": 0.17431409657001495, "learning_rate": 5.7938312207302207e-05, "loss": 0.3514, "num_tokens": 812016051.0, "step": 1282 }, { "epoch": 0.15170864372708998, "grad_norm": 0.17664341628551483, "learning_rate": 5.79343473013405e-05, "loss": 0.331, "num_tokens": 812649108.0, "step": 1283 }, { "epoch": 0.15182688896771904, "grad_norm": 0.181692436337471, "learning_rate": 5.793037873814838e-05, "loss": 0.3666, "num_tokens": 813283409.0, "step": 1284 }, { "epoch": 0.15194513420834813, "grad_norm": 0.20560723543167114, "learning_rate": 5.792640651830797e-05, "loss": 0.3773, "num_tokens": 813917493.0, "step": 1285 }, { "epoch": 0.1520633794489772, "grad_norm": 0.18313612043857574, "learning_rate": 5.792243064240185e-05, "loss": 0.3863, "num_tokens": 814556038.0, "step": 1286 }, { "epoch": 0.15218162468960625, "grad_norm": 0.1859603077173233, "learning_rate": 5.791845111101319e-05, "loss": 0.3749, "num_tokens": 815190976.0, "step": 1287 }, { "epoch": 0.1522998699302353, "grad_norm": 0.187571182847023, "learning_rate": 5.791446792472571e-05, "loss": 0.3751, "num_tokens": 815830227.0, "step": 1288 }, { "epoch": 0.15241811517086437, "grad_norm": 0.17868036031723022, "learning_rate": 5.791048108412362e-05, "loss": 0.3799, "num_tokens": 816464763.0, "step": 1289 }, { "epoch": 0.15253636041149343, "grad_norm": 0.1951415091753006, "learning_rate": 5.790649058979168e-05, "loss": 0.3874, "num_tokens": 817072689.0, "step": 1290 }, { "epoch": 0.1526546056521225, "grad_norm": 0.1585177481174469, "learning_rate": 5.7902496442315224e-05, "loss": 0.3887, "num_tokens": 817703995.0, "step": 1291 }, { "epoch": 0.15277285089275155, "grad_norm": 0.17791469395160675, "learning_rate": 5.7898498642280055e-05, "loss": 0.3401, "num_tokens": 818306204.0, "step": 1292 }, { "epoch": 0.15289109613338064, "grad_norm": 0.17755120992660522, "learning_rate": 5.789449719027257e-05, "loss": 0.4031, "num_tokens": 818944794.0, "step": 1293 }, { "epoch": 0.1530093413740097, "grad_norm": 0.2117358297109604, "learning_rate": 5.789049208687968e-05, "loss": 0.4309, "num_tokens": 819580090.0, "step": 1294 }, { "epoch": 0.15312758661463877, "grad_norm": 0.19366182386875153, "learning_rate": 5.788648333268882e-05, "loss": 0.3976, "num_tokens": 820195511.0, "step": 1295 }, { "epoch": 0.15324583185526783, "grad_norm": 0.1668238490819931, "learning_rate": 5.788247092828798e-05, "loss": 0.3784, "num_tokens": 820832912.0, "step": 1296 }, { "epoch": 0.1533640770958969, "grad_norm": 0.21351125836372375, "learning_rate": 5.787845487426567e-05, "loss": 0.3883, "num_tokens": 821463804.0, "step": 1297 }, { "epoch": 0.15348232233652595, "grad_norm": 0.1870070844888687, "learning_rate": 5.787443517121095e-05, "loss": 0.3848, "num_tokens": 822096771.0, "step": 1298 }, { "epoch": 0.153600567577155, "grad_norm": 0.18743258714675903, "learning_rate": 5.78704118197134e-05, "loss": 0.3981, "num_tokens": 822724976.0, "step": 1299 }, { "epoch": 0.15371881281778407, "grad_norm": 0.18136751651763916, "learning_rate": 5.7866384820363154e-05, "loss": 0.3696, "num_tokens": 823360078.0, "step": 1300 }, { "epoch": 0.15383705805841316, "grad_norm": 0.16728819906711578, "learning_rate": 5.786235417375085e-05, "loss": 0.3661, "num_tokens": 823995903.0, "step": 1301 }, { "epoch": 0.15395530329904222, "grad_norm": 0.1906318962574005, "learning_rate": 5.78583198804677e-05, "loss": 0.4192, "num_tokens": 824634564.0, "step": 1302 }, { "epoch": 0.15407354853967128, "grad_norm": 0.18334315717220306, "learning_rate": 5.785428194110544e-05, "loss": 0.4411, "num_tokens": 825267057.0, "step": 1303 }, { "epoch": 0.15419179378030035, "grad_norm": 0.17707347869873047, "learning_rate": 5.78502403562563e-05, "loss": 0.3668, "num_tokens": 825899088.0, "step": 1304 }, { "epoch": 0.1543100390209294, "grad_norm": 0.17369075119495392, "learning_rate": 5.784619512651309e-05, "loss": 0.4, "num_tokens": 826535290.0, "step": 1305 }, { "epoch": 0.15442828426155847, "grad_norm": 0.1673600971698761, "learning_rate": 5.784214625246915e-05, "loss": 0.3808, "num_tokens": 827173746.0, "step": 1306 }, { "epoch": 0.15454652950218753, "grad_norm": 0.1781369000673294, "learning_rate": 5.783809373471835e-05, "loss": 0.3747, "num_tokens": 827804731.0, "step": 1307 }, { "epoch": 0.1546647747428166, "grad_norm": 0.18070824444293976, "learning_rate": 5.7834037573855074e-05, "loss": 0.3767, "num_tokens": 828436832.0, "step": 1308 }, { "epoch": 0.15478301998344568, "grad_norm": 0.1717315912246704, "learning_rate": 5.7829977770474265e-05, "loss": 0.352, "num_tokens": 829065711.0, "step": 1309 }, { "epoch": 0.15490126522407474, "grad_norm": 0.17846590280532837, "learning_rate": 5.78259143251714e-05, "loss": 0.3738, "num_tokens": 829699491.0, "step": 1310 }, { "epoch": 0.1550195104647038, "grad_norm": 0.2016000598669052, "learning_rate": 5.7821847238542473e-05, "loss": 0.3918, "num_tokens": 830301332.0, "step": 1311 }, { "epoch": 0.15513775570533286, "grad_norm": 0.16683803498744965, "learning_rate": 5.781777651118401e-05, "loss": 0.3543, "num_tokens": 830932269.0, "step": 1312 }, { "epoch": 0.15525600094596193, "grad_norm": 0.208168625831604, "learning_rate": 5.781370214369311e-05, "loss": 0.3977, "num_tokens": 831571426.0, "step": 1313 }, { "epoch": 0.155374246186591, "grad_norm": 0.18353724479675293, "learning_rate": 5.7809624136667365e-05, "loss": 0.3956, "num_tokens": 832211161.0, "step": 1314 }, { "epoch": 0.15549249142722005, "grad_norm": 0.18182198703289032, "learning_rate": 5.7805542490704905e-05, "loss": 0.4159, "num_tokens": 832846569.0, "step": 1315 }, { "epoch": 0.1556107366678491, "grad_norm": 0.1856834590435028, "learning_rate": 5.780145720640442e-05, "loss": 0.3792, "num_tokens": 833477855.0, "step": 1316 }, { "epoch": 0.15572898190847817, "grad_norm": 0.16982270777225494, "learning_rate": 5.779736828436511e-05, "loss": 0.3856, "num_tokens": 834112533.0, "step": 1317 }, { "epoch": 0.15584722714910726, "grad_norm": 0.17862676084041595, "learning_rate": 5.77932757251867e-05, "loss": 0.3455, "num_tokens": 834750383.0, "step": 1318 }, { "epoch": 0.15596547238973632, "grad_norm": 0.1760154515504837, "learning_rate": 5.778917952946949e-05, "loss": 0.3948, "num_tokens": 835382487.0, "step": 1319 }, { "epoch": 0.15608371763036538, "grad_norm": 0.16499872505664825, "learning_rate": 5.778507969781427e-05, "loss": 0.3629, "num_tokens": 836011005.0, "step": 1320 }, { "epoch": 0.15620196287099444, "grad_norm": 0.16312913596630096, "learning_rate": 5.778097623082238e-05, "loss": 0.3633, "num_tokens": 836645534.0, "step": 1321 }, { "epoch": 0.1563202081116235, "grad_norm": 0.1623142510652542, "learning_rate": 5.77768691290957e-05, "loss": 0.3422, "num_tokens": 837281263.0, "step": 1322 }, { "epoch": 0.15643845335225257, "grad_norm": 0.17715275287628174, "learning_rate": 5.777275839323664e-05, "loss": 0.3649, "num_tokens": 837911899.0, "step": 1323 }, { "epoch": 0.15655669859288163, "grad_norm": 0.20797595381736755, "learning_rate": 5.7768644023848126e-05, "loss": 0.416, "num_tokens": 838549137.0, "step": 1324 }, { "epoch": 0.1566749438335107, "grad_norm": 0.16768136620521545, "learning_rate": 5.776452602153365e-05, "loss": 0.3964, "num_tokens": 839185355.0, "step": 1325 }, { "epoch": 0.15679318907413978, "grad_norm": 0.1655835658311844, "learning_rate": 5.7760404386897194e-05, "loss": 0.3785, "num_tokens": 839817125.0, "step": 1326 }, { "epoch": 0.15691143431476884, "grad_norm": 0.18343134224414825, "learning_rate": 5.7756279120543316e-05, "loss": 0.3687, "num_tokens": 840448393.0, "step": 1327 }, { "epoch": 0.1570296795553979, "grad_norm": 0.1677461862564087, "learning_rate": 5.775215022307708e-05, "loss": 0.36, "num_tokens": 841086731.0, "step": 1328 }, { "epoch": 0.15714792479602696, "grad_norm": 0.18419942259788513, "learning_rate": 5.7748017695104086e-05, "loss": 0.4064, "num_tokens": 841723258.0, "step": 1329 }, { "epoch": 0.15726617003665602, "grad_norm": 0.17661339044570923, "learning_rate": 5.774388153723048e-05, "loss": 0.3581, "num_tokens": 842354306.0, "step": 1330 }, { "epoch": 0.15738441527728508, "grad_norm": 0.15641751885414124, "learning_rate": 5.773974175006291e-05, "loss": 0.3721, "num_tokens": 842990332.0, "step": 1331 }, { "epoch": 0.15750266051791414, "grad_norm": 0.1716194599866867, "learning_rate": 5.7735598334208604e-05, "loss": 0.3892, "num_tokens": 843624261.0, "step": 1332 }, { "epoch": 0.1576209057585432, "grad_norm": 0.1854345053434372, "learning_rate": 5.7731451290275284e-05, "loss": 0.3835, "num_tokens": 844262144.0, "step": 1333 }, { "epoch": 0.1577391509991723, "grad_norm": 0.18976964056491852, "learning_rate": 5.7727300618871205e-05, "loss": 0.3788, "num_tokens": 844897584.0, "step": 1334 }, { "epoch": 0.15785739623980136, "grad_norm": 0.17165213823318481, "learning_rate": 5.772314632060517e-05, "loss": 0.3907, "num_tokens": 845532833.0, "step": 1335 }, { "epoch": 0.15797564148043042, "grad_norm": 0.16549891233444214, "learning_rate": 5.771898839608652e-05, "loss": 0.3683, "num_tokens": 846132555.0, "step": 1336 }, { "epoch": 0.15809388672105948, "grad_norm": 0.20871177315711975, "learning_rate": 5.771482684592509e-05, "loss": 0.4402, "num_tokens": 846771073.0, "step": 1337 }, { "epoch": 0.15821213196168854, "grad_norm": 0.15606831014156342, "learning_rate": 5.771066167073131e-05, "loss": 0.3608, "num_tokens": 847407047.0, "step": 1338 }, { "epoch": 0.1583303772023176, "grad_norm": 0.17621371150016785, "learning_rate": 5.7706492871116064e-05, "loss": 0.3995, "num_tokens": 848038762.0, "step": 1339 }, { "epoch": 0.15844862244294666, "grad_norm": 0.1689538210630417, "learning_rate": 5.7702320447690834e-05, "loss": 0.3665, "num_tokens": 848671922.0, "step": 1340 }, { "epoch": 0.15856686768357572, "grad_norm": 0.17364315688610077, "learning_rate": 5.76981444010676e-05, "loss": 0.3828, "num_tokens": 849309141.0, "step": 1341 }, { "epoch": 0.1586851129242048, "grad_norm": 0.19010542333126068, "learning_rate": 5.76939647318589e-05, "loss": 0.3907, "num_tokens": 849947357.0, "step": 1342 }, { "epoch": 0.15880335816483387, "grad_norm": 0.1707213968038559, "learning_rate": 5.7689781440677747e-05, "loss": 0.371, "num_tokens": 850586919.0, "step": 1343 }, { "epoch": 0.15892160340546294, "grad_norm": 0.1973489373922348, "learning_rate": 5.7685594528137754e-05, "loss": 0.414, "num_tokens": 851164524.0, "step": 1344 }, { "epoch": 0.159039848646092, "grad_norm": 0.17892633378505707, "learning_rate": 5.768140399485302e-05, "loss": 0.3826, "num_tokens": 851795535.0, "step": 1345 }, { "epoch": 0.15915809388672106, "grad_norm": 0.16512104868888855, "learning_rate": 5.767720984143819e-05, "loss": 0.3736, "num_tokens": 852429267.0, "step": 1346 }, { "epoch": 0.15927633912735012, "grad_norm": 0.18079063296318054, "learning_rate": 5.7673012068508446e-05, "loss": 0.4171, "num_tokens": 853066581.0, "step": 1347 }, { "epoch": 0.15939458436797918, "grad_norm": 0.16424012184143066, "learning_rate": 5.7668810676679476e-05, "loss": 0.3758, "num_tokens": 853705006.0, "step": 1348 }, { "epoch": 0.15951282960860824, "grad_norm": 0.15844646096229553, "learning_rate": 5.766460566656755e-05, "loss": 0.3901, "num_tokens": 854335569.0, "step": 1349 }, { "epoch": 0.15963107484923733, "grad_norm": 0.16080792248249054, "learning_rate": 5.766039703878939e-05, "loss": 0.3629, "num_tokens": 854974579.0, "step": 1350 }, { "epoch": 0.1597493200898664, "grad_norm": 0.18353760242462158, "learning_rate": 5.765618479396233e-05, "loss": 0.397, "num_tokens": 855612244.0, "step": 1351 }, { "epoch": 0.15986756533049545, "grad_norm": 0.16359633207321167, "learning_rate": 5.76519689327042e-05, "loss": 0.3726, "num_tokens": 856244164.0, "step": 1352 }, { "epoch": 0.15998581057112451, "grad_norm": 0.1780007928609848, "learning_rate": 5.764774945563332e-05, "loss": 0.3856, "num_tokens": 856879791.0, "step": 1353 }, { "epoch": 0.16010405581175358, "grad_norm": 0.17300951480865479, "learning_rate": 5.7643526363368626e-05, "loss": 0.3782, "num_tokens": 857513354.0, "step": 1354 }, { "epoch": 0.16022230105238264, "grad_norm": 0.17118407785892487, "learning_rate": 5.7639299656529504e-05, "loss": 0.3571, "num_tokens": 858148669.0, "step": 1355 }, { "epoch": 0.1603405462930117, "grad_norm": 0.17600245773792267, "learning_rate": 5.7635069335735923e-05, "loss": 0.3595, "num_tokens": 858784311.0, "step": 1356 }, { "epoch": 0.16045879153364076, "grad_norm": 0.16972529888153076, "learning_rate": 5.763083540160836e-05, "loss": 0.3633, "num_tokens": 859417782.0, "step": 1357 }, { "epoch": 0.16057703677426985, "grad_norm": 0.18301771581172943, "learning_rate": 5.7626597854767815e-05, "loss": 0.3654, "num_tokens": 860048747.0, "step": 1358 }, { "epoch": 0.1606952820148989, "grad_norm": 0.18079617619514465, "learning_rate": 5.762235669583583e-05, "loss": 0.4047, "num_tokens": 860684168.0, "step": 1359 }, { "epoch": 0.16081352725552797, "grad_norm": 0.1902022361755371, "learning_rate": 5.7618111925434486e-05, "loss": 0.4, "num_tokens": 861317334.0, "step": 1360 }, { "epoch": 0.16093177249615703, "grad_norm": 0.1677486002445221, "learning_rate": 5.7613863544186366e-05, "loss": 0.4034, "num_tokens": 861948731.0, "step": 1361 }, { "epoch": 0.1610500177367861, "grad_norm": 0.18391026556491852, "learning_rate": 5.760961155271461e-05, "loss": 0.4216, "num_tokens": 862586570.0, "step": 1362 }, { "epoch": 0.16116826297741516, "grad_norm": 0.1642833948135376, "learning_rate": 5.760535595164287e-05, "loss": 0.3783, "num_tokens": 863222204.0, "step": 1363 }, { "epoch": 0.16128650821804422, "grad_norm": 0.19462759792804718, "learning_rate": 5.7601096741595345e-05, "loss": 0.3808, "num_tokens": 863857243.0, "step": 1364 }, { "epoch": 0.16140475345867328, "grad_norm": 0.14950813353061676, "learning_rate": 5.759683392319674e-05, "loss": 0.3564, "num_tokens": 864495982.0, "step": 1365 }, { "epoch": 0.16152299869930234, "grad_norm": 0.15803667902946472, "learning_rate": 5.7592567497072304e-05, "loss": 0.3271, "num_tokens": 865131852.0, "step": 1366 }, { "epoch": 0.16164124393993143, "grad_norm": 0.17030693590641022, "learning_rate": 5.758829746384781e-05, "loss": 0.3543, "num_tokens": 865767401.0, "step": 1367 }, { "epoch": 0.1617594891805605, "grad_norm": 0.1849510818719864, "learning_rate": 5.758402382414957e-05, "loss": 0.3773, "num_tokens": 866405290.0, "step": 1368 }, { "epoch": 0.16187773442118955, "grad_norm": 0.16604110598564148, "learning_rate": 5.757974657860441e-05, "loss": 0.3962, "num_tokens": 867039158.0, "step": 1369 }, { "epoch": 0.1619959796618186, "grad_norm": 0.17715173959732056, "learning_rate": 5.7575465727839706e-05, "loss": 0.3754, "num_tokens": 867673395.0, "step": 1370 }, { "epoch": 0.16211422490244767, "grad_norm": 0.1767461746931076, "learning_rate": 5.757118127248333e-05, "loss": 0.3986, "num_tokens": 868310685.0, "step": 1371 }, { "epoch": 0.16223247014307673, "grad_norm": 0.1700175553560257, "learning_rate": 5.756689321316372e-05, "loss": 0.3939, "num_tokens": 868923042.0, "step": 1372 }, { "epoch": 0.1623507153837058, "grad_norm": 0.1728692352771759, "learning_rate": 5.7562601550509805e-05, "loss": 0.4214, "num_tokens": 869556905.0, "step": 1373 }, { "epoch": 0.16246896062433486, "grad_norm": 0.15773224830627441, "learning_rate": 5.7558306285151084e-05, "loss": 0.329, "num_tokens": 870190559.0, "step": 1374 }, { "epoch": 0.16258720586496395, "grad_norm": 0.1808062493801117, "learning_rate": 5.7554007417717555e-05, "loss": 0.3622, "num_tokens": 870829637.0, "step": 1375 }, { "epoch": 0.162705451105593, "grad_norm": 0.1665572077035904, "learning_rate": 5.754970494883973e-05, "loss": 0.3561, "num_tokens": 871466630.0, "step": 1376 }, { "epoch": 0.16282369634622207, "grad_norm": 0.17975753545761108, "learning_rate": 5.7545398879148704e-05, "loss": 0.3862, "num_tokens": 872077498.0, "step": 1377 }, { "epoch": 0.16294194158685113, "grad_norm": 0.20208002626895905, "learning_rate": 5.7541089209276044e-05, "loss": 0.3958, "num_tokens": 872706532.0, "step": 1378 }, { "epoch": 0.1630601868274802, "grad_norm": 0.16003376245498657, "learning_rate": 5.753677593985388e-05, "loss": 0.364, "num_tokens": 873345301.0, "step": 1379 }, { "epoch": 0.16317843206810925, "grad_norm": 0.16806146502494812, "learning_rate": 5.7532459071514856e-05, "loss": 0.3614, "num_tokens": 873977689.0, "step": 1380 }, { "epoch": 0.16329667730873831, "grad_norm": 0.184342160820961, "learning_rate": 5.7528138604892146e-05, "loss": 0.3763, "num_tokens": 874615260.0, "step": 1381 }, { "epoch": 0.16341492254936738, "grad_norm": 0.16699367761611938, "learning_rate": 5.752381454061945e-05, "loss": 0.3839, "num_tokens": 875248450.0, "step": 1382 }, { "epoch": 0.16353316778999646, "grad_norm": 0.17636771500110626, "learning_rate": 5.7519486879331004e-05, "loss": 0.4221, "num_tokens": 875858896.0, "step": 1383 }, { "epoch": 0.16365141303062553, "grad_norm": 0.16423584520816803, "learning_rate": 5.751515562166155e-05, "loss": 0.3577, "num_tokens": 876495972.0, "step": 1384 }, { "epoch": 0.1637696582712546, "grad_norm": 0.17457082867622375, "learning_rate": 5.7510820768246374e-05, "loss": 0.4052, "num_tokens": 877126376.0, "step": 1385 }, { "epoch": 0.16388790351188365, "grad_norm": 0.15950888395309448, "learning_rate": 5.7506482319721314e-05, "loss": 0.3422, "num_tokens": 877761789.0, "step": 1386 }, { "epoch": 0.1640061487525127, "grad_norm": 0.1940324604511261, "learning_rate": 5.750214027672267e-05, "loss": 0.4023, "num_tokens": 878356527.0, "step": 1387 }, { "epoch": 0.16412439399314177, "grad_norm": 0.1596798449754715, "learning_rate": 5.749779463988735e-05, "loss": 0.3839, "num_tokens": 878990451.0, "step": 1388 }, { "epoch": 0.16424263923377083, "grad_norm": 0.16422350704669952, "learning_rate": 5.749344540985272e-05, "loss": 0.34, "num_tokens": 879620939.0, "step": 1389 }, { "epoch": 0.1643608844743999, "grad_norm": 0.1858249455690384, "learning_rate": 5.7489092587256714e-05, "loss": 0.3808, "num_tokens": 880257254.0, "step": 1390 }, { "epoch": 0.16447912971502898, "grad_norm": 0.19918718934059143, "learning_rate": 5.748473617273777e-05, "loss": 0.4159, "num_tokens": 880896268.0, "step": 1391 }, { "epoch": 0.16459737495565804, "grad_norm": 0.1766170710325241, "learning_rate": 5.7480376166934866e-05, "loss": 0.3674, "num_tokens": 881531804.0, "step": 1392 }, { "epoch": 0.1647156201962871, "grad_norm": 0.1807500123977661, "learning_rate": 5.74760125704875e-05, "loss": 0.3662, "num_tokens": 882135234.0, "step": 1393 }, { "epoch": 0.16483386543691617, "grad_norm": 0.18267473578453064, "learning_rate": 5.747164538403571e-05, "loss": 0.3586, "num_tokens": 882730906.0, "step": 1394 }, { "epoch": 0.16495211067754523, "grad_norm": 0.18741489946842194, "learning_rate": 5.746727460822004e-05, "loss": 0.4045, "num_tokens": 883362306.0, "step": 1395 }, { "epoch": 0.1650703559181743, "grad_norm": 0.17373774945735931, "learning_rate": 5.7462900243681575e-05, "loss": 0.3628, "num_tokens": 884001035.0, "step": 1396 }, { "epoch": 0.16518860115880335, "grad_norm": 0.17788158357143402, "learning_rate": 5.745852229106192e-05, "loss": 0.3894, "num_tokens": 884635015.0, "step": 1397 }, { "epoch": 0.1653068463994324, "grad_norm": 0.19019939005374908, "learning_rate": 5.7454140751003215e-05, "loss": 0.3977, "num_tokens": 885272613.0, "step": 1398 }, { "epoch": 0.1654250916400615, "grad_norm": 0.1818268895149231, "learning_rate": 5.7449755624148116e-05, "loss": 0.3315, "num_tokens": 885905643.0, "step": 1399 }, { "epoch": 0.16554333688069056, "grad_norm": 0.16635698080062866, "learning_rate": 5.744536691113981e-05, "loss": 0.3702, "num_tokens": 886536895.0, "step": 1400 }, { "epoch": 0.16566158212131962, "grad_norm": 0.15699191391468048, "learning_rate": 5.7440974612622e-05, "loss": 0.3503, "num_tokens": 887173320.0, "step": 1401 }, { "epoch": 0.16577982736194868, "grad_norm": 0.1800384670495987, "learning_rate": 5.7436578729238926e-05, "loss": 0.3877, "num_tokens": 887807056.0, "step": 1402 }, { "epoch": 0.16589807260257775, "grad_norm": 0.17141304910182953, "learning_rate": 5.743217926163538e-05, "loss": 0.3951, "num_tokens": 888446558.0, "step": 1403 }, { "epoch": 0.1660163178432068, "grad_norm": 0.1724863350391388, "learning_rate": 5.7427776210456605e-05, "loss": 0.369, "num_tokens": 889079741.0, "step": 1404 }, { "epoch": 0.16613456308383587, "grad_norm": 0.1625068634748459, "learning_rate": 5.742336957634844e-05, "loss": 0.4094, "num_tokens": 889708375.0, "step": 1405 }, { "epoch": 0.16625280832446493, "grad_norm": 0.1718367487192154, "learning_rate": 5.741895935995724e-05, "loss": 0.3531, "num_tokens": 890343205.0, "step": 1406 }, { "epoch": 0.16637105356509402, "grad_norm": 0.1586138904094696, "learning_rate": 5.741454556192984e-05, "loss": 0.3562, "num_tokens": 890980377.0, "step": 1407 }, { "epoch": 0.16648929880572308, "grad_norm": 0.15216132998466492, "learning_rate": 5.741012818291365e-05, "loss": 0.3552, "num_tokens": 891610939.0, "step": 1408 }, { "epoch": 0.16660754404635214, "grad_norm": 0.1776556521654129, "learning_rate": 5.740570722355659e-05, "loss": 0.4038, "num_tokens": 892248590.0, "step": 1409 }, { "epoch": 0.1667257892869812, "grad_norm": 0.20102806389331818, "learning_rate": 5.7401282684507084e-05, "loss": 0.4652, "num_tokens": 892884534.0, "step": 1410 }, { "epoch": 0.16684403452761026, "grad_norm": 0.17488045990467072, "learning_rate": 5.739685456641411e-05, "loss": 0.35, "num_tokens": 893522439.0, "step": 1411 }, { "epoch": 0.16696227976823932, "grad_norm": 0.17362560331821442, "learning_rate": 5.739242286992717e-05, "loss": 0.3602, "num_tokens": 894159327.0, "step": 1412 }, { "epoch": 0.16708052500886839, "grad_norm": 0.1951437145471573, "learning_rate": 5.7387987595696254e-05, "loss": 0.4054, "num_tokens": 894796406.0, "step": 1413 }, { "epoch": 0.16719877024949745, "grad_norm": 0.17659123241901398, "learning_rate": 5.738354874437192e-05, "loss": 0.3769, "num_tokens": 895433169.0, "step": 1414 }, { "epoch": 0.16731701549012654, "grad_norm": 0.1672336757183075, "learning_rate": 5.737910631660524e-05, "loss": 0.3811, "num_tokens": 896065522.0, "step": 1415 }, { "epoch": 0.1674352607307556, "grad_norm": 0.17975319921970367, "learning_rate": 5.737466031304778e-05, "loss": 0.3564, "num_tokens": 896697566.0, "step": 1416 }, { "epoch": 0.16755350597138466, "grad_norm": 0.16962675750255585, "learning_rate": 5.737021073435168e-05, "loss": 0.3735, "num_tokens": 897331944.0, "step": 1417 }, { "epoch": 0.16767175121201372, "grad_norm": 0.19549699127674103, "learning_rate": 5.736575758116958e-05, "loss": 0.4277, "num_tokens": 897965210.0, "step": 1418 }, { "epoch": 0.16778999645264278, "grad_norm": 0.17434746026992798, "learning_rate": 5.736130085415462e-05, "loss": 0.3862, "num_tokens": 898598679.0, "step": 1419 }, { "epoch": 0.16790824169327184, "grad_norm": 0.19372202455997467, "learning_rate": 5.735684055396051e-05, "loss": 0.4192, "num_tokens": 899230065.0, "step": 1420 }, { "epoch": 0.1680264869339009, "grad_norm": 0.1553725153207779, "learning_rate": 5.7352376681241445e-05, "loss": 0.3354, "num_tokens": 899866736.0, "step": 1421 }, { "epoch": 0.16814473217452997, "grad_norm": 0.1704157590866089, "learning_rate": 5.734790923665218e-05, "loss": 0.3561, "num_tokens": 900503362.0, "step": 1422 }, { "epoch": 0.16826297741515903, "grad_norm": 0.16170266270637512, "learning_rate": 5.734343822084796e-05, "loss": 0.3806, "num_tokens": 901142459.0, "step": 1423 }, { "epoch": 0.16838122265578812, "grad_norm": 0.1833435744047165, "learning_rate": 5.733896363448457e-05, "loss": 0.3791, "num_tokens": 901768662.0, "step": 1424 }, { "epoch": 0.16849946789641718, "grad_norm": 0.1675805002450943, "learning_rate": 5.733448547821832e-05, "loss": 0.3735, "num_tokens": 902398246.0, "step": 1425 }, { "epoch": 0.16861771313704624, "grad_norm": 0.17453108727931976, "learning_rate": 5.733000375270605e-05, "loss": 0.364, "num_tokens": 903032227.0, "step": 1426 }, { "epoch": 0.1687359583776753, "grad_norm": 0.1849447637796402, "learning_rate": 5.7325518458605086e-05, "loss": 0.395, "num_tokens": 903666346.0, "step": 1427 }, { "epoch": 0.16885420361830436, "grad_norm": 0.1807112991809845, "learning_rate": 5.732102959657334e-05, "loss": 0.4028, "num_tokens": 904303678.0, "step": 1428 }, { "epoch": 0.16897244885893342, "grad_norm": 0.16616033017635345, "learning_rate": 5.731653716726919e-05, "loss": 0.3705, "num_tokens": 904930363.0, "step": 1429 }, { "epoch": 0.16909069409956248, "grad_norm": 0.2000718116760254, "learning_rate": 5.731204117135158e-05, "loss": 0.4329, "num_tokens": 905564490.0, "step": 1430 }, { "epoch": 0.16920893934019154, "grad_norm": 0.15403352677822113, "learning_rate": 5.7307541609479927e-05, "loss": 0.3976, "num_tokens": 906200069.0, "step": 1431 }, { "epoch": 0.16932718458082063, "grad_norm": 0.17509843409061432, "learning_rate": 5.7303038482314235e-05, "loss": 0.3538, "num_tokens": 906837954.0, "step": 1432 }, { "epoch": 0.1694454298214497, "grad_norm": 0.1721985638141632, "learning_rate": 5.7298531790514984e-05, "loss": 0.3455, "num_tokens": 907469285.0, "step": 1433 }, { "epoch": 0.16956367506207876, "grad_norm": 0.1995219588279724, "learning_rate": 5.7294021534743184e-05, "loss": 0.4362, "num_tokens": 908105346.0, "step": 1434 }, { "epoch": 0.16968192030270782, "grad_norm": 0.15750497579574585, "learning_rate": 5.7289507715660373e-05, "loss": 0.3633, "num_tokens": 908736680.0, "step": 1435 }, { "epoch": 0.16980016554333688, "grad_norm": 0.1895129382610321, "learning_rate": 5.728499033392863e-05, "loss": 0.3769, "num_tokens": 909376079.0, "step": 1436 }, { "epoch": 0.16991841078396594, "grad_norm": 0.16165003180503845, "learning_rate": 5.7280469390210525e-05, "loss": 0.3934, "num_tokens": 910014511.0, "step": 1437 }, { "epoch": 0.170036656024595, "grad_norm": 0.15552614629268646, "learning_rate": 5.727594488516917e-05, "loss": 0.3628, "num_tokens": 910650479.0, "step": 1438 }, { "epoch": 0.17015490126522406, "grad_norm": 0.18032468855381012, "learning_rate": 5.727141681946819e-05, "loss": 0.3682, "num_tokens": 911289548.0, "step": 1439 }, { "epoch": 0.17027314650585315, "grad_norm": 0.1740652173757553, "learning_rate": 5.7266885193771734e-05, "loss": 0.3857, "num_tokens": 911920115.0, "step": 1440 }, { "epoch": 0.1703913917464822, "grad_norm": 0.1717079132795334, "learning_rate": 5.726235000874448e-05, "loss": 0.389, "num_tokens": 912552910.0, "step": 1441 }, { "epoch": 0.17050963698711127, "grad_norm": 0.15245652198791504, "learning_rate": 5.725781126505163e-05, "loss": 0.3654, "num_tokens": 913186992.0, "step": 1442 }, { "epoch": 0.17062788222774034, "grad_norm": 0.16250964999198914, "learning_rate": 5.725326896335889e-05, "loss": 0.3737, "num_tokens": 913822413.0, "step": 1443 }, { "epoch": 0.1707461274683694, "grad_norm": 0.16571637988090515, "learning_rate": 5.7248723104332507e-05, "loss": 0.3461, "num_tokens": 914458604.0, "step": 1444 }, { "epoch": 0.17086437270899846, "grad_norm": 0.17581582069396973, "learning_rate": 5.724417368863924e-05, "loss": 0.3876, "num_tokens": 915097439.0, "step": 1445 }, { "epoch": 0.17098261794962752, "grad_norm": 0.16777122020721436, "learning_rate": 5.723962071694637e-05, "loss": 0.3953, "num_tokens": 915732434.0, "step": 1446 }, { "epoch": 0.17110086319025658, "grad_norm": 0.1666746586561203, "learning_rate": 5.72350641899217e-05, "loss": 0.3867, "num_tokens": 916361639.0, "step": 1447 }, { "epoch": 0.17121910843088567, "grad_norm": 0.1826799213886261, "learning_rate": 5.723050410823355e-05, "loss": 0.3721, "num_tokens": 916961319.0, "step": 1448 }, { "epoch": 0.17133735367151473, "grad_norm": 0.18067409098148346, "learning_rate": 5.722594047255079e-05, "loss": 0.3882, "num_tokens": 917600686.0, "step": 1449 }, { "epoch": 0.1714555989121438, "grad_norm": 0.15862010419368744, "learning_rate": 5.722137328354277e-05, "loss": 0.3788, "num_tokens": 918239869.0, "step": 1450 }, { "epoch": 0.17157384415277285, "grad_norm": 0.16484549641609192, "learning_rate": 5.721680254187939e-05, "loss": 0.3707, "num_tokens": 918877611.0, "step": 1451 }, { "epoch": 0.17169208939340191, "grad_norm": 0.16367289423942566, "learning_rate": 5.7212228248231046e-05, "loss": 0.4007, "num_tokens": 919517243.0, "step": 1452 }, { "epoch": 0.17181033463403098, "grad_norm": 0.1565965861082077, "learning_rate": 5.720765040326868e-05, "loss": 0.3036, "num_tokens": 920155279.0, "step": 1453 }, { "epoch": 0.17192857987466004, "grad_norm": 0.18695995211601257, "learning_rate": 5.720306900766374e-05, "loss": 0.361, "num_tokens": 920791529.0, "step": 1454 }, { "epoch": 0.1720468251152891, "grad_norm": 0.18436199426651, "learning_rate": 5.719848406208821e-05, "loss": 0.4121, "num_tokens": 921421972.0, "step": 1455 }, { "epoch": 0.1721650703559182, "grad_norm": 0.18362797796726227, "learning_rate": 5.719389556721457e-05, "loss": 0.4048, "num_tokens": 922059966.0, "step": 1456 }, { "epoch": 0.17228331559654725, "grad_norm": 0.15684004127979279, "learning_rate": 5.7189303523715836e-05, "loss": 0.3631, "num_tokens": 922694070.0, "step": 1457 }, { "epoch": 0.1724015608371763, "grad_norm": 0.17251646518707275, "learning_rate": 5.718470793226557e-05, "loss": 0.3808, "num_tokens": 923326220.0, "step": 1458 }, { "epoch": 0.17251980607780537, "grad_norm": 0.17562457919120789, "learning_rate": 5.718010879353778e-05, "loss": 0.408, "num_tokens": 923963438.0, "step": 1459 }, { "epoch": 0.17263805131843443, "grad_norm": 0.17615176737308502, "learning_rate": 5.717550610820709e-05, "loss": 0.4055, "num_tokens": 924593547.0, "step": 1460 }, { "epoch": 0.1727562965590635, "grad_norm": 0.18110738694667816, "learning_rate": 5.717089987694857e-05, "loss": 0.3541, "num_tokens": 925228798.0, "step": 1461 }, { "epoch": 0.17287454179969255, "grad_norm": 0.18025967478752136, "learning_rate": 5.7166290100437834e-05, "loss": 0.3951, "num_tokens": 925861981.0, "step": 1462 }, { "epoch": 0.17299278704032162, "grad_norm": 0.17541316151618958, "learning_rate": 5.716167677935102e-05, "loss": 0.3857, "num_tokens": 926497587.0, "step": 1463 }, { "epoch": 0.1731110322809507, "grad_norm": 0.15674187242984772, "learning_rate": 5.71570599143648e-05, "loss": 0.3657, "num_tokens": 927132750.0, "step": 1464 }, { "epoch": 0.17322927752157977, "grad_norm": 0.16255369782447815, "learning_rate": 5.7152439506156334e-05, "loss": 0.3687, "num_tokens": 927764160.0, "step": 1465 }, { "epoch": 0.17334752276220883, "grad_norm": 0.16576018929481506, "learning_rate": 5.714781555540332e-05, "loss": 0.3541, "num_tokens": 928403130.0, "step": 1466 }, { "epoch": 0.1734657680028379, "grad_norm": 0.1667606085538864, "learning_rate": 5.714318806278398e-05, "loss": 0.3763, "num_tokens": 929035983.0, "step": 1467 }, { "epoch": 0.17358401324346695, "grad_norm": 0.17444296181201935, "learning_rate": 5.7138557028977036e-05, "loss": 0.4094, "num_tokens": 929673737.0, "step": 1468 }, { "epoch": 0.173702258484096, "grad_norm": 0.1654641181230545, "learning_rate": 5.713392245466175e-05, "loss": 0.3911, "num_tokens": 930303540.0, "step": 1469 }, { "epoch": 0.17382050372472507, "grad_norm": 0.19520190358161926, "learning_rate": 5.712928434051791e-05, "loss": 0.4113, "num_tokens": 930934690.0, "step": 1470 }, { "epoch": 0.17393874896535413, "grad_norm": 0.1614913046360016, "learning_rate": 5.712464268722578e-05, "loss": 0.3931, "num_tokens": 931570813.0, "step": 1471 }, { "epoch": 0.1740569942059832, "grad_norm": 0.168425515294075, "learning_rate": 5.711999749546619e-05, "loss": 0.3763, "num_tokens": 932203713.0, "step": 1472 }, { "epoch": 0.17417523944661228, "grad_norm": 0.17640207707881927, "learning_rate": 5.711534876592048e-05, "loss": 0.3885, "num_tokens": 932842100.0, "step": 1473 }, { "epoch": 0.17429348468724135, "grad_norm": 0.1586141735315323, "learning_rate": 5.711069649927046e-05, "loss": 0.3552, "num_tokens": 933475258.0, "step": 1474 }, { "epoch": 0.1744117299278704, "grad_norm": 0.15493513643741608, "learning_rate": 5.7106040696198546e-05, "loss": 0.366, "num_tokens": 934112965.0, "step": 1475 }, { "epoch": 0.17452997516849947, "grad_norm": 0.17881877720355988, "learning_rate": 5.71013813573876e-05, "loss": 0.4003, "num_tokens": 934737655.0, "step": 1476 }, { "epoch": 0.17464822040912853, "grad_norm": 0.16451947391033173, "learning_rate": 5.709671848352103e-05, "loss": 0.3694, "num_tokens": 935377274.0, "step": 1477 }, { "epoch": 0.1747664656497576, "grad_norm": 0.17068316042423248, "learning_rate": 5.7092052075282754e-05, "loss": 0.3617, "num_tokens": 936011871.0, "step": 1478 }, { "epoch": 0.17488471089038665, "grad_norm": 0.18479599058628082, "learning_rate": 5.708738213335723e-05, "loss": 0.4204, "num_tokens": 936647042.0, "step": 1479 }, { "epoch": 0.1750029561310157, "grad_norm": 0.16451942920684814, "learning_rate": 5.708270865842941e-05, "loss": 0.3689, "num_tokens": 937283368.0, "step": 1480 }, { "epoch": 0.1751212013716448, "grad_norm": 0.16067013144493103, "learning_rate": 5.707803165118477e-05, "loss": 0.378, "num_tokens": 937919785.0, "step": 1481 }, { "epoch": 0.17523944661227386, "grad_norm": 0.16603684425354004, "learning_rate": 5.707335111230931e-05, "loss": 0.3692, "num_tokens": 938554025.0, "step": 1482 }, { "epoch": 0.17535769185290292, "grad_norm": 0.1576988250017166, "learning_rate": 5.706866704248955e-05, "loss": 0.3661, "num_tokens": 939191403.0, "step": 1483 }, { "epoch": 0.175475937093532, "grad_norm": 0.1843055933713913, "learning_rate": 5.7063979442412536e-05, "loss": 0.4091, "num_tokens": 939827159.0, "step": 1484 }, { "epoch": 0.17559418233416105, "grad_norm": 0.1624571830034256, "learning_rate": 5.705928831276578e-05, "loss": 0.3569, "num_tokens": 940465371.0, "step": 1485 }, { "epoch": 0.1757124275747901, "grad_norm": 0.19993843138217926, "learning_rate": 5.705459365423739e-05, "loss": 0.4194, "num_tokens": 941102114.0, "step": 1486 }, { "epoch": 0.17583067281541917, "grad_norm": 0.17558766901493073, "learning_rate": 5.704989546751592e-05, "loss": 0.3928, "num_tokens": 941738142.0, "step": 1487 }, { "epoch": 0.17594891805604823, "grad_norm": 0.15563024580478668, "learning_rate": 5.70451937532905e-05, "loss": 0.339, "num_tokens": 942366943.0, "step": 1488 }, { "epoch": 0.17606716329667732, "grad_norm": 0.18981914222240448, "learning_rate": 5.704048851225074e-05, "loss": 0.3893, "num_tokens": 942995207.0, "step": 1489 }, { "epoch": 0.17618540853730638, "grad_norm": 0.19932571053504944, "learning_rate": 5.7035779745086775e-05, "loss": 0.3988, "num_tokens": 943631334.0, "step": 1490 }, { "epoch": 0.17630365377793544, "grad_norm": 0.1697188913822174, "learning_rate": 5.703106745248927e-05, "loss": 0.375, "num_tokens": 944264751.0, "step": 1491 }, { "epoch": 0.1764218990185645, "grad_norm": 0.20995450019836426, "learning_rate": 5.702635163514939e-05, "loss": 0.4091, "num_tokens": 944901410.0, "step": 1492 }, { "epoch": 0.17654014425919357, "grad_norm": 0.16497501730918884, "learning_rate": 5.702163229375882e-05, "loss": 0.3829, "num_tokens": 945538374.0, "step": 1493 }, { "epoch": 0.17665838949982263, "grad_norm": 0.17477144300937653, "learning_rate": 5.7016909429009785e-05, "loss": 0.362, "num_tokens": 946170416.0, "step": 1494 }, { "epoch": 0.1767766347404517, "grad_norm": 0.18215693533420563, "learning_rate": 5.7012183041594986e-05, "loss": 0.3675, "num_tokens": 946805320.0, "step": 1495 }, { "epoch": 0.17689487998108075, "grad_norm": 0.2018221914768219, "learning_rate": 5.700745313220768e-05, "loss": 0.4189, "num_tokens": 947437269.0, "step": 1496 }, { "epoch": 0.17701312522170984, "grad_norm": 0.1752748042345047, "learning_rate": 5.7002719701541616e-05, "loss": 0.408, "num_tokens": 948074252.0, "step": 1497 }, { "epoch": 0.1771313704623389, "grad_norm": 0.15957944095134735, "learning_rate": 5.6997982750291074e-05, "loss": 0.3306, "num_tokens": 948701457.0, "step": 1498 }, { "epoch": 0.17724961570296796, "grad_norm": 0.16693972051143646, "learning_rate": 5.699324227915083e-05, "loss": 0.3767, "num_tokens": 949332037.0, "step": 1499 }, { "epoch": 0.17736786094359702, "grad_norm": 0.15751278400421143, "learning_rate": 5.69884982888162e-05, "loss": 0.3692, "num_tokens": 949967678.0, "step": 1500 }, { "epoch": 0.17748610618422608, "grad_norm": 0.16588011384010315, "learning_rate": 5.6983750779983e-05, "loss": 0.3643, "num_tokens": 950600709.0, "step": 1501 }, { "epoch": 0.17760435142485514, "grad_norm": 0.1865762621164322, "learning_rate": 5.697899975334758e-05, "loss": 0.3993, "num_tokens": 951227404.0, "step": 1502 }, { "epoch": 0.1777225966654842, "grad_norm": 0.17431242763996124, "learning_rate": 5.697424520960679e-05, "loss": 0.3819, "num_tokens": 951866625.0, "step": 1503 }, { "epoch": 0.17784084190611327, "grad_norm": 0.16422583162784576, "learning_rate": 5.696948714945798e-05, "loss": 0.3688, "num_tokens": 952502095.0, "step": 1504 }, { "epoch": 0.17795908714674236, "grad_norm": 0.16826240718364716, "learning_rate": 5.696472557359905e-05, "loss": 0.3871, "num_tokens": 953134327.0, "step": 1505 }, { "epoch": 0.17807733238737142, "grad_norm": 0.16753126680850983, "learning_rate": 5.695996048272842e-05, "loss": 0.3663, "num_tokens": 953773701.0, "step": 1506 }, { "epoch": 0.17819557762800048, "grad_norm": 0.16235628724098206, "learning_rate": 5.695519187754498e-05, "loss": 0.3929, "num_tokens": 954406325.0, "step": 1507 }, { "epoch": 0.17831382286862954, "grad_norm": 0.1876831352710724, "learning_rate": 5.695041975874817e-05, "loss": 0.3734, "num_tokens": 955041716.0, "step": 1508 }, { "epoch": 0.1784320681092586, "grad_norm": 0.1801062524318695, "learning_rate": 5.694564412703795e-05, "loss": 0.3834, "num_tokens": 955678383.0, "step": 1509 }, { "epoch": 0.17855031334988766, "grad_norm": 0.18182380497455597, "learning_rate": 5.694086498311475e-05, "loss": 0.3986, "num_tokens": 956315561.0, "step": 1510 }, { "epoch": 0.17866855859051672, "grad_norm": 0.21927852928638458, "learning_rate": 5.693608232767959e-05, "loss": 0.4004, "num_tokens": 956944551.0, "step": 1511 }, { "epoch": 0.17878680383114579, "grad_norm": 0.18574084341526031, "learning_rate": 5.693129616143393e-05, "loss": 0.3903, "num_tokens": 957583170.0, "step": 1512 }, { "epoch": 0.17890504907177487, "grad_norm": 0.17611847817897797, "learning_rate": 5.6926506485079805e-05, "loss": 0.3461, "num_tokens": 958218955.0, "step": 1513 }, { "epoch": 0.17902329431240394, "grad_norm": 0.17276743054389954, "learning_rate": 5.692171329931971e-05, "loss": 0.361, "num_tokens": 958852938.0, "step": 1514 }, { "epoch": 0.179141539553033, "grad_norm": 0.19187121093273163, "learning_rate": 5.691691660485671e-05, "loss": 0.374, "num_tokens": 959490199.0, "step": 1515 }, { "epoch": 0.17925978479366206, "grad_norm": 0.18945401906967163, "learning_rate": 5.691211640239434e-05, "loss": 0.3869, "num_tokens": 960127283.0, "step": 1516 }, { "epoch": 0.17937803003429112, "grad_norm": 0.19083687663078308, "learning_rate": 5.6907312692636665e-05, "loss": 0.3831, "num_tokens": 960756987.0, "step": 1517 }, { "epoch": 0.17949627527492018, "grad_norm": 0.1904890090227127, "learning_rate": 5.690250547628828e-05, "loss": 0.3904, "num_tokens": 961395831.0, "step": 1518 }, { "epoch": 0.17961452051554924, "grad_norm": 0.1716516762971878, "learning_rate": 5.689769475405427e-05, "loss": 0.3725, "num_tokens": 962031388.0, "step": 1519 }, { "epoch": 0.1797327657561783, "grad_norm": 0.16732878983020782, "learning_rate": 5.6892880526640245e-05, "loss": 0.3293, "num_tokens": 962666836.0, "step": 1520 }, { "epoch": 0.1798510109968074, "grad_norm": 0.17494244873523712, "learning_rate": 5.6888062794752344e-05, "loss": 0.3568, "num_tokens": 963305624.0, "step": 1521 }, { "epoch": 0.17996925623743645, "grad_norm": 0.17904774844646454, "learning_rate": 5.688324155909717e-05, "loss": 0.372, "num_tokens": 963941024.0, "step": 1522 }, { "epoch": 0.18008750147806551, "grad_norm": 0.15880945324897766, "learning_rate": 5.6878416820381914e-05, "loss": 0.3756, "num_tokens": 964574609.0, "step": 1523 }, { "epoch": 0.18020574671869458, "grad_norm": 0.17286312580108643, "learning_rate": 5.687358857931422e-05, "loss": 0.4225, "num_tokens": 965214035.0, "step": 1524 }, { "epoch": 0.18032399195932364, "grad_norm": 0.170638307929039, "learning_rate": 5.686875683660227e-05, "loss": 0.3646, "num_tokens": 965848776.0, "step": 1525 }, { "epoch": 0.1804422371999527, "grad_norm": 0.1648791879415512, "learning_rate": 5.686392159295476e-05, "loss": 0.363, "num_tokens": 966478626.0, "step": 1526 }, { "epoch": 0.18056048244058176, "grad_norm": 0.18919819593429565, "learning_rate": 5.68590828490809e-05, "loss": 0.4077, "num_tokens": 967079379.0, "step": 1527 }, { "epoch": 0.18067872768121082, "grad_norm": 0.15450045466423035, "learning_rate": 5.68542406056904e-05, "loss": 0.3593, "num_tokens": 967715213.0, "step": 1528 }, { "epoch": 0.18079697292183988, "grad_norm": 0.17305758595466614, "learning_rate": 5.68493948634935e-05, "loss": 0.3868, "num_tokens": 968326010.0, "step": 1529 }, { "epoch": 0.18091521816246897, "grad_norm": 0.1639513224363327, "learning_rate": 5.684454562320095e-05, "loss": 0.364, "num_tokens": 968964068.0, "step": 1530 }, { "epoch": 0.18103346340309803, "grad_norm": 0.16958145797252655, "learning_rate": 5.6839692885524e-05, "loss": 0.3995, "num_tokens": 969598545.0, "step": 1531 }, { "epoch": 0.1811517086437271, "grad_norm": 0.15846242010593414, "learning_rate": 5.683483665117443e-05, "loss": 0.405, "num_tokens": 970231579.0, "step": 1532 }, { "epoch": 0.18126995388435616, "grad_norm": 0.1584145575761795, "learning_rate": 5.6829976920864524e-05, "loss": 0.3657, "num_tokens": 970868020.0, "step": 1533 }, { "epoch": 0.18138819912498522, "grad_norm": 0.1566583216190338, "learning_rate": 5.6825113695307074e-05, "loss": 0.3898, "num_tokens": 971501577.0, "step": 1534 }, { "epoch": 0.18150644436561428, "grad_norm": 0.18424497544765472, "learning_rate": 5.682024697521539e-05, "loss": 0.4149, "num_tokens": 972136461.0, "step": 1535 }, { "epoch": 0.18162468960624334, "grad_norm": 0.15541110932826996, "learning_rate": 5.6815376761303305e-05, "loss": 0.347, "num_tokens": 972774594.0, "step": 1536 }, { "epoch": 0.1817429348468724, "grad_norm": 0.1932457685470581, "learning_rate": 5.6810503054285145e-05, "loss": 0.3792, "num_tokens": 973410075.0, "step": 1537 }, { "epoch": 0.1818611800875015, "grad_norm": 0.15818209946155548, "learning_rate": 5.680562585487577e-05, "loss": 0.3929, "num_tokens": 974049163.0, "step": 1538 }, { "epoch": 0.18197942532813055, "grad_norm": 0.15644583106040955, "learning_rate": 5.6800745163790534e-05, "loss": 0.3581, "num_tokens": 974681502.0, "step": 1539 }, { "epoch": 0.1820976705687596, "grad_norm": 0.1770258992910385, "learning_rate": 5.67958609817453e-05, "loss": 0.4022, "num_tokens": 975312361.0, "step": 1540 }, { "epoch": 0.18221591580938867, "grad_norm": 0.15786851942539215, "learning_rate": 5.679097330945647e-05, "loss": 0.3827, "num_tokens": 975944571.0, "step": 1541 }, { "epoch": 0.18233416105001773, "grad_norm": 0.17109793424606323, "learning_rate": 5.6786082147640925e-05, "loss": 0.3534, "num_tokens": 976578990.0, "step": 1542 }, { "epoch": 0.1824524062906468, "grad_norm": 0.16347132623195648, "learning_rate": 5.678118749701608e-05, "loss": 0.3524, "num_tokens": 977213113.0, "step": 1543 }, { "epoch": 0.18257065153127586, "grad_norm": 0.18392309546470642, "learning_rate": 5.677628935829986e-05, "loss": 0.3809, "num_tokens": 977846686.0, "step": 1544 }, { "epoch": 0.18268889677190492, "grad_norm": 0.16528384387493134, "learning_rate": 5.677138773221068e-05, "loss": 0.3599, "num_tokens": 978484082.0, "step": 1545 }, { "epoch": 0.182807142012534, "grad_norm": 0.168715238571167, "learning_rate": 5.676648261946751e-05, "loss": 0.381, "num_tokens": 979120007.0, "step": 1546 }, { "epoch": 0.18292538725316307, "grad_norm": 0.14987052977085114, "learning_rate": 5.676157402078978e-05, "loss": 0.3306, "num_tokens": 979754257.0, "step": 1547 }, { "epoch": 0.18304363249379213, "grad_norm": 0.1776624172925949, "learning_rate": 5.6756661936897456e-05, "loss": 0.3738, "num_tokens": 980379873.0, "step": 1548 }, { "epoch": 0.1831618777344212, "grad_norm": 0.16085457801818848, "learning_rate": 5.6751746368511035e-05, "loss": 0.3591, "num_tokens": 981012470.0, "step": 1549 }, { "epoch": 0.18328012297505025, "grad_norm": 0.18967899680137634, "learning_rate": 5.674682731635149e-05, "loss": 0.4213, "num_tokens": 981645741.0, "step": 1550 }, { "epoch": 0.1833983682156793, "grad_norm": 0.17952141165733337, "learning_rate": 5.674190478114032e-05, "loss": 0.3837, "num_tokens": 982273942.0, "step": 1551 }, { "epoch": 0.18351661345630838, "grad_norm": 0.1630575805902481, "learning_rate": 5.673697876359954e-05, "loss": 0.3797, "num_tokens": 982905114.0, "step": 1552 }, { "epoch": 0.18363485869693744, "grad_norm": 44.51564407348633, "learning_rate": 5.6732049264451674e-05, "loss": 1.7862, "num_tokens": 983508182.0, "step": 1553 }, { "epoch": 0.18375310393756653, "grad_norm": 0.23031260073184967, "learning_rate": 5.672711628441974e-05, "loss": 0.3798, "num_tokens": 984147363.0, "step": 1554 }, { "epoch": 0.1838713491781956, "grad_norm": 0.16688291728496552, "learning_rate": 5.672217982422729e-05, "loss": 0.3776, "num_tokens": 984781250.0, "step": 1555 }, { "epoch": 0.18398959441882465, "grad_norm": 0.18365970253944397, "learning_rate": 5.671723988459838e-05, "loss": 0.3771, "num_tokens": 985418934.0, "step": 1556 }, { "epoch": 0.1841078396594537, "grad_norm": 0.18355931341648102, "learning_rate": 5.671229646625756e-05, "loss": 0.3705, "num_tokens": 986058183.0, "step": 1557 }, { "epoch": 0.18422608490008277, "grad_norm": 0.16490042209625244, "learning_rate": 5.670734956992992e-05, "loss": 0.3378, "num_tokens": 986689892.0, "step": 1558 }, { "epoch": 0.18434433014071183, "grad_norm": 0.1942044347524643, "learning_rate": 5.670239919634102e-05, "loss": 0.3622, "num_tokens": 987323095.0, "step": 1559 }, { "epoch": 0.1844625753813409, "grad_norm": 0.17794747650623322, "learning_rate": 5.669744534621698e-05, "loss": 0.3506, "num_tokens": 987953264.0, "step": 1560 }, { "epoch": 0.18458082062196995, "grad_norm": 0.16860783100128174, "learning_rate": 5.6692488020284386e-05, "loss": 0.3586, "num_tokens": 988590158.0, "step": 1561 }, { "epoch": 0.18469906586259904, "grad_norm": 0.17349503934383392, "learning_rate": 5.668752721927034e-05, "loss": 0.3878, "num_tokens": 989215297.0, "step": 1562 }, { "epoch": 0.1848173111032281, "grad_norm": 0.16144496202468872, "learning_rate": 5.6682562943902496e-05, "loss": 0.373, "num_tokens": 989854912.0, "step": 1563 }, { "epoch": 0.18493555634385717, "grad_norm": 0.1743748039007187, "learning_rate": 5.6677595194908956e-05, "loss": 0.3364, "num_tokens": 990494043.0, "step": 1564 }, { "epoch": 0.18505380158448623, "grad_norm": 0.19618842005729675, "learning_rate": 5.667262397301838e-05, "loss": 0.3797, "num_tokens": 991126843.0, "step": 1565 }, { "epoch": 0.1851720468251153, "grad_norm": 0.16037946939468384, "learning_rate": 5.666764927895991e-05, "loss": 0.3638, "num_tokens": 991761760.0, "step": 1566 }, { "epoch": 0.18529029206574435, "grad_norm": 0.17959721386432648, "learning_rate": 5.666267111346321e-05, "loss": 0.3464, "num_tokens": 992401240.0, "step": 1567 }, { "epoch": 0.1854085373063734, "grad_norm": 0.16185957193374634, "learning_rate": 5.665768947725845e-05, "loss": 0.3802, "num_tokens": 993035492.0, "step": 1568 }, { "epoch": 0.18552678254700247, "grad_norm": 0.21383321285247803, "learning_rate": 5.665270437107631e-05, "loss": 0.4096, "num_tokens": 993670701.0, "step": 1569 }, { "epoch": 0.18564502778763156, "grad_norm": 0.19013111293315887, "learning_rate": 5.664771579564797e-05, "loss": 0.4086, "num_tokens": 994304128.0, "step": 1570 }, { "epoch": 0.18576327302826062, "grad_norm": 0.17301039397716522, "learning_rate": 5.664272375170513e-05, "loss": 0.337, "num_tokens": 994941330.0, "step": 1571 }, { "epoch": 0.18588151826888968, "grad_norm": 0.23406770825386047, "learning_rate": 5.6637728239980004e-05, "loss": 0.4014, "num_tokens": 995567115.0, "step": 1572 }, { "epoch": 0.18599976350951875, "grad_norm": 0.17187941074371338, "learning_rate": 5.6632729261205286e-05, "loss": 0.4041, "num_tokens": 996201713.0, "step": 1573 }, { "epoch": 0.1861180087501478, "grad_norm": 0.19999489188194275, "learning_rate": 5.662772681611423e-05, "loss": 0.3495, "num_tokens": 996834913.0, "step": 1574 }, { "epoch": 0.18623625399077687, "grad_norm": 0.18090218305587769, "learning_rate": 5.662272090544053e-05, "loss": 0.3718, "num_tokens": 997471417.0, "step": 1575 }, { "epoch": 0.18635449923140593, "grad_norm": 0.18771344423294067, "learning_rate": 5.6617711529918444e-05, "loss": 0.4151, "num_tokens": 998105800.0, "step": 1576 }, { "epoch": 0.186472744472035, "grad_norm": 0.18846431374549866, "learning_rate": 5.661269869028271e-05, "loss": 0.4128, "num_tokens": 998742897.0, "step": 1577 }, { "epoch": 0.18659098971266405, "grad_norm": 0.20827901363372803, "learning_rate": 5.660768238726861e-05, "loss": 0.4001, "num_tokens": 999375870.0, "step": 1578 }, { "epoch": 0.18670923495329314, "grad_norm": 0.16869118809700012, "learning_rate": 5.660266262161187e-05, "loss": 0.3725, "num_tokens": 1000009533.0, "step": 1579 }, { "epoch": 0.1868274801939222, "grad_norm": 0.20359516143798828, "learning_rate": 5.659763939404878e-05, "loss": 0.3717, "num_tokens": 1000648319.0, "step": 1580 }, { "epoch": 0.18694572543455126, "grad_norm": 0.18024982511997223, "learning_rate": 5.659261270531612e-05, "loss": 0.3861, "num_tokens": 1001283756.0, "step": 1581 }, { "epoch": 0.18706397067518032, "grad_norm": 0.16098256409168243, "learning_rate": 5.658758255615118e-05, "loss": 0.3358, "num_tokens": 1001920534.0, "step": 1582 }, { "epoch": 0.18718221591580939, "grad_norm": 0.1886758953332901, "learning_rate": 5.658254894729174e-05, "loss": 0.349, "num_tokens": 1002538598.0, "step": 1583 }, { "epoch": 0.18730046115643845, "grad_norm": 0.15826751291751862, "learning_rate": 5.6577511879476113e-05, "loss": 0.3442, "num_tokens": 1003174599.0, "step": 1584 }, { "epoch": 0.1874187063970675, "grad_norm": 0.16147328913211823, "learning_rate": 5.65724713534431e-05, "loss": 0.3565, "num_tokens": 1003810442.0, "step": 1585 }, { "epoch": 0.18753695163769657, "grad_norm": 0.20615555346012115, "learning_rate": 5.656742736993202e-05, "loss": 0.4275, "num_tokens": 1004449210.0, "step": 1586 }, { "epoch": 0.18765519687832566, "grad_norm": 0.15462739765644073, "learning_rate": 5.6562379929682704e-05, "loss": 0.3286, "num_tokens": 1005080267.0, "step": 1587 }, { "epoch": 0.18777344211895472, "grad_norm": 0.196158766746521, "learning_rate": 5.6557329033435474e-05, "loss": 0.4252, "num_tokens": 1005714238.0, "step": 1588 }, { "epoch": 0.18789168735958378, "grad_norm": 0.17845283448696136, "learning_rate": 5.655227468193117e-05, "loss": 0.3767, "num_tokens": 1006353420.0, "step": 1589 }, { "epoch": 0.18800993260021284, "grad_norm": 0.19313257932662964, "learning_rate": 5.654721687591112e-05, "loss": 0.3948, "num_tokens": 1006983318.0, "step": 1590 }, { "epoch": 0.1881281778408419, "grad_norm": 0.16796085238456726, "learning_rate": 5.6542155616117216e-05, "loss": 0.3621, "num_tokens": 1007615434.0, "step": 1591 }, { "epoch": 0.18824642308147096, "grad_norm": 0.17490707337856293, "learning_rate": 5.6537090903291775e-05, "loss": 0.3738, "num_tokens": 1008247783.0, "step": 1592 }, { "epoch": 0.18836466832210003, "grad_norm": 0.15461547672748566, "learning_rate": 5.653202273817768e-05, "loss": 0.3566, "num_tokens": 1008884013.0, "step": 1593 }, { "epoch": 0.1884829135627291, "grad_norm": 0.16873563826084137, "learning_rate": 5.652695112151829e-05, "loss": 0.3887, "num_tokens": 1009517526.0, "step": 1594 }, { "epoch": 0.18860115880335818, "grad_norm": 0.15170042216777802, "learning_rate": 5.6521876054057486e-05, "loss": 0.373, "num_tokens": 1010147212.0, "step": 1595 }, { "epoch": 0.18871940404398724, "grad_norm": 0.1640443354845047, "learning_rate": 5.651679753653966e-05, "loss": 0.3797, "num_tokens": 1010778276.0, "step": 1596 }, { "epoch": 0.1888376492846163, "grad_norm": 0.15017777681350708, "learning_rate": 5.651171556970968e-05, "loss": 0.3617, "num_tokens": 1011412350.0, "step": 1597 }, { "epoch": 0.18895589452524536, "grad_norm": 0.17587436735630035, "learning_rate": 5.650663015431296e-05, "loss": 0.3798, "num_tokens": 1012042617.0, "step": 1598 }, { "epoch": 0.18907413976587442, "grad_norm": 0.1668560802936554, "learning_rate": 5.6501541291095395e-05, "loss": 0.3672, "num_tokens": 1012678514.0, "step": 1599 }, { "epoch": 0.18919238500650348, "grad_norm": 0.16462525725364685, "learning_rate": 5.649644898080338e-05, "loss": 0.4039, "num_tokens": 1013317458.0, "step": 1600 }, { "epoch": 0.18931063024713254, "grad_norm": 0.17160676419734955, "learning_rate": 5.649135322418384e-05, "loss": 0.329, "num_tokens": 1013951330.0, "step": 1601 }, { "epoch": 0.1894288754877616, "grad_norm": 0.16911710798740387, "learning_rate": 5.648625402198418e-05, "loss": 0.3665, "num_tokens": 1014583639.0, "step": 1602 }, { "epoch": 0.1895471207283907, "grad_norm": 0.18022656440734863, "learning_rate": 5.6481151374952344e-05, "loss": 0.4089, "num_tokens": 1015222884.0, "step": 1603 }, { "epoch": 0.18966536596901976, "grad_norm": 0.1629490852355957, "learning_rate": 5.6476045283836745e-05, "loss": 0.389, "num_tokens": 1015860526.0, "step": 1604 }, { "epoch": 0.18978361120964882, "grad_norm": 0.17405815422534943, "learning_rate": 5.64709357493863e-05, "loss": 0.3614, "num_tokens": 1016498048.0, "step": 1605 }, { "epoch": 0.18990185645027788, "grad_norm": 0.18966513872146606, "learning_rate": 5.646582277235047e-05, "loss": 0.3923, "num_tokens": 1017129789.0, "step": 1606 }, { "epoch": 0.19002010169090694, "grad_norm": 0.1767895519733429, "learning_rate": 5.64607063534792e-05, "loss": 0.4372, "num_tokens": 1017761831.0, "step": 1607 }, { "epoch": 0.190138346931536, "grad_norm": 0.19019336998462677, "learning_rate": 5.645558649352292e-05, "loss": 0.3848, "num_tokens": 1018397544.0, "step": 1608 }, { "epoch": 0.19025659217216506, "grad_norm": 3.179877758026123, "learning_rate": 5.6450463193232586e-05, "loss": 0.4766, "num_tokens": 1018998279.0, "step": 1609 }, { "epoch": 0.19037483741279412, "grad_norm": 0.225839763879776, "learning_rate": 5.644533645335967e-05, "loss": 0.3774, "num_tokens": 1019634188.0, "step": 1610 }, { "epoch": 0.1904930826534232, "grad_norm": 0.1825924664735794, "learning_rate": 5.6440206274656124e-05, "loss": 0.3657, "num_tokens": 1020259191.0, "step": 1611 }, { "epoch": 0.19061132789405227, "grad_norm": 0.16921837627887726, "learning_rate": 5.643507265787441e-05, "loss": 0.35, "num_tokens": 1020890735.0, "step": 1612 }, { "epoch": 0.19072957313468134, "grad_norm": 0.19447536766529083, "learning_rate": 5.642993560376749e-05, "loss": 0.3538, "num_tokens": 1021516346.0, "step": 1613 }, { "epoch": 0.1908478183753104, "grad_norm": 0.18670037388801575, "learning_rate": 5.642479511308887e-05, "loss": 0.3863, "num_tokens": 1022154101.0, "step": 1614 }, { "epoch": 0.19096606361593946, "grad_norm": 0.18784339725971222, "learning_rate": 5.64196511865925e-05, "loss": 0.4138, "num_tokens": 1022784679.0, "step": 1615 }, { "epoch": 0.19108430885656852, "grad_norm": 0.1630748212337494, "learning_rate": 5.641450382503286e-05, "loss": 0.3459, "num_tokens": 1023419003.0, "step": 1616 }, { "epoch": 0.19120255409719758, "grad_norm": 0.19578048586845398, "learning_rate": 5.640935302916496e-05, "loss": 0.3866, "num_tokens": 1024053560.0, "step": 1617 }, { "epoch": 0.19132079933782664, "grad_norm": 0.17380568385124207, "learning_rate": 5.640419879974427e-05, "loss": 0.3303, "num_tokens": 1024689470.0, "step": 1618 }, { "epoch": 0.19143904457845573, "grad_norm": 0.16582848131656647, "learning_rate": 5.6399041137526796e-05, "loss": 0.3538, "num_tokens": 1025315733.0, "step": 1619 }, { "epoch": 0.1915572898190848, "grad_norm": 0.1898595690727234, "learning_rate": 5.639388004326903e-05, "loss": 0.3788, "num_tokens": 1025951911.0, "step": 1620 }, { "epoch": 0.19167553505971385, "grad_norm": 0.1676625907421112, "learning_rate": 5.638871551772796e-05, "loss": 0.3857, "num_tokens": 1026586199.0, "step": 1621 }, { "epoch": 0.19179378030034291, "grad_norm": 0.1723065972328186, "learning_rate": 5.6383547561661105e-05, "loss": 0.369, "num_tokens": 1027217632.0, "step": 1622 }, { "epoch": 0.19191202554097198, "grad_norm": 0.18904320895671844, "learning_rate": 5.637837617582648e-05, "loss": 0.3824, "num_tokens": 1027847572.0, "step": 1623 }, { "epoch": 0.19203027078160104, "grad_norm": 0.18980403244495392, "learning_rate": 5.6373201360982566e-05, "loss": 0.3795, "num_tokens": 1028475842.0, "step": 1624 }, { "epoch": 0.1921485160222301, "grad_norm": 0.17266632616519928, "learning_rate": 5.636802311788841e-05, "loss": 0.3671, "num_tokens": 1029111898.0, "step": 1625 }, { "epoch": 0.19226676126285916, "grad_norm": 0.17338383197784424, "learning_rate": 5.63628414473035e-05, "loss": 0.3838, "num_tokens": 1029743386.0, "step": 1626 }, { "epoch": 0.19238500650348825, "grad_norm": 0.18085215985774994, "learning_rate": 5.6357656349987865e-05, "loss": 0.3803, "num_tokens": 1030378115.0, "step": 1627 }, { "epoch": 0.1925032517441173, "grad_norm": 0.18351654708385468, "learning_rate": 5.635246782670202e-05, "loss": 0.4144, "num_tokens": 1031003252.0, "step": 1628 }, { "epoch": 0.19262149698474637, "grad_norm": 0.1564149409532547, "learning_rate": 5.6347275878207006e-05, "loss": 0.3812, "num_tokens": 1031616777.0, "step": 1629 }, { "epoch": 0.19273974222537543, "grad_norm": 0.19233672320842743, "learning_rate": 5.6342080505264324e-05, "loss": 0.4005, "num_tokens": 1032254017.0, "step": 1630 }, { "epoch": 0.1928579874660045, "grad_norm": 0.18492649495601654, "learning_rate": 5.6336881708636025e-05, "loss": 0.4156, "num_tokens": 1032888699.0, "step": 1631 }, { "epoch": 0.19297623270663355, "grad_norm": 0.17568689584732056, "learning_rate": 5.6331679489084634e-05, "loss": 0.3702, "num_tokens": 1033518207.0, "step": 1632 }, { "epoch": 0.19309447794726262, "grad_norm": 0.1601502150297165, "learning_rate": 5.632647384737317e-05, "loss": 0.3704, "num_tokens": 1034156394.0, "step": 1633 }, { "epoch": 0.19321272318789168, "grad_norm": 0.1796914041042328, "learning_rate": 5.6321264784265184e-05, "loss": 0.3546, "num_tokens": 1034757861.0, "step": 1634 }, { "epoch": 0.19333096842852074, "grad_norm": 0.16432447731494904, "learning_rate": 5.6316052300524704e-05, "loss": 0.3414, "num_tokens": 1035390904.0, "step": 1635 }, { "epoch": 0.19344921366914983, "grad_norm": 0.17800767719745636, "learning_rate": 5.6310836396916266e-05, "loss": 0.3574, "num_tokens": 1036022608.0, "step": 1636 }, { "epoch": 0.1935674589097789, "grad_norm": 0.15416550636291504, "learning_rate": 5.6305617074204916e-05, "loss": 0.3339, "num_tokens": 1036653565.0, "step": 1637 }, { "epoch": 0.19368570415040795, "grad_norm": 0.18750128149986267, "learning_rate": 5.630039433315618e-05, "loss": 0.3712, "num_tokens": 1037289640.0, "step": 1638 }, { "epoch": 0.193803949391037, "grad_norm": 0.1731875240802765, "learning_rate": 5.629516817453613e-05, "loss": 0.3716, "num_tokens": 1037923126.0, "step": 1639 }, { "epoch": 0.19392219463166607, "grad_norm": 0.15479791164398193, "learning_rate": 5.628993859911128e-05, "loss": 0.3231, "num_tokens": 1038552223.0, "step": 1640 }, { "epoch": 0.19404043987229513, "grad_norm": 0.17309975624084473, "learning_rate": 5.628470560764869e-05, "loss": 0.3669, "num_tokens": 1039187143.0, "step": 1641 }, { "epoch": 0.1941586851129242, "grad_norm": 0.1811775118112564, "learning_rate": 5.62794692009159e-05, "loss": 0.4147, "num_tokens": 1039825115.0, "step": 1642 }, { "epoch": 0.19427693035355326, "grad_norm": 0.16321071982383728, "learning_rate": 5.627422937968095e-05, "loss": 0.3724, "num_tokens": 1040457103.0, "step": 1643 }, { "epoch": 0.19439517559418235, "grad_norm": 0.15942496061325073, "learning_rate": 5.626898614471241e-05, "loss": 0.3362, "num_tokens": 1041089601.0, "step": 1644 }, { "epoch": 0.1945134208348114, "grad_norm": 0.16278398036956787, "learning_rate": 5.62637394967793e-05, "loss": 0.3729, "num_tokens": 1041725934.0, "step": 1645 }, { "epoch": 0.19463166607544047, "grad_norm": 0.17011965811252594, "learning_rate": 5.6258489436651204e-05, "loss": 0.3559, "num_tokens": 1042365339.0, "step": 1646 }, { "epoch": 0.19474991131606953, "grad_norm": 0.15716242790222168, "learning_rate": 5.625323596509814e-05, "loss": 0.3731, "num_tokens": 1042996166.0, "step": 1647 }, { "epoch": 0.1948681565566986, "grad_norm": 0.18518084287643433, "learning_rate": 5.624797908289067e-05, "loss": 0.3434, "num_tokens": 1043623270.0, "step": 1648 }, { "epoch": 0.19498640179732765, "grad_norm": 0.16130898892879486, "learning_rate": 5.6242718790799845e-05, "loss": 0.3571, "num_tokens": 1044254863.0, "step": 1649 }, { "epoch": 0.1951046470379567, "grad_norm": 0.1838882863521576, "learning_rate": 5.623745508959721e-05, "loss": 0.4343, "num_tokens": 1044894564.0, "step": 1650 }, { "epoch": 0.19522289227858577, "grad_norm": 0.16889618337154388, "learning_rate": 5.623218798005483e-05, "loss": 0.3585, "num_tokens": 1045527684.0, "step": 1651 }, { "epoch": 0.19534113751921486, "grad_norm": 0.15412765741348267, "learning_rate": 5.622691746294523e-05, "loss": 0.3693, "num_tokens": 1046158648.0, "step": 1652 }, { "epoch": 0.19545938275984392, "grad_norm": 0.15957015752792358, "learning_rate": 5.6221643539041476e-05, "loss": 0.3645, "num_tokens": 1046792143.0, "step": 1653 }, { "epoch": 0.195577628000473, "grad_norm": 0.1711938977241516, "learning_rate": 5.621636620911711e-05, "loss": 0.3939, "num_tokens": 1047425498.0, "step": 1654 }, { "epoch": 0.19569587324110205, "grad_norm": 0.1671687811613083, "learning_rate": 5.621108547394619e-05, "loss": 0.3737, "num_tokens": 1048059505.0, "step": 1655 }, { "epoch": 0.1958141184817311, "grad_norm": 0.15672016143798828, "learning_rate": 5.6205801334303277e-05, "loss": 0.3887, "num_tokens": 1048693153.0, "step": 1656 }, { "epoch": 0.19593236372236017, "grad_norm": 0.16777455806732178, "learning_rate": 5.6200513790963385e-05, "loss": 0.4105, "num_tokens": 1049329857.0, "step": 1657 }, { "epoch": 0.19605060896298923, "grad_norm": 0.16693848371505737, "learning_rate": 5.619522284470208e-05, "loss": 0.3465, "num_tokens": 1049961195.0, "step": 1658 }, { "epoch": 0.1961688542036183, "grad_norm": 0.187652125954628, "learning_rate": 5.618992849629542e-05, "loss": 0.4061, "num_tokens": 1050595023.0, "step": 1659 }, { "epoch": 0.19628709944424738, "grad_norm": 0.18449878692626953, "learning_rate": 5.6184630746519924e-05, "loss": 0.4181, "num_tokens": 1051234499.0, "step": 1660 }, { "epoch": 0.19640534468487644, "grad_norm": 0.16416864097118378, "learning_rate": 5.617932959615266e-05, "loss": 0.3849, "num_tokens": 1051865728.0, "step": 1661 }, { "epoch": 0.1965235899255055, "grad_norm": 0.1639140397310257, "learning_rate": 5.617402504597116e-05, "loss": 0.3659, "num_tokens": 1052494691.0, "step": 1662 }, { "epoch": 0.19664183516613457, "grad_norm": 0.15407100319862366, "learning_rate": 5.616871709675346e-05, "loss": 0.3563, "num_tokens": 1053129843.0, "step": 1663 }, { "epoch": 0.19676008040676363, "grad_norm": 0.16401374340057373, "learning_rate": 5.6163405749278114e-05, "loss": 0.3711, "num_tokens": 1053762168.0, "step": 1664 }, { "epoch": 0.1968783256473927, "grad_norm": 0.17386414110660553, "learning_rate": 5.615809100432415e-05, "loss": 0.396, "num_tokens": 1054399760.0, "step": 1665 }, { "epoch": 0.19699657088802175, "grad_norm": 0.16414833068847656, "learning_rate": 5.615277286267111e-05, "loss": 0.3824, "num_tokens": 1055028654.0, "step": 1666 }, { "epoch": 0.1971148161286508, "grad_norm": 0.16925270855426788, "learning_rate": 5.6147451325099034e-05, "loss": 0.3669, "num_tokens": 1055660000.0, "step": 1667 }, { "epoch": 0.1972330613692799, "grad_norm": 0.16197408735752106, "learning_rate": 5.614212639238845e-05, "loss": 0.3711, "num_tokens": 1056296438.0, "step": 1668 }, { "epoch": 0.19735130660990896, "grad_norm": 0.17272470891475677, "learning_rate": 5.613679806532038e-05, "loss": 0.3614, "num_tokens": 1056927317.0, "step": 1669 }, { "epoch": 0.19746955185053802, "grad_norm": 0.16594335436820984, "learning_rate": 5.613146634467636e-05, "loss": 0.3887, "num_tokens": 1057562315.0, "step": 1670 }, { "epoch": 0.19758779709116708, "grad_norm": 0.1587894707918167, "learning_rate": 5.612613123123843e-05, "loss": 0.3784, "num_tokens": 1058196627.0, "step": 1671 }, { "epoch": 0.19770604233179614, "grad_norm": 0.16417083144187927, "learning_rate": 5.6120792725789096e-05, "loss": 0.3398, "num_tokens": 1058830504.0, "step": 1672 }, { "epoch": 0.1978242875724252, "grad_norm": 0.18508689105510712, "learning_rate": 5.611545082911139e-05, "loss": 0.3827, "num_tokens": 1059458402.0, "step": 1673 }, { "epoch": 0.19794253281305427, "grad_norm": 0.17113204300403595, "learning_rate": 5.611010554198883e-05, "loss": 0.3398, "num_tokens": 1060092121.0, "step": 1674 }, { "epoch": 0.19806077805368333, "grad_norm": 0.16387419402599335, "learning_rate": 5.610475686520542e-05, "loss": 0.3603, "num_tokens": 1060729280.0, "step": 1675 }, { "epoch": 0.19817902329431242, "grad_norm": 0.15762260556221008, "learning_rate": 5.60994047995457e-05, "loss": 0.3662, "num_tokens": 1061357345.0, "step": 1676 }, { "epoch": 0.19829726853494148, "grad_norm": 0.1683008372783661, "learning_rate": 5.6094049345794664e-05, "loss": 0.3605, "num_tokens": 1061986290.0, "step": 1677 }, { "epoch": 0.19841551377557054, "grad_norm": 0.1778731346130371, "learning_rate": 5.608869050473783e-05, "loss": 0.3778, "num_tokens": 1062616696.0, "step": 1678 }, { "epoch": 0.1985337590161996, "grad_norm": 0.15409034490585327, "learning_rate": 5.608332827716118e-05, "loss": 0.3747, "num_tokens": 1063253499.0, "step": 1679 }, { "epoch": 0.19865200425682866, "grad_norm": 0.16674906015396118, "learning_rate": 5.607796266385124e-05, "loss": 0.3973, "num_tokens": 1063890393.0, "step": 1680 }, { "epoch": 0.19877024949745772, "grad_norm": 0.1663948893547058, "learning_rate": 5.6072593665595e-05, "loss": 0.4103, "num_tokens": 1064526042.0, "step": 1681 }, { "epoch": 0.19888849473808679, "grad_norm": 0.17831626534461975, "learning_rate": 5.606722128317994e-05, "loss": 0.3994, "num_tokens": 1065162440.0, "step": 1682 }, { "epoch": 0.19900673997871585, "grad_norm": 0.15515778958797455, "learning_rate": 5.606184551739408e-05, "loss": 0.3694, "num_tokens": 1065798256.0, "step": 1683 }, { "epoch": 0.1991249852193449, "grad_norm": 0.15295614302158356, "learning_rate": 5.60564663690259e-05, "loss": 0.3641, "num_tokens": 1066431917.0, "step": 1684 }, { "epoch": 0.199243230459974, "grad_norm": 0.16488422453403473, "learning_rate": 5.6051083838864354e-05, "loss": 0.3533, "num_tokens": 1067062494.0, "step": 1685 }, { "epoch": 0.19936147570060306, "grad_norm": 0.17154201865196228, "learning_rate": 5.6045697927698954e-05, "loss": 0.3611, "num_tokens": 1067697069.0, "step": 1686 }, { "epoch": 0.19947972094123212, "grad_norm": 0.16390235722064972, "learning_rate": 5.604030863631965e-05, "loss": 0.4081, "num_tokens": 1068295187.0, "step": 1687 }, { "epoch": 0.19959796618186118, "grad_norm": 0.16959942877292633, "learning_rate": 5.603491596551695e-05, "loss": 0.3575, "num_tokens": 1068932515.0, "step": 1688 }, { "epoch": 0.19971621142249024, "grad_norm": 0.16777712106704712, "learning_rate": 5.602951991608178e-05, "loss": 0.352, "num_tokens": 1069570945.0, "step": 1689 }, { "epoch": 0.1998344566631193, "grad_norm": 0.15861544013023376, "learning_rate": 5.602412048880562e-05, "loss": 0.3373, "num_tokens": 1070176282.0, "step": 1690 }, { "epoch": 0.19995270190374836, "grad_norm": 0.18047991394996643, "learning_rate": 5.6018717684480446e-05, "loss": 0.3951, "num_tokens": 1070811105.0, "step": 1691 }, { "epoch": 0.20007094714437743, "grad_norm": 0.16524425148963928, "learning_rate": 5.601331150389868e-05, "loss": 0.3387, "num_tokens": 1071447938.0, "step": 1692 }, { "epoch": 0.20018919238500651, "grad_norm": 0.17959363758563995, "learning_rate": 5.600790194785328e-05, "loss": 0.3592, "num_tokens": 1072084659.0, "step": 1693 }, { "epoch": 0.20030743762563558, "grad_norm": 0.1542888879776001, "learning_rate": 5.60024890171377e-05, "loss": 0.3493, "num_tokens": 1072721591.0, "step": 1694 }, { "epoch": 0.20042568286626464, "grad_norm": 0.19947941601276398, "learning_rate": 5.599707271254588e-05, "loss": 0.405, "num_tokens": 1073359151.0, "step": 1695 }, { "epoch": 0.2005439281068937, "grad_norm": 0.1575387567281723, "learning_rate": 5.5991653034872224e-05, "loss": 0.3114, "num_tokens": 1073993201.0, "step": 1696 }, { "epoch": 0.20066217334752276, "grad_norm": 0.16751012206077576, "learning_rate": 5.5986229984911685e-05, "loss": 0.3848, "num_tokens": 1074627609.0, "step": 1697 }, { "epoch": 0.20078041858815182, "grad_norm": 0.2010495960712433, "learning_rate": 5.5980803563459685e-05, "loss": 0.3814, "num_tokens": 1075266812.0, "step": 1698 }, { "epoch": 0.20089866382878088, "grad_norm": 0.17102493345737457, "learning_rate": 5.597537377131214e-05, "loss": 0.3868, "num_tokens": 1075904344.0, "step": 1699 }, { "epoch": 0.20101690906940994, "grad_norm": 0.16345565021038055, "learning_rate": 5.596994060926545e-05, "loss": 0.3671, "num_tokens": 1076539387.0, "step": 1700 }, { "epoch": 0.20113515431003903, "grad_norm": 0.15770921111106873, "learning_rate": 5.596450407811653e-05, "loss": 0.3395, "num_tokens": 1077171298.0, "step": 1701 }, { "epoch": 0.2012533995506681, "grad_norm": 0.16773982346057892, "learning_rate": 5.595906417866278e-05, "loss": 0.3596, "num_tokens": 1077802493.0, "step": 1702 }, { "epoch": 0.20137164479129716, "grad_norm": 0.16538496315479279, "learning_rate": 5.59536209117021e-05, "loss": 0.3877, "num_tokens": 1078441368.0, "step": 1703 }, { "epoch": 0.20148989003192622, "grad_norm": 0.1700970083475113, "learning_rate": 5.594817427803287e-05, "loss": 0.3409, "num_tokens": 1079072648.0, "step": 1704 }, { "epoch": 0.20160813527255528, "grad_norm": 0.16228941082954407, "learning_rate": 5.594272427845396e-05, "loss": 0.3514, "num_tokens": 1079706981.0, "step": 1705 }, { "epoch": 0.20172638051318434, "grad_norm": 0.186961367726326, "learning_rate": 5.593727091376477e-05, "loss": 0.4241, "num_tokens": 1080339530.0, "step": 1706 }, { "epoch": 0.2018446257538134, "grad_norm": 0.18173547089099884, "learning_rate": 5.5931814184765144e-05, "loss": 0.3486, "num_tokens": 1080970260.0, "step": 1707 }, { "epoch": 0.20196287099444246, "grad_norm": 0.17249839007854462, "learning_rate": 5.5926354092255476e-05, "loss": 0.3825, "num_tokens": 1081590177.0, "step": 1708 }, { "epoch": 0.20208111623507155, "grad_norm": 0.19153840839862823, "learning_rate": 5.5920890637036594e-05, "loss": 0.3736, "num_tokens": 1082225090.0, "step": 1709 }, { "epoch": 0.2021993614757006, "grad_norm": 0.16184039413928986, "learning_rate": 5.5915423819909854e-05, "loss": 0.3453, "num_tokens": 1082857496.0, "step": 1710 }, { "epoch": 0.20231760671632967, "grad_norm": 0.19660115242004395, "learning_rate": 5.5909953641677104e-05, "loss": 0.4243, "num_tokens": 1083488770.0, "step": 1711 }, { "epoch": 0.20243585195695873, "grad_norm": 0.1727016717195511, "learning_rate": 5.590448010314067e-05, "loss": 0.3477, "num_tokens": 1084124695.0, "step": 1712 }, { "epoch": 0.2025540971975878, "grad_norm": 0.16290037333965302, "learning_rate": 5.589900320510339e-05, "loss": 0.3414, "num_tokens": 1084757117.0, "step": 1713 }, { "epoch": 0.20267234243821686, "grad_norm": 0.20220103859901428, "learning_rate": 5.5893522948368584e-05, "loss": 0.3652, "num_tokens": 1085390682.0, "step": 1714 }, { "epoch": 0.20279058767884592, "grad_norm": 0.16659769415855408, "learning_rate": 5.588803933374006e-05, "loss": 0.341, "num_tokens": 1086017829.0, "step": 1715 }, { "epoch": 0.20290883291947498, "grad_norm": 0.17582020163536072, "learning_rate": 5.588255236202212e-05, "loss": 0.3932, "num_tokens": 1086653372.0, "step": 1716 }, { "epoch": 0.20302707816010407, "grad_norm": 0.17737135291099548, "learning_rate": 5.587706203401958e-05, "loss": 0.3669, "num_tokens": 1087290946.0, "step": 1717 }, { "epoch": 0.20314532340073313, "grad_norm": 0.1839505136013031, "learning_rate": 5.5871568350537695e-05, "loss": 0.3956, "num_tokens": 1087920118.0, "step": 1718 }, { "epoch": 0.2032635686413622, "grad_norm": 0.1613587588071823, "learning_rate": 5.586607131238229e-05, "loss": 0.3552, "num_tokens": 1088552138.0, "step": 1719 }, { "epoch": 0.20338181388199125, "grad_norm": 0.17319151759147644, "learning_rate": 5.586057092035961e-05, "loss": 0.3865, "num_tokens": 1089182532.0, "step": 1720 }, { "epoch": 0.2035000591226203, "grad_norm": 0.17063890397548676, "learning_rate": 5.585506717527644e-05, "loss": 0.3559, "num_tokens": 1089818206.0, "step": 1721 }, { "epoch": 0.20361830436324937, "grad_norm": 0.1641162931919098, "learning_rate": 5.584956007794004e-05, "loss": 0.3745, "num_tokens": 1090453385.0, "step": 1722 }, { "epoch": 0.20373654960387844, "grad_norm": 0.15469761192798615, "learning_rate": 5.5844049629158126e-05, "loss": 0.3431, "num_tokens": 1091090465.0, "step": 1723 }, { "epoch": 0.2038547948445075, "grad_norm": 0.15648992359638214, "learning_rate": 5.583853582973899e-05, "loss": 0.3578, "num_tokens": 1091724132.0, "step": 1724 }, { "epoch": 0.2039730400851366, "grad_norm": 0.17042061686515808, "learning_rate": 5.583301868049132e-05, "loss": 0.4019, "num_tokens": 1092359583.0, "step": 1725 }, { "epoch": 0.20409128532576565, "grad_norm": 0.16862136125564575, "learning_rate": 5.5827498182224375e-05, "loss": 0.3666, "num_tokens": 1092994941.0, "step": 1726 }, { "epoch": 0.2042095305663947, "grad_norm": 0.1738826185464859, "learning_rate": 5.582197433574785e-05, "loss": 0.3829, "num_tokens": 1093633821.0, "step": 1727 }, { "epoch": 0.20432777580702377, "grad_norm": 0.1648256927728653, "learning_rate": 5.5816447141871966e-05, "loss": 0.4065, "num_tokens": 1094271599.0, "step": 1728 }, { "epoch": 0.20444602104765283, "grad_norm": 0.17406491935253143, "learning_rate": 5.581091660140741e-05, "loss": 0.3624, "num_tokens": 1094899179.0, "step": 1729 }, { "epoch": 0.2045642662882819, "grad_norm": 0.17174841463565826, "learning_rate": 5.5805382715165364e-05, "loss": 0.3491, "num_tokens": 1095533149.0, "step": 1730 }, { "epoch": 0.20468251152891095, "grad_norm": 0.19063274562358856, "learning_rate": 5.5799845483957516e-05, "loss": 0.4337, "num_tokens": 1096169826.0, "step": 1731 }, { "epoch": 0.20480075676954002, "grad_norm": 0.17348699271678925, "learning_rate": 5.579430490859605e-05, "loss": 0.4119, "num_tokens": 1096805072.0, "step": 1732 }, { "epoch": 0.2049190020101691, "grad_norm": 0.18787938356399536, "learning_rate": 5.578876098989361e-05, "loss": 0.3745, "num_tokens": 1097431523.0, "step": 1733 }, { "epoch": 0.20503724725079817, "grad_norm": 0.17936678230762482, "learning_rate": 5.578321372866335e-05, "loss": 0.356, "num_tokens": 1098064979.0, "step": 1734 }, { "epoch": 0.20515549249142723, "grad_norm": 0.17686747014522552, "learning_rate": 5.577766312571891e-05, "loss": 0.342, "num_tokens": 1098696612.0, "step": 1735 }, { "epoch": 0.2052737377320563, "grad_norm": 0.17104050517082214, "learning_rate": 5.577210918187443e-05, "loss": 0.3915, "num_tokens": 1099334882.0, "step": 1736 }, { "epoch": 0.20539198297268535, "grad_norm": 0.16770519316196442, "learning_rate": 5.576655189794452e-05, "loss": 0.3937, "num_tokens": 1099965725.0, "step": 1737 }, { "epoch": 0.2055102282133144, "grad_norm": 0.15419445931911469, "learning_rate": 5.576099127474429e-05, "loss": 0.3354, "num_tokens": 1100599444.0, "step": 1738 }, { "epoch": 0.20562847345394347, "grad_norm": 0.1736830323934555, "learning_rate": 5.5755427313089356e-05, "loss": 0.39, "num_tokens": 1101235842.0, "step": 1739 }, { "epoch": 0.20574671869457253, "grad_norm": 0.1474846601486206, "learning_rate": 5.574986001379579e-05, "loss": 0.3712, "num_tokens": 1101872512.0, "step": 1740 }, { "epoch": 0.2058649639352016, "grad_norm": 0.16483238339424133, "learning_rate": 5.5744289377680205e-05, "loss": 0.3946, "num_tokens": 1102511392.0, "step": 1741 }, { "epoch": 0.20598320917583068, "grad_norm": 0.14727474749088287, "learning_rate": 5.573871540555963e-05, "loss": 0.322, "num_tokens": 1103143502.0, "step": 1742 }, { "epoch": 0.20610145441645975, "grad_norm": 0.1590547412633896, "learning_rate": 5.573313809825164e-05, "loss": 0.3369, "num_tokens": 1103778115.0, "step": 1743 }, { "epoch": 0.2062196996570888, "grad_norm": 0.14879490435123444, "learning_rate": 5.5727557456574286e-05, "loss": 0.3492, "num_tokens": 1104417351.0, "step": 1744 }, { "epoch": 0.20633794489771787, "grad_norm": 0.1646801084280014, "learning_rate": 5.5721973481346116e-05, "loss": 0.3719, "num_tokens": 1105051447.0, "step": 1745 }, { "epoch": 0.20645619013834693, "grad_norm": 0.1706719994544983, "learning_rate": 5.571638617338613e-05, "loss": 0.4052, "num_tokens": 1105684481.0, "step": 1746 }, { "epoch": 0.206574435378976, "grad_norm": 0.1649491935968399, "learning_rate": 5.571079553351387e-05, "loss": 0.3588, "num_tokens": 1106316069.0, "step": 1747 }, { "epoch": 0.20669268061960505, "grad_norm": 0.17096470296382904, "learning_rate": 5.5705201562549315e-05, "loss": 0.371, "num_tokens": 1106946321.0, "step": 1748 }, { "epoch": 0.2068109258602341, "grad_norm": 0.17612944543361664, "learning_rate": 5.569960426131297e-05, "loss": 0.3731, "num_tokens": 1107578837.0, "step": 1749 }, { "epoch": 0.2069291711008632, "grad_norm": 0.1620485782623291, "learning_rate": 5.569400363062582e-05, "loss": 0.3841, "num_tokens": 1108209790.0, "step": 1750 }, { "epoch": 0.20704741634149226, "grad_norm": 0.1728174239397049, "learning_rate": 5.568839967130934e-05, "loss": 0.3757, "num_tokens": 1108848427.0, "step": 1751 }, { "epoch": 0.20716566158212132, "grad_norm": 0.18179410696029663, "learning_rate": 5.568279238418546e-05, "loss": 0.3599, "num_tokens": 1109482368.0, "step": 1752 }, { "epoch": 0.20728390682275039, "grad_norm": 0.19228853285312653, "learning_rate": 5.567718177007665e-05, "loss": 0.3484, "num_tokens": 1110116891.0, "step": 1753 }, { "epoch": 0.20740215206337945, "grad_norm": 0.17104071378707886, "learning_rate": 5.5671567829805836e-05, "loss": 0.3528, "num_tokens": 1110751086.0, "step": 1754 }, { "epoch": 0.2075203973040085, "grad_norm": 0.191243976354599, "learning_rate": 5.5665950564196444e-05, "loss": 0.3684, "num_tokens": 1111379871.0, "step": 1755 }, { "epoch": 0.20763864254463757, "grad_norm": 0.16696473956108093, "learning_rate": 5.566032997407237e-05, "loss": 0.3806, "num_tokens": 1112019590.0, "step": 1756 }, { "epoch": 0.20775688778526663, "grad_norm": 0.1670149713754654, "learning_rate": 5.565470606025802e-05, "loss": 0.363, "num_tokens": 1112657954.0, "step": 1757 }, { "epoch": 0.20787513302589572, "grad_norm": 0.18600250780582428, "learning_rate": 5.564907882357828e-05, "loss": 0.3608, "num_tokens": 1113297065.0, "step": 1758 }, { "epoch": 0.20799337826652478, "grad_norm": 0.1696121245622635, "learning_rate": 5.564344826485852e-05, "loss": 0.3801, "num_tokens": 1113925418.0, "step": 1759 }, { "epoch": 0.20811162350715384, "grad_norm": 0.1575317233800888, "learning_rate": 5.563781438492459e-05, "loss": 0.3716, "num_tokens": 1114557861.0, "step": 1760 }, { "epoch": 0.2082298687477829, "grad_norm": 0.16374079883098602, "learning_rate": 5.5632177184602845e-05, "loss": 0.3459, "num_tokens": 1115190865.0, "step": 1761 }, { "epoch": 0.20834811398841196, "grad_norm": 0.1712082028388977, "learning_rate": 5.562653666472013e-05, "loss": 0.3534, "num_tokens": 1115828417.0, "step": 1762 }, { "epoch": 0.20846635922904103, "grad_norm": 0.16090470552444458, "learning_rate": 5.562089282610373e-05, "loss": 0.3799, "num_tokens": 1116467782.0, "step": 1763 }, { "epoch": 0.2085846044696701, "grad_norm": 0.15335479378700256, "learning_rate": 5.561524566958149e-05, "loss": 0.3504, "num_tokens": 1117103530.0, "step": 1764 }, { "epoch": 0.20870284971029915, "grad_norm": 0.18574309349060059, "learning_rate": 5.5609595195981676e-05, "loss": 0.3864, "num_tokens": 1117737680.0, "step": 1765 }, { "epoch": 0.20882109495092824, "grad_norm": 0.1544754058122635, "learning_rate": 5.560394140613308e-05, "loss": 0.3882, "num_tokens": 1118373673.0, "step": 1766 }, { "epoch": 0.2089393401915573, "grad_norm": 0.162854865193367, "learning_rate": 5.559828430086497e-05, "loss": 0.3484, "num_tokens": 1118998834.0, "step": 1767 }, { "epoch": 0.20905758543218636, "grad_norm": 0.15691205859184265, "learning_rate": 5.559262388100709e-05, "loss": 0.3867, "num_tokens": 1119628929.0, "step": 1768 }, { "epoch": 0.20917583067281542, "grad_norm": 0.17412465810775757, "learning_rate": 5.558696014738969e-05, "loss": 0.3309, "num_tokens": 1120265763.0, "step": 1769 }, { "epoch": 0.20929407591344448, "grad_norm": 0.19345293939113617, "learning_rate": 5.558129310084348e-05, "loss": 0.376, "num_tokens": 1120900582.0, "step": 1770 }, { "epoch": 0.20941232115407354, "grad_norm": 0.180593341588974, "learning_rate": 5.557562274219968e-05, "loss": 0.3568, "num_tokens": 1121525988.0, "step": 1771 }, { "epoch": 0.2095305663947026, "grad_norm": 0.1684592068195343, "learning_rate": 5.556994907228999e-05, "loss": 0.3664, "num_tokens": 1122163965.0, "step": 1772 }, { "epoch": 0.20964881163533167, "grad_norm": 0.18159092962741852, "learning_rate": 5.556427209194658e-05, "loss": 0.3735, "num_tokens": 1122801406.0, "step": 1773 }, { "epoch": 0.20976705687596076, "grad_norm": 0.17665807902812958, "learning_rate": 5.555859180200214e-05, "loss": 0.3618, "num_tokens": 1123437251.0, "step": 1774 }, { "epoch": 0.20988530211658982, "grad_norm": 0.16268689930438995, "learning_rate": 5.55529082032898e-05, "loss": 0.4008, "num_tokens": 1124069900.0, "step": 1775 }, { "epoch": 0.21000354735721888, "grad_norm": 0.15612858533859253, "learning_rate": 5.5547221296643196e-05, "loss": 0.3517, "num_tokens": 1124707468.0, "step": 1776 }, { "epoch": 0.21012179259784794, "grad_norm": 0.15570764243602753, "learning_rate": 5.554153108289647e-05, "loss": 0.3467, "num_tokens": 1125310855.0, "step": 1777 }, { "epoch": 0.210240037838477, "grad_norm": 0.1669013947248459, "learning_rate": 5.553583756288423e-05, "loss": 0.3583, "num_tokens": 1125920063.0, "step": 1778 }, { "epoch": 0.21035828307910606, "grad_norm": 0.14521604776382446, "learning_rate": 5.5530140737441556e-05, "loss": 0.3366, "num_tokens": 1126553148.0, "step": 1779 }, { "epoch": 0.21047652831973512, "grad_norm": 0.16427499055862427, "learning_rate": 5.552444060740403e-05, "loss": 0.3626, "num_tokens": 1127180645.0, "step": 1780 }, { "epoch": 0.21059477356036418, "grad_norm": 0.16613474488258362, "learning_rate": 5.5518737173607726e-05, "loss": 0.4001, "num_tokens": 1127814988.0, "step": 1781 }, { "epoch": 0.21071301880099327, "grad_norm": 0.17835578322410583, "learning_rate": 5.551303043688917e-05, "loss": 0.3666, "num_tokens": 1128450513.0, "step": 1782 }, { "epoch": 0.21083126404162233, "grad_norm": 0.16965000331401825, "learning_rate": 5.550732039808542e-05, "loss": 0.3838, "num_tokens": 1129084385.0, "step": 1783 }, { "epoch": 0.2109495092822514, "grad_norm": 0.15127909183502197, "learning_rate": 5.550160705803398e-05, "loss": 0.363, "num_tokens": 1129723803.0, "step": 1784 }, { "epoch": 0.21106775452288046, "grad_norm": 0.18379729986190796, "learning_rate": 5.549589041757285e-05, "loss": 0.4143, "num_tokens": 1130361719.0, "step": 1785 }, { "epoch": 0.21118599976350952, "grad_norm": 0.1575160175561905, "learning_rate": 5.549017047754051e-05, "loss": 0.3786, "num_tokens": 1130996067.0, "step": 1786 }, { "epoch": 0.21130424500413858, "grad_norm": 0.1674695461988449, "learning_rate": 5.548444723877594e-05, "loss": 0.4087, "num_tokens": 1131630388.0, "step": 1787 }, { "epoch": 0.21142249024476764, "grad_norm": 0.16178591549396515, "learning_rate": 5.547872070211858e-05, "loss": 0.3535, "num_tokens": 1132265944.0, "step": 1788 }, { "epoch": 0.2115407354853967, "grad_norm": 0.15059073269367218, "learning_rate": 5.547299086840839e-05, "loss": 0.3533, "num_tokens": 1132901366.0, "step": 1789 }, { "epoch": 0.21165898072602576, "grad_norm": 0.17124061286449432, "learning_rate": 5.546725773848576e-05, "loss": 0.3416, "num_tokens": 1133533829.0, "step": 1790 }, { "epoch": 0.21177722596665485, "grad_norm": 0.18833769857883453, "learning_rate": 5.546152131319161e-05, "loss": 0.374, "num_tokens": 1134167426.0, "step": 1791 }, { "epoch": 0.21189547120728391, "grad_norm": 0.16163519024848938, "learning_rate": 5.545578159336733e-05, "loss": 0.3455, "num_tokens": 1134798148.0, "step": 1792 }, { "epoch": 0.21201371644791298, "grad_norm": 0.15681228041648865, "learning_rate": 5.545003857985477e-05, "loss": 0.3696, "num_tokens": 1135435451.0, "step": 1793 }, { "epoch": 0.21213196168854204, "grad_norm": 0.1973257213830948, "learning_rate": 5.544429227349631e-05, "loss": 0.4227, "num_tokens": 1136074929.0, "step": 1794 }, { "epoch": 0.2122502069291711, "grad_norm": 0.1705009937286377, "learning_rate": 5.543854267513477e-05, "loss": 0.3892, "num_tokens": 1136709588.0, "step": 1795 }, { "epoch": 0.21236845216980016, "grad_norm": 0.1503731608390808, "learning_rate": 5.543278978561347e-05, "loss": 0.3516, "num_tokens": 1137345661.0, "step": 1796 }, { "epoch": 0.21248669741042922, "grad_norm": 0.16565866768360138, "learning_rate": 5.5427033605776205e-05, "loss": 0.3585, "num_tokens": 1137949799.0, "step": 1797 }, { "epoch": 0.21260494265105828, "grad_norm": 0.16144925355911255, "learning_rate": 5.542127413646728e-05, "loss": 0.3574, "num_tokens": 1138575265.0, "step": 1798 }, { "epoch": 0.21272318789168737, "grad_norm": 0.14865180850028992, "learning_rate": 5.541551137853144e-05, "loss": 0.3536, "num_tokens": 1139211263.0, "step": 1799 }, { "epoch": 0.21284143313231643, "grad_norm": 0.16791199147701263, "learning_rate": 5.5409745332813945e-05, "loss": 0.3724, "num_tokens": 1139850606.0, "step": 1800 }, { "epoch": 0.2129596783729455, "grad_norm": 0.149713933467865, "learning_rate": 5.540397600016053e-05, "loss": 0.333, "num_tokens": 1140482025.0, "step": 1801 }, { "epoch": 0.21307792361357455, "grad_norm": 0.15735702216625214, "learning_rate": 5.53982033814174e-05, "loss": 0.3503, "num_tokens": 1141111370.0, "step": 1802 }, { "epoch": 0.21319616885420362, "grad_norm": 0.15558961033821106, "learning_rate": 5.539242747743125e-05, "loss": 0.3645, "num_tokens": 1141741981.0, "step": 1803 }, { "epoch": 0.21331441409483268, "grad_norm": 0.15568359196186066, "learning_rate": 5.538664828904927e-05, "loss": 0.3818, "num_tokens": 1142381483.0, "step": 1804 }, { "epoch": 0.21343265933546174, "grad_norm": 0.15518690645694733, "learning_rate": 5.53808658171191e-05, "loss": 0.3378, "num_tokens": 1143015721.0, "step": 1805 }, { "epoch": 0.2135509045760908, "grad_norm": 0.1548079550266266, "learning_rate": 5.537508006248889e-05, "loss": 0.319, "num_tokens": 1143647685.0, "step": 1806 }, { "epoch": 0.2136691498167199, "grad_norm": 0.16514387726783752, "learning_rate": 5.5369291026007266e-05, "loss": 0.3569, "num_tokens": 1144279423.0, "step": 1807 }, { "epoch": 0.21378739505734895, "grad_norm": 0.16932187974452972, "learning_rate": 5.536349870852333e-05, "loss": 0.3652, "num_tokens": 1144914752.0, "step": 1808 }, { "epoch": 0.213905640297978, "grad_norm": 0.16285857558250427, "learning_rate": 5.535770311088666e-05, "loss": 0.3848, "num_tokens": 1145548289.0, "step": 1809 }, { "epoch": 0.21402388553860707, "grad_norm": 0.1568160206079483, "learning_rate": 5.535190423394733e-05, "loss": 0.4111, "num_tokens": 1146177907.0, "step": 1810 }, { "epoch": 0.21414213077923613, "grad_norm": 0.1457107812166214, "learning_rate": 5.5346102078555876e-05, "loss": 0.3571, "num_tokens": 1146811669.0, "step": 1811 }, { "epoch": 0.2142603760198652, "grad_norm": 0.1561731994152069, "learning_rate": 5.534029664556334e-05, "loss": 0.3651, "num_tokens": 1147450564.0, "step": 1812 }, { "epoch": 0.21437862126049426, "grad_norm": 0.15244807302951813, "learning_rate": 5.5334487935821226e-05, "loss": 0.3674, "num_tokens": 1148081304.0, "step": 1813 }, { "epoch": 0.21449686650112332, "grad_norm": 0.17489269375801086, "learning_rate": 5.532867595018151e-05, "loss": 0.399, "num_tokens": 1148714735.0, "step": 1814 }, { "epoch": 0.2146151117417524, "grad_norm": 0.15375182032585144, "learning_rate": 5.532286068949667e-05, "loss": 0.3936, "num_tokens": 1149346233.0, "step": 1815 }, { "epoch": 0.21473335698238147, "grad_norm": 0.15176524221897125, "learning_rate": 5.5317042154619665e-05, "loss": 0.341, "num_tokens": 1149980039.0, "step": 1816 }, { "epoch": 0.21485160222301053, "grad_norm": 0.1615682691335678, "learning_rate": 5.531122034640392e-05, "loss": 0.3513, "num_tokens": 1150610366.0, "step": 1817 }, { "epoch": 0.2149698474636396, "grad_norm": 0.16329064965248108, "learning_rate": 5.5305395265703326e-05, "loss": 0.3913, "num_tokens": 1151249431.0, "step": 1818 }, { "epoch": 0.21508809270426865, "grad_norm": 0.16560256481170654, "learning_rate": 5.529956691337231e-05, "loss": 0.3591, "num_tokens": 1151882110.0, "step": 1819 }, { "epoch": 0.2152063379448977, "grad_norm": 0.1703309267759323, "learning_rate": 5.529373529026571e-05, "loss": 0.3799, "num_tokens": 1152521530.0, "step": 1820 }, { "epoch": 0.21532458318552677, "grad_norm": 0.14722923934459686, "learning_rate": 5.5287900397238885e-05, "loss": 0.3618, "num_tokens": 1153156986.0, "step": 1821 }, { "epoch": 0.21544282842615584, "grad_norm": 0.16444632411003113, "learning_rate": 5.528206223514768e-05, "loss": 0.3978, "num_tokens": 1153794759.0, "step": 1822 }, { "epoch": 0.21556107366678492, "grad_norm": 0.1431114375591278, "learning_rate": 5.527622080484838e-05, "loss": 0.332, "num_tokens": 1154433046.0, "step": 1823 }, { "epoch": 0.21567931890741399, "grad_norm": 0.14335665106773376, "learning_rate": 5.527037610719779e-05, "loss": 0.3375, "num_tokens": 1155067168.0, "step": 1824 }, { "epoch": 0.21579756414804305, "grad_norm": 0.16260433197021484, "learning_rate": 5.5264528143053174e-05, "loss": 0.3744, "num_tokens": 1155703377.0, "step": 1825 }, { "epoch": 0.2159158093886721, "grad_norm": 0.14873884618282318, "learning_rate": 5.525867691327227e-05, "loss": 0.3542, "num_tokens": 1156333194.0, "step": 1826 }, { "epoch": 0.21603405462930117, "grad_norm": 0.15651258826255798, "learning_rate": 5.5252822418713317e-05, "loss": 0.3608, "num_tokens": 1156965595.0, "step": 1827 }, { "epoch": 0.21615229986993023, "grad_norm": 0.16862799227237701, "learning_rate": 5.524696466023501e-05, "loss": 0.4055, "num_tokens": 1157595055.0, "step": 1828 }, { "epoch": 0.2162705451105593, "grad_norm": 0.16530156135559082, "learning_rate": 5.524110363869653e-05, "loss": 0.4096, "num_tokens": 1158227500.0, "step": 1829 }, { "epoch": 0.21638879035118835, "grad_norm": 0.16185835003852844, "learning_rate": 5.523523935495755e-05, "loss": 0.3841, "num_tokens": 1158863732.0, "step": 1830 }, { "epoch": 0.21650703559181744, "grad_norm": 0.16488629579544067, "learning_rate": 5.52293718098782e-05, "loss": 0.373, "num_tokens": 1159489840.0, "step": 1831 }, { "epoch": 0.2166252808324465, "grad_norm": 0.16744542121887207, "learning_rate": 5.52235010043191e-05, "loss": 0.3819, "num_tokens": 1160128980.0, "step": 1832 }, { "epoch": 0.21674352607307557, "grad_norm": 0.1519583910703659, "learning_rate": 5.521762693914136e-05, "loss": 0.3538, "num_tokens": 1160764145.0, "step": 1833 }, { "epoch": 0.21686177131370463, "grad_norm": 0.17682109773159027, "learning_rate": 5.521174961520653e-05, "loss": 0.4051, "num_tokens": 1161400464.0, "step": 1834 }, { "epoch": 0.2169800165543337, "grad_norm": 0.1632201373577118, "learning_rate": 5.520586903337669e-05, "loss": 0.3733, "num_tokens": 1162027629.0, "step": 1835 }, { "epoch": 0.21709826179496275, "grad_norm": 0.1692347526550293, "learning_rate": 5.519998519451435e-05, "loss": 0.4042, "num_tokens": 1162661769.0, "step": 1836 }, { "epoch": 0.2172165070355918, "grad_norm": 0.15698187053203583, "learning_rate": 5.519409809948254e-05, "loss": 0.3627, "num_tokens": 1163294488.0, "step": 1837 }, { "epoch": 0.21733475227622087, "grad_norm": 0.17315460741519928, "learning_rate": 5.518820774914472e-05, "loss": 0.3645, "num_tokens": 1163907989.0, "step": 1838 }, { "epoch": 0.21745299751684993, "grad_norm": 0.15877170860767365, "learning_rate": 5.518231414436486e-05, "loss": 0.3684, "num_tokens": 1164541061.0, "step": 1839 }, { "epoch": 0.21757124275747902, "grad_norm": 0.1746237426996231, "learning_rate": 5.517641728600742e-05, "loss": 0.3774, "num_tokens": 1165172101.0, "step": 1840 }, { "epoch": 0.21768948799810808, "grad_norm": 0.17720474302768707, "learning_rate": 5.5170517174937304e-05, "loss": 0.3247, "num_tokens": 1165801010.0, "step": 1841 }, { "epoch": 0.21780773323873714, "grad_norm": 0.15425847470760345, "learning_rate": 5.5164613812019904e-05, "loss": 0.3477, "num_tokens": 1166401559.0, "step": 1842 }, { "epoch": 0.2179259784793662, "grad_norm": 0.1663532257080078, "learning_rate": 5.51587071981211e-05, "loss": 0.3142, "num_tokens": 1167038615.0, "step": 1843 }, { "epoch": 0.21804422371999527, "grad_norm": 0.20956267416477203, "learning_rate": 5.515279733410724e-05, "loss": 0.3832, "num_tokens": 1167676463.0, "step": 1844 }, { "epoch": 0.21816246896062433, "grad_norm": 0.17030753195285797, "learning_rate": 5.514688422084514e-05, "loss": 0.3853, "num_tokens": 1168309031.0, "step": 1845 }, { "epoch": 0.2182807142012534, "grad_norm": 0.17038972675800323, "learning_rate": 5.5140967859202116e-05, "loss": 0.3858, "num_tokens": 1168935781.0, "step": 1846 }, { "epoch": 0.21839895944188245, "grad_norm": 0.24669261276721954, "learning_rate": 5.5135048250045947e-05, "loss": 0.4045, "num_tokens": 1169567656.0, "step": 1847 }, { "epoch": 0.21851720468251154, "grad_norm": 0.18820074200630188, "learning_rate": 5.512912539424487e-05, "loss": 0.4094, "num_tokens": 1170206328.0, "step": 1848 }, { "epoch": 0.2186354499231406, "grad_norm": 0.14663389325141907, "learning_rate": 5.5123199292667635e-05, "loss": 0.3917, "num_tokens": 1170845768.0, "step": 1849 }, { "epoch": 0.21875369516376966, "grad_norm": 0.18154430389404297, "learning_rate": 5.511726994618344e-05, "loss": 0.3452, "num_tokens": 1171477604.0, "step": 1850 }, { "epoch": 0.21887194040439872, "grad_norm": 0.19304318726062775, "learning_rate": 5.5111337355661975e-05, "loss": 0.4155, "num_tokens": 1172110395.0, "step": 1851 }, { "epoch": 0.21899018564502779, "grad_norm": 0.17825494706630707, "learning_rate": 5.5105401521973387e-05, "loss": 0.3467, "num_tokens": 1172747360.0, "step": 1852 }, { "epoch": 0.21910843088565685, "grad_norm": 0.16617631912231445, "learning_rate": 5.5099462445988325e-05, "loss": 0.402, "num_tokens": 1173383816.0, "step": 1853 }, { "epoch": 0.2192266761262859, "grad_norm": 0.1714310199022293, "learning_rate": 5.509352012857789e-05, "loss": 0.3545, "num_tokens": 1174017986.0, "step": 1854 }, { "epoch": 0.21934492136691497, "grad_norm": 0.1578076034784317, "learning_rate": 5.508757457061368e-05, "loss": 0.3374, "num_tokens": 1174648098.0, "step": 1855 }, { "epoch": 0.21946316660754406, "grad_norm": 0.1549782156944275, "learning_rate": 5.508162577296774e-05, "loss": 0.3666, "num_tokens": 1175285162.0, "step": 1856 }, { "epoch": 0.21958141184817312, "grad_norm": 0.1661461889743805, "learning_rate": 5.5075673736512615e-05, "loss": 0.4045, "num_tokens": 1175918416.0, "step": 1857 }, { "epoch": 0.21969965708880218, "grad_norm": 0.15641838312149048, "learning_rate": 5.5069718462121316e-05, "loss": 0.3452, "num_tokens": 1176553703.0, "step": 1858 }, { "epoch": 0.21981790232943124, "grad_norm": 0.17078624665737152, "learning_rate": 5.506375995066733e-05, "loss": 0.3635, "num_tokens": 1177188385.0, "step": 1859 }, { "epoch": 0.2199361475700603, "grad_norm": 0.15560130774974823, "learning_rate": 5.505779820302461e-05, "loss": 0.3436, "num_tokens": 1177825789.0, "step": 1860 }, { "epoch": 0.22005439281068936, "grad_norm": 0.1476420760154724, "learning_rate": 5.50518332200676e-05, "loss": 0.3713, "num_tokens": 1178461722.0, "step": 1861 }, { "epoch": 0.22017263805131843, "grad_norm": 0.158548966050148, "learning_rate": 5.5045865002671215e-05, "loss": 0.3713, "num_tokens": 1179095968.0, "step": 1862 }, { "epoch": 0.2202908832919475, "grad_norm": 0.14006307721138, "learning_rate": 5.5039893551710826e-05, "loss": 0.3428, "num_tokens": 1179730603.0, "step": 1863 }, { "epoch": 0.22040912853257658, "grad_norm": 0.16388210654258728, "learning_rate": 5.50339188680623e-05, "loss": 0.4023, "num_tokens": 1180365636.0, "step": 1864 }, { "epoch": 0.22052737377320564, "grad_norm": 0.15038001537322998, "learning_rate": 5.5027940952601975e-05, "loss": 0.3591, "num_tokens": 1181002112.0, "step": 1865 }, { "epoch": 0.2206456190138347, "grad_norm": 0.149652361869812, "learning_rate": 5.502195980620665e-05, "loss": 0.3337, "num_tokens": 1181635764.0, "step": 1866 }, { "epoch": 0.22076386425446376, "grad_norm": 0.15370416641235352, "learning_rate": 5.50159754297536e-05, "loss": 0.384, "num_tokens": 1182274273.0, "step": 1867 }, { "epoch": 0.22088210949509282, "grad_norm": 0.16633738577365875, "learning_rate": 5.50099878241206e-05, "loss": 0.362, "num_tokens": 1182907997.0, "step": 1868 }, { "epoch": 0.22100035473572188, "grad_norm": 0.1556580811738968, "learning_rate": 5.5003996990185865e-05, "loss": 0.3818, "num_tokens": 1183539711.0, "step": 1869 }, { "epoch": 0.22111859997635094, "grad_norm": 0.1547265648841858, "learning_rate": 5.4998002928828096e-05, "loss": 0.3514, "num_tokens": 1184178482.0, "step": 1870 }, { "epoch": 0.22123684521698, "grad_norm": 0.15106819570064545, "learning_rate": 5.499200564092647e-05, "loss": 0.3673, "num_tokens": 1184815893.0, "step": 1871 }, { "epoch": 0.2213550904576091, "grad_norm": 0.16126137971878052, "learning_rate": 5.498600512736064e-05, "loss": 0.387, "num_tokens": 1185451168.0, "step": 1872 }, { "epoch": 0.22147333569823816, "grad_norm": 0.15680567920207977, "learning_rate": 5.498000138901072e-05, "loss": 0.3956, "num_tokens": 1186086488.0, "step": 1873 }, { "epoch": 0.22159158093886722, "grad_norm": 0.16245433688163757, "learning_rate": 5.4973994426757306e-05, "loss": 0.3925, "num_tokens": 1186722050.0, "step": 1874 }, { "epoch": 0.22170982617949628, "grad_norm": 0.1591397523880005, "learning_rate": 5.496798424148148e-05, "loss": 0.3617, "num_tokens": 1187352361.0, "step": 1875 }, { "epoch": 0.22182807142012534, "grad_norm": 0.14030338823795319, "learning_rate": 5.496197083406476e-05, "loss": 0.3359, "num_tokens": 1187987963.0, "step": 1876 }, { "epoch": 0.2219463166607544, "grad_norm": 0.15089866518974304, "learning_rate": 5.495595420538918e-05, "loss": 0.3302, "num_tokens": 1188622119.0, "step": 1877 }, { "epoch": 0.22206456190138346, "grad_norm": 0.15419599413871765, "learning_rate": 5.494993435633721e-05, "loss": 0.3786, "num_tokens": 1189257899.0, "step": 1878 }, { "epoch": 0.22218280714201252, "grad_norm": 0.15243268013000488, "learning_rate": 5.494391128779182e-05, "loss": 0.3818, "num_tokens": 1189890204.0, "step": 1879 }, { "epoch": 0.2223010523826416, "grad_norm": 0.16612209379673004, "learning_rate": 5.493788500063643e-05, "loss": 0.374, "num_tokens": 1190523164.0, "step": 1880 }, { "epoch": 0.22241929762327067, "grad_norm": 0.15218707919120789, "learning_rate": 5.493185549575493e-05, "loss": 0.3456, "num_tokens": 1191157689.0, "step": 1881 }, { "epoch": 0.22253754286389973, "grad_norm": 0.15330468118190765, "learning_rate": 5.4925822774031726e-05, "loss": 0.3054, "num_tokens": 1191786401.0, "step": 1882 }, { "epoch": 0.2226557881045288, "grad_norm": 0.16546884179115295, "learning_rate": 5.4919786836351635e-05, "loss": 0.3275, "num_tokens": 1192420292.0, "step": 1883 }, { "epoch": 0.22277403334515786, "grad_norm": 0.17202454805374146, "learning_rate": 5.49137476836e-05, "loss": 0.3788, "num_tokens": 1193050347.0, "step": 1884 }, { "epoch": 0.22289227858578692, "grad_norm": 0.16212061047554016, "learning_rate": 5.4907705316662586e-05, "loss": 0.3486, "num_tokens": 1193683618.0, "step": 1885 }, { "epoch": 0.22301052382641598, "grad_norm": 0.16484704613685608, "learning_rate": 5.490165973642567e-05, "loss": 0.3721, "num_tokens": 1194316482.0, "step": 1886 }, { "epoch": 0.22312876906704504, "grad_norm": 0.1464393436908722, "learning_rate": 5.489561094377597e-05, "loss": 0.3685, "num_tokens": 1194953767.0, "step": 1887 }, { "epoch": 0.22324701430767413, "grad_norm": 0.1652904450893402, "learning_rate": 5.48895589396007e-05, "loss": 0.3608, "num_tokens": 1195589094.0, "step": 1888 }, { "epoch": 0.2233652595483032, "grad_norm": 0.15964029729366302, "learning_rate": 5.488350372478754e-05, "loss": 0.4216, "num_tokens": 1196219735.0, "step": 1889 }, { "epoch": 0.22348350478893225, "grad_norm": 0.1495048850774765, "learning_rate": 5.4877445300224615e-05, "loss": 0.3511, "num_tokens": 1196849912.0, "step": 1890 }, { "epoch": 0.2236017500295613, "grad_norm": 0.16153854131698608, "learning_rate": 5.487138366680056e-05, "loss": 0.3799, "num_tokens": 1197483986.0, "step": 1891 }, { "epoch": 0.22371999527019037, "grad_norm": 0.1538904458284378, "learning_rate": 5.4865318825404456e-05, "loss": 0.3961, "num_tokens": 1198120332.0, "step": 1892 }, { "epoch": 0.22383824051081944, "grad_norm": 0.1512598991394043, "learning_rate": 5.485925077692585e-05, "loss": 0.3168, "num_tokens": 1198750956.0, "step": 1893 }, { "epoch": 0.2239564857514485, "grad_norm": 0.17255137860774994, "learning_rate": 5.4853179522254773e-05, "loss": 0.436, "num_tokens": 1199388319.0, "step": 1894 }, { "epoch": 0.22407473099207756, "grad_norm": 0.14017070829868317, "learning_rate": 5.4847105062281735e-05, "loss": 0.3488, "num_tokens": 1200026396.0, "step": 1895 }, { "epoch": 0.22419297623270662, "grad_norm": 0.14663483202457428, "learning_rate": 5.48410273978977e-05, "loss": 0.3499, "num_tokens": 1200653214.0, "step": 1896 }, { "epoch": 0.2243112214733357, "grad_norm": 0.15354713797569275, "learning_rate": 5.4834946529994083e-05, "loss": 0.3782, "num_tokens": 1201283854.0, "step": 1897 }, { "epoch": 0.22442946671396477, "grad_norm": 0.1725357472896576, "learning_rate": 5.482886245946281e-05, "loss": 0.3669, "num_tokens": 1201916015.0, "step": 1898 }, { "epoch": 0.22454771195459383, "grad_norm": 0.18190482258796692, "learning_rate": 5.4822775187196265e-05, "loss": 0.3963, "num_tokens": 1202554692.0, "step": 1899 }, { "epoch": 0.2246659571952229, "grad_norm": 0.16775840520858765, "learning_rate": 5.481668471408729e-05, "loss": 0.3589, "num_tokens": 1203191938.0, "step": 1900 }, { "epoch": 0.22478420243585195, "grad_norm": 0.1781112551689148, "learning_rate": 5.48105910410292e-05, "loss": 0.3639, "num_tokens": 1203824362.0, "step": 1901 }, { "epoch": 0.22490244767648102, "grad_norm": 0.18170200288295746, "learning_rate": 5.480449416891577e-05, "loss": 0.3555, "num_tokens": 1204462936.0, "step": 1902 }, { "epoch": 0.22502069291711008, "grad_norm": 0.20846673846244812, "learning_rate": 5.479839409864127e-05, "loss": 0.3727, "num_tokens": 1205095441.0, "step": 1903 }, { "epoch": 0.22513893815773914, "grad_norm": 0.15848124027252197, "learning_rate": 5.479229083110043e-05, "loss": 0.3259, "num_tokens": 1205729677.0, "step": 1904 }, { "epoch": 0.22525718339836823, "grad_norm": 0.16097433865070343, "learning_rate": 5.478618436718843e-05, "loss": 0.3367, "num_tokens": 1206366321.0, "step": 1905 }, { "epoch": 0.2253754286389973, "grad_norm": 0.19073092937469482, "learning_rate": 5.4780074707800914e-05, "loss": 0.3719, "num_tokens": 1207005752.0, "step": 1906 }, { "epoch": 0.22549367387962635, "grad_norm": 0.1781982034444809, "learning_rate": 5.477396185383406e-05, "loss": 0.4113, "num_tokens": 1207633804.0, "step": 1907 }, { "epoch": 0.2256119191202554, "grad_norm": 0.1554650068283081, "learning_rate": 5.4767845806184415e-05, "loss": 0.364, "num_tokens": 1208270549.0, "step": 1908 }, { "epoch": 0.22573016436088447, "grad_norm": 0.16480515897274017, "learning_rate": 5.476172656574909e-05, "loss": 0.3413, "num_tokens": 1208909763.0, "step": 1909 }, { "epoch": 0.22584840960151353, "grad_norm": 0.1664728820323944, "learning_rate": 5.4755604133425605e-05, "loss": 0.3733, "num_tokens": 1209548729.0, "step": 1910 }, { "epoch": 0.2259666548421426, "grad_norm": 0.1746877133846283, "learning_rate": 5.4749478510111946e-05, "loss": 0.3978, "num_tokens": 1210184584.0, "step": 1911 }, { "epoch": 0.22608490008277166, "grad_norm": 0.14822730422019958, "learning_rate": 5.4743349696706604e-05, "loss": 0.3583, "num_tokens": 1210820281.0, "step": 1912 }, { "epoch": 0.22620314532340074, "grad_norm": 0.1769832819700241, "learning_rate": 5.473721769410852e-05, "loss": 0.3732, "num_tokens": 1211459450.0, "step": 1913 }, { "epoch": 0.2263213905640298, "grad_norm": 0.1664923131465912, "learning_rate": 5.473108250321711e-05, "loss": 0.3546, "num_tokens": 1212089281.0, "step": 1914 }, { "epoch": 0.22643963580465887, "grad_norm": 0.1475830078125, "learning_rate": 5.472494412493222e-05, "loss": 0.3447, "num_tokens": 1212723355.0, "step": 1915 }, { "epoch": 0.22655788104528793, "grad_norm": 0.16894294321537018, "learning_rate": 5.471880256015421e-05, "loss": 0.3711, "num_tokens": 1213354384.0, "step": 1916 }, { "epoch": 0.226676126285917, "grad_norm": 0.1459546536207199, "learning_rate": 5.47126578097839e-05, "loss": 0.3443, "num_tokens": 1213990075.0, "step": 1917 }, { "epoch": 0.22679437152654605, "grad_norm": 0.1568668782711029, "learning_rate": 5.4706509874722556e-05, "loss": 0.3946, "num_tokens": 1214629125.0, "step": 1918 }, { "epoch": 0.2269126167671751, "grad_norm": 0.15302623808383942, "learning_rate": 5.4700358755871915e-05, "loss": 0.3569, "num_tokens": 1215264589.0, "step": 1919 }, { "epoch": 0.22703086200780417, "grad_norm": 0.18077534437179565, "learning_rate": 5.469420445413421e-05, "loss": 0.3765, "num_tokens": 1215902900.0, "step": 1920 }, { "epoch": 0.22714910724843326, "grad_norm": 0.1580323427915573, "learning_rate": 5.46880469704121e-05, "loss": 0.3781, "num_tokens": 1216535762.0, "step": 1921 }, { "epoch": 0.22726735248906232, "grad_norm": 0.17579665780067444, "learning_rate": 5.468188630560874e-05, "loss": 0.3726, "num_tokens": 1217171423.0, "step": 1922 }, { "epoch": 0.22738559772969139, "grad_norm": 0.157264843583107, "learning_rate": 5.4675722460627734e-05, "loss": 0.3705, "num_tokens": 1217808427.0, "step": 1923 }, { "epoch": 0.22750384297032045, "grad_norm": 0.16392987966537476, "learning_rate": 5.466955543637318e-05, "loss": 0.3892, "num_tokens": 1218442317.0, "step": 1924 }, { "epoch": 0.2276220882109495, "grad_norm": 0.15679478645324707, "learning_rate": 5.46633852337496e-05, "loss": 0.3483, "num_tokens": 1219075224.0, "step": 1925 }, { "epoch": 0.22774033345157857, "grad_norm": 0.15674087405204773, "learning_rate": 5.465721185366201e-05, "loss": 0.3837, "num_tokens": 1219706682.0, "step": 1926 }, { "epoch": 0.22785857869220763, "grad_norm": 0.14894507825374603, "learning_rate": 5.46510352970159e-05, "loss": 0.3657, "num_tokens": 1220346000.0, "step": 1927 }, { "epoch": 0.2279768239328367, "grad_norm": 0.1725359708070755, "learning_rate": 5.4644855564717204e-05, "loss": 0.4, "num_tokens": 1220976247.0, "step": 1928 }, { "epoch": 0.22809506917346578, "grad_norm": 0.15751789510250092, "learning_rate": 5.4638672657672314e-05, "loss": 0.3716, "num_tokens": 1221609494.0, "step": 1929 }, { "epoch": 0.22821331441409484, "grad_norm": 0.1516016721725464, "learning_rate": 5.4632486576788126e-05, "loss": 0.358, "num_tokens": 1222243620.0, "step": 1930 }, { "epoch": 0.2283315596547239, "grad_norm": 0.1692955046892166, "learning_rate": 5.4626297322971976e-05, "loss": 0.3796, "num_tokens": 1222872003.0, "step": 1931 }, { "epoch": 0.22844980489535296, "grad_norm": 0.14610891044139862, "learning_rate": 5.462010489713167e-05, "loss": 0.3678, "num_tokens": 1223509301.0, "step": 1932 }, { "epoch": 0.22856805013598203, "grad_norm": 0.1580503284931183, "learning_rate": 5.461390930017547e-05, "loss": 0.3318, "num_tokens": 1224141171.0, "step": 1933 }, { "epoch": 0.2286862953766111, "grad_norm": 0.1733987033367157, "learning_rate": 5.4607710533012114e-05, "loss": 0.3764, "num_tokens": 1224777797.0, "step": 1934 }, { "epoch": 0.22880454061724015, "grad_norm": 0.15294653177261353, "learning_rate": 5.460150859655081e-05, "loss": 0.3778, "num_tokens": 1225410917.0, "step": 1935 }, { "epoch": 0.2289227858578692, "grad_norm": 0.14203496277332306, "learning_rate": 5.4595303491701213e-05, "loss": 0.3463, "num_tokens": 1226044770.0, "step": 1936 }, { "epoch": 0.2290410310984983, "grad_norm": 0.190177783370018, "learning_rate": 5.4589095219373465e-05, "loss": 0.3876, "num_tokens": 1226684345.0, "step": 1937 }, { "epoch": 0.22915927633912736, "grad_norm": 0.1539534032344818, "learning_rate": 5.458288378047815e-05, "loss": 0.363, "num_tokens": 1227323937.0, "step": 1938 }, { "epoch": 0.22927752157975642, "grad_norm": 0.1458713561296463, "learning_rate": 5.457666917592633e-05, "loss": 0.3391, "num_tokens": 1227957801.0, "step": 1939 }, { "epoch": 0.22939576682038548, "grad_norm": 0.1764982044696808, "learning_rate": 5.4570451406629525e-05, "loss": 0.3848, "num_tokens": 1228594724.0, "step": 1940 }, { "epoch": 0.22951401206101454, "grad_norm": 0.16702842712402344, "learning_rate": 5.456423047349973e-05, "loss": 0.3701, "num_tokens": 1229220164.0, "step": 1941 }, { "epoch": 0.2296322573016436, "grad_norm": 0.14900796115398407, "learning_rate": 5.45580063774494e-05, "loss": 0.3486, "num_tokens": 1229851524.0, "step": 1942 }, { "epoch": 0.22975050254227267, "grad_norm": 0.1488669514656067, "learning_rate": 5.455177911939144e-05, "loss": 0.3684, "num_tokens": 1230484467.0, "step": 1943 }, { "epoch": 0.22986874778290173, "grad_norm": 0.14481857419013977, "learning_rate": 5.454554870023923e-05, "loss": 0.3376, "num_tokens": 1231116359.0, "step": 1944 }, { "epoch": 0.2299869930235308, "grad_norm": 0.16663751006126404, "learning_rate": 5.453931512090661e-05, "loss": 0.3897, "num_tokens": 1231753859.0, "step": 1945 }, { "epoch": 0.23010523826415988, "grad_norm": 0.16572678089141846, "learning_rate": 5.45330783823079e-05, "loss": 0.3386, "num_tokens": 1232388378.0, "step": 1946 }, { "epoch": 0.23022348350478894, "grad_norm": 0.1593601256608963, "learning_rate": 5.4526838485357866e-05, "loss": 0.3741, "num_tokens": 1233026448.0, "step": 1947 }, { "epoch": 0.230341728745418, "grad_norm": 0.14038948714733124, "learning_rate": 5.4520595430971726e-05, "loss": 0.3484, "num_tokens": 1233660542.0, "step": 1948 }, { "epoch": 0.23045997398604706, "grad_norm": 0.1513201892375946, "learning_rate": 5.45143492200652e-05, "loss": 0.3539, "num_tokens": 1234287661.0, "step": 1949 }, { "epoch": 0.23057821922667612, "grad_norm": 0.13773423433303833, "learning_rate": 5.4508099853554435e-05, "loss": 0.3513, "num_tokens": 1234922170.0, "step": 1950 }, { "epoch": 0.23069646446730518, "grad_norm": 0.153417706489563, "learning_rate": 5.450184733235604e-05, "loss": 0.3293, "num_tokens": 1235555317.0, "step": 1951 }, { "epoch": 0.23081470970793425, "grad_norm": 0.1520823836326599, "learning_rate": 5.4495591657387125e-05, "loss": 0.3216, "num_tokens": 1236192199.0, "step": 1952 }, { "epoch": 0.2309329549485633, "grad_norm": 0.15144705772399902, "learning_rate": 5.4489332829565225e-05, "loss": 0.3625, "num_tokens": 1236799053.0, "step": 1953 }, { "epoch": 0.2310512001891924, "grad_norm": 0.13955703377723694, "learning_rate": 5.448307084980834e-05, "loss": 0.3574, "num_tokens": 1237432544.0, "step": 1954 }, { "epoch": 0.23116944542982146, "grad_norm": 0.16970664262771606, "learning_rate": 5.447680571903496e-05, "loss": 0.4497, "num_tokens": 1238070500.0, "step": 1955 }, { "epoch": 0.23128769067045052, "grad_norm": 0.16598466038703918, "learning_rate": 5.447053743816402e-05, "loss": 0.3632, "num_tokens": 1238698864.0, "step": 1956 }, { "epoch": 0.23140593591107958, "grad_norm": 0.1503814309835434, "learning_rate": 5.4464266008114894e-05, "loss": 0.3801, "num_tokens": 1239326142.0, "step": 1957 }, { "epoch": 0.23152418115170864, "grad_norm": 0.1451963484287262, "learning_rate": 5.4457991429807457e-05, "loss": 0.3452, "num_tokens": 1239965759.0, "step": 1958 }, { "epoch": 0.2316424263923377, "grad_norm": 0.15187358856201172, "learning_rate": 5.445171370416203e-05, "loss": 0.3638, "num_tokens": 1240593937.0, "step": 1959 }, { "epoch": 0.23176067163296676, "grad_norm": 0.14551091194152832, "learning_rate": 5.444543283209939e-05, "loss": 0.3507, "num_tokens": 1241232600.0, "step": 1960 }, { "epoch": 0.23187891687359583, "grad_norm": 0.18228963017463684, "learning_rate": 5.443914881454078e-05, "loss": 0.3933, "num_tokens": 1241868072.0, "step": 1961 }, { "epoch": 0.23199716211422491, "grad_norm": 0.165349081158638, "learning_rate": 5.44328616524079e-05, "loss": 0.3879, "num_tokens": 1242499329.0, "step": 1962 }, { "epoch": 0.23211540735485398, "grad_norm": 0.15861810743808746, "learning_rate": 5.442657134662292e-05, "loss": 0.333, "num_tokens": 1243132947.0, "step": 1963 }, { "epoch": 0.23223365259548304, "grad_norm": 0.16581396758556366, "learning_rate": 5.442027789810847e-05, "loss": 0.3878, "num_tokens": 1243768759.0, "step": 1964 }, { "epoch": 0.2323518978361121, "grad_norm": 0.16230174899101257, "learning_rate": 5.441398130778765e-05, "loss": 0.355, "num_tokens": 1244397968.0, "step": 1965 }, { "epoch": 0.23247014307674116, "grad_norm": 0.16032636165618896, "learning_rate": 5.4407681576583986e-05, "loss": 0.3583, "num_tokens": 1245030355.0, "step": 1966 }, { "epoch": 0.23258838831737022, "grad_norm": 0.15290401875972748, "learning_rate": 5.440137870542149e-05, "loss": 0.368, "num_tokens": 1245652710.0, "step": 1967 }, { "epoch": 0.23270663355799928, "grad_norm": 0.14915311336517334, "learning_rate": 5.439507269522464e-05, "loss": 0.3318, "num_tokens": 1246288548.0, "step": 1968 }, { "epoch": 0.23282487879862834, "grad_norm": 0.15037572383880615, "learning_rate": 5.438876354691836e-05, "loss": 0.3475, "num_tokens": 1246925966.0, "step": 1969 }, { "epoch": 0.23294312403925743, "grad_norm": 0.15037477016448975, "learning_rate": 5.4382451261428055e-05, "loss": 0.358, "num_tokens": 1247554529.0, "step": 1970 }, { "epoch": 0.2330613692798865, "grad_norm": 0.1574881225824356, "learning_rate": 5.4376135839679554e-05, "loss": 0.4064, "num_tokens": 1248186600.0, "step": 1971 }, { "epoch": 0.23317961452051555, "grad_norm": 0.15518930554389954, "learning_rate": 5.436981728259918e-05, "loss": 0.3713, "num_tokens": 1248802763.0, "step": 1972 }, { "epoch": 0.23329785976114462, "grad_norm": 0.14689789712429047, "learning_rate": 5.43634955911137e-05, "loss": 0.319, "num_tokens": 1249434773.0, "step": 1973 }, { "epoch": 0.23341610500177368, "grad_norm": 0.15772512555122375, "learning_rate": 5.435717076615036e-05, "loss": 0.3655, "num_tokens": 1250071859.0, "step": 1974 }, { "epoch": 0.23353435024240274, "grad_norm": 0.15202851593494415, "learning_rate": 5.4350842808636825e-05, "loss": 0.3556, "num_tokens": 1250709705.0, "step": 1975 }, { "epoch": 0.2336525954830318, "grad_norm": 0.1812056452035904, "learning_rate": 5.434451171950126e-05, "loss": 0.4044, "num_tokens": 1251344489.0, "step": 1976 }, { "epoch": 0.23377084072366086, "grad_norm": 0.1545693576335907, "learning_rate": 5.4338177499672276e-05, "loss": 0.3607, "num_tokens": 1251982650.0, "step": 1977 }, { "epoch": 0.23388908596428995, "grad_norm": 0.16198091208934784, "learning_rate": 5.433184015007892e-05, "loss": 0.3745, "num_tokens": 1252618531.0, "step": 1978 }, { "epoch": 0.234007331204919, "grad_norm": 0.169450581073761, "learning_rate": 5.4325499671650734e-05, "loss": 0.3557, "num_tokens": 1253250959.0, "step": 1979 }, { "epoch": 0.23412557644554807, "grad_norm": 0.15970279276371002, "learning_rate": 5.431915606531771e-05, "loss": 0.3749, "num_tokens": 1253889968.0, "step": 1980 }, { "epoch": 0.23424382168617713, "grad_norm": 0.15565136075019836, "learning_rate": 5.431280933201029e-05, "loss": 0.3591, "num_tokens": 1254520645.0, "step": 1981 }, { "epoch": 0.2343620669268062, "grad_norm": 0.16840307414531708, "learning_rate": 5.430645947265936e-05, "loss": 0.3917, "num_tokens": 1255148745.0, "step": 1982 }, { "epoch": 0.23448031216743526, "grad_norm": 0.15070202946662903, "learning_rate": 5.4300106488196304e-05, "loss": 0.3755, "num_tokens": 1255784484.0, "step": 1983 }, { "epoch": 0.23459855740806432, "grad_norm": 0.1479189097881317, "learning_rate": 5.429375037955293e-05, "loss": 0.3958, "num_tokens": 1256417769.0, "step": 1984 }, { "epoch": 0.23471680264869338, "grad_norm": 0.15323278307914734, "learning_rate": 5.428739114766152e-05, "loss": 0.3557, "num_tokens": 1257052424.0, "step": 1985 }, { "epoch": 0.23483504788932247, "grad_norm": 0.14574050903320312, "learning_rate": 5.4281028793454814e-05, "loss": 0.345, "num_tokens": 1257689558.0, "step": 1986 }, { "epoch": 0.23495329312995153, "grad_norm": 0.16904637217521667, "learning_rate": 5.4274663317866e-05, "loss": 0.3809, "num_tokens": 1258323494.0, "step": 1987 }, { "epoch": 0.2350715383705806, "grad_norm": 0.15759454667568207, "learning_rate": 5.4268294721828725e-05, "loss": 0.3494, "num_tokens": 1258955356.0, "step": 1988 }, { "epoch": 0.23518978361120965, "grad_norm": 0.17300264537334442, "learning_rate": 5.4261923006277116e-05, "loss": 0.3677, "num_tokens": 1259588215.0, "step": 1989 }, { "epoch": 0.2353080288518387, "grad_norm": 0.18069575726985931, "learning_rate": 5.425554817214573e-05, "loss": 0.3917, "num_tokens": 1260226408.0, "step": 1990 }, { "epoch": 0.23542627409246777, "grad_norm": 0.16216754913330078, "learning_rate": 5.4249170220369594e-05, "loss": 0.3652, "num_tokens": 1260860288.0, "step": 1991 }, { "epoch": 0.23554451933309684, "grad_norm": 0.17436784505844116, "learning_rate": 5.424278915188419e-05, "loss": 0.3766, "num_tokens": 1261494410.0, "step": 1992 }, { "epoch": 0.2356627645737259, "grad_norm": 0.1748049557209015, "learning_rate": 5.423640496762545e-05, "loss": 0.3702, "num_tokens": 1262129492.0, "step": 1993 }, { "epoch": 0.23578100981435499, "grad_norm": 0.1513872891664505, "learning_rate": 5.4230017668529774e-05, "loss": 0.3467, "num_tokens": 1262767078.0, "step": 1994 }, { "epoch": 0.23589925505498405, "grad_norm": 0.17539353668689728, "learning_rate": 5.422362725553403e-05, "loss": 0.3425, "num_tokens": 1263402389.0, "step": 1995 }, { "epoch": 0.2360175002956131, "grad_norm": 0.17234133183956146, "learning_rate": 5.4217233729575515e-05, "loss": 0.4215, "num_tokens": 1264035180.0, "step": 1996 }, { "epoch": 0.23613574553624217, "grad_norm": 0.1417018324136734, "learning_rate": 5.421083709159199e-05, "loss": 0.3323, "num_tokens": 1264670288.0, "step": 1997 }, { "epoch": 0.23625399077687123, "grad_norm": 0.16061554849147797, "learning_rate": 5.420443734252169e-05, "loss": 0.3373, "num_tokens": 1265305830.0, "step": 1998 }, { "epoch": 0.2363722360175003, "grad_norm": 0.16978295147418976, "learning_rate": 5.4198034483303296e-05, "loss": 0.3548, "num_tokens": 1265939517.0, "step": 1999 }, { "epoch": 0.23649048125812935, "grad_norm": 0.1711961328983307, "learning_rate": 5.4191628514875924e-05, "loss": 0.4026, "num_tokens": 1266577606.0, "step": 2000 }, { "epoch": 0.23660872649875841, "grad_norm": 0.15285345911979675, "learning_rate": 5.418521943817919e-05, "loss": 0.3863, "num_tokens": 1267214198.0, "step": 2001 }, { "epoch": 0.23672697173938748, "grad_norm": 0.1700797975063324, "learning_rate": 5.417880725415312e-05, "loss": 0.377, "num_tokens": 1267844709.0, "step": 2002 }, { "epoch": 0.23684521698001657, "grad_norm": 0.1472562849521637, "learning_rate": 5.417239196373823e-05, "loss": 0.3384, "num_tokens": 1268478703.0, "step": 2003 }, { "epoch": 0.23696346222064563, "grad_norm": 0.15963377058506012, "learning_rate": 5.416597356787548e-05, "loss": 0.3657, "num_tokens": 1269115700.0, "step": 2004 }, { "epoch": 0.2370817074612747, "grad_norm": 0.15071351826190948, "learning_rate": 5.4159552067506274e-05, "loss": 0.3635, "num_tokens": 1269747201.0, "step": 2005 }, { "epoch": 0.23719995270190375, "grad_norm": 0.16851656138896942, "learning_rate": 5.4153127463572494e-05, "loss": 0.3412, "num_tokens": 1270382337.0, "step": 2006 }, { "epoch": 0.2373181979425328, "grad_norm": 0.17994379997253418, "learning_rate": 5.414669975701646e-05, "loss": 0.3668, "num_tokens": 1271020341.0, "step": 2007 }, { "epoch": 0.23743644318316187, "grad_norm": 0.16902025043964386, "learning_rate": 5.4140268948780943e-05, "loss": 0.3597, "num_tokens": 1271652135.0, "step": 2008 }, { "epoch": 0.23755468842379093, "grad_norm": 0.15479910373687744, "learning_rate": 5.4133835039809196e-05, "loss": 0.3538, "num_tokens": 1272287068.0, "step": 2009 }, { "epoch": 0.23767293366442, "grad_norm": 0.15584295988082886, "learning_rate": 5.412739803104489e-05, "loss": 0.3591, "num_tokens": 1272924545.0, "step": 2010 }, { "epoch": 0.23779117890504908, "grad_norm": 0.1701490581035614, "learning_rate": 5.412095792343218e-05, "loss": 0.4007, "num_tokens": 1273561738.0, "step": 2011 }, { "epoch": 0.23790942414567814, "grad_norm": 0.14669926464557648, "learning_rate": 5.4114514717915666e-05, "loss": 0.3491, "num_tokens": 1274195285.0, "step": 2012 }, { "epoch": 0.2380276693863072, "grad_norm": 0.16035956144332886, "learning_rate": 5.41080684154404e-05, "loss": 0.3607, "num_tokens": 1274817746.0, "step": 2013 }, { "epoch": 0.23814591462693627, "grad_norm": 0.14970235526561737, "learning_rate": 5.410161901695188e-05, "loss": 0.3073, "num_tokens": 1275435439.0, "step": 2014 }, { "epoch": 0.23826415986756533, "grad_norm": 0.16121137142181396, "learning_rate": 5.409516652339609e-05, "loss": 0.3907, "num_tokens": 1276068813.0, "step": 2015 }, { "epoch": 0.2383824051081944, "grad_norm": 0.14724740386009216, "learning_rate": 5.4088710935719414e-05, "loss": 0.355, "num_tokens": 1276698654.0, "step": 2016 }, { "epoch": 0.23850065034882345, "grad_norm": 0.19315670430660248, "learning_rate": 5.408225225486875e-05, "loss": 0.3982, "num_tokens": 1277333784.0, "step": 2017 }, { "epoch": 0.2386188955894525, "grad_norm": 0.15700021386146545, "learning_rate": 5.407579048179141e-05, "loss": 0.3558, "num_tokens": 1277962469.0, "step": 2018 }, { "epoch": 0.2387371408300816, "grad_norm": 0.18013453483581543, "learning_rate": 5.406932561743516e-05, "loss": 0.3878, "num_tokens": 1278598036.0, "step": 2019 }, { "epoch": 0.23885538607071066, "grad_norm": 0.1724926233291626, "learning_rate": 5.4062857662748254e-05, "loss": 0.3796, "num_tokens": 1279232150.0, "step": 2020 }, { "epoch": 0.23897363131133972, "grad_norm": 0.1612100899219513, "learning_rate": 5.405638661867935e-05, "loss": 0.3616, "num_tokens": 1279866936.0, "step": 2021 }, { "epoch": 0.23909187655196878, "grad_norm": 0.15893042087554932, "learning_rate": 5.404991248617761e-05, "loss": 0.3375, "num_tokens": 1280501697.0, "step": 2022 }, { "epoch": 0.23921012179259785, "grad_norm": 0.1754988431930542, "learning_rate": 5.4043435266192586e-05, "loss": 0.4213, "num_tokens": 1281129166.0, "step": 2023 }, { "epoch": 0.2393283670332269, "grad_norm": 0.1480528712272644, "learning_rate": 5.403695495967437e-05, "loss": 0.35, "num_tokens": 1281765313.0, "step": 2024 }, { "epoch": 0.23944661227385597, "grad_norm": 0.17405565083026886, "learning_rate": 5.4030471567573425e-05, "loss": 0.3928, "num_tokens": 1282401699.0, "step": 2025 }, { "epoch": 0.23956485751448503, "grad_norm": 0.17622433602809906, "learning_rate": 5.40239850908407e-05, "loss": 0.4057, "num_tokens": 1283036777.0, "step": 2026 }, { "epoch": 0.23968310275511412, "grad_norm": 0.14597110450267792, "learning_rate": 5.40174955304276e-05, "loss": 0.3595, "num_tokens": 1283671025.0, "step": 2027 }, { "epoch": 0.23980134799574318, "grad_norm": 0.15811839699745178, "learning_rate": 5.401100288728597e-05, "loss": 0.3479, "num_tokens": 1284302915.0, "step": 2028 }, { "epoch": 0.23991959323637224, "grad_norm": 0.1497052162885666, "learning_rate": 5.4004507162368125e-05, "loss": 0.3751, "num_tokens": 1284941325.0, "step": 2029 }, { "epoch": 0.2400378384770013, "grad_norm": 0.15771523118019104, "learning_rate": 5.3998008356626826e-05, "loss": 0.3705, "num_tokens": 1285577381.0, "step": 2030 }, { "epoch": 0.24015608371763036, "grad_norm": 0.15651516616344452, "learning_rate": 5.399150647101528e-05, "loss": 0.3896, "num_tokens": 1286210952.0, "step": 2031 }, { "epoch": 0.24027432895825943, "grad_norm": 0.14082595705986023, "learning_rate": 5.398500150648713e-05, "loss": 0.3516, "num_tokens": 1286839896.0, "step": 2032 }, { "epoch": 0.2403925741988885, "grad_norm": 0.162435844540596, "learning_rate": 5.3978493463996496e-05, "loss": 0.3746, "num_tokens": 1287473506.0, "step": 2033 }, { "epoch": 0.24051081943951755, "grad_norm": 0.15836970508098602, "learning_rate": 5.3971982344497944e-05, "loss": 0.3824, "num_tokens": 1288110471.0, "step": 2034 }, { "epoch": 0.24062906468014664, "grad_norm": 0.1578359454870224, "learning_rate": 5.3965468148946505e-05, "loss": 0.3773, "num_tokens": 1288746833.0, "step": 2035 }, { "epoch": 0.2407473099207757, "grad_norm": 0.14599491655826569, "learning_rate": 5.395895087829761e-05, "loss": 0.3699, "num_tokens": 1289384237.0, "step": 2036 }, { "epoch": 0.24086555516140476, "grad_norm": 0.14697672426700592, "learning_rate": 5.39524305335072e-05, "loss": 0.3825, "num_tokens": 1290019192.0, "step": 2037 }, { "epoch": 0.24098380040203382, "grad_norm": 0.14842620491981506, "learning_rate": 5.3945907115531635e-05, "loss": 0.3655, "num_tokens": 1290658276.0, "step": 2038 }, { "epoch": 0.24110204564266288, "grad_norm": 0.14941760897636414, "learning_rate": 5.393938062532774e-05, "loss": 0.3706, "num_tokens": 1291293510.0, "step": 2039 }, { "epoch": 0.24122029088329194, "grad_norm": 0.14156727492809296, "learning_rate": 5.393285106385278e-05, "loss": 0.3414, "num_tokens": 1291917409.0, "step": 2040 }, { "epoch": 0.241338536123921, "grad_norm": 0.14439865946769714, "learning_rate": 5.392631843206447e-05, "loss": 0.3526, "num_tokens": 1292552987.0, "step": 2041 }, { "epoch": 0.24145678136455007, "grad_norm": 0.15525022149085999, "learning_rate": 5.391978273092098e-05, "loss": 0.3502, "num_tokens": 1293186477.0, "step": 2042 }, { "epoch": 0.24157502660517916, "grad_norm": 0.16831472516059875, "learning_rate": 5.391324396138093e-05, "loss": 0.4008, "num_tokens": 1293819286.0, "step": 2043 }, { "epoch": 0.24169327184580822, "grad_norm": 0.13581213355064392, "learning_rate": 5.3906702124403406e-05, "loss": 0.3196, "num_tokens": 1294453861.0, "step": 2044 }, { "epoch": 0.24181151708643728, "grad_norm": 0.1604950726032257, "learning_rate": 5.3900157220947906e-05, "loss": 0.4111, "num_tokens": 1295089864.0, "step": 2045 }, { "epoch": 0.24192976232706634, "grad_norm": 0.1553572118282318, "learning_rate": 5.389360925197442e-05, "loss": 0.3582, "num_tokens": 1295726569.0, "step": 2046 }, { "epoch": 0.2420480075676954, "grad_norm": 0.15278811752796173, "learning_rate": 5.388705821844334e-05, "loss": 0.3386, "num_tokens": 1296360993.0, "step": 2047 }, { "epoch": 0.24216625280832446, "grad_norm": 0.15493986010551453, "learning_rate": 5.3880504121315567e-05, "loss": 0.3942, "num_tokens": 1297000395.0, "step": 2048 }, { "epoch": 0.24228449804895352, "grad_norm": 0.15086907148361206, "learning_rate": 5.3873946961552396e-05, "loss": 0.355, "num_tokens": 1297637794.0, "step": 2049 }, { "epoch": 0.24240274328958258, "grad_norm": 0.1396048367023468, "learning_rate": 5.386738674011561e-05, "loss": 0.3602, "num_tokens": 1298274450.0, "step": 2050 }, { "epoch": 0.24252098853021165, "grad_norm": 0.15514451265335083, "learning_rate": 5.386082345796741e-05, "loss": 0.4049, "num_tokens": 1298912342.0, "step": 2051 }, { "epoch": 0.24263923377084073, "grad_norm": 0.1802719384431839, "learning_rate": 5.3854257116070476e-05, "loss": 0.3943, "num_tokens": 1299542171.0, "step": 2052 }, { "epoch": 0.2427574790114698, "grad_norm": 0.14600828289985657, "learning_rate": 5.384768771538791e-05, "loss": 0.3856, "num_tokens": 1300176165.0, "step": 2053 }, { "epoch": 0.24287572425209886, "grad_norm": 0.1754714548587799, "learning_rate": 5.384111525688328e-05, "loss": 0.3746, "num_tokens": 1300812545.0, "step": 2054 }, { "epoch": 0.24299396949272792, "grad_norm": 0.1517096310853958, "learning_rate": 5.383453974152061e-05, "loss": 0.4027, "num_tokens": 1301447175.0, "step": 2055 }, { "epoch": 0.24311221473335698, "grad_norm": 0.15638382732868195, "learning_rate": 5.3827961170264336e-05, "loss": 0.3807, "num_tokens": 1302081362.0, "step": 2056 }, { "epoch": 0.24323045997398604, "grad_norm": 0.16212023794651031, "learning_rate": 5.382137954407938e-05, "loss": 0.3821, "num_tokens": 1302715490.0, "step": 2057 }, { "epoch": 0.2433487052146151, "grad_norm": 0.14419527351856232, "learning_rate": 5.3814794863931094e-05, "loss": 0.3678, "num_tokens": 1303349090.0, "step": 2058 }, { "epoch": 0.24346695045524416, "grad_norm": 0.1741582751274109, "learning_rate": 5.380820713078528e-05, "loss": 0.4249, "num_tokens": 1303981074.0, "step": 2059 }, { "epoch": 0.24358519569587325, "grad_norm": 0.15199047327041626, "learning_rate": 5.38016163456082e-05, "loss": 0.3748, "num_tokens": 1304616565.0, "step": 2060 }, { "epoch": 0.2437034409365023, "grad_norm": 0.1453985869884491, "learning_rate": 5.379502250936655e-05, "loss": 0.3361, "num_tokens": 1305250305.0, "step": 2061 }, { "epoch": 0.24382168617713137, "grad_norm": 0.18713077902793884, "learning_rate": 5.3788425623027464e-05, "loss": 0.3669, "num_tokens": 1305885171.0, "step": 2062 }, { "epoch": 0.24393993141776044, "grad_norm": 0.17551301419734955, "learning_rate": 5.378182568755855e-05, "loss": 0.3934, "num_tokens": 1306515269.0, "step": 2063 }, { "epoch": 0.2440581766583895, "grad_norm": 0.15984942018985748, "learning_rate": 5.3775222703927856e-05, "loss": 0.3574, "num_tokens": 1307140965.0, "step": 2064 }, { "epoch": 0.24417642189901856, "grad_norm": 0.15906912088394165, "learning_rate": 5.3768616673103855e-05, "loss": 0.3618, "num_tokens": 1307772843.0, "step": 2065 }, { "epoch": 0.24429466713964762, "grad_norm": 0.1604430228471756, "learning_rate": 5.376200759605548e-05, "loss": 0.3818, "num_tokens": 1308403711.0, "step": 2066 }, { "epoch": 0.24441291238027668, "grad_norm": 0.14180250465869904, "learning_rate": 5.375539547375213e-05, "loss": 0.3352, "num_tokens": 1309036194.0, "step": 2067 }, { "epoch": 0.24453115762090577, "grad_norm": 0.1704554557800293, "learning_rate": 5.374878030716362e-05, "loss": 0.3794, "num_tokens": 1309668349.0, "step": 2068 }, { "epoch": 0.24464940286153483, "grad_norm": 0.16279055178165436, "learning_rate": 5.374216209726023e-05, "loss": 0.3824, "num_tokens": 1310305545.0, "step": 2069 }, { "epoch": 0.2447676481021639, "grad_norm": 0.16345319151878357, "learning_rate": 5.373554084501269e-05, "loss": 0.3478, "num_tokens": 1310939242.0, "step": 2070 }, { "epoch": 0.24488589334279295, "grad_norm": 0.13663792610168457, "learning_rate": 5.3728916551392154e-05, "loss": 0.3644, "num_tokens": 1311572053.0, "step": 2071 }, { "epoch": 0.24500413858342202, "grad_norm": 0.1513679027557373, "learning_rate": 5.372228921737025e-05, "loss": 0.3596, "num_tokens": 1312205190.0, "step": 2072 }, { "epoch": 0.24512238382405108, "grad_norm": 0.172732874751091, "learning_rate": 5.3715658843919034e-05, "loss": 0.4035, "num_tokens": 1312841010.0, "step": 2073 }, { "epoch": 0.24524062906468014, "grad_norm": 0.15379314124584198, "learning_rate": 5.3709025432011007e-05, "loss": 0.3801, "num_tokens": 1313472468.0, "step": 2074 }, { "epoch": 0.2453588743053092, "grad_norm": 0.14200414717197418, "learning_rate": 5.370238898261913e-05, "loss": 0.3581, "num_tokens": 1314111009.0, "step": 2075 }, { "epoch": 0.2454771195459383, "grad_norm": 0.17472080886363983, "learning_rate": 5.369574949671678e-05, "loss": 0.3638, "num_tokens": 1314744729.0, "step": 2076 }, { "epoch": 0.24559536478656735, "grad_norm": 0.15372346341609955, "learning_rate": 5.368910697527782e-05, "loss": 0.3453, "num_tokens": 1315375503.0, "step": 2077 }, { "epoch": 0.2457136100271964, "grad_norm": 0.16470204293727875, "learning_rate": 5.3682461419276525e-05, "loss": 0.3776, "num_tokens": 1316012926.0, "step": 2078 }, { "epoch": 0.24583185526782547, "grad_norm": 0.15991996228694916, "learning_rate": 5.367581282968764e-05, "loss": 0.3843, "num_tokens": 1316650789.0, "step": 2079 }, { "epoch": 0.24595010050845453, "grad_norm": 0.15553231537342072, "learning_rate": 5.366916120748634e-05, "loss": 0.3529, "num_tokens": 1317282748.0, "step": 2080 }, { "epoch": 0.2460683457490836, "grad_norm": 0.14532029628753662, "learning_rate": 5.3662506553648246e-05, "loss": 0.3533, "num_tokens": 1317922154.0, "step": 2081 }, { "epoch": 0.24618659098971266, "grad_norm": 0.13556896150112152, "learning_rate": 5.365584886914941e-05, "loss": 0.3532, "num_tokens": 1318555926.0, "step": 2082 }, { "epoch": 0.24630483623034172, "grad_norm": 0.15878699719905853, "learning_rate": 5.3649188154966374e-05, "loss": 0.4059, "num_tokens": 1319192513.0, "step": 2083 }, { "epoch": 0.2464230814709708, "grad_norm": 0.1363053172826767, "learning_rate": 5.3642524412076066e-05, "loss": 0.3262, "num_tokens": 1319824850.0, "step": 2084 }, { "epoch": 0.24654132671159987, "grad_norm": 0.16873478889465332, "learning_rate": 5.363585764145591e-05, "loss": 0.4022, "num_tokens": 1320458462.0, "step": 2085 }, { "epoch": 0.24665957195222893, "grad_norm": 0.1472693383693695, "learning_rate": 5.362918784408373e-05, "loss": 0.3571, "num_tokens": 1321092111.0, "step": 2086 }, { "epoch": 0.246777817192858, "grad_norm": 0.14396964013576508, "learning_rate": 5.362251502093783e-05, "loss": 0.337, "num_tokens": 1321724270.0, "step": 2087 }, { "epoch": 0.24689606243348705, "grad_norm": 0.14331543445587158, "learning_rate": 5.3615839172996934e-05, "loss": 0.3745, "num_tokens": 1322359786.0, "step": 2088 }, { "epoch": 0.2470143076741161, "grad_norm": 0.15597862005233765, "learning_rate": 5.360916030124022e-05, "loss": 0.3624, "num_tokens": 1322984880.0, "step": 2089 }, { "epoch": 0.24713255291474517, "grad_norm": 0.151664599776268, "learning_rate": 5.360247840664731e-05, "loss": 0.3682, "num_tokens": 1323623217.0, "step": 2090 }, { "epoch": 0.24725079815537424, "grad_norm": 0.14420808851718903, "learning_rate": 5.359579349019826e-05, "loss": 0.3499, "num_tokens": 1324257199.0, "step": 2091 }, { "epoch": 0.24736904339600332, "grad_norm": 0.16332575678825378, "learning_rate": 5.358910555287359e-05, "loss": 0.3602, "num_tokens": 1324889167.0, "step": 2092 }, { "epoch": 0.24748728863663239, "grad_norm": 0.14524239301681519, "learning_rate": 5.3582414595654225e-05, "loss": 0.3627, "num_tokens": 1325522031.0, "step": 2093 }, { "epoch": 0.24760553387726145, "grad_norm": 0.15779301524162292, "learning_rate": 5.357572061952158e-05, "loss": 0.3947, "num_tokens": 1326153928.0, "step": 2094 }, { "epoch": 0.2477237791178905, "grad_norm": 0.16470865905284882, "learning_rate": 5.356902362545749e-05, "loss": 0.4203, "num_tokens": 1326785341.0, "step": 2095 }, { "epoch": 0.24784202435851957, "grad_norm": 0.15758727490901947, "learning_rate": 5.3562323614444215e-05, "loss": 0.405, "num_tokens": 1327423021.0, "step": 2096 }, { "epoch": 0.24796026959914863, "grad_norm": 0.14064037799835205, "learning_rate": 5.355562058746449e-05, "loss": 0.329, "num_tokens": 1328060086.0, "step": 2097 }, { "epoch": 0.2480785148397777, "grad_norm": 0.15590064227581024, "learning_rate": 5.354891454550146e-05, "loss": 0.3802, "num_tokens": 1328698549.0, "step": 2098 }, { "epoch": 0.24819676008040675, "grad_norm": 0.13441018760204315, "learning_rate": 5.354220548953875e-05, "loss": 0.3602, "num_tokens": 1329331694.0, "step": 2099 }, { "epoch": 0.24831500532103584, "grad_norm": 0.14545440673828125, "learning_rate": 5.35354934205604e-05, "loss": 0.3621, "num_tokens": 1329962005.0, "step": 2100 }, { "epoch": 0.2484332505616649, "grad_norm": 0.17721132934093475, "learning_rate": 5.352877833955089e-05, "loss": 0.3849, "num_tokens": 1330597204.0, "step": 2101 }, { "epoch": 0.24855149580229396, "grad_norm": 0.1511656492948532, "learning_rate": 5.3522060247495164e-05, "loss": 0.3608, "num_tokens": 1331236517.0, "step": 2102 }, { "epoch": 0.24866974104292303, "grad_norm": 0.14000004529953003, "learning_rate": 5.351533914537859e-05, "loss": 0.3028, "num_tokens": 1331871778.0, "step": 2103 }, { "epoch": 0.2487879862835521, "grad_norm": 0.15406225621700287, "learning_rate": 5.350861503418697e-05, "loss": 0.3705, "num_tokens": 1332509099.0, "step": 2104 }, { "epoch": 0.24890623152418115, "grad_norm": 0.16031169891357422, "learning_rate": 5.350188791490658e-05, "loss": 0.3563, "num_tokens": 1333140652.0, "step": 2105 }, { "epoch": 0.2490244767648102, "grad_norm": 0.17556558549404144, "learning_rate": 5.3495157788524083e-05, "loss": 0.4279, "num_tokens": 1333778201.0, "step": 2106 }, { "epoch": 0.24914272200543927, "grad_norm": 0.16550473868846893, "learning_rate": 5.3488424656026665e-05, "loss": 0.3657, "num_tokens": 1334415211.0, "step": 2107 }, { "epoch": 0.24926096724606833, "grad_norm": 0.14152948558330536, "learning_rate": 5.348168851840186e-05, "loss": 0.3069, "num_tokens": 1335048790.0, "step": 2108 }, { "epoch": 0.24937921248669742, "grad_norm": 0.17543792724609375, "learning_rate": 5.3474949376637705e-05, "loss": 0.3784, "num_tokens": 1335683687.0, "step": 2109 }, { "epoch": 0.24949745772732648, "grad_norm": 0.14950953423976898, "learning_rate": 5.3468207231722664e-05, "loss": 0.3646, "num_tokens": 1336320525.0, "step": 2110 }, { "epoch": 0.24961570296795554, "grad_norm": 0.15556392073631287, "learning_rate": 5.346146208464562e-05, "loss": 0.3628, "num_tokens": 1336952088.0, "step": 2111 }, { "epoch": 0.2497339482085846, "grad_norm": 0.16873623430728912, "learning_rate": 5.345471393639594e-05, "loss": 0.3892, "num_tokens": 1337591066.0, "step": 2112 }, { "epoch": 0.24985219344921367, "grad_norm": 0.1372711956501007, "learning_rate": 5.344796278796338e-05, "loss": 0.3194, "num_tokens": 1338227429.0, "step": 2113 }, { "epoch": 0.24997043868984273, "grad_norm": 0.1443687379360199, "learning_rate": 5.344120864033817e-05, "loss": 0.3481, "num_tokens": 1338857099.0, "step": 2114 }, { "epoch": 0.2500886839304718, "grad_norm": 0.14881138503551483, "learning_rate": 5.343445149451098e-05, "loss": 0.339, "num_tokens": 1339491225.0, "step": 2115 }, { "epoch": 0.2502069291711009, "grad_norm": 0.168214812874794, "learning_rate": 5.342769135147289e-05, "loss": 0.3546, "num_tokens": 1340126659.0, "step": 2116 }, { "epoch": 0.2503251744117299, "grad_norm": 0.141407772898674, "learning_rate": 5.342092821221546e-05, "loss": 0.3378, "num_tokens": 1340752701.0, "step": 2117 }, { "epoch": 0.250443419652359, "grad_norm": 0.17049533128738403, "learning_rate": 5.341416207773065e-05, "loss": 0.3765, "num_tokens": 1341388806.0, "step": 2118 }, { "epoch": 0.25056166489298803, "grad_norm": 0.15516772866249084, "learning_rate": 5.3407392949010894e-05, "loss": 0.3771, "num_tokens": 1342025572.0, "step": 2119 }, { "epoch": 0.2506799101336171, "grad_norm": 0.1480228751897812, "learning_rate": 5.340062082704905e-05, "loss": 0.3658, "num_tokens": 1342621450.0, "step": 2120 }, { "epoch": 0.2507981553742462, "grad_norm": 0.16725991666316986, "learning_rate": 5.33938457128384e-05, "loss": 0.3692, "num_tokens": 1343251996.0, "step": 2121 }, { "epoch": 0.25091640061487525, "grad_norm": 0.15373362600803375, "learning_rate": 5.338706760737269e-05, "loss": 0.3903, "num_tokens": 1343888063.0, "step": 2122 }, { "epoch": 0.25103464585550433, "grad_norm": 0.14497415721416473, "learning_rate": 5.3380286511646094e-05, "loss": 0.3275, "num_tokens": 1344524355.0, "step": 2123 }, { "epoch": 0.25115289109613337, "grad_norm": 0.1689607799053192, "learning_rate": 5.3373502426653225e-05, "loss": 0.3921, "num_tokens": 1345157130.0, "step": 2124 }, { "epoch": 0.25127113633676246, "grad_norm": 0.16514548659324646, "learning_rate": 5.3366715353389145e-05, "loss": 0.3276, "num_tokens": 1345788742.0, "step": 2125 }, { "epoch": 0.2513893815773915, "grad_norm": 0.15629707276821136, "learning_rate": 5.335992529284933e-05, "loss": 0.3693, "num_tokens": 1346424640.0, "step": 2126 }, { "epoch": 0.2515076268180206, "grad_norm": 0.1470033824443817, "learning_rate": 5.33531322460297e-05, "loss": 0.3651, "num_tokens": 1347050079.0, "step": 2127 }, { "epoch": 0.2516258720586496, "grad_norm": 0.15820907056331635, "learning_rate": 5.334633621392665e-05, "loss": 0.3547, "num_tokens": 1347683639.0, "step": 2128 }, { "epoch": 0.2517441172992787, "grad_norm": 0.177626833319664, "learning_rate": 5.333953719753695e-05, "loss": 0.4349, "num_tokens": 1348322094.0, "step": 2129 }, { "epoch": 0.2518623625399078, "grad_norm": 0.1684924066066742, "learning_rate": 5.333273519785787e-05, "loss": 0.3618, "num_tokens": 1348957537.0, "step": 2130 }, { "epoch": 0.2519806077805368, "grad_norm": 0.1514047086238861, "learning_rate": 5.3325930215887074e-05, "loss": 0.3625, "num_tokens": 1349596973.0, "step": 2131 }, { "epoch": 0.2520988530211659, "grad_norm": 0.14272215962409973, "learning_rate": 5.331912225262267e-05, "loss": 0.3386, "num_tokens": 1350233160.0, "step": 2132 }, { "epoch": 0.25221709826179495, "grad_norm": 0.16041554510593414, "learning_rate": 5.331231130906325e-05, "loss": 0.3526, "num_tokens": 1350870883.0, "step": 2133 }, { "epoch": 0.25233534350242404, "grad_norm": 0.15311948955059052, "learning_rate": 5.330549738620776e-05, "loss": 0.3364, "num_tokens": 1351499885.0, "step": 2134 }, { "epoch": 0.25245358874305307, "grad_norm": 0.14642387628555298, "learning_rate": 5.329868048505565e-05, "loss": 0.3297, "num_tokens": 1352131223.0, "step": 2135 }, { "epoch": 0.25257183398368216, "grad_norm": 0.15875321626663208, "learning_rate": 5.3291860606606787e-05, "loss": 0.3573, "num_tokens": 1352768469.0, "step": 2136 }, { "epoch": 0.25269007922431125, "grad_norm": 0.14919845759868622, "learning_rate": 5.328503775186147e-05, "loss": 0.3295, "num_tokens": 1353395367.0, "step": 2137 }, { "epoch": 0.2528083244649403, "grad_norm": 0.1573837399482727, "learning_rate": 5.327821192182042e-05, "loss": 0.3736, "num_tokens": 1354024395.0, "step": 2138 }, { "epoch": 0.25292656970556937, "grad_norm": 0.15392784774303436, "learning_rate": 5.327138311748483e-05, "loss": 0.3826, "num_tokens": 1354657702.0, "step": 2139 }, { "epoch": 0.2530448149461984, "grad_norm": 0.16402103006839752, "learning_rate": 5.326455133985632e-05, "loss": 0.3961, "num_tokens": 1355295855.0, "step": 2140 }, { "epoch": 0.2531630601868275, "grad_norm": 0.14792178571224213, "learning_rate": 5.325771658993691e-05, "loss": 0.3583, "num_tokens": 1355913131.0, "step": 2141 }, { "epoch": 0.2532813054274565, "grad_norm": 0.1974206119775772, "learning_rate": 5.325087886872911e-05, "loss": 0.3732, "num_tokens": 1356552376.0, "step": 2142 }, { "epoch": 0.2533995506680856, "grad_norm": 0.16362109780311584, "learning_rate": 5.324403817723582e-05, "loss": 0.3851, "num_tokens": 1357190876.0, "step": 2143 }, { "epoch": 0.25351779590871465, "grad_norm": 0.15679627656936646, "learning_rate": 5.32371945164604e-05, "loss": 0.3722, "num_tokens": 1357827846.0, "step": 2144 }, { "epoch": 0.25363604114934374, "grad_norm": 0.14891614019870758, "learning_rate": 5.323034788740664e-05, "loss": 0.3469, "num_tokens": 1358460902.0, "step": 2145 }, { "epoch": 0.2537542863899728, "grad_norm": 0.13949279487133026, "learning_rate": 5.3223498291078757e-05, "loss": 0.3688, "num_tokens": 1359097940.0, "step": 2146 }, { "epoch": 0.25387253163060186, "grad_norm": 0.1506310999393463, "learning_rate": 5.321664572848142e-05, "loss": 0.3428, "num_tokens": 1359727970.0, "step": 2147 }, { "epoch": 0.25399077687123095, "grad_norm": 0.16179482638835907, "learning_rate": 5.3209790200619726e-05, "loss": 0.3658, "num_tokens": 1360363518.0, "step": 2148 }, { "epoch": 0.25410902211186, "grad_norm": 0.14653274416923523, "learning_rate": 5.320293170849921e-05, "loss": 0.3583, "num_tokens": 1360996423.0, "step": 2149 }, { "epoch": 0.2542272673524891, "grad_norm": 0.14725536108016968, "learning_rate": 5.319607025312583e-05, "loss": 0.3567, "num_tokens": 1361629975.0, "step": 2150 }, { "epoch": 0.2543455125931181, "grad_norm": 0.15229299664497375, "learning_rate": 5.318920583550598e-05, "loss": 0.3758, "num_tokens": 1362263332.0, "step": 2151 }, { "epoch": 0.2544637578337472, "grad_norm": 0.15284046530723572, "learning_rate": 5.31823384566465e-05, "loss": 0.381, "num_tokens": 1362893971.0, "step": 2152 }, { "epoch": 0.2545820030743763, "grad_norm": 0.1482195258140564, "learning_rate": 5.317546811755466e-05, "loss": 0.3848, "num_tokens": 1363530574.0, "step": 2153 }, { "epoch": 0.2547002483150053, "grad_norm": 0.14732998609542847, "learning_rate": 5.3168594819238166e-05, "loss": 0.3676, "num_tokens": 1364162896.0, "step": 2154 }, { "epoch": 0.2548184935556344, "grad_norm": 0.15356378257274628, "learning_rate": 5.316171856270515e-05, "loss": 0.3566, "num_tokens": 1364795298.0, "step": 2155 }, { "epoch": 0.25493673879626344, "grad_norm": 0.16320830583572388, "learning_rate": 5.315483934896418e-05, "loss": 0.3865, "num_tokens": 1365431336.0, "step": 2156 }, { "epoch": 0.25505498403689253, "grad_norm": 0.1412510722875595, "learning_rate": 5.314795717902426e-05, "loss": 0.3509, "num_tokens": 1366063237.0, "step": 2157 }, { "epoch": 0.25517322927752156, "grad_norm": 0.1473199427127838, "learning_rate": 5.314107205389484e-05, "loss": 0.3393, "num_tokens": 1366701239.0, "step": 2158 }, { "epoch": 0.25529147451815065, "grad_norm": 0.15776368975639343, "learning_rate": 5.3134183974585786e-05, "loss": 0.3525, "num_tokens": 1367334509.0, "step": 2159 }, { "epoch": 0.2554097197587797, "grad_norm": 0.15940934419631958, "learning_rate": 5.312729294210739e-05, "loss": 0.3845, "num_tokens": 1367970895.0, "step": 2160 }, { "epoch": 0.2555279649994088, "grad_norm": 0.156023770570755, "learning_rate": 5.3120398957470406e-05, "loss": 0.3591, "num_tokens": 1368606104.0, "step": 2161 }, { "epoch": 0.25564621024003786, "grad_norm": 0.1543560028076172, "learning_rate": 5.3113502021686e-05, "loss": 0.3624, "num_tokens": 1369242016.0, "step": 2162 }, { "epoch": 0.2557644554806669, "grad_norm": 0.1467498540878296, "learning_rate": 5.310660213576578e-05, "loss": 0.366, "num_tokens": 1369878992.0, "step": 2163 }, { "epoch": 0.255882700721296, "grad_norm": 0.14994537830352783, "learning_rate": 5.3099699300721775e-05, "loss": 0.3699, "num_tokens": 1370511489.0, "step": 2164 }, { "epoch": 0.256000945961925, "grad_norm": 0.15845350921154022, "learning_rate": 5.309279351756646e-05, "loss": 0.3779, "num_tokens": 1371142032.0, "step": 2165 }, { "epoch": 0.2561191912025541, "grad_norm": 0.13557997345924377, "learning_rate": 5.308588478731273e-05, "loss": 0.3583, "num_tokens": 1371775322.0, "step": 2166 }, { "epoch": 0.25623743644318314, "grad_norm": 0.14154909551143646, "learning_rate": 5.3078973110973924e-05, "loss": 0.3487, "num_tokens": 1372409818.0, "step": 2167 }, { "epoch": 0.25635568168381223, "grad_norm": 0.1509413868188858, "learning_rate": 5.307205848956381e-05, "loss": 0.3893, "num_tokens": 1373047668.0, "step": 2168 }, { "epoch": 0.25647392692444126, "grad_norm": 0.15367534756660461, "learning_rate": 5.306514092409659e-05, "loss": 0.3885, "num_tokens": 1373685499.0, "step": 2169 }, { "epoch": 0.25659217216507035, "grad_norm": 0.15413184463977814, "learning_rate": 5.305822041558688e-05, "loss": 0.3726, "num_tokens": 1374323849.0, "step": 2170 }, { "epoch": 0.25671041740569944, "grad_norm": 0.13913336396217346, "learning_rate": 5.305129696504974e-05, "loss": 0.3102, "num_tokens": 1374959880.0, "step": 2171 }, { "epoch": 0.2568286626463285, "grad_norm": 0.15154416859149933, "learning_rate": 5.304437057350067e-05, "loss": 0.3548, "num_tokens": 1375595575.0, "step": 2172 }, { "epoch": 0.25694690788695757, "grad_norm": 0.14348585903644562, "learning_rate": 5.3037441241955605e-05, "loss": 0.3758, "num_tokens": 1376233493.0, "step": 2173 }, { "epoch": 0.2570651531275866, "grad_norm": 0.16622044146060944, "learning_rate": 5.303050897143089e-05, "loss": 0.3636, "num_tokens": 1376867408.0, "step": 2174 }, { "epoch": 0.2571833983682157, "grad_norm": 0.13817891478538513, "learning_rate": 5.3023573762943316e-05, "loss": 0.338, "num_tokens": 1377501464.0, "step": 2175 }, { "epoch": 0.2573016436088447, "grad_norm": 0.14900870621204376, "learning_rate": 5.301663561751009e-05, "loss": 0.3494, "num_tokens": 1378107558.0, "step": 2176 }, { "epoch": 0.2574198888494738, "grad_norm": 0.15680353343486786, "learning_rate": 5.300969453614886e-05, "loss": 0.3439, "num_tokens": 1378742114.0, "step": 2177 }, { "epoch": 0.2575381340901029, "grad_norm": 0.13579994440078735, "learning_rate": 5.300275051987773e-05, "loss": 0.3309, "num_tokens": 1379380501.0, "step": 2178 }, { "epoch": 0.25765637933073193, "grad_norm": 0.14304503798484802, "learning_rate": 5.299580356971519e-05, "loss": 0.3434, "num_tokens": 1380013591.0, "step": 2179 }, { "epoch": 0.257774624571361, "grad_norm": 0.13549815118312836, "learning_rate": 5.298885368668017e-05, "loss": 0.3315, "num_tokens": 1380648061.0, "step": 2180 }, { "epoch": 0.25789286981199006, "grad_norm": 0.15500304102897644, "learning_rate": 5.298190087179206e-05, "loss": 0.3693, "num_tokens": 1381282667.0, "step": 2181 }, { "epoch": 0.25801111505261914, "grad_norm": 0.16345326602458954, "learning_rate": 5.2974945126070645e-05, "loss": 0.3828, "num_tokens": 1381921709.0, "step": 2182 }, { "epoch": 0.2581293602932482, "grad_norm": 0.1559644490480423, "learning_rate": 5.2967986450536174e-05, "loss": 0.349, "num_tokens": 1382558484.0, "step": 2183 }, { "epoch": 0.25824760553387727, "grad_norm": 0.15683728456497192, "learning_rate": 5.2961024846209274e-05, "loss": 0.3769, "num_tokens": 1383194599.0, "step": 2184 }, { "epoch": 0.2583658507745063, "grad_norm": 0.15579140186309814, "learning_rate": 5.295406031411107e-05, "loss": 0.3455, "num_tokens": 1383830599.0, "step": 2185 }, { "epoch": 0.2584840960151354, "grad_norm": 0.1503695249557495, "learning_rate": 5.294709285526306e-05, "loss": 0.3583, "num_tokens": 1384469453.0, "step": 2186 }, { "epoch": 0.2586023412557645, "grad_norm": 0.16478906571865082, "learning_rate": 5.294012247068719e-05, "loss": 0.3755, "num_tokens": 1385108151.0, "step": 2187 }, { "epoch": 0.2587205864963935, "grad_norm": 0.15313348174095154, "learning_rate": 5.293314916140585e-05, "loss": 0.3812, "num_tokens": 1385737537.0, "step": 2188 }, { "epoch": 0.2588388317370226, "grad_norm": 0.1684901863336563, "learning_rate": 5.292617292844184e-05, "loss": 0.3655, "num_tokens": 1386371978.0, "step": 2189 }, { "epoch": 0.25895707697765163, "grad_norm": 0.14332157373428345, "learning_rate": 5.291919377281839e-05, "loss": 0.314, "num_tokens": 1387006855.0, "step": 2190 }, { "epoch": 0.2590753222182807, "grad_norm": 0.1591310352087021, "learning_rate": 5.2912211695559165e-05, "loss": 0.3872, "num_tokens": 1387642922.0, "step": 2191 }, { "epoch": 0.25919356745890976, "grad_norm": 0.168354794383049, "learning_rate": 5.290522669768826e-05, "loss": 0.3547, "num_tokens": 1388273872.0, "step": 2192 }, { "epoch": 0.25931181269953885, "grad_norm": 0.13862217962741852, "learning_rate": 5.289823878023021e-05, "loss": 0.3439, "num_tokens": 1388907732.0, "step": 2193 }, { "epoch": 0.25943005794016794, "grad_norm": 0.1624598205089569, "learning_rate": 5.289124794420993e-05, "loss": 0.3878, "num_tokens": 1389539553.0, "step": 2194 }, { "epoch": 0.25954830318079697, "grad_norm": 0.16137446463108063, "learning_rate": 5.288425419065282e-05, "loss": 0.3731, "num_tokens": 1390174842.0, "step": 2195 }, { "epoch": 0.25966654842142606, "grad_norm": 0.17590227723121643, "learning_rate": 5.2877257520584686e-05, "loss": 0.3753, "num_tokens": 1390810002.0, "step": 2196 }, { "epoch": 0.2597847936620551, "grad_norm": 0.14830352365970612, "learning_rate": 5.287025793503174e-05, "loss": 0.3746, "num_tokens": 1391444325.0, "step": 2197 }, { "epoch": 0.2599030389026842, "grad_norm": 0.15205052495002747, "learning_rate": 5.286325543502067e-05, "loss": 0.3497, "num_tokens": 1392075518.0, "step": 2198 }, { "epoch": 0.2600212841433132, "grad_norm": 0.15763668715953827, "learning_rate": 5.2856250021578544e-05, "loss": 0.3887, "num_tokens": 1392708819.0, "step": 2199 }, { "epoch": 0.2601395293839423, "grad_norm": 0.16404272615909576, "learning_rate": 5.284924169573288e-05, "loss": 0.3806, "num_tokens": 1393340694.0, "step": 2200 }, { "epoch": 0.26025777462457134, "grad_norm": 0.1432538777589798, "learning_rate": 5.284223045851162e-05, "loss": 0.3625, "num_tokens": 1393973875.0, "step": 2201 }, { "epoch": 0.2603760198652004, "grad_norm": 0.158190056681633, "learning_rate": 5.283521631094315e-05, "loss": 0.3592, "num_tokens": 1394610492.0, "step": 2202 }, { "epoch": 0.2604942651058295, "grad_norm": 0.15423351526260376, "learning_rate": 5.282819925405624e-05, "loss": 0.3726, "num_tokens": 1395249777.0, "step": 2203 }, { "epoch": 0.26061251034645855, "grad_norm": 0.16164931654930115, "learning_rate": 5.2821179288880123e-05, "loss": 0.3616, "num_tokens": 1395881158.0, "step": 2204 }, { "epoch": 0.26073075558708764, "grad_norm": 0.1473531275987625, "learning_rate": 5.281415641644445e-05, "loss": 0.3752, "num_tokens": 1396510147.0, "step": 2205 }, { "epoch": 0.26084900082771667, "grad_norm": 0.15720437467098236, "learning_rate": 5.28071306377793e-05, "loss": 0.3715, "num_tokens": 1397143032.0, "step": 2206 }, { "epoch": 0.26096724606834576, "grad_norm": 0.1608814299106598, "learning_rate": 5.280010195391517e-05, "loss": 0.3791, "num_tokens": 1397778901.0, "step": 2207 }, { "epoch": 0.2610854913089748, "grad_norm": 0.15696018934249878, "learning_rate": 5.2793070365882975e-05, "loss": 0.3529, "num_tokens": 1398416648.0, "step": 2208 }, { "epoch": 0.2612037365496039, "grad_norm": 0.1388389617204666, "learning_rate": 5.2786035874714094e-05, "loss": 0.35, "num_tokens": 1399055774.0, "step": 2209 }, { "epoch": 0.2613219817902329, "grad_norm": 0.17042142152786255, "learning_rate": 5.27789984814403e-05, "loss": 0.4052, "num_tokens": 1399679014.0, "step": 2210 }, { "epoch": 0.261440227030862, "grad_norm": 0.14257775247097015, "learning_rate": 5.277195818709378e-05, "loss": 0.3659, "num_tokens": 1400310496.0, "step": 2211 }, { "epoch": 0.2615584722714911, "grad_norm": 0.16147346794605255, "learning_rate": 5.276491499270719e-05, "loss": 0.347, "num_tokens": 1400944987.0, "step": 2212 }, { "epoch": 0.2616767175121201, "grad_norm": 0.16071119904518127, "learning_rate": 5.275786889931357e-05, "loss": 0.3373, "num_tokens": 1401580390.0, "step": 2213 }, { "epoch": 0.2617949627527492, "grad_norm": 0.14823023974895477, "learning_rate": 5.27508199079464e-05, "loss": 0.3865, "num_tokens": 1402207325.0, "step": 2214 }, { "epoch": 0.26191320799337825, "grad_norm": 0.1576201617717743, "learning_rate": 5.2743768019639605e-05, "loss": 0.3682, "num_tokens": 1402846619.0, "step": 2215 }, { "epoch": 0.26203145323400734, "grad_norm": 0.1457044631242752, "learning_rate": 5.273671323542751e-05, "loss": 0.3534, "num_tokens": 1403475238.0, "step": 2216 }, { "epoch": 0.2621496984746364, "grad_norm": 0.14292076230049133, "learning_rate": 5.272965555634485e-05, "loss": 0.3306, "num_tokens": 1404109414.0, "step": 2217 }, { "epoch": 0.26226794371526546, "grad_norm": 0.17055392265319824, "learning_rate": 5.272259498342682e-05, "loss": 0.376, "num_tokens": 1404741649.0, "step": 2218 }, { "epoch": 0.26238618895589455, "grad_norm": 0.17038218677043915, "learning_rate": 5.2715531517709036e-05, "loss": 0.4005, "num_tokens": 1405372004.0, "step": 2219 }, { "epoch": 0.2625044341965236, "grad_norm": 0.15128056704998016, "learning_rate": 5.2708465160227506e-05, "loss": 0.3965, "num_tokens": 1406008107.0, "step": 2220 }, { "epoch": 0.2626226794371527, "grad_norm": 0.15639491379261017, "learning_rate": 5.270139591201871e-05, "loss": 0.3417, "num_tokens": 1406642830.0, "step": 2221 }, { "epoch": 0.2627409246777817, "grad_norm": 0.15869344770908356, "learning_rate": 5.26943237741195e-05, "loss": 0.3634, "num_tokens": 1407279706.0, "step": 2222 }, { "epoch": 0.2628591699184108, "grad_norm": 0.16059310734272003, "learning_rate": 5.26872487475672e-05, "loss": 0.3856, "num_tokens": 1407913980.0, "step": 2223 }, { "epoch": 0.26297741515903983, "grad_norm": 0.15953677892684937, "learning_rate": 5.268017083339952e-05, "loss": 0.3972, "num_tokens": 1408544218.0, "step": 2224 }, { "epoch": 0.2630956603996689, "grad_norm": 0.14649692177772522, "learning_rate": 5.267309003265462e-05, "loss": 0.3381, "num_tokens": 1409176076.0, "step": 2225 }, { "epoch": 0.26321390564029795, "grad_norm": 0.14951498806476593, "learning_rate": 5.2666006346371054e-05, "loss": 0.3368, "num_tokens": 1409808144.0, "step": 2226 }, { "epoch": 0.26333215088092704, "grad_norm": 0.16685305535793304, "learning_rate": 5.2658919775587836e-05, "loss": 0.3691, "num_tokens": 1410438046.0, "step": 2227 }, { "epoch": 0.26345039612155613, "grad_norm": 0.15579086542129517, "learning_rate": 5.265183032134437e-05, "loss": 0.364, "num_tokens": 1411070928.0, "step": 2228 }, { "epoch": 0.26356864136218516, "grad_norm": 0.16424673795700073, "learning_rate": 5.264473798468052e-05, "loss": 0.3799, "num_tokens": 1411705318.0, "step": 2229 }, { "epoch": 0.26368688660281425, "grad_norm": 0.15066514909267426, "learning_rate": 5.263764276663652e-05, "loss": 0.3483, "num_tokens": 1412339837.0, "step": 2230 }, { "epoch": 0.2638051318434433, "grad_norm": 0.14781062304973602, "learning_rate": 5.263054466825308e-05, "loss": 0.3162, "num_tokens": 1412944973.0, "step": 2231 }, { "epoch": 0.2639233770840724, "grad_norm": 0.17103751003742218, "learning_rate": 5.26234436905713e-05, "loss": 0.3676, "num_tokens": 1413570089.0, "step": 2232 }, { "epoch": 0.2640416223247014, "grad_norm": 0.14774970710277557, "learning_rate": 5.261633983463271e-05, "loss": 0.3623, "num_tokens": 1414206557.0, "step": 2233 }, { "epoch": 0.2641598675653305, "grad_norm": 0.15373104810714722, "learning_rate": 5.260923310147927e-05, "loss": 0.3567, "num_tokens": 1414839263.0, "step": 2234 }, { "epoch": 0.2642781128059596, "grad_norm": 0.15893341600894928, "learning_rate": 5.260212349215335e-05, "loss": 0.3581, "num_tokens": 1415473529.0, "step": 2235 }, { "epoch": 0.2643963580465886, "grad_norm": 0.1384173482656479, "learning_rate": 5.259501100769775e-05, "loss": 0.3411, "num_tokens": 1416110661.0, "step": 2236 }, { "epoch": 0.2645146032872177, "grad_norm": 0.17919005453586578, "learning_rate": 5.258789564915569e-05, "loss": 0.3773, "num_tokens": 1416741736.0, "step": 2237 }, { "epoch": 0.26463284852784674, "grad_norm": 0.14204750955104828, "learning_rate": 5.258077741757081e-05, "loss": 0.3303, "num_tokens": 1417367685.0, "step": 2238 }, { "epoch": 0.26475109376847583, "grad_norm": 0.15437358617782593, "learning_rate": 5.257365631398717e-05, "loss": 0.3369, "num_tokens": 1418004275.0, "step": 2239 }, { "epoch": 0.26486933900910486, "grad_norm": 0.15280762314796448, "learning_rate": 5.2566532339449267e-05, "loss": 0.3401, "num_tokens": 1418632155.0, "step": 2240 }, { "epoch": 0.26498758424973395, "grad_norm": 0.16271238029003143, "learning_rate": 5.2559405495001986e-05, "loss": 0.3791, "num_tokens": 1419269496.0, "step": 2241 }, { "epoch": 0.265105829490363, "grad_norm": 0.14687085151672363, "learning_rate": 5.2552275781690664e-05, "loss": 0.337, "num_tokens": 1419904473.0, "step": 2242 }, { "epoch": 0.2652240747309921, "grad_norm": 0.14374111592769623, "learning_rate": 5.2545143200561055e-05, "loss": 0.3378, "num_tokens": 1420538845.0, "step": 2243 }, { "epoch": 0.26534231997162117, "grad_norm": 0.14919394254684448, "learning_rate": 5.25380077526593e-05, "loss": 0.3698, "num_tokens": 1421173978.0, "step": 2244 }, { "epoch": 0.2654605652122502, "grad_norm": 0.17058208584785461, "learning_rate": 5.2530869439032014e-05, "loss": 0.3854, "num_tokens": 1421805151.0, "step": 2245 }, { "epoch": 0.2655788104528793, "grad_norm": 0.17299485206604004, "learning_rate": 5.252372826072619e-05, "loss": 0.3967, "num_tokens": 1422442066.0, "step": 2246 }, { "epoch": 0.2656970556935083, "grad_norm": 0.14907874166965485, "learning_rate": 5.251658421878927e-05, "loss": 0.3394, "num_tokens": 1423075090.0, "step": 2247 }, { "epoch": 0.2658153009341374, "grad_norm": 0.15277047455310822, "learning_rate": 5.250943731426907e-05, "loss": 0.3301, "num_tokens": 1423675926.0, "step": 2248 }, { "epoch": 0.26593354617476644, "grad_norm": 0.1593698412179947, "learning_rate": 5.25022875482139e-05, "loss": 0.3425, "num_tokens": 1424296031.0, "step": 2249 }, { "epoch": 0.26605179141539553, "grad_norm": 0.1606752872467041, "learning_rate": 5.2495134921672404e-05, "loss": 0.3634, "num_tokens": 1424930538.0, "step": 2250 }, { "epoch": 0.2661700366560246, "grad_norm": 0.17702144384384155, "learning_rate": 5.248797943569373e-05, "loss": 0.3809, "num_tokens": 1425559363.0, "step": 2251 }, { "epoch": 0.26628828189665366, "grad_norm": 0.16341924667358398, "learning_rate": 5.248082109132739e-05, "loss": 0.3777, "num_tokens": 1426194887.0, "step": 2252 }, { "epoch": 0.26640652713728274, "grad_norm": 0.16518183052539825, "learning_rate": 5.2473659889623316e-05, "loss": 0.3618, "num_tokens": 1426827620.0, "step": 2253 }, { "epoch": 0.2665247723779118, "grad_norm": 0.17266036570072174, "learning_rate": 5.2466495831631885e-05, "loss": 0.3683, "num_tokens": 1427462285.0, "step": 2254 }, { "epoch": 0.26664301761854087, "grad_norm": 0.15891999006271362, "learning_rate": 5.245932891840389e-05, "loss": 0.3697, "num_tokens": 1428098823.0, "step": 2255 }, { "epoch": 0.2667612628591699, "grad_norm": 0.1481497436761856, "learning_rate": 5.2452159150990515e-05, "loss": 0.3555, "num_tokens": 1428738293.0, "step": 2256 }, { "epoch": 0.266879508099799, "grad_norm": 0.1500621885061264, "learning_rate": 5.2444986530443384e-05, "loss": 0.3677, "num_tokens": 1429371807.0, "step": 2257 }, { "epoch": 0.266997753340428, "grad_norm": 0.15876691043376923, "learning_rate": 5.243781105781455e-05, "loss": 0.3647, "num_tokens": 1430008824.0, "step": 2258 }, { "epoch": 0.2671159985810571, "grad_norm": 0.15242648124694824, "learning_rate": 5.243063273415647e-05, "loss": 0.3759, "num_tokens": 1430648432.0, "step": 2259 }, { "epoch": 0.2672342438216862, "grad_norm": 0.14529050886631012, "learning_rate": 5.2423451560522e-05, "loss": 0.3378, "num_tokens": 1431274507.0, "step": 2260 }, { "epoch": 0.26735248906231524, "grad_norm": 0.16179488599300385, "learning_rate": 5.241626753796447e-05, "loss": 0.364, "num_tokens": 1431899378.0, "step": 2261 }, { "epoch": 0.2674707343029443, "grad_norm": 0.16901804506778717, "learning_rate": 5.2409080667537556e-05, "loss": 0.3714, "num_tokens": 1432532765.0, "step": 2262 }, { "epoch": 0.26758897954357336, "grad_norm": 0.15731197595596313, "learning_rate": 5.24018909502954e-05, "loss": 0.3602, "num_tokens": 1433167753.0, "step": 2263 }, { "epoch": 0.26770722478420245, "grad_norm": 0.14256243407726288, "learning_rate": 5.239469838729256e-05, "loss": 0.3456, "num_tokens": 1433802888.0, "step": 2264 }, { "epoch": 0.2678254700248315, "grad_norm": 0.15813036262989044, "learning_rate": 5.2387502979583986e-05, "loss": 0.3708, "num_tokens": 1434439739.0, "step": 2265 }, { "epoch": 0.26794371526546057, "grad_norm": 0.1671803742647171, "learning_rate": 5.2380304728225065e-05, "loss": 0.3754, "num_tokens": 1435075550.0, "step": 2266 }, { "epoch": 0.2680619605060896, "grad_norm": 0.14781129360198975, "learning_rate": 5.23731036342716e-05, "loss": 0.3699, "num_tokens": 1435702450.0, "step": 2267 }, { "epoch": 0.2681802057467187, "grad_norm": 0.14528228342533112, "learning_rate": 5.2365899698779817e-05, "loss": 0.3652, "num_tokens": 1436341880.0, "step": 2268 }, { "epoch": 0.2682984509873478, "grad_norm": 0.15035268664360046, "learning_rate": 5.235869292280632e-05, "loss": 0.3417, "num_tokens": 1436980764.0, "step": 2269 }, { "epoch": 0.2684166962279768, "grad_norm": 0.15088537335395813, "learning_rate": 5.2351483307408194e-05, "loss": 0.3715, "num_tokens": 1437607941.0, "step": 2270 }, { "epoch": 0.2685349414686059, "grad_norm": 0.14756181836128235, "learning_rate": 5.2344270853642865e-05, "loss": 0.3788, "num_tokens": 1438242314.0, "step": 2271 }, { "epoch": 0.26865318670923494, "grad_norm": 0.15675291419029236, "learning_rate": 5.233705556256825e-05, "loss": 0.3602, "num_tokens": 1438871363.0, "step": 2272 }, { "epoch": 0.268771431949864, "grad_norm": 0.15357424318790436, "learning_rate": 5.232983743524263e-05, "loss": 0.3567, "num_tokens": 1439499872.0, "step": 2273 }, { "epoch": 0.26888967719049306, "grad_norm": 0.1439976692199707, "learning_rate": 5.232261647272472e-05, "loss": 0.3334, "num_tokens": 1440133531.0, "step": 2274 }, { "epoch": 0.26900792243112215, "grad_norm": 0.13786187767982483, "learning_rate": 5.231539267607367e-05, "loss": 0.3317, "num_tokens": 1440768688.0, "step": 2275 }, { "epoch": 0.26912616767175124, "grad_norm": 0.16290701925754547, "learning_rate": 5.2308166046349e-05, "loss": 0.4127, "num_tokens": 1441398656.0, "step": 2276 }, { "epoch": 0.26924441291238027, "grad_norm": 0.1527061015367508, "learning_rate": 5.2300936584610686e-05, "loss": 0.3443, "num_tokens": 1442003989.0, "step": 2277 }, { "epoch": 0.26936265815300936, "grad_norm": 0.13706231117248535, "learning_rate": 5.229370429191909e-05, "loss": 0.3914, "num_tokens": 1442643326.0, "step": 2278 }, { "epoch": 0.2694809033936384, "grad_norm": 0.16317781805992126, "learning_rate": 5.228646916933503e-05, "loss": 0.3798, "num_tokens": 1443278975.0, "step": 2279 }, { "epoch": 0.2695991486342675, "grad_norm": 0.1382177323102951, "learning_rate": 5.227923121791969e-05, "loss": 0.3415, "num_tokens": 1443913604.0, "step": 2280 }, { "epoch": 0.2697173938748965, "grad_norm": 0.14264902472496033, "learning_rate": 5.22719904387347e-05, "loss": 0.3637, "num_tokens": 1444549969.0, "step": 2281 }, { "epoch": 0.2698356391155256, "grad_norm": 0.14483878016471863, "learning_rate": 5.2264746832842096e-05, "loss": 0.3579, "num_tokens": 1445189491.0, "step": 2282 }, { "epoch": 0.26995388435615464, "grad_norm": 0.1441272348165512, "learning_rate": 5.2257500401304335e-05, "loss": 0.3412, "num_tokens": 1445820831.0, "step": 2283 }, { "epoch": 0.2700721295967837, "grad_norm": 0.14249451458454132, "learning_rate": 5.225025114518428e-05, "loss": 0.3474, "num_tokens": 1446457219.0, "step": 2284 }, { "epoch": 0.2701903748374128, "grad_norm": 0.13442419469356537, "learning_rate": 5.224299906554521e-05, "loss": 0.3516, "num_tokens": 1447094737.0, "step": 2285 }, { "epoch": 0.27030862007804185, "grad_norm": 0.16069334745407104, "learning_rate": 5.223574416345081e-05, "loss": 0.3881, "num_tokens": 1447727826.0, "step": 2286 }, { "epoch": 0.27042686531867094, "grad_norm": 0.15976020693778992, "learning_rate": 5.222848643996521e-05, "loss": 0.3704, "num_tokens": 1448362348.0, "step": 2287 }, { "epoch": 0.2705451105593, "grad_norm": 0.1612728089094162, "learning_rate": 5.222122589615291e-05, "loss": 0.3814, "num_tokens": 1448998191.0, "step": 2288 }, { "epoch": 0.27066335579992906, "grad_norm": 0.15360663831233978, "learning_rate": 5.2213962533078865e-05, "loss": 0.3588, "num_tokens": 1449624381.0, "step": 2289 }, { "epoch": 0.2707816010405581, "grad_norm": 0.15359435975551605, "learning_rate": 5.220669635180842e-05, "loss": 0.369, "num_tokens": 1450260226.0, "step": 2290 }, { "epoch": 0.2708998462811872, "grad_norm": 0.15823595225811005, "learning_rate": 5.219942735340732e-05, "loss": 0.3683, "num_tokens": 1450890661.0, "step": 2291 }, { "epoch": 0.2710180915218163, "grad_norm": 0.15650156140327454, "learning_rate": 5.2192155538941764e-05, "loss": 0.35, "num_tokens": 1451526642.0, "step": 2292 }, { "epoch": 0.2711363367624453, "grad_norm": 0.16201554238796234, "learning_rate": 5.2184880909478325e-05, "loss": 0.3749, "num_tokens": 1452159607.0, "step": 2293 }, { "epoch": 0.2712545820030744, "grad_norm": 0.17001183331012726, "learning_rate": 5.217760346608402e-05, "loss": 0.3893, "num_tokens": 1452797560.0, "step": 2294 }, { "epoch": 0.27137282724370343, "grad_norm": 0.15308302640914917, "learning_rate": 5.217032320982624e-05, "loss": 0.3591, "num_tokens": 1453434624.0, "step": 2295 }, { "epoch": 0.2714910724843325, "grad_norm": 0.15287715196609497, "learning_rate": 5.2163040141772835e-05, "loss": 0.3627, "num_tokens": 1454067127.0, "step": 2296 }, { "epoch": 0.27160931772496155, "grad_norm": 0.17291271686553955, "learning_rate": 5.215575426299204e-05, "loss": 0.3784, "num_tokens": 1454700667.0, "step": 2297 }, { "epoch": 0.27172756296559064, "grad_norm": 0.16600170731544495, "learning_rate": 5.2148465574552506e-05, "loss": 0.4014, "num_tokens": 1455337380.0, "step": 2298 }, { "epoch": 0.2718458082062197, "grad_norm": 0.1472035050392151, "learning_rate": 5.214117407752329e-05, "loss": 0.3616, "num_tokens": 1455974456.0, "step": 2299 }, { "epoch": 0.27196405344684876, "grad_norm": 0.16266970336437225, "learning_rate": 5.213387977297387e-05, "loss": 0.399, "num_tokens": 1456611249.0, "step": 2300 }, { "epoch": 0.27208229868747785, "grad_norm": 0.1545649617910385, "learning_rate": 5.212658266197414e-05, "loss": 0.3518, "num_tokens": 1457243118.0, "step": 2301 }, { "epoch": 0.2722005439281069, "grad_norm": 0.1743442565202713, "learning_rate": 5.2119282745594394e-05, "loss": 0.3677, "num_tokens": 1457878878.0, "step": 2302 }, { "epoch": 0.272318789168736, "grad_norm": 0.15945695340633392, "learning_rate": 5.211198002490534e-05, "loss": 0.3502, "num_tokens": 1458506707.0, "step": 2303 }, { "epoch": 0.272437034409365, "grad_norm": 0.1667330414056778, "learning_rate": 5.210467450097811e-05, "loss": 0.3715, "num_tokens": 1459136085.0, "step": 2304 }, { "epoch": 0.2725552796499941, "grad_norm": 0.1610744595527649, "learning_rate": 5.209736617488423e-05, "loss": 0.3875, "num_tokens": 1459775346.0, "step": 2305 }, { "epoch": 0.27267352489062313, "grad_norm": 0.151181161403656, "learning_rate": 5.209005504769565e-05, "loss": 0.397, "num_tokens": 1460412291.0, "step": 2306 }, { "epoch": 0.2727917701312522, "grad_norm": 0.1508421003818512, "learning_rate": 5.208274112048472e-05, "loss": 0.3337, "num_tokens": 1461048611.0, "step": 2307 }, { "epoch": 0.2729100153718813, "grad_norm": 0.15021061897277832, "learning_rate": 5.207542439432421e-05, "loss": 0.3721, "num_tokens": 1461677729.0, "step": 2308 }, { "epoch": 0.27302826061251034, "grad_norm": 0.15476137399673462, "learning_rate": 5.206810487028728e-05, "loss": 0.3832, "num_tokens": 1462315617.0, "step": 2309 }, { "epoch": 0.27314650585313943, "grad_norm": 0.15683971345424652, "learning_rate": 5.206078254944754e-05, "loss": 0.3312, "num_tokens": 1462930982.0, "step": 2310 }, { "epoch": 0.27326475109376847, "grad_norm": 0.15003030002117157, "learning_rate": 5.205345743287899e-05, "loss": 0.3618, "num_tokens": 1463566971.0, "step": 2311 }, { "epoch": 0.27338299633439755, "grad_norm": 0.13606947660446167, "learning_rate": 5.2046129521656004e-05, "loss": 0.3413, "num_tokens": 1464202172.0, "step": 2312 }, { "epoch": 0.2735012415750266, "grad_norm": 0.1545344442129135, "learning_rate": 5.203879881685343e-05, "loss": 0.3974, "num_tokens": 1464838092.0, "step": 2313 }, { "epoch": 0.2736194868156557, "grad_norm": 0.14543376863002777, "learning_rate": 5.203146531954648e-05, "loss": 0.3558, "num_tokens": 1465477610.0, "step": 2314 }, { "epoch": 0.2737377320562847, "grad_norm": 0.15073993802070618, "learning_rate": 5.20241290308108e-05, "loss": 0.3622, "num_tokens": 1466116332.0, "step": 2315 }, { "epoch": 0.2738559772969138, "grad_norm": 0.15892696380615234, "learning_rate": 5.201678995172243e-05, "loss": 0.3903, "num_tokens": 1466749671.0, "step": 2316 }, { "epoch": 0.2739742225375429, "grad_norm": 0.16409429907798767, "learning_rate": 5.200944808335782e-05, "loss": 0.4105, "num_tokens": 1467388729.0, "step": 2317 }, { "epoch": 0.2740924677781719, "grad_norm": 0.14576315879821777, "learning_rate": 5.2002103426793844e-05, "loss": 0.3552, "num_tokens": 1468022678.0, "step": 2318 }, { "epoch": 0.274210713018801, "grad_norm": 0.16824838519096375, "learning_rate": 5.199475598310776e-05, "loss": 0.3911, "num_tokens": 1468653588.0, "step": 2319 }, { "epoch": 0.27432895825943004, "grad_norm": 0.16714206337928772, "learning_rate": 5.198740575337728e-05, "loss": 0.3915, "num_tokens": 1469292071.0, "step": 2320 }, { "epoch": 0.27444720350005913, "grad_norm": 0.1523461937904358, "learning_rate": 5.198005273868046e-05, "loss": 0.3646, "num_tokens": 1469926130.0, "step": 2321 }, { "epoch": 0.27456544874068817, "grad_norm": 0.14040854573249817, "learning_rate": 5.197269694009582e-05, "loss": 0.3426, "num_tokens": 1470565682.0, "step": 2322 }, { "epoch": 0.27468369398131726, "grad_norm": 0.15783368051052094, "learning_rate": 5.1965338358702264e-05, "loss": 0.3504, "num_tokens": 1471199661.0, "step": 2323 }, { "epoch": 0.2748019392219463, "grad_norm": 0.1497131586074829, "learning_rate": 5.195797699557911e-05, "loss": 0.3291, "num_tokens": 1471837079.0, "step": 2324 }, { "epoch": 0.2749201844625754, "grad_norm": 0.15223778784275055, "learning_rate": 5.1950612851806056e-05, "loss": 0.3668, "num_tokens": 1472470748.0, "step": 2325 }, { "epoch": 0.27503842970320447, "grad_norm": 0.1467036008834839, "learning_rate": 5.194324592846327e-05, "loss": 0.3447, "num_tokens": 1473076369.0, "step": 2326 }, { "epoch": 0.2751566749438335, "grad_norm": 0.15233638882637024, "learning_rate": 5.193587622663127e-05, "loss": 0.3575, "num_tokens": 1473713052.0, "step": 2327 }, { "epoch": 0.2752749201844626, "grad_norm": 0.14095458388328552, "learning_rate": 5.1928503747391004e-05, "loss": 0.3438, "num_tokens": 1474350921.0, "step": 2328 }, { "epoch": 0.2753931654250916, "grad_norm": 0.17240092158317566, "learning_rate": 5.192112849182383e-05, "loss": 0.4213, "num_tokens": 1474987408.0, "step": 2329 }, { "epoch": 0.2755114106657207, "grad_norm": 0.15787003934383392, "learning_rate": 5.191375046101152e-05, "loss": 0.3548, "num_tokens": 1475621528.0, "step": 2330 }, { "epoch": 0.27562965590634975, "grad_norm": 0.13558189570903778, "learning_rate": 5.190636965603622e-05, "loss": 0.3568, "num_tokens": 1476256968.0, "step": 2331 }, { "epoch": 0.27574790114697884, "grad_norm": 0.14935897290706635, "learning_rate": 5.189898607798052e-05, "loss": 0.3337, "num_tokens": 1476887629.0, "step": 2332 }, { "epoch": 0.2758661463876079, "grad_norm": 0.16723769903182983, "learning_rate": 5.1891599727927396e-05, "loss": 0.387, "num_tokens": 1477522575.0, "step": 2333 }, { "epoch": 0.27598439162823696, "grad_norm": 0.14238448441028595, "learning_rate": 5.1884210606960236e-05, "loss": 0.3506, "num_tokens": 1478158656.0, "step": 2334 }, { "epoch": 0.27610263686886605, "grad_norm": 0.150628000497818, "learning_rate": 5.187681871616284e-05, "loss": 0.3784, "num_tokens": 1478794818.0, "step": 2335 }, { "epoch": 0.2762208821094951, "grad_norm": 0.12632951140403748, "learning_rate": 5.18694240566194e-05, "loss": 0.35, "num_tokens": 1479428236.0, "step": 2336 }, { "epoch": 0.27633912735012417, "grad_norm": 0.14539533853530884, "learning_rate": 5.186202662941454e-05, "loss": 0.3698, "num_tokens": 1480061891.0, "step": 2337 }, { "epoch": 0.2764573725907532, "grad_norm": 0.16277393698692322, "learning_rate": 5.185462643563327e-05, "loss": 0.4074, "num_tokens": 1480701365.0, "step": 2338 }, { "epoch": 0.2765756178313823, "grad_norm": 0.1372813880443573, "learning_rate": 5.184722347636098e-05, "loss": 0.3481, "num_tokens": 1481336637.0, "step": 2339 }, { "epoch": 0.2766938630720113, "grad_norm": 0.15544790029525757, "learning_rate": 5.1839817752683524e-05, "loss": 0.3575, "num_tokens": 1481974800.0, "step": 2340 }, { "epoch": 0.2768121083126404, "grad_norm": 0.15033455193042755, "learning_rate": 5.1832409265687124e-05, "loss": 0.3439, "num_tokens": 1482607289.0, "step": 2341 }, { "epoch": 0.2769303535532695, "grad_norm": 0.1518167108297348, "learning_rate": 5.182499801645842e-05, "loss": 0.3777, "num_tokens": 1483243948.0, "step": 2342 }, { "epoch": 0.27704859879389854, "grad_norm": 18.004554748535156, "learning_rate": 5.181758400608444e-05, "loss": 0.8962, "num_tokens": 1483845955.0, "step": 2343 }, { "epoch": 0.2771668440345276, "grad_norm": 0.1874932199716568, "learning_rate": 5.181016723565265e-05, "loss": 0.3706, "num_tokens": 1484479960.0, "step": 2344 }, { "epoch": 0.27728508927515666, "grad_norm": 0.16853219270706177, "learning_rate": 5.180274770625088e-05, "loss": 0.3717, "num_tokens": 1485115529.0, "step": 2345 }, { "epoch": 0.27740333451578575, "grad_norm": 0.1486130803823471, "learning_rate": 5.179532541896739e-05, "loss": 0.3699, "num_tokens": 1485751394.0, "step": 2346 }, { "epoch": 0.2775215797564148, "grad_norm": 0.1473405957221985, "learning_rate": 5.178790037489085e-05, "loss": 0.3879, "num_tokens": 1486390225.0, "step": 2347 }, { "epoch": 0.27763982499704387, "grad_norm": 0.1471875160932541, "learning_rate": 5.178047257511031e-05, "loss": 0.3611, "num_tokens": 1486988102.0, "step": 2348 }, { "epoch": 0.27775807023767296, "grad_norm": 0.15154783427715302, "learning_rate": 5.177304202071526e-05, "loss": 0.3564, "num_tokens": 1487623515.0, "step": 2349 }, { "epoch": 0.277876315478302, "grad_norm": 0.16886240243911743, "learning_rate": 5.1765608712795545e-05, "loss": 0.396, "num_tokens": 1488259828.0, "step": 2350 }, { "epoch": 0.2779945607189311, "grad_norm": 0.1388898491859436, "learning_rate": 5.1758172652441444e-05, "loss": 0.3544, "num_tokens": 1488891872.0, "step": 2351 }, { "epoch": 0.2781128059595601, "grad_norm": 0.1588786095380783, "learning_rate": 5.175073384074366e-05, "loss": 0.4167, "num_tokens": 1489527101.0, "step": 2352 }, { "epoch": 0.2782310512001892, "grad_norm": 0.16156388819217682, "learning_rate": 5.174329227879326e-05, "loss": 0.3678, "num_tokens": 1490165911.0, "step": 2353 }, { "epoch": 0.27834929644081824, "grad_norm": 0.13891494274139404, "learning_rate": 5.173584796768173e-05, "loss": 0.3548, "num_tokens": 1490802923.0, "step": 2354 }, { "epoch": 0.27846754168144733, "grad_norm": 0.15996013581752777, "learning_rate": 5.1728400908500954e-05, "loss": 0.3734, "num_tokens": 1491439721.0, "step": 2355 }, { "epoch": 0.27858578692207636, "grad_norm": 0.16167289018630981, "learning_rate": 5.172095110234324e-05, "loss": 0.3575, "num_tokens": 1492078820.0, "step": 2356 }, { "epoch": 0.27870403216270545, "grad_norm": 0.16740791499614716, "learning_rate": 5.171349855030127e-05, "loss": 0.3429, "num_tokens": 1492714650.0, "step": 2357 }, { "epoch": 0.27882227740333454, "grad_norm": 0.13522467017173767, "learning_rate": 5.170604325346816e-05, "loss": 0.3203, "num_tokens": 1493341798.0, "step": 2358 }, { "epoch": 0.2789405226439636, "grad_norm": 0.16082540154457092, "learning_rate": 5.1698585212937394e-05, "loss": 0.3997, "num_tokens": 1493978267.0, "step": 2359 }, { "epoch": 0.27905876788459266, "grad_norm": 0.15238749980926514, "learning_rate": 5.169112442980289e-05, "loss": 0.3198, "num_tokens": 1494612569.0, "step": 2360 }, { "epoch": 0.2791770131252217, "grad_norm": 0.15735118091106415, "learning_rate": 5.1683660905158935e-05, "loss": 0.3797, "num_tokens": 1495250024.0, "step": 2361 }, { "epoch": 0.2792952583658508, "grad_norm": 0.1450258046388626, "learning_rate": 5.167619464010025e-05, "loss": 0.3662, "num_tokens": 1495879619.0, "step": 2362 }, { "epoch": 0.2794135036064798, "grad_norm": 0.15994147956371307, "learning_rate": 5.166872563572194e-05, "loss": 0.3953, "num_tokens": 1496509723.0, "step": 2363 }, { "epoch": 0.2795317488471089, "grad_norm": 0.17398406565189362, "learning_rate": 5.1661253893119526e-05, "loss": 0.3723, "num_tokens": 1497139086.0, "step": 2364 }, { "epoch": 0.27964999408773794, "grad_norm": 0.14153091609477997, "learning_rate": 5.165377941338892e-05, "loss": 0.3192, "num_tokens": 1497769353.0, "step": 2365 }, { "epoch": 0.27976823932836703, "grad_norm": 0.14599373936653137, "learning_rate": 5.1646302197626427e-05, "loss": 0.3275, "num_tokens": 1498406713.0, "step": 2366 }, { "epoch": 0.2798864845689961, "grad_norm": 0.14073973894119263, "learning_rate": 5.163882224692877e-05, "loss": 0.3398, "num_tokens": 1499042860.0, "step": 2367 }, { "epoch": 0.28000472980962515, "grad_norm": 0.1550605446100235, "learning_rate": 5.163133956239307e-05, "loss": 0.3276, "num_tokens": 1499673955.0, "step": 2368 }, { "epoch": 0.28012297505025424, "grad_norm": 0.1414223462343216, "learning_rate": 5.162385414511683e-05, "loss": 0.3307, "num_tokens": 1500308786.0, "step": 2369 }, { "epoch": 0.2802412202908833, "grad_norm": 0.1378117799758911, "learning_rate": 5.161636599619798e-05, "loss": 0.3276, "num_tokens": 1500943668.0, "step": 2370 }, { "epoch": 0.28035946553151236, "grad_norm": 0.14597615599632263, "learning_rate": 5.160887511673483e-05, "loss": 0.3283, "num_tokens": 1501575125.0, "step": 2371 }, { "epoch": 0.2804777107721414, "grad_norm": 0.1476171314716339, "learning_rate": 5.160138150782612e-05, "loss": 0.3973, "num_tokens": 1502205209.0, "step": 2372 }, { "epoch": 0.2805959560127705, "grad_norm": 0.1440732479095459, "learning_rate": 5.1593885170570955e-05, "loss": 0.3933, "num_tokens": 1502841700.0, "step": 2373 }, { "epoch": 0.2807142012533996, "grad_norm": 0.1449534296989441, "learning_rate": 5.158638610606887e-05, "loss": 0.3509, "num_tokens": 1503478757.0, "step": 2374 }, { "epoch": 0.2808324464940286, "grad_norm": 0.15498371422290802, "learning_rate": 5.157888431541976e-05, "loss": 0.3317, "num_tokens": 1504114367.0, "step": 2375 }, { "epoch": 0.2809506917346577, "grad_norm": 0.1510004699230194, "learning_rate": 5.157137979972396e-05, "loss": 0.3457, "num_tokens": 1504750346.0, "step": 2376 }, { "epoch": 0.28106893697528673, "grad_norm": 0.16893434524536133, "learning_rate": 5.1563872560082206e-05, "loss": 0.3647, "num_tokens": 1505373996.0, "step": 2377 }, { "epoch": 0.2811871822159158, "grad_norm": 0.16010859608650208, "learning_rate": 5.15563625975956e-05, "loss": 0.3339, "num_tokens": 1506006742.0, "step": 2378 }, { "epoch": 0.28130542745654485, "grad_norm": 0.16579493880271912, "learning_rate": 5.154884991336566e-05, "loss": 0.3525, "num_tokens": 1506642591.0, "step": 2379 }, { "epoch": 0.28142367269717394, "grad_norm": 0.16797488927841187, "learning_rate": 5.154133450849431e-05, "loss": 0.3301, "num_tokens": 1507279859.0, "step": 2380 }, { "epoch": 0.281541917937803, "grad_norm": 0.14919261634349823, "learning_rate": 5.1533816384083866e-05, "loss": 0.3467, "num_tokens": 1507913520.0, "step": 2381 }, { "epoch": 0.28166016317843207, "grad_norm": 0.17393799126148224, "learning_rate": 5.152629554123706e-05, "loss": 0.3511, "num_tokens": 1508547624.0, "step": 2382 }, { "epoch": 0.28177840841906115, "grad_norm": 0.15337307751178741, "learning_rate": 5.151877198105698e-05, "loss": 0.3779, "num_tokens": 1509186196.0, "step": 2383 }, { "epoch": 0.2818966536596902, "grad_norm": 0.14126722514629364, "learning_rate": 5.151124570464715e-05, "loss": 0.3492, "num_tokens": 1509824371.0, "step": 2384 }, { "epoch": 0.2820148989003193, "grad_norm": 0.14233171939849854, "learning_rate": 5.150371671311148e-05, "loss": 0.353, "num_tokens": 1510453337.0, "step": 2385 }, { "epoch": 0.2821331441409483, "grad_norm": 0.14706680178642273, "learning_rate": 5.149618500755429e-05, "loss": 0.3346, "num_tokens": 1511089984.0, "step": 2386 }, { "epoch": 0.2822513893815774, "grad_norm": 0.1561274528503418, "learning_rate": 5.148865058908029e-05, "loss": 0.3474, "num_tokens": 1511723104.0, "step": 2387 }, { "epoch": 0.28236963462220643, "grad_norm": 0.14485491812229156, "learning_rate": 5.148111345879456e-05, "loss": 0.358, "num_tokens": 1512361915.0, "step": 2388 }, { "epoch": 0.2824878798628355, "grad_norm": 0.16236385703086853, "learning_rate": 5.1473573617802644e-05, "loss": 0.4054, "num_tokens": 1512989309.0, "step": 2389 }, { "epoch": 0.2826061251034646, "grad_norm": 0.1574629545211792, "learning_rate": 5.1466031067210414e-05, "loss": 0.3718, "num_tokens": 1513623233.0, "step": 2390 }, { "epoch": 0.28272437034409365, "grad_norm": 0.14909283816814423, "learning_rate": 5.145848580812417e-05, "loss": 0.3706, "num_tokens": 1514257994.0, "step": 2391 }, { "epoch": 0.28284261558472273, "grad_norm": 0.15986815094947815, "learning_rate": 5.145093784165063e-05, "loss": 0.366, "num_tokens": 1514895129.0, "step": 2392 }, { "epoch": 0.28296086082535177, "grad_norm": 0.14897796511650085, "learning_rate": 5.1443387168896866e-05, "loss": 0.3292, "num_tokens": 1515533555.0, "step": 2393 }, { "epoch": 0.28307910606598086, "grad_norm": 0.1255037784576416, "learning_rate": 5.1435833790970374e-05, "loss": 0.3244, "num_tokens": 1516170546.0, "step": 2394 }, { "epoch": 0.2831973513066099, "grad_norm": 0.13740988075733185, "learning_rate": 5.1428277708979046e-05, "loss": 0.308, "num_tokens": 1516799730.0, "step": 2395 }, { "epoch": 0.283315596547239, "grad_norm": 0.14730027318000793, "learning_rate": 5.1420718924031166e-05, "loss": 0.3616, "num_tokens": 1517436780.0, "step": 2396 }, { "epoch": 0.283433841787868, "grad_norm": 0.14762364327907562, "learning_rate": 5.1413157437235406e-05, "loss": 0.4029, "num_tokens": 1518072732.0, "step": 2397 }, { "epoch": 0.2835520870284971, "grad_norm": 0.14341436326503754, "learning_rate": 5.140559324970084e-05, "loss": 0.3703, "num_tokens": 1518710901.0, "step": 2398 }, { "epoch": 0.2836703322691262, "grad_norm": 0.1452096849679947, "learning_rate": 5.139802636253696e-05, "loss": 0.3466, "num_tokens": 1519345313.0, "step": 2399 }, { "epoch": 0.2837885775097552, "grad_norm": 0.16120626032352448, "learning_rate": 5.139045677685362e-05, "loss": 0.3716, "num_tokens": 1519980264.0, "step": 2400 }, { "epoch": 0.2839068227503843, "grad_norm": 0.16466911137104034, "learning_rate": 5.1382884493761094e-05, "loss": 0.3853, "num_tokens": 1520615916.0, "step": 2401 }, { "epoch": 0.28402506799101335, "grad_norm": 0.14920389652252197, "learning_rate": 5.137530951437003e-05, "loss": 0.3857, "num_tokens": 1521252102.0, "step": 2402 }, { "epoch": 0.28414331323164244, "grad_norm": 0.13696464896202087, "learning_rate": 5.1367731839791484e-05, "loss": 0.3469, "num_tokens": 1521879862.0, "step": 2403 }, { "epoch": 0.28426155847227147, "grad_norm": 0.15004949271678925, "learning_rate": 5.136015147113691e-05, "loss": 0.3912, "num_tokens": 1522505201.0, "step": 2404 }, { "epoch": 0.28437980371290056, "grad_norm": 0.14496594667434692, "learning_rate": 5.135256840951816e-05, "loss": 0.3455, "num_tokens": 1523144696.0, "step": 2405 }, { "epoch": 0.28449804895352965, "grad_norm": 0.1522136777639389, "learning_rate": 5.1344982656047476e-05, "loss": 0.3596, "num_tokens": 1523776301.0, "step": 2406 }, { "epoch": 0.2846162941941587, "grad_norm": 0.1498071551322937, "learning_rate": 5.133739421183748e-05, "loss": 0.3648, "num_tokens": 1524411665.0, "step": 2407 }, { "epoch": 0.28473453943478777, "grad_norm": 0.1476230025291443, "learning_rate": 5.1329803078001224e-05, "loss": 0.36, "num_tokens": 1525049675.0, "step": 2408 }, { "epoch": 0.2848527846754168, "grad_norm": 0.1434587687253952, "learning_rate": 5.1322209255652114e-05, "loss": 0.3201, "num_tokens": 1525681212.0, "step": 2409 }, { "epoch": 0.2849710299160459, "grad_norm": 0.14709366858005524, "learning_rate": 5.1314612745903974e-05, "loss": 0.3522, "num_tokens": 1526316616.0, "step": 2410 }, { "epoch": 0.2850892751566749, "grad_norm": 0.14652235805988312, "learning_rate": 5.130701354987102e-05, "loss": 0.3624, "num_tokens": 1526950609.0, "step": 2411 }, { "epoch": 0.285207520397304, "grad_norm": 0.1547906994819641, "learning_rate": 5.1299411668667855e-05, "loss": 0.3826, "num_tokens": 1527587441.0, "step": 2412 }, { "epoch": 0.28532576563793305, "grad_norm": 0.15779371559619904, "learning_rate": 5.129180710340949e-05, "loss": 0.3894, "num_tokens": 1528223473.0, "step": 2413 }, { "epoch": 0.28544401087856214, "grad_norm": 0.15027301013469696, "learning_rate": 5.128419985521131e-05, "loss": 0.3707, "num_tokens": 1528862184.0, "step": 2414 }, { "epoch": 0.2855622561191912, "grad_norm": 0.14584867656230927, "learning_rate": 5.127658992518911e-05, "loss": 0.3594, "num_tokens": 1529498134.0, "step": 2415 }, { "epoch": 0.28568050135982026, "grad_norm": 0.15217836201190948, "learning_rate": 5.126897731445906e-05, "loss": 0.3691, "num_tokens": 1530132720.0, "step": 2416 }, { "epoch": 0.28579874660044935, "grad_norm": 0.15481121838092804, "learning_rate": 5.1261362024137755e-05, "loss": 0.352, "num_tokens": 1530766312.0, "step": 2417 }, { "epoch": 0.2859169918410784, "grad_norm": 0.16314752399921417, "learning_rate": 5.1253744055342145e-05, "loss": 0.3754, "num_tokens": 1531401703.0, "step": 2418 }, { "epoch": 0.28603523708170747, "grad_norm": 0.16154710948467255, "learning_rate": 5.12461234091896e-05, "loss": 0.3657, "num_tokens": 1532033358.0, "step": 2419 }, { "epoch": 0.2861534823223365, "grad_norm": 0.14500078558921814, "learning_rate": 5.123850008679787e-05, "loss": 0.3446, "num_tokens": 1532666946.0, "step": 2420 }, { "epoch": 0.2862717275629656, "grad_norm": 0.16005858778953552, "learning_rate": 5.1230874089285106e-05, "loss": 0.3665, "num_tokens": 1533305486.0, "step": 2421 }, { "epoch": 0.28638997280359463, "grad_norm": 0.16123497486114502, "learning_rate": 5.122324541776985e-05, "loss": 0.3493, "num_tokens": 1533935802.0, "step": 2422 }, { "epoch": 0.2865082180442237, "grad_norm": 0.15175017714500427, "learning_rate": 5.121561407337103e-05, "loss": 0.3788, "num_tokens": 1534537936.0, "step": 2423 }, { "epoch": 0.2866264632848528, "grad_norm": 0.16482868790626526, "learning_rate": 5.120798005720796e-05, "loss": 0.3755, "num_tokens": 1535176503.0, "step": 2424 }, { "epoch": 0.28674470852548184, "grad_norm": 0.14901557564735413, "learning_rate": 5.120034337040035e-05, "loss": 0.3177, "num_tokens": 1535809878.0, "step": 2425 }, { "epoch": 0.28686295376611093, "grad_norm": 0.14456705749034882, "learning_rate": 5.119270401406833e-05, "loss": 0.3202, "num_tokens": 1536444197.0, "step": 2426 }, { "epoch": 0.28698119900673996, "grad_norm": 0.15920798480510712, "learning_rate": 5.118506198933238e-05, "loss": 0.3461, "num_tokens": 1537077940.0, "step": 2427 }, { "epoch": 0.28709944424736905, "grad_norm": 0.14617350697517395, "learning_rate": 5.117741729731339e-05, "loss": 0.3216, "num_tokens": 1537709405.0, "step": 2428 }, { "epoch": 0.2872176894879981, "grad_norm": 0.15979531407356262, "learning_rate": 5.116976993913266e-05, "loss": 0.378, "num_tokens": 1538346163.0, "step": 2429 }, { "epoch": 0.2873359347286272, "grad_norm": 0.15407593548297882, "learning_rate": 5.116211991591183e-05, "loss": 0.4211, "num_tokens": 1538982802.0, "step": 2430 }, { "epoch": 0.28745417996925626, "grad_norm": 0.164232537150383, "learning_rate": 5.115446722877299e-05, "loss": 0.3736, "num_tokens": 1539617968.0, "step": 2431 }, { "epoch": 0.2875724252098853, "grad_norm": 0.13974468410015106, "learning_rate": 5.114681187883857e-05, "loss": 0.3313, "num_tokens": 1540248847.0, "step": 2432 }, { "epoch": 0.2876906704505144, "grad_norm": 0.1548672467470169, "learning_rate": 5.113915386723145e-05, "loss": 0.3668, "num_tokens": 1540884115.0, "step": 2433 }, { "epoch": 0.2878089156911434, "grad_norm": 0.151759073138237, "learning_rate": 5.1131493195074816e-05, "loss": 0.3346, "num_tokens": 1541519078.0, "step": 2434 }, { "epoch": 0.2879271609317725, "grad_norm": 0.1297214776277542, "learning_rate": 5.112382986349233e-05, "loss": 0.313, "num_tokens": 1542154829.0, "step": 2435 }, { "epoch": 0.28804540617240154, "grad_norm": 0.15951651334762573, "learning_rate": 5.1116163873607985e-05, "loss": 0.35, "num_tokens": 1542787615.0, "step": 2436 }, { "epoch": 0.28816365141303063, "grad_norm": 0.15006236732006073, "learning_rate": 5.11084952265462e-05, "loss": 0.3726, "num_tokens": 1543422823.0, "step": 2437 }, { "epoch": 0.28828189665365966, "grad_norm": 0.1509401947259903, "learning_rate": 5.110082392343177e-05, "loss": 0.3987, "num_tokens": 1544061139.0, "step": 2438 }, { "epoch": 0.28840014189428875, "grad_norm": 0.14976786077022552, "learning_rate": 5.1093149965389865e-05, "loss": 0.4053, "num_tokens": 1544680188.0, "step": 2439 }, { "epoch": 0.28851838713491784, "grad_norm": 0.16512008011341095, "learning_rate": 5.108547335354607e-05, "loss": 0.4282, "num_tokens": 1545317687.0, "step": 2440 }, { "epoch": 0.2886366323755469, "grad_norm": 0.14550714194774628, "learning_rate": 5.1077794089026325e-05, "loss": 0.3552, "num_tokens": 1545950953.0, "step": 2441 }, { "epoch": 0.28875487761617596, "grad_norm": 0.14643588662147522, "learning_rate": 5.1070112172957016e-05, "loss": 0.3397, "num_tokens": 1546570745.0, "step": 2442 }, { "epoch": 0.288873122856805, "grad_norm": 0.1360168755054474, "learning_rate": 5.1062427606464865e-05, "loss": 0.3369, "num_tokens": 1547208758.0, "step": 2443 }, { "epoch": 0.2889913680974341, "grad_norm": 0.1535852551460266, "learning_rate": 5.105474039067699e-05, "loss": 0.3537, "num_tokens": 1547841621.0, "step": 2444 }, { "epoch": 0.2891096133380631, "grad_norm": 0.17614653706550598, "learning_rate": 5.104705052672094e-05, "loss": 0.3878, "num_tokens": 1548474236.0, "step": 2445 }, { "epoch": 0.2892278585786922, "grad_norm": 0.14424295723438263, "learning_rate": 5.103935801572459e-05, "loss": 0.3439, "num_tokens": 1549102957.0, "step": 2446 }, { "epoch": 0.2893461038193213, "grad_norm": 0.15351137518882751, "learning_rate": 5.1031662858816244e-05, "loss": 0.3668, "num_tokens": 1549738034.0, "step": 2447 }, { "epoch": 0.28946434905995033, "grad_norm": 0.1494022011756897, "learning_rate": 5.1023965057124595e-05, "loss": 0.3477, "num_tokens": 1550367757.0, "step": 2448 }, { "epoch": 0.2895825943005794, "grad_norm": 0.1511409431695938, "learning_rate": 5.101626461177871e-05, "loss": 0.3538, "num_tokens": 1551002943.0, "step": 2449 }, { "epoch": 0.28970083954120845, "grad_norm": 0.14820496737957, "learning_rate": 5.100856152390803e-05, "loss": 0.3445, "num_tokens": 1551640916.0, "step": 2450 }, { "epoch": 0.28981908478183754, "grad_norm": 0.15256306529045105, "learning_rate": 5.1000855794642426e-05, "loss": 0.3875, "num_tokens": 1552279719.0, "step": 2451 }, { "epoch": 0.2899373300224666, "grad_norm": 0.14742372930049896, "learning_rate": 5.0993147425112114e-05, "loss": 0.3687, "num_tokens": 1552909547.0, "step": 2452 }, { "epoch": 0.29005557526309567, "grad_norm": 0.14389902353286743, "learning_rate": 5.098543641644772e-05, "loss": 0.3333, "num_tokens": 1553547378.0, "step": 2453 }, { "epoch": 0.2901738205037247, "grad_norm": 0.15277884900569916, "learning_rate": 5.097772276978026e-05, "loss": 0.3612, "num_tokens": 1554186991.0, "step": 2454 }, { "epoch": 0.2902920657443538, "grad_norm": 0.15810954570770264, "learning_rate": 5.0970006486241104e-05, "loss": 0.3875, "num_tokens": 1554823738.0, "step": 2455 }, { "epoch": 0.2904103109849829, "grad_norm": 0.15370549261569977, "learning_rate": 5.096228756696206e-05, "loss": 0.3843, "num_tokens": 1555463056.0, "step": 2456 }, { "epoch": 0.2905285562256119, "grad_norm": 0.1583654135465622, "learning_rate": 5.0954566013075284e-05, "loss": 0.3719, "num_tokens": 1556098586.0, "step": 2457 }, { "epoch": 0.290646801466241, "grad_norm": 0.14468631148338318, "learning_rate": 5.094684182571332e-05, "loss": 0.3664, "num_tokens": 1556734027.0, "step": 2458 }, { "epoch": 0.29076504670687003, "grad_norm": 0.15024395287036896, "learning_rate": 5.093911500600914e-05, "loss": 0.3669, "num_tokens": 1557369690.0, "step": 2459 }, { "epoch": 0.2908832919474991, "grad_norm": 0.15600430965423584, "learning_rate": 5.093138555509604e-05, "loss": 0.3725, "num_tokens": 1557999303.0, "step": 2460 }, { "epoch": 0.29100153718812816, "grad_norm": 0.15223824977874756, "learning_rate": 5.092365347410773e-05, "loss": 0.3738, "num_tokens": 1558634807.0, "step": 2461 }, { "epoch": 0.29111978242875725, "grad_norm": 0.1305040568113327, "learning_rate": 5.091591876417834e-05, "loss": 0.3157, "num_tokens": 1559269638.0, "step": 2462 }, { "epoch": 0.29123802766938633, "grad_norm": 0.13695046305656433, "learning_rate": 5.090818142644233e-05, "loss": 0.3352, "num_tokens": 1559902736.0, "step": 2463 }, { "epoch": 0.29135627291001537, "grad_norm": 0.1705075353384018, "learning_rate": 5.0900441462034575e-05, "loss": 0.3781, "num_tokens": 1560537476.0, "step": 2464 }, { "epoch": 0.29147451815064446, "grad_norm": 0.13505369424819946, "learning_rate": 5.089269887209032e-05, "loss": 0.3542, "num_tokens": 1561173697.0, "step": 2465 }, { "epoch": 0.2915927633912735, "grad_norm": 0.15983089804649353, "learning_rate": 5.088495365774522e-05, "loss": 0.351, "num_tokens": 1561809371.0, "step": 2466 }, { "epoch": 0.2917110086319026, "grad_norm": 0.13605132699012756, "learning_rate": 5.0877205820135296e-05, "loss": 0.351, "num_tokens": 1562448843.0, "step": 2467 }, { "epoch": 0.2918292538725316, "grad_norm": 0.1968318372964859, "learning_rate": 5.086945536039695e-05, "loss": 0.3907, "num_tokens": 1563085995.0, "step": 2468 }, { "epoch": 0.2919474991131607, "grad_norm": 0.17326131463050842, "learning_rate": 5.086170227966699e-05, "loss": 0.4424, "num_tokens": 1563717068.0, "step": 2469 }, { "epoch": 0.29206574435378974, "grad_norm": 0.14930212497711182, "learning_rate": 5.0853946579082575e-05, "loss": 0.4087, "num_tokens": 1564350110.0, "step": 2470 }, { "epoch": 0.2921839895944188, "grad_norm": 0.17055080831050873, "learning_rate": 5.084618825978128e-05, "loss": 0.382, "num_tokens": 1564985340.0, "step": 2471 }, { "epoch": 0.2923022348350479, "grad_norm": 0.14754746854305267, "learning_rate": 5.0838427322901047e-05, "loss": 0.3385, "num_tokens": 1565619951.0, "step": 2472 }, { "epoch": 0.29242048007567695, "grad_norm": 0.15629802644252777, "learning_rate": 5.083066376958022e-05, "loss": 0.3602, "num_tokens": 1566247164.0, "step": 2473 }, { "epoch": 0.29253872531630604, "grad_norm": 0.15467692911624908, "learning_rate": 5.08228976009575e-05, "loss": 0.3899, "num_tokens": 1566885564.0, "step": 2474 }, { "epoch": 0.29265697055693507, "grad_norm": 0.13130120933055878, "learning_rate": 5.0815128818171984e-05, "loss": 0.3235, "num_tokens": 1567517351.0, "step": 2475 }, { "epoch": 0.29277521579756416, "grad_norm": 0.14711728692054749, "learning_rate": 5.080735742236316e-05, "loss": 0.3417, "num_tokens": 1568152846.0, "step": 2476 }, { "epoch": 0.2928934610381932, "grad_norm": 0.15567351877689362, "learning_rate": 5.079958341467088e-05, "loss": 0.3668, "num_tokens": 1568780581.0, "step": 2477 }, { "epoch": 0.2930117062788223, "grad_norm": 0.13537286221981049, "learning_rate": 5.079180679623542e-05, "loss": 0.3437, "num_tokens": 1569415183.0, "step": 2478 }, { "epoch": 0.2931299515194513, "grad_norm": 0.1664997786283493, "learning_rate": 5.0784027568197386e-05, "loss": 0.3842, "num_tokens": 1570054656.0, "step": 2479 }, { "epoch": 0.2932481967600804, "grad_norm": 0.1475135087966919, "learning_rate": 5.07762457316978e-05, "loss": 0.3663, "num_tokens": 1570688875.0, "step": 2480 }, { "epoch": 0.2933664420007095, "grad_norm": 0.14164982736110687, "learning_rate": 5.076846128787805e-05, "loss": 0.3312, "num_tokens": 1571324827.0, "step": 2481 }, { "epoch": 0.2934846872413385, "grad_norm": 0.14938534796237946, "learning_rate": 5.076067423787993e-05, "loss": 0.3651, "num_tokens": 1571958663.0, "step": 2482 }, { "epoch": 0.2936029324819676, "grad_norm": 0.137350395321846, "learning_rate": 5.0752884582845596e-05, "loss": 0.3727, "num_tokens": 1572588508.0, "step": 2483 }, { "epoch": 0.29372117772259665, "grad_norm": 0.14978715777397156, "learning_rate": 5.074509232391757e-05, "loss": 0.348, "num_tokens": 1573222363.0, "step": 2484 }, { "epoch": 0.29383942296322574, "grad_norm": 0.16208483278751373, "learning_rate": 5.0737297462238806e-05, "loss": 0.3755, "num_tokens": 1573857267.0, "step": 2485 }, { "epoch": 0.29395766820385477, "grad_norm": 0.13174913823604584, "learning_rate": 5.07294999989526e-05, "loss": 0.3371, "num_tokens": 1574489833.0, "step": 2486 }, { "epoch": 0.29407591344448386, "grad_norm": 0.14728987216949463, "learning_rate": 5.072169993520265e-05, "loss": 0.3464, "num_tokens": 1575127826.0, "step": 2487 }, { "epoch": 0.29419415868511295, "grad_norm": 0.15298466384410858, "learning_rate": 5.0713897272133005e-05, "loss": 0.3923, "num_tokens": 1575764942.0, "step": 2488 }, { "epoch": 0.294312403925742, "grad_norm": 0.12261069566011429, "learning_rate": 5.070609201088812e-05, "loss": 0.3318, "num_tokens": 1576396971.0, "step": 2489 }, { "epoch": 0.2944306491663711, "grad_norm": 0.1509537398815155, "learning_rate": 5.069828415261284e-05, "loss": 0.4, "num_tokens": 1577032933.0, "step": 2490 }, { "epoch": 0.2945488944070001, "grad_norm": 0.1331872195005417, "learning_rate": 5.069047369845237e-05, "loss": 0.3686, "num_tokens": 1577670215.0, "step": 2491 }, { "epoch": 0.2946671396476292, "grad_norm": 0.14146214723587036, "learning_rate": 5.06826606495523e-05, "loss": 0.3533, "num_tokens": 1578304041.0, "step": 2492 }, { "epoch": 0.29478538488825823, "grad_norm": 0.12835468351840973, "learning_rate": 5.067484500705861e-05, "loss": 0.3378, "num_tokens": 1578942969.0, "step": 2493 }, { "epoch": 0.2949036301288873, "grad_norm": 0.14581221342086792, "learning_rate": 5.066702677211766e-05, "loss": 0.3776, "num_tokens": 1579580392.0, "step": 2494 }, { "epoch": 0.29502187536951635, "grad_norm": 0.15271341800689697, "learning_rate": 5.065920594587618e-05, "loss": 0.3536, "num_tokens": 1580212819.0, "step": 2495 }, { "epoch": 0.29514012061014544, "grad_norm": 0.14915582537651062, "learning_rate": 5.0651382529481276e-05, "loss": 0.3451, "num_tokens": 1580842095.0, "step": 2496 }, { "epoch": 0.29525836585077453, "grad_norm": 0.1648779660463333, "learning_rate": 5.064355652408046e-05, "loss": 0.376, "num_tokens": 1581477926.0, "step": 2497 }, { "epoch": 0.29537661109140356, "grad_norm": 0.13914352655410767, "learning_rate": 5.0635727930821576e-05, "loss": 0.3572, "num_tokens": 1582109535.0, "step": 2498 }, { "epoch": 0.29549485633203265, "grad_norm": 0.1509476751089096, "learning_rate": 5.062789675085292e-05, "loss": 0.3687, "num_tokens": 1582745020.0, "step": 2499 }, { "epoch": 0.2956131015726617, "grad_norm": 0.14042633771896362, "learning_rate": 5.062006298532309e-05, "loss": 0.3548, "num_tokens": 1583382664.0, "step": 2500 }, { "epoch": 0.2957313468132908, "grad_norm": 0.13550545275211334, "learning_rate": 5.0612226635381126e-05, "loss": 0.3486, "num_tokens": 1584021873.0, "step": 2501 }, { "epoch": 0.2958495920539198, "grad_norm": 0.16540901362895966, "learning_rate": 5.06043877021764e-05, "loss": 0.3998, "num_tokens": 1584661560.0, "step": 2502 }, { "epoch": 0.2959678372945489, "grad_norm": 21.212095260620117, "learning_rate": 5.0596546186858695e-05, "loss": 0.8826, "num_tokens": 1585262729.0, "step": 2503 }, { "epoch": 0.296086082535178, "grad_norm": 0.16181381046772003, "learning_rate": 5.058870209057815e-05, "loss": 0.3758, "num_tokens": 1585884360.0, "step": 2504 }, { "epoch": 0.296204327775807, "grad_norm": 0.16291457414627075, "learning_rate": 5.05808554144853e-05, "loss": 0.4061, "num_tokens": 1586513378.0, "step": 2505 }, { "epoch": 0.2963225730164361, "grad_norm": 0.13988372683525085, "learning_rate": 5.057300615973105e-05, "loss": 0.3453, "num_tokens": 1587146261.0, "step": 2506 }, { "epoch": 0.29644081825706514, "grad_norm": 0.14150689542293549, "learning_rate": 5.0565154327466694e-05, "loss": 0.3558, "num_tokens": 1587784709.0, "step": 2507 }, { "epoch": 0.29655906349769423, "grad_norm": 0.14061804115772247, "learning_rate": 5.0557299918843874e-05, "loss": 0.3423, "num_tokens": 1588417863.0, "step": 2508 }, { "epoch": 0.29667730873832326, "grad_norm": 0.15579354763031006, "learning_rate": 5.054944293501465e-05, "loss": 0.3792, "num_tokens": 1589052471.0, "step": 2509 }, { "epoch": 0.29679555397895235, "grad_norm": 0.14252778887748718, "learning_rate": 5.0541583377131436e-05, "loss": 0.3821, "num_tokens": 1589685366.0, "step": 2510 }, { "epoch": 0.2969137992195814, "grad_norm": 0.14577676355838776, "learning_rate": 5.053372124634702e-05, "loss": 0.3409, "num_tokens": 1590322671.0, "step": 2511 }, { "epoch": 0.2970320444602105, "grad_norm": 0.14242297410964966, "learning_rate": 5.052585654381458e-05, "loss": 0.3328, "num_tokens": 1590953678.0, "step": 2512 }, { "epoch": 0.29715028970083956, "grad_norm": 0.14217747747898102, "learning_rate": 5.051798927068767e-05, "loss": 0.3577, "num_tokens": 1591588343.0, "step": 2513 }, { "epoch": 0.2972685349414686, "grad_norm": 0.13957667350769043, "learning_rate": 5.051011942812021e-05, "loss": 0.3677, "num_tokens": 1592222393.0, "step": 2514 }, { "epoch": 0.2973867801820977, "grad_norm": 0.16819806396961212, "learning_rate": 5.050224701726651e-05, "loss": 0.3888, "num_tokens": 1592836737.0, "step": 2515 }, { "epoch": 0.2975050254227267, "grad_norm": 0.16252745687961578, "learning_rate": 5.049437203928125e-05, "loss": 0.3856, "num_tokens": 1593468479.0, "step": 2516 }, { "epoch": 0.2976232706633558, "grad_norm": 0.13640791177749634, "learning_rate": 5.0486494495319486e-05, "loss": 0.329, "num_tokens": 1594103240.0, "step": 2517 }, { "epoch": 0.29774151590398484, "grad_norm": 0.15332572162151337, "learning_rate": 5.0478614386536655e-05, "loss": 0.3173, "num_tokens": 1594736479.0, "step": 2518 }, { "epoch": 0.29785976114461393, "grad_norm": 0.14375394582748413, "learning_rate": 5.047073171408857e-05, "loss": 0.3538, "num_tokens": 1595369844.0, "step": 2519 }, { "epoch": 0.297978006385243, "grad_norm": 2.3390374183654785, "learning_rate": 5.04628464791314e-05, "loss": 0.4221, "num_tokens": 1595969594.0, "step": 2520 }, { "epoch": 0.29809625162587206, "grad_norm": 0.18925605714321136, "learning_rate": 5.0454958682821735e-05, "loss": 0.3694, "num_tokens": 1596607685.0, "step": 2521 }, { "epoch": 0.29821449686650114, "grad_norm": 0.15425294637680054, "learning_rate": 5.0447068326316495e-05, "loss": 0.3522, "num_tokens": 1597246441.0, "step": 2522 }, { "epoch": 0.2983327421071302, "grad_norm": 0.1728714108467102, "learning_rate": 5.043917541077299e-05, "loss": 0.3791, "num_tokens": 1597882201.0, "step": 2523 }, { "epoch": 0.29845098734775927, "grad_norm": 0.16465453803539276, "learning_rate": 5.043127993734892e-05, "loss": 0.3644, "num_tokens": 1598512865.0, "step": 2524 }, { "epoch": 0.2985692325883883, "grad_norm": 0.14727315306663513, "learning_rate": 5.042338190720234e-05, "loss": 0.3488, "num_tokens": 1599149984.0, "step": 2525 }, { "epoch": 0.2986874778290174, "grad_norm": 0.15509533882141113, "learning_rate": 5.0415481321491694e-05, "loss": 0.3472, "num_tokens": 1599780457.0, "step": 2526 }, { "epoch": 0.2988057230696464, "grad_norm": 0.1475566178560257, "learning_rate": 5.04075781813758e-05, "loss": 0.3611, "num_tokens": 1600414611.0, "step": 2527 }, { "epoch": 0.2989239683102755, "grad_norm": 0.15511438250541687, "learning_rate": 5.039967248801383e-05, "loss": 0.3741, "num_tokens": 1601051962.0, "step": 2528 }, { "epoch": 0.2990422135509046, "grad_norm": 0.14940109848976135, "learning_rate": 5.039176424256536e-05, "loss": 0.3625, "num_tokens": 1601682372.0, "step": 2529 }, { "epoch": 0.29916045879153363, "grad_norm": 0.1553540974855423, "learning_rate": 5.0383853446190335e-05, "loss": 0.3831, "num_tokens": 1602307670.0, "step": 2530 }, { "epoch": 0.2992787040321627, "grad_norm": 0.14420151710510254, "learning_rate": 5.037594010004905e-05, "loss": 0.3751, "num_tokens": 1602943700.0, "step": 2531 }, { "epoch": 0.29939694927279176, "grad_norm": 0.14781242609024048, "learning_rate": 5.036802420530219e-05, "loss": 0.3648, "num_tokens": 1603576967.0, "step": 2532 }, { "epoch": 0.29951519451342085, "grad_norm": 0.1485464870929718, "learning_rate": 5.0360105763110826e-05, "loss": 0.3477, "num_tokens": 1604215712.0, "step": 2533 }, { "epoch": 0.2996334397540499, "grad_norm": 0.15036864578723907, "learning_rate": 5.0352184774636376e-05, "loss": 0.3466, "num_tokens": 1604816424.0, "step": 2534 }, { "epoch": 0.29975168499467897, "grad_norm": 0.15015023946762085, "learning_rate": 5.034426124104066e-05, "loss": 0.3455, "num_tokens": 1605422776.0, "step": 2535 }, { "epoch": 0.299869930235308, "grad_norm": 0.14370419085025787, "learning_rate": 5.033633516348586e-05, "loss": 0.3523, "num_tokens": 1606055073.0, "step": 2536 }, { "epoch": 0.2999881754759371, "grad_norm": 0.17869606614112854, "learning_rate": 5.0328406543134514e-05, "loss": 0.3659, "num_tokens": 1606691741.0, "step": 2537 }, { "epoch": 0.3001064207165662, "grad_norm": 0.25986751914024353, "learning_rate": 5.0320475381149565e-05, "loss": 0.3751, "num_tokens": 1607324599.0, "step": 2538 }, { "epoch": 0.3002246659571952, "grad_norm": 0.1488002985715866, "learning_rate": 5.0312541678694285e-05, "loss": 0.3253, "num_tokens": 1607960797.0, "step": 2539 }, { "epoch": 0.3003429111978243, "grad_norm": 0.16260503232479095, "learning_rate": 5.030460543693238e-05, "loss": 0.375, "num_tokens": 1608569909.0, "step": 2540 }, { "epoch": 0.30046115643845334, "grad_norm": 0.14690275490283966, "learning_rate": 5.029666665702786e-05, "loss": 0.3723, "num_tokens": 1609206721.0, "step": 2541 }, { "epoch": 0.3005794016790824, "grad_norm": 0.15154686570167542, "learning_rate": 5.0288725340145155e-05, "loss": 0.3719, "num_tokens": 1609836379.0, "step": 2542 }, { "epoch": 0.30069764691971146, "grad_norm": 0.15408851206302643, "learning_rate": 5.028078148744906e-05, "loss": 0.3501, "num_tokens": 1610467723.0, "step": 2543 }, { "epoch": 0.30081589216034055, "grad_norm": 0.1527935266494751, "learning_rate": 5.0272835100104725e-05, "loss": 0.3724, "num_tokens": 1611102219.0, "step": 2544 }, { "epoch": 0.30093413740096964, "grad_norm": 0.16145366430282593, "learning_rate": 5.026488617927768e-05, "loss": 0.3619, "num_tokens": 1611737217.0, "step": 2545 }, { "epoch": 0.30105238264159867, "grad_norm": 0.15530750155448914, "learning_rate": 5.025693472613385e-05, "loss": 0.3401, "num_tokens": 1612372064.0, "step": 2546 }, { "epoch": 0.30117062788222776, "grad_norm": 0.13368497788906097, "learning_rate": 5.0248980741839476e-05, "loss": 0.3587, "num_tokens": 1613007358.0, "step": 2547 }, { "epoch": 0.3012888731228568, "grad_norm": 0.16102302074432373, "learning_rate": 5.024102422756123e-05, "loss": 0.3575, "num_tokens": 1613639636.0, "step": 2548 }, { "epoch": 0.3014071183634859, "grad_norm": 0.14257802069187164, "learning_rate": 5.023306518446611e-05, "loss": 0.3418, "num_tokens": 1614272364.0, "step": 2549 }, { "epoch": 0.3015253636041149, "grad_norm": 0.1522272378206253, "learning_rate": 5.0225103613721526e-05, "loss": 0.3588, "num_tokens": 1614901916.0, "step": 2550 }, { "epoch": 0.301643608844744, "grad_norm": 0.13765737414360046, "learning_rate": 5.021713951649522e-05, "loss": 0.3376, "num_tokens": 1615538690.0, "step": 2551 }, { "epoch": 0.30176185408537304, "grad_norm": 0.15675680339336395, "learning_rate": 5.0209172893955324e-05, "loss": 0.3669, "num_tokens": 1616173042.0, "step": 2552 }, { "epoch": 0.3018800993260021, "grad_norm": 0.17081394791603088, "learning_rate": 5.020120374727034e-05, "loss": 0.4107, "num_tokens": 1616809845.0, "step": 2553 }, { "epoch": 0.3019983445666312, "grad_norm": 0.15046051144599915, "learning_rate": 5.019323207760913e-05, "loss": 0.3468, "num_tokens": 1617446340.0, "step": 2554 }, { "epoch": 0.30211658980726025, "grad_norm": 0.16037890315055847, "learning_rate": 5.0185257886140944e-05, "loss": 0.3643, "num_tokens": 1618075970.0, "step": 2555 }, { "epoch": 0.30223483504788934, "grad_norm": 0.1587369292974472, "learning_rate": 5.017728117403539e-05, "loss": 0.3698, "num_tokens": 1618702541.0, "step": 2556 }, { "epoch": 0.30235308028851837, "grad_norm": 0.14202697575092316, "learning_rate": 5.0169301942462454e-05, "loss": 0.3578, "num_tokens": 1619339972.0, "step": 2557 }, { "epoch": 0.30247132552914746, "grad_norm": 0.14736437797546387, "learning_rate": 5.016132019259246e-05, "loss": 0.3697, "num_tokens": 1619975087.0, "step": 2558 }, { "epoch": 0.3025895707697765, "grad_norm": 0.1841895580291748, "learning_rate": 5.0153335925596157e-05, "loss": 0.3762, "num_tokens": 1620598247.0, "step": 2559 }, { "epoch": 0.3027078160104056, "grad_norm": 0.1433762162923813, "learning_rate": 5.01453491426446e-05, "loss": 0.371, "num_tokens": 1621233754.0, "step": 2560 }, { "epoch": 0.3028260612510347, "grad_norm": 0.15632355213165283, "learning_rate": 5.0137359844909287e-05, "loss": 0.3972, "num_tokens": 1621865701.0, "step": 2561 }, { "epoch": 0.3029443064916637, "grad_norm": 0.15138910710811615, "learning_rate": 5.012936803356201e-05, "loss": 0.3746, "num_tokens": 1622502297.0, "step": 2562 }, { "epoch": 0.3030625517322928, "grad_norm": 0.15566207468509674, "learning_rate": 5.012137370977497e-05, "loss": 0.3849, "num_tokens": 1623133709.0, "step": 2563 }, { "epoch": 0.30318079697292183, "grad_norm": 0.13833820819854736, "learning_rate": 5.011337687472073e-05, "loss": 0.3793, "num_tokens": 1623765687.0, "step": 2564 }, { "epoch": 0.3032990422135509, "grad_norm": 0.14578305184841156, "learning_rate": 5.010537752957223e-05, "loss": 0.3732, "num_tokens": 1624390373.0, "step": 2565 }, { "epoch": 0.30341728745417995, "grad_norm": 0.14520291984081268, "learning_rate": 5.009737567550276e-05, "loss": 0.3561, "num_tokens": 1625025113.0, "step": 2566 }, { "epoch": 0.30353553269480904, "grad_norm": 0.14845344424247742, "learning_rate": 5.0089371313686e-05, "loss": 0.3527, "num_tokens": 1625657956.0, "step": 2567 }, { "epoch": 0.3036537779354381, "grad_norm": 0.15107372403144836, "learning_rate": 5.0081364445295974e-05, "loss": 0.3441, "num_tokens": 1626290658.0, "step": 2568 }, { "epoch": 0.30377202317606716, "grad_norm": 0.15273070335388184, "learning_rate": 5.007335507150708e-05, "loss": 0.3755, "num_tokens": 1626927675.0, "step": 2569 }, { "epoch": 0.30389026841669625, "grad_norm": 0.14277704060077667, "learning_rate": 5.006534319349409e-05, "loss": 0.3611, "num_tokens": 1627561965.0, "step": 2570 }, { "epoch": 0.3040085136573253, "grad_norm": 0.14692258834838867, "learning_rate": 5.005732881243216e-05, "loss": 0.3134, "num_tokens": 1628198126.0, "step": 2571 }, { "epoch": 0.3041267588979544, "grad_norm": 0.13913483917713165, "learning_rate": 5.0049311929496774e-05, "loss": 0.34, "num_tokens": 1628832165.0, "step": 2572 }, { "epoch": 0.3042450041385834, "grad_norm": 0.15258818864822388, "learning_rate": 5.0041292545863806e-05, "loss": 0.3619, "num_tokens": 1629466351.0, "step": 2573 }, { "epoch": 0.3043632493792125, "grad_norm": 0.155850350856781, "learning_rate": 5.0033270662709497e-05, "loss": 0.4, "num_tokens": 1630103503.0, "step": 2574 }, { "epoch": 0.30448149461984153, "grad_norm": 0.15866555273532867, "learning_rate": 5.002524628121046e-05, "loss": 0.3848, "num_tokens": 1630741258.0, "step": 2575 }, { "epoch": 0.3045997398604706, "grad_norm": 0.15099690854549408, "learning_rate": 5.001721940254365e-05, "loss": 0.3464, "num_tokens": 1631378278.0, "step": 2576 }, { "epoch": 0.30471798510109965, "grad_norm": 0.15565212070941925, "learning_rate": 5.000919002788642e-05, "loss": 0.3347, "num_tokens": 1632012029.0, "step": 2577 }, { "epoch": 0.30483623034172874, "grad_norm": 0.145799919962883, "learning_rate": 5.000115815841647e-05, "loss": 0.3275, "num_tokens": 1632649835.0, "step": 2578 }, { "epoch": 0.30495447558235783, "grad_norm": 0.15756964683532715, "learning_rate": 4.999312379531186e-05, "loss": 0.3716, "num_tokens": 1633279626.0, "step": 2579 }, { "epoch": 0.30507272082298686, "grad_norm": 0.15340521931648254, "learning_rate": 4.998508693975103e-05, "loss": 0.3423, "num_tokens": 1633913718.0, "step": 2580 }, { "epoch": 0.30519096606361595, "grad_norm": 0.14894331991672516, "learning_rate": 4.99770475929128e-05, "loss": 0.3331, "num_tokens": 1634549519.0, "step": 2581 }, { "epoch": 0.305309211304245, "grad_norm": 0.1372758448123932, "learning_rate": 4.9969005755976305e-05, "loss": 0.3423, "num_tokens": 1635181293.0, "step": 2582 }, { "epoch": 0.3054274565448741, "grad_norm": 0.15055830776691437, "learning_rate": 4.996096143012109e-05, "loss": 0.3645, "num_tokens": 1635817022.0, "step": 2583 }, { "epoch": 0.3055457017855031, "grad_norm": 0.16187329590320587, "learning_rate": 4.995291461652706e-05, "loss": 0.3718, "num_tokens": 1636452163.0, "step": 2584 }, { "epoch": 0.3056639470261322, "grad_norm": 0.1524498462677002, "learning_rate": 4.994486531637446e-05, "loss": 0.3844, "num_tokens": 1637091122.0, "step": 2585 }, { "epoch": 0.3057821922667613, "grad_norm": 0.16135941445827484, "learning_rate": 4.993681353084393e-05, "loss": 0.3877, "num_tokens": 1637730453.0, "step": 2586 }, { "epoch": 0.3059004375073903, "grad_norm": 0.13814179599285126, "learning_rate": 4.992875926111647e-05, "loss": 0.3147, "num_tokens": 1638367703.0, "step": 2587 }, { "epoch": 0.3060186827480194, "grad_norm": 0.1667226254940033, "learning_rate": 4.992070250837341e-05, "loss": 0.4004, "num_tokens": 1638999767.0, "step": 2588 }, { "epoch": 0.30613692798864844, "grad_norm": 0.15473672747612, "learning_rate": 4.9912643273796486e-05, "loss": 0.3804, "num_tokens": 1639639175.0, "step": 2589 }, { "epoch": 0.30625517322927753, "grad_norm": 0.15862344205379486, "learning_rate": 4.990458155856777e-05, "loss": 0.3615, "num_tokens": 1640268937.0, "step": 2590 }, { "epoch": 0.30637341846990657, "grad_norm": 0.15074317157268524, "learning_rate": 4.989651736386972e-05, "loss": 0.3975, "num_tokens": 1640905144.0, "step": 2591 }, { "epoch": 0.30649166371053566, "grad_norm": 0.15299466252326965, "learning_rate": 4.988845069088513e-05, "loss": 0.3701, "num_tokens": 1641535980.0, "step": 2592 }, { "epoch": 0.3066099089511647, "grad_norm": 0.1560005098581314, "learning_rate": 4.9880381540797203e-05, "loss": 0.3266, "num_tokens": 1642164823.0, "step": 2593 }, { "epoch": 0.3067281541917938, "grad_norm": 0.1413908302783966, "learning_rate": 4.987230991478946e-05, "loss": 0.3484, "num_tokens": 1642792813.0, "step": 2594 }, { "epoch": 0.30684639943242287, "grad_norm": 0.12512947618961334, "learning_rate": 4.9864235814045796e-05, "loss": 0.3181, "num_tokens": 1643418555.0, "step": 2595 }, { "epoch": 0.3069646446730519, "grad_norm": 0.15284454822540283, "learning_rate": 4.985615923975049e-05, "loss": 0.3632, "num_tokens": 1644034211.0, "step": 2596 }, { "epoch": 0.307082889913681, "grad_norm": 0.15455694496631622, "learning_rate": 4.9848080193088153e-05, "loss": 0.3445, "num_tokens": 1644670523.0, "step": 2597 }, { "epoch": 0.30720113515431, "grad_norm": 0.16075168550014496, "learning_rate": 4.983999867524379e-05, "loss": 0.371, "num_tokens": 1645306242.0, "step": 2598 }, { "epoch": 0.3073193803949391, "grad_norm": 0.1409207284450531, "learning_rate": 4.9831914687402736e-05, "loss": 0.3188, "num_tokens": 1645938779.0, "step": 2599 }, { "epoch": 0.30743762563556815, "grad_norm": 0.155086949467659, "learning_rate": 4.9823828230750707e-05, "loss": 0.3785, "num_tokens": 1646575837.0, "step": 2600 }, { "epoch": 0.30755587087619723, "grad_norm": 0.1414051055908203, "learning_rate": 4.98157393064738e-05, "loss": 0.3241, "num_tokens": 1647211588.0, "step": 2601 }, { "epoch": 0.3076741161168263, "grad_norm": 0.16080044209957123, "learning_rate": 4.980764791575843e-05, "loss": 0.345, "num_tokens": 1647847594.0, "step": 2602 }, { "epoch": 0.30779236135745536, "grad_norm": 0.15084360539913177, "learning_rate": 4.97995540597914e-05, "loss": 0.3792, "num_tokens": 1648480650.0, "step": 2603 }, { "epoch": 0.30791060659808445, "grad_norm": 0.1429891735315323, "learning_rate": 4.979145773975989e-05, "loss": 0.3434, "num_tokens": 1649117682.0, "step": 2604 }, { "epoch": 0.3080288518387135, "grad_norm": 0.14498206973075867, "learning_rate": 4.978335895685141e-05, "loss": 0.3062, "num_tokens": 1649751752.0, "step": 2605 }, { "epoch": 0.30814709707934257, "grad_norm": 0.14570236206054688, "learning_rate": 4.977525771225383e-05, "loss": 0.3571, "num_tokens": 1650391077.0, "step": 2606 }, { "epoch": 0.3082653423199716, "grad_norm": 0.1405676007270813, "learning_rate": 4.9767154007155424e-05, "loss": 0.3458, "num_tokens": 1651025851.0, "step": 2607 }, { "epoch": 0.3083835875606007, "grad_norm": 0.14592914283275604, "learning_rate": 4.975904784274478e-05, "loss": 0.3461, "num_tokens": 1651663895.0, "step": 2608 }, { "epoch": 0.3085018328012297, "grad_norm": 0.14680138230323792, "learning_rate": 4.9750939220210865e-05, "loss": 0.3456, "num_tokens": 1652297334.0, "step": 2609 }, { "epoch": 0.3086200780418588, "grad_norm": 0.17038288712501526, "learning_rate": 4.974282814074301e-05, "loss": 0.3712, "num_tokens": 1652930319.0, "step": 2610 }, { "epoch": 0.3087383232824879, "grad_norm": 0.15522988140583038, "learning_rate": 4.97347146055309e-05, "loss": 0.3524, "num_tokens": 1653558106.0, "step": 2611 }, { "epoch": 0.30885656852311694, "grad_norm": 0.13919782638549805, "learning_rate": 4.972659861576459e-05, "loss": 0.2988, "num_tokens": 1654190622.0, "step": 2612 }, { "epoch": 0.308974813763746, "grad_norm": 0.1609845906496048, "learning_rate": 4.9718480172634477e-05, "loss": 0.3614, "num_tokens": 1654804468.0, "step": 2613 }, { "epoch": 0.30909305900437506, "grad_norm": 0.15366476774215698, "learning_rate": 4.971035927733133e-05, "loss": 0.3457, "num_tokens": 1655436872.0, "step": 2614 }, { "epoch": 0.30921130424500415, "grad_norm": 0.1801452338695526, "learning_rate": 4.970223593104629e-05, "loss": 0.3834, "num_tokens": 1656073073.0, "step": 2615 }, { "epoch": 0.3093295494856332, "grad_norm": 0.1387845128774643, "learning_rate": 4.969411013497084e-05, "loss": 0.3412, "num_tokens": 1656704728.0, "step": 2616 }, { "epoch": 0.30944779472626227, "grad_norm": 0.15643592178821564, "learning_rate": 4.9685981890296815e-05, "loss": 0.3418, "num_tokens": 1657332271.0, "step": 2617 }, { "epoch": 0.30956603996689136, "grad_norm": 0.164729505777359, "learning_rate": 4.967785119821643e-05, "loss": 0.3644, "num_tokens": 1657948359.0, "step": 2618 }, { "epoch": 0.3096842852075204, "grad_norm": 0.1720496416091919, "learning_rate": 4.9669718059922234e-05, "loss": 0.422, "num_tokens": 1658582924.0, "step": 2619 }, { "epoch": 0.3098025304481495, "grad_norm": 0.15229572355747223, "learning_rate": 4.9661582476607174e-05, "loss": 0.3715, "num_tokens": 1659208653.0, "step": 2620 }, { "epoch": 0.3099207756887785, "grad_norm": 0.14217643439769745, "learning_rate": 4.965344444946452e-05, "loss": 0.3196, "num_tokens": 1659845271.0, "step": 2621 }, { "epoch": 0.3100390209294076, "grad_norm": 0.19868801534175873, "learning_rate": 4.964530397968791e-05, "loss": 0.3707, "num_tokens": 1660468585.0, "step": 2622 }, { "epoch": 0.31015726617003664, "grad_norm": 0.1407567262649536, "learning_rate": 4.9637161068471345e-05, "loss": 0.3466, "num_tokens": 1661101485.0, "step": 2623 }, { "epoch": 0.3102755114106657, "grad_norm": 0.13727466762065887, "learning_rate": 4.962901571700917e-05, "loss": 0.3305, "num_tokens": 1661735662.0, "step": 2624 }, { "epoch": 0.31039375665129476, "grad_norm": 0.1891823559999466, "learning_rate": 4.9620867926496124e-05, "loss": 0.4198, "num_tokens": 1662373879.0, "step": 2625 }, { "epoch": 0.31051200189192385, "grad_norm": 0.1461276113986969, "learning_rate": 4.9612717698127266e-05, "loss": 0.3673, "num_tokens": 1663008427.0, "step": 2626 }, { "epoch": 0.31063024713255294, "grad_norm": 0.15043039619922638, "learning_rate": 4.960456503309802e-05, "loss": 0.3665, "num_tokens": 1663638900.0, "step": 2627 }, { "epoch": 0.310748492373182, "grad_norm": 0.14737606048583984, "learning_rate": 4.959640993260419e-05, "loss": 0.3198, "num_tokens": 1664272324.0, "step": 2628 }, { "epoch": 0.31086673761381106, "grad_norm": 0.16243639588356018, "learning_rate": 4.9588252397841905e-05, "loss": 0.4113, "num_tokens": 1664904228.0, "step": 2629 }, { "epoch": 0.3109849828544401, "grad_norm": 0.14403705298900604, "learning_rate": 4.958009243000766e-05, "loss": 0.3531, "num_tokens": 1665537753.0, "step": 2630 }, { "epoch": 0.3111032280950692, "grad_norm": 0.15176835656166077, "learning_rate": 4.957193003029834e-05, "loss": 0.3782, "num_tokens": 1666174169.0, "step": 2631 }, { "epoch": 0.3112214733356982, "grad_norm": 0.1425517350435257, "learning_rate": 4.956376519991114e-05, "loss": 0.3637, "num_tokens": 1666803754.0, "step": 2632 }, { "epoch": 0.3113397185763273, "grad_norm": 0.14878658950328827, "learning_rate": 4.955559794004363e-05, "loss": 0.3425, "num_tokens": 1667438241.0, "step": 2633 }, { "epoch": 0.31145796381695634, "grad_norm": 0.15044325590133667, "learning_rate": 4.954742825189375e-05, "loss": 0.3401, "num_tokens": 1668076450.0, "step": 2634 }, { "epoch": 0.31157620905758543, "grad_norm": 0.1502782702445984, "learning_rate": 4.953925613665977e-05, "loss": 0.371, "num_tokens": 1668712082.0, "step": 2635 }, { "epoch": 0.3116944542982145, "grad_norm": 0.165771022439003, "learning_rate": 4.953108159554035e-05, "loss": 0.3942, "num_tokens": 1669348399.0, "step": 2636 }, { "epoch": 0.31181269953884355, "grad_norm": 0.13961359858512878, "learning_rate": 4.952290462973447e-05, "loss": 0.3459, "num_tokens": 1669984836.0, "step": 2637 }, { "epoch": 0.31193094477947264, "grad_norm": 0.12863527238368988, "learning_rate": 4.9514725240441494e-05, "loss": 0.3134, "num_tokens": 1670619936.0, "step": 2638 }, { "epoch": 0.3120491900201017, "grad_norm": 0.16211073100566864, "learning_rate": 4.950654342886111e-05, "loss": 0.3641, "num_tokens": 1671255448.0, "step": 2639 }, { "epoch": 0.31216743526073076, "grad_norm": 0.1444818377494812, "learning_rate": 4.949835919619339e-05, "loss": 0.3784, "num_tokens": 1671888642.0, "step": 2640 }, { "epoch": 0.3122856805013598, "grad_norm": 0.14098957180976868, "learning_rate": 4.9490172543638755e-05, "loss": 0.3551, "num_tokens": 1672528375.0, "step": 2641 }, { "epoch": 0.3124039257419889, "grad_norm": 0.15444675087928772, "learning_rate": 4.948198347239798e-05, "loss": 0.4019, "num_tokens": 1673167791.0, "step": 2642 }, { "epoch": 0.312522170982618, "grad_norm": 0.14311270415782928, "learning_rate": 4.947379198367218e-05, "loss": 0.3428, "num_tokens": 1673807365.0, "step": 2643 }, { "epoch": 0.312640416223247, "grad_norm": 0.1459292769432068, "learning_rate": 4.946559807866284e-05, "loss": 0.3771, "num_tokens": 1674442513.0, "step": 2644 }, { "epoch": 0.3127586614638761, "grad_norm": 0.14473164081573486, "learning_rate": 4.9457401758571816e-05, "loss": 0.3607, "num_tokens": 1675075724.0, "step": 2645 }, { "epoch": 0.31287690670450513, "grad_norm": 0.1428631842136383, "learning_rate": 4.9449203024601275e-05, "loss": 0.3659, "num_tokens": 1675710105.0, "step": 2646 }, { "epoch": 0.3129951519451342, "grad_norm": 0.13685475289821625, "learning_rate": 4.944100187795376e-05, "loss": 0.351, "num_tokens": 1676349650.0, "step": 2647 }, { "epoch": 0.31311339718576325, "grad_norm": 0.14980390667915344, "learning_rate": 4.943279831983219e-05, "loss": 0.3503, "num_tokens": 1676989186.0, "step": 2648 }, { "epoch": 0.31323164242639234, "grad_norm": 0.14672040939331055, "learning_rate": 4.942459235143979e-05, "loss": 0.3934, "num_tokens": 1677628500.0, "step": 2649 }, { "epoch": 0.3133498876670214, "grad_norm": 0.1321474313735962, "learning_rate": 4.941638397398019e-05, "loss": 0.3224, "num_tokens": 1678243011.0, "step": 2650 }, { "epoch": 0.31346813290765047, "grad_norm": 0.1491018384695053, "learning_rate": 4.9408173188657336e-05, "loss": 0.3715, "num_tokens": 1678877133.0, "step": 2651 }, { "epoch": 0.31358637814827955, "grad_norm": 0.14962702989578247, "learning_rate": 4.939995999667555e-05, "loss": 0.3606, "num_tokens": 1679511112.0, "step": 2652 }, { "epoch": 0.3137046233889086, "grad_norm": 0.1505243182182312, "learning_rate": 4.939174439923949e-05, "loss": 0.4011, "num_tokens": 1680144682.0, "step": 2653 }, { "epoch": 0.3138228686295377, "grad_norm": 0.15766404569149017, "learning_rate": 4.938352639755417e-05, "loss": 0.366, "num_tokens": 1680769049.0, "step": 2654 }, { "epoch": 0.3139411138701667, "grad_norm": 0.1566963940858841, "learning_rate": 4.937530599282496e-05, "loss": 0.3712, "num_tokens": 1681408352.0, "step": 2655 }, { "epoch": 0.3140593591107958, "grad_norm": 0.14953123033046722, "learning_rate": 4.936708318625758e-05, "loss": 0.3626, "num_tokens": 1682038448.0, "step": 2656 }, { "epoch": 0.31417760435142483, "grad_norm": 0.15264447033405304, "learning_rate": 4.935885797905813e-05, "loss": 0.3712, "num_tokens": 1682676961.0, "step": 2657 }, { "epoch": 0.3142958495920539, "grad_norm": 0.15155024826526642, "learning_rate": 4.935063037243301e-05, "loss": 0.3609, "num_tokens": 1683310802.0, "step": 2658 }, { "epoch": 0.314414094832683, "grad_norm": 0.14461688697338104, "learning_rate": 4.934240036758901e-05, "loss": 0.3678, "num_tokens": 1683948414.0, "step": 2659 }, { "epoch": 0.31453234007331204, "grad_norm": 0.14900623261928558, "learning_rate": 4.933416796573327e-05, "loss": 0.386, "num_tokens": 1684582469.0, "step": 2660 }, { "epoch": 0.31465058531394113, "grad_norm": 0.1332385540008545, "learning_rate": 4.932593316807325e-05, "loss": 0.3676, "num_tokens": 1685219110.0, "step": 2661 }, { "epoch": 0.31476883055457017, "grad_norm": 0.13717584311962128, "learning_rate": 4.9317695975816796e-05, "loss": 0.3311, "num_tokens": 1685852803.0, "step": 2662 }, { "epoch": 0.31488707579519926, "grad_norm": 0.156549334526062, "learning_rate": 4.93094563901721e-05, "loss": 0.3386, "num_tokens": 1686488656.0, "step": 2663 }, { "epoch": 0.3150053210358283, "grad_norm": 0.1654921919107437, "learning_rate": 4.930121441234769e-05, "loss": 0.3545, "num_tokens": 1687123786.0, "step": 2664 }, { "epoch": 0.3151235662764574, "grad_norm": 0.16102896630764008, "learning_rate": 4.929297004355246e-05, "loss": 0.3742, "num_tokens": 1687756907.0, "step": 2665 }, { "epoch": 0.3152418115170864, "grad_norm": 0.1564297080039978, "learning_rate": 4.928472328499564e-05, "loss": 0.3353, "num_tokens": 1688393115.0, "step": 2666 }, { "epoch": 0.3153600567577155, "grad_norm": 0.14145350456237793, "learning_rate": 4.927647413788682e-05, "loss": 0.364, "num_tokens": 1689028874.0, "step": 2667 }, { "epoch": 0.3154783019983446, "grad_norm": 0.15700456500053406, "learning_rate": 4.926822260343594e-05, "loss": 0.389, "num_tokens": 1689668481.0, "step": 2668 }, { "epoch": 0.3155965472389736, "grad_norm": 0.13130880892276764, "learning_rate": 4.925996868285328e-05, "loss": 0.3411, "num_tokens": 1690307233.0, "step": 2669 }, { "epoch": 0.3157147924796027, "grad_norm": 0.14191026985645294, "learning_rate": 4.92517123773495e-05, "loss": 0.3493, "num_tokens": 1690936000.0, "step": 2670 }, { "epoch": 0.31583303772023175, "grad_norm": 0.14893792569637299, "learning_rate": 4.924345368813557e-05, "loss": 0.3813, "num_tokens": 1691574884.0, "step": 2671 }, { "epoch": 0.31595128296086084, "grad_norm": 0.14371034502983093, "learning_rate": 4.9235192616422844e-05, "loss": 0.3313, "num_tokens": 1692206801.0, "step": 2672 }, { "epoch": 0.31606952820148987, "grad_norm": 0.1361967921257019, "learning_rate": 4.9226929163423e-05, "loss": 0.3558, "num_tokens": 1692843392.0, "step": 2673 }, { "epoch": 0.31618777344211896, "grad_norm": 0.15345615148544312, "learning_rate": 4.921866333034806e-05, "loss": 0.4102, "num_tokens": 1693481747.0, "step": 2674 }, { "epoch": 0.31630601868274805, "grad_norm": 0.13692708313465118, "learning_rate": 4.921039511841044e-05, "loss": 0.3772, "num_tokens": 1694120880.0, "step": 2675 }, { "epoch": 0.3164242639233771, "grad_norm": 0.1612626612186432, "learning_rate": 4.920212452882286e-05, "loss": 0.3644, "num_tokens": 1694755717.0, "step": 2676 }, { "epoch": 0.31654250916400617, "grad_norm": 0.15381447970867157, "learning_rate": 4.91938515627984e-05, "loss": 0.373, "num_tokens": 1695389815.0, "step": 2677 }, { "epoch": 0.3166607544046352, "grad_norm": 0.13971692323684692, "learning_rate": 4.918557622155049e-05, "loss": 0.3453, "num_tokens": 1696025843.0, "step": 2678 }, { "epoch": 0.3167789996452643, "grad_norm": 0.14777106046676636, "learning_rate": 4.917729850629293e-05, "loss": 0.3667, "num_tokens": 1696659035.0, "step": 2679 }, { "epoch": 0.3168972448858933, "grad_norm": 0.14608249068260193, "learning_rate": 4.916901841823983e-05, "loss": 0.3483, "num_tokens": 1697289201.0, "step": 2680 }, { "epoch": 0.3170154901265224, "grad_norm": 0.13603992760181427, "learning_rate": 4.916073595860567e-05, "loss": 0.3407, "num_tokens": 1697925113.0, "step": 2681 }, { "epoch": 0.31713373536715145, "grad_norm": 0.1392383873462677, "learning_rate": 4.915245112860529e-05, "loss": 0.3506, "num_tokens": 1698558395.0, "step": 2682 }, { "epoch": 0.31725198060778054, "grad_norm": 0.1463528275489807, "learning_rate": 4.914416392945384e-05, "loss": 0.3759, "num_tokens": 1699196086.0, "step": 2683 }, { "epoch": 0.3173702258484096, "grad_norm": 0.12495151162147522, "learning_rate": 4.9135874362366855e-05, "loss": 0.3076, "num_tokens": 1699829880.0, "step": 2684 }, { "epoch": 0.31748847108903866, "grad_norm": 0.1504184752702713, "learning_rate": 4.91275824285602e-05, "loss": 0.3921, "num_tokens": 1700459160.0, "step": 2685 }, { "epoch": 0.31760671632966775, "grad_norm": 0.13627371191978455, "learning_rate": 4.9119288129250075e-05, "loss": 0.3444, "num_tokens": 1701090510.0, "step": 2686 }, { "epoch": 0.3177249615702968, "grad_norm": 0.14987996220588684, "learning_rate": 4.911099146565306e-05, "loss": 0.3779, "num_tokens": 1701726174.0, "step": 2687 }, { "epoch": 0.31784320681092587, "grad_norm": 0.1593654900789261, "learning_rate": 4.9102692438986065e-05, "loss": 0.3909, "num_tokens": 1702361400.0, "step": 2688 }, { "epoch": 0.3179614520515549, "grad_norm": 0.14411930739879608, "learning_rate": 4.909439105046633e-05, "loss": 0.3781, "num_tokens": 1702993806.0, "step": 2689 }, { "epoch": 0.318079697292184, "grad_norm": 0.13609649240970612, "learning_rate": 4.9086087301311454e-05, "loss": 0.3217, "num_tokens": 1703632945.0, "step": 2690 }, { "epoch": 0.318197942532813, "grad_norm": 0.16053102910518646, "learning_rate": 4.907778119273941e-05, "loss": 0.3564, "num_tokens": 1704265862.0, "step": 2691 }, { "epoch": 0.3183161877734421, "grad_norm": 0.13469050824642181, "learning_rate": 4.9069472725968465e-05, "loss": 0.3537, "num_tokens": 1704866810.0, "step": 2692 }, { "epoch": 0.3184344330140712, "grad_norm": 0.1446019858121872, "learning_rate": 4.906116190221726e-05, "loss": 0.3567, "num_tokens": 1705501981.0, "step": 2693 }, { "epoch": 0.31855267825470024, "grad_norm": 0.1566362977027893, "learning_rate": 4.90528487227048e-05, "loss": 0.3675, "num_tokens": 1706139459.0, "step": 2694 }, { "epoch": 0.31867092349532933, "grad_norm": 0.1379963606595993, "learning_rate": 4.90445331886504e-05, "loss": 0.3309, "num_tokens": 1706776278.0, "step": 2695 }, { "epoch": 0.31878916873595836, "grad_norm": 0.14326104521751404, "learning_rate": 4.903621530127373e-05, "loss": 0.345, "num_tokens": 1707408737.0, "step": 2696 }, { "epoch": 0.31890741397658745, "grad_norm": 0.16234040260314941, "learning_rate": 4.902789506179483e-05, "loss": 0.4067, "num_tokens": 1708040485.0, "step": 2697 }, { "epoch": 0.3190256592172165, "grad_norm": 0.15496356785297394, "learning_rate": 4.901957247143406e-05, "loss": 0.3813, "num_tokens": 1708667258.0, "step": 2698 }, { "epoch": 0.3191439044578456, "grad_norm": 0.15392260253429413, "learning_rate": 4.9011247531412125e-05, "loss": 0.3927, "num_tokens": 1709306576.0, "step": 2699 }, { "epoch": 0.31926214969847466, "grad_norm": 0.1625681221485138, "learning_rate": 4.9002920242950075e-05, "loss": 0.3726, "num_tokens": 1709931524.0, "step": 2700 }, { "epoch": 0.3193803949391037, "grad_norm": 0.1310642659664154, "learning_rate": 4.8994590607269334e-05, "loss": 0.3359, "num_tokens": 1710563229.0, "step": 2701 }, { "epoch": 0.3194986401797328, "grad_norm": 0.16521522402763367, "learning_rate": 4.898625862559162e-05, "loss": 0.376, "num_tokens": 1711197349.0, "step": 2702 }, { "epoch": 0.3196168854203618, "grad_norm": 0.14266753196716309, "learning_rate": 4.897792429913903e-05, "loss": 0.3146, "num_tokens": 1711832054.0, "step": 2703 }, { "epoch": 0.3197351306609909, "grad_norm": 0.14586713910102844, "learning_rate": 4.8969587629134003e-05, "loss": 0.3669, "num_tokens": 1712466588.0, "step": 2704 }, { "epoch": 0.31985337590161994, "grad_norm": 0.141366645693779, "learning_rate": 4.896124861679931e-05, "loss": 0.3442, "num_tokens": 1713104055.0, "step": 2705 }, { "epoch": 0.31997162114224903, "grad_norm": 0.1449168622493744, "learning_rate": 4.895290726335806e-05, "loss": 0.3767, "num_tokens": 1713741405.0, "step": 2706 }, { "epoch": 0.32008986638287806, "grad_norm": 0.14182746410369873, "learning_rate": 4.8944563570033744e-05, "loss": 0.3666, "num_tokens": 1714376139.0, "step": 2707 }, { "epoch": 0.32020811162350715, "grad_norm": 0.1399429589509964, "learning_rate": 4.893621753805014e-05, "loss": 0.3505, "num_tokens": 1715010719.0, "step": 2708 }, { "epoch": 0.32032635686413624, "grad_norm": 0.15289928019046783, "learning_rate": 4.892786916863142e-05, "loss": 0.384, "num_tokens": 1715642575.0, "step": 2709 }, { "epoch": 0.3204446021047653, "grad_norm": 0.16236214339733124, "learning_rate": 4.891951846300205e-05, "loss": 0.3666, "num_tokens": 1716275068.0, "step": 2710 }, { "epoch": 0.32056284734539436, "grad_norm": 0.15717679262161255, "learning_rate": 4.891116542238689e-05, "loss": 0.395, "num_tokens": 1716914533.0, "step": 2711 }, { "epoch": 0.3206810925860234, "grad_norm": 0.154623880982399, "learning_rate": 4.89028100480111e-05, "loss": 0.356, "num_tokens": 1717515924.0, "step": 2712 }, { "epoch": 0.3207993378266525, "grad_norm": 0.14391647279262543, "learning_rate": 4.889445234110021e-05, "loss": 0.3376, "num_tokens": 1718149602.0, "step": 2713 }, { "epoch": 0.3209175830672815, "grad_norm": 0.15994234383106232, "learning_rate": 4.888609230288007e-05, "loss": 0.3607, "num_tokens": 1718783402.0, "step": 2714 }, { "epoch": 0.3210358283079106, "grad_norm": 0.1503729224205017, "learning_rate": 4.887772993457689e-05, "loss": 0.3667, "num_tokens": 1719417133.0, "step": 2715 }, { "epoch": 0.3211540735485397, "grad_norm": 0.14269284904003143, "learning_rate": 4.8869365237417224e-05, "loss": 0.3322, "num_tokens": 1720055957.0, "step": 2716 }, { "epoch": 0.32127231878916873, "grad_norm": 0.14641371369361877, "learning_rate": 4.8860998212627956e-05, "loss": 0.3498, "num_tokens": 1720672344.0, "step": 2717 }, { "epoch": 0.3213905640297978, "grad_norm": 0.14321842789649963, "learning_rate": 4.88526288614363e-05, "loss": 0.3414, "num_tokens": 1721301318.0, "step": 2718 }, { "epoch": 0.32150880927042685, "grad_norm": 0.14833571016788483, "learning_rate": 4.884425718506985e-05, "loss": 0.3798, "num_tokens": 1721927703.0, "step": 2719 }, { "epoch": 0.32162705451105594, "grad_norm": 0.13699258863925934, "learning_rate": 4.883588318475649e-05, "loss": 0.3388, "num_tokens": 1722565193.0, "step": 2720 }, { "epoch": 0.321745299751685, "grad_norm": 0.14043930172920227, "learning_rate": 4.882750686172448e-05, "loss": 0.3668, "num_tokens": 1723200696.0, "step": 2721 }, { "epoch": 0.32186354499231407, "grad_norm": 0.1506030261516571, "learning_rate": 4.881912821720242e-05, "loss": 0.3828, "num_tokens": 1723839772.0, "step": 2722 }, { "epoch": 0.3219817902329431, "grad_norm": 0.15427295863628387, "learning_rate": 4.8810747252419246e-05, "loss": 0.3936, "num_tokens": 1724477404.0, "step": 2723 }, { "epoch": 0.3221000354735722, "grad_norm": 0.13628952205181122, "learning_rate": 4.88023639686042e-05, "loss": 0.3419, "num_tokens": 1725115337.0, "step": 2724 }, { "epoch": 0.3222182807142013, "grad_norm": 0.14123721420764923, "learning_rate": 4.8793978366986945e-05, "loss": 0.3388, "num_tokens": 1725751875.0, "step": 2725 }, { "epoch": 0.3223365259548303, "grad_norm": 0.14520679414272308, "learning_rate": 4.8785590448797394e-05, "loss": 0.3257, "num_tokens": 1726386668.0, "step": 2726 }, { "epoch": 0.3224547711954594, "grad_norm": 0.15800966322422028, "learning_rate": 4.877720021526585e-05, "loss": 0.3593, "num_tokens": 1727024471.0, "step": 2727 }, { "epoch": 0.32257301643608843, "grad_norm": 0.15010881423950195, "learning_rate": 4.8768807667622945e-05, "loss": 0.3556, "num_tokens": 1727657575.0, "step": 2728 }, { "epoch": 0.3226912616767175, "grad_norm": 0.1497504860162735, "learning_rate": 4.876041280709967e-05, "loss": 0.3521, "num_tokens": 1728294968.0, "step": 2729 }, { "epoch": 0.32280950691734656, "grad_norm": 0.12916025519371033, "learning_rate": 4.875201563492732e-05, "loss": 0.3329, "num_tokens": 1728933844.0, "step": 2730 }, { "epoch": 0.32292775215797564, "grad_norm": 0.14728513360023499, "learning_rate": 4.8743616152337536e-05, "loss": 0.3477, "num_tokens": 1729563923.0, "step": 2731 }, { "epoch": 0.3230459973986047, "grad_norm": 0.13855090737342834, "learning_rate": 4.8735214360562314e-05, "loss": 0.3568, "num_tokens": 1730193072.0, "step": 2732 }, { "epoch": 0.32316424263923377, "grad_norm": 0.15832413733005524, "learning_rate": 4.8726810260834e-05, "loss": 0.3478, "num_tokens": 1730826376.0, "step": 2733 }, { "epoch": 0.32328248787986286, "grad_norm": 0.15312460064888, "learning_rate": 4.871840385438524e-05, "loss": 0.379, "num_tokens": 1731457578.0, "step": 2734 }, { "epoch": 0.3234007331204919, "grad_norm": 0.1423926204442978, "learning_rate": 4.870999514244903e-05, "loss": 0.3171, "num_tokens": 1732094125.0, "step": 2735 }, { "epoch": 0.323518978361121, "grad_norm": 0.1498597413301468, "learning_rate": 4.8701584126258735e-05, "loss": 0.4163, "num_tokens": 1732695672.0, "step": 2736 }, { "epoch": 0.32363722360175, "grad_norm": 0.15859666466712952, "learning_rate": 4.8693170807048026e-05, "loss": 0.3845, "num_tokens": 1733327345.0, "step": 2737 }, { "epoch": 0.3237554688423791, "grad_norm": 0.13456298410892487, "learning_rate": 4.8684755186050924e-05, "loss": 0.3403, "num_tokens": 1733965576.0, "step": 2738 }, { "epoch": 0.32387371408300814, "grad_norm": 0.1437515914440155, "learning_rate": 4.8676337264501774e-05, "loss": 0.3495, "num_tokens": 1734592394.0, "step": 2739 }, { "epoch": 0.3239919593236372, "grad_norm": 0.13156425952911377, "learning_rate": 4.86679170436353e-05, "loss": 0.3517, "num_tokens": 1735221890.0, "step": 2740 }, { "epoch": 0.3241102045642663, "grad_norm": 0.1659955382347107, "learning_rate": 4.8659494524686496e-05, "loss": 0.3969, "num_tokens": 1735857891.0, "step": 2741 }, { "epoch": 0.32422844980489535, "grad_norm": 0.15194129943847656, "learning_rate": 4.8651069708890745e-05, "loss": 0.3751, "num_tokens": 1736492956.0, "step": 2742 }, { "epoch": 0.32434669504552444, "grad_norm": 0.14212173223495483, "learning_rate": 4.864264259748376e-05, "loss": 0.39, "num_tokens": 1737125568.0, "step": 2743 }, { "epoch": 0.32446494028615347, "grad_norm": 0.15140962600708008, "learning_rate": 4.863421319170158e-05, "loss": 0.3801, "num_tokens": 1737736959.0, "step": 2744 }, { "epoch": 0.32458318552678256, "grad_norm": 0.1499944031238556, "learning_rate": 4.862578149278058e-05, "loss": 0.3613, "num_tokens": 1738364118.0, "step": 2745 }, { "epoch": 0.3247014307674116, "grad_norm": 0.1412487030029297, "learning_rate": 4.861734750195745e-05, "loss": 0.335, "num_tokens": 1739000213.0, "step": 2746 }, { "epoch": 0.3248196760080407, "grad_norm": 0.15604901313781738, "learning_rate": 4.860891122046929e-05, "loss": 0.3831, "num_tokens": 1739635841.0, "step": 2747 }, { "epoch": 0.3249379212486697, "grad_norm": 0.13909344375133514, "learning_rate": 4.860047264955344e-05, "loss": 0.3143, "num_tokens": 1740270727.0, "step": 2748 }, { "epoch": 0.3250561664892988, "grad_norm": 0.14199915528297424, "learning_rate": 4.8592031790447646e-05, "loss": 0.3181, "num_tokens": 1740910145.0, "step": 2749 }, { "epoch": 0.3251744117299279, "grad_norm": 0.14924414455890656, "learning_rate": 4.858358864438997e-05, "loss": 0.3401, "num_tokens": 1741545955.0, "step": 2750 }, { "epoch": 0.3252926569705569, "grad_norm": 0.14285029470920563, "learning_rate": 4.8575143212618795e-05, "loss": 0.3478, "num_tokens": 1742182116.0, "step": 2751 }, { "epoch": 0.325410902211186, "grad_norm": 0.149746835231781, "learning_rate": 4.856669549637285e-05, "loss": 0.3566, "num_tokens": 1742817038.0, "step": 2752 }, { "epoch": 0.32552914745181505, "grad_norm": 0.15099166333675385, "learning_rate": 4.8558245496891206e-05, "loss": 0.3305, "num_tokens": 1743447171.0, "step": 2753 }, { "epoch": 0.32564739269244414, "grad_norm": 0.14623211324214935, "learning_rate": 4.854979321541324e-05, "loss": 0.3118, "num_tokens": 1744082013.0, "step": 2754 }, { "epoch": 0.32576563793307317, "grad_norm": 0.1376602053642273, "learning_rate": 4.8541338653178716e-05, "loss": 0.375, "num_tokens": 1744718914.0, "step": 2755 }, { "epoch": 0.32588388317370226, "grad_norm": 0.1466083526611328, "learning_rate": 4.853288181142769e-05, "loss": 0.3613, "num_tokens": 1745347427.0, "step": 2756 }, { "epoch": 0.32600212841433135, "grad_norm": 0.14580032229423523, "learning_rate": 4.852442269140055e-05, "loss": 0.3606, "num_tokens": 1745987038.0, "step": 2757 }, { "epoch": 0.3261203736549604, "grad_norm": 0.1459808051586151, "learning_rate": 4.8515961294338055e-05, "loss": 0.3726, "num_tokens": 1746619830.0, "step": 2758 }, { "epoch": 0.32623861889558947, "grad_norm": 0.1493554264307022, "learning_rate": 4.8507497621481254e-05, "loss": 0.4027, "num_tokens": 1747252651.0, "step": 2759 }, { "epoch": 0.3263568641362185, "grad_norm": 0.1678636074066162, "learning_rate": 4.8499031674071566e-05, "loss": 0.3821, "num_tokens": 1747887273.0, "step": 2760 }, { "epoch": 0.3264751093768476, "grad_norm": 0.1365593522787094, "learning_rate": 4.849056345335073e-05, "loss": 0.3456, "num_tokens": 1748523868.0, "step": 2761 }, { "epoch": 0.32659335461747663, "grad_norm": 0.14750993251800537, "learning_rate": 4.84820929605608e-05, "loss": 0.3518, "num_tokens": 1749155294.0, "step": 2762 }, { "epoch": 0.3267115998581057, "grad_norm": 0.1401047259569168, "learning_rate": 4.84736201969442e-05, "loss": 0.3367, "num_tokens": 1749791054.0, "step": 2763 }, { "epoch": 0.32682984509873475, "grad_norm": 0.1554698795080185, "learning_rate": 4.8465145163743653e-05, "loss": 0.3274, "num_tokens": 1750428935.0, "step": 2764 }, { "epoch": 0.32694809033936384, "grad_norm": 0.1612074226140976, "learning_rate": 4.845666786220224e-05, "loss": 0.386, "num_tokens": 1751065445.0, "step": 2765 }, { "epoch": 0.32706633557999293, "grad_norm": 0.1391248255968094, "learning_rate": 4.844818829356336e-05, "loss": 0.3615, "num_tokens": 1751702074.0, "step": 2766 }, { "epoch": 0.32718458082062196, "grad_norm": 0.1479518860578537, "learning_rate": 4.843970645907075e-05, "loss": 0.3646, "num_tokens": 1752338344.0, "step": 2767 }, { "epoch": 0.32730282606125105, "grad_norm": 0.14084507524967194, "learning_rate": 4.8431222359968476e-05, "loss": 0.3735, "num_tokens": 1752976568.0, "step": 2768 }, { "epoch": 0.3274210713018801, "grad_norm": 0.15142183005809784, "learning_rate": 4.842273599750093e-05, "loss": 0.3435, "num_tokens": 1753603003.0, "step": 2769 }, { "epoch": 0.3275393165425092, "grad_norm": 0.15152017772197723, "learning_rate": 4.8414247372912865e-05, "loss": 0.3564, "num_tokens": 1754241935.0, "step": 2770 }, { "epoch": 0.3276575617831382, "grad_norm": 0.14922118186950684, "learning_rate": 4.8405756487449324e-05, "loss": 0.3781, "num_tokens": 1754872359.0, "step": 2771 }, { "epoch": 0.3277758070237673, "grad_norm": 0.12546834349632263, "learning_rate": 4.839726334235572e-05, "loss": 0.2964, "num_tokens": 1755511603.0, "step": 2772 }, { "epoch": 0.3278940522643964, "grad_norm": 0.15380531549453735, "learning_rate": 4.8388767938877756e-05, "loss": 0.3763, "num_tokens": 1756143176.0, "step": 2773 }, { "epoch": 0.3280122975050254, "grad_norm": 0.14676882326602936, "learning_rate": 4.838027027826151e-05, "loss": 0.3606, "num_tokens": 1756756307.0, "step": 2774 }, { "epoch": 0.3281305427456545, "grad_norm": 0.14641840755939484, "learning_rate": 4.837177036175338e-05, "loss": 0.3555, "num_tokens": 1757390678.0, "step": 2775 }, { "epoch": 0.32824878798628354, "grad_norm": 0.13358046114444733, "learning_rate": 4.8363268190600056e-05, "loss": 0.3471, "num_tokens": 1758029292.0, "step": 2776 }, { "epoch": 0.32836703322691263, "grad_norm": 0.13722774386405945, "learning_rate": 4.835476376604862e-05, "loss": 0.3651, "num_tokens": 1758663414.0, "step": 2777 }, { "epoch": 0.32848527846754166, "grad_norm": 0.14129462838172913, "learning_rate": 4.834625708934642e-05, "loss": 0.3391, "num_tokens": 1759297216.0, "step": 2778 }, { "epoch": 0.32860352370817075, "grad_norm": 0.1524050533771515, "learning_rate": 4.8337748161741207e-05, "loss": 0.3513, "num_tokens": 1759927411.0, "step": 2779 }, { "epoch": 0.3287217689487998, "grad_norm": 0.1645013988018036, "learning_rate": 4.8329236984480994e-05, "loss": 0.3733, "num_tokens": 1760561865.0, "step": 2780 }, { "epoch": 0.3288400141894289, "grad_norm": 0.15018771588802338, "learning_rate": 4.832072355881417e-05, "loss": 0.392, "num_tokens": 1761198485.0, "step": 2781 }, { "epoch": 0.32895825943005796, "grad_norm": 0.1390950083732605, "learning_rate": 4.83122078859894e-05, "loss": 0.3108, "num_tokens": 1761832996.0, "step": 2782 }, { "epoch": 0.329076504670687, "grad_norm": 0.16477707028388977, "learning_rate": 4.8303689967255775e-05, "loss": 0.3895, "num_tokens": 1762469073.0, "step": 2783 }, { "epoch": 0.3291947499113161, "grad_norm": 0.16156215965747833, "learning_rate": 4.829516980386261e-05, "loss": 0.3398, "num_tokens": 1763102453.0, "step": 2784 }, { "epoch": 0.3293129951519451, "grad_norm": 0.15173323452472687, "learning_rate": 4.8286647397059604e-05, "loss": 0.3844, "num_tokens": 1763740288.0, "step": 2785 }, { "epoch": 0.3294312403925742, "grad_norm": 0.1569770872592926, "learning_rate": 4.827812274809678e-05, "loss": 0.3815, "num_tokens": 1764368292.0, "step": 2786 }, { "epoch": 0.32954948563320324, "grad_norm": 0.14676645398139954, "learning_rate": 4.8269595858224505e-05, "loss": 0.3707, "num_tokens": 1764998177.0, "step": 2787 }, { "epoch": 0.32966773087383233, "grad_norm": 0.13235852122306824, "learning_rate": 4.826106672869341e-05, "loss": 0.3223, "num_tokens": 1765629891.0, "step": 2788 }, { "epoch": 0.32978597611446137, "grad_norm": 0.13438676297664642, "learning_rate": 4.825253536075454e-05, "loss": 0.3604, "num_tokens": 1766266964.0, "step": 2789 }, { "epoch": 0.32990422135509045, "grad_norm": 0.13858480751514435, "learning_rate": 4.824400175565922e-05, "loss": 0.356, "num_tokens": 1766904529.0, "step": 2790 }, { "epoch": 0.33002246659571954, "grad_norm": 0.14423711597919464, "learning_rate": 4.8235465914659096e-05, "loss": 0.3804, "num_tokens": 1767537573.0, "step": 2791 }, { "epoch": 0.3301407118363486, "grad_norm": 0.13321511447429657, "learning_rate": 4.822692783900617e-05, "loss": 0.3671, "num_tokens": 1768173295.0, "step": 2792 }, { "epoch": 0.33025895707697767, "grad_norm": 0.14741672575473785, "learning_rate": 4.8218387529952753e-05, "loss": 0.4, "num_tokens": 1768810003.0, "step": 2793 }, { "epoch": 0.3303772023176067, "grad_norm": 0.14085063338279724, "learning_rate": 4.82098449887515e-05, "loss": 0.3682, "num_tokens": 1769440243.0, "step": 2794 }, { "epoch": 0.3304954475582358, "grad_norm": 0.13563644886016846, "learning_rate": 4.820130021665538e-05, "loss": 0.357, "num_tokens": 1770079311.0, "step": 2795 }, { "epoch": 0.3306136927988648, "grad_norm": 0.1393500417470932, "learning_rate": 4.819275321491768e-05, "loss": 0.3555, "num_tokens": 1770715195.0, "step": 2796 }, { "epoch": 0.3307319380394939, "grad_norm": 0.159691721200943, "learning_rate": 4.818420398479204e-05, "loss": 0.341, "num_tokens": 1771349599.0, "step": 2797 }, { "epoch": 0.330850183280123, "grad_norm": 0.14084549248218536, "learning_rate": 4.817565252753241e-05, "loss": 0.3526, "num_tokens": 1771943463.0, "step": 2798 }, { "epoch": 0.33096842852075203, "grad_norm": 0.1310177445411682, "learning_rate": 4.816709884439306e-05, "loss": 0.3482, "num_tokens": 1772581089.0, "step": 2799 }, { "epoch": 0.3310866737613811, "grad_norm": 0.15061940252780914, "learning_rate": 4.81585429366286e-05, "loss": 0.3558, "num_tokens": 1773213267.0, "step": 2800 }, { "epoch": 0.33120491900201016, "grad_norm": 0.1381532847881317, "learning_rate": 4.814998480549398e-05, "loss": 0.3326, "num_tokens": 1773843997.0, "step": 2801 }, { "epoch": 0.33132316424263925, "grad_norm": 0.1474015861749649, "learning_rate": 4.814142445224445e-05, "loss": 0.3572, "num_tokens": 1774443566.0, "step": 2802 }, { "epoch": 0.3314414094832683, "grad_norm": 0.14934299886226654, "learning_rate": 4.813286187813558e-05, "loss": 0.3749, "num_tokens": 1775076095.0, "step": 2803 }, { "epoch": 0.33155965472389737, "grad_norm": 0.14532700181007385, "learning_rate": 4.8124297084423294e-05, "loss": 0.3917, "num_tokens": 1775705995.0, "step": 2804 }, { "epoch": 0.3316778999645264, "grad_norm": 0.13150621950626373, "learning_rate": 4.811573007236381e-05, "loss": 0.3546, "num_tokens": 1776340157.0, "step": 2805 }, { "epoch": 0.3317961452051555, "grad_norm": 0.13967682421207428, "learning_rate": 4.8107160843213726e-05, "loss": 0.348, "num_tokens": 1776975868.0, "step": 2806 }, { "epoch": 0.3319143904457846, "grad_norm": 0.14232183992862701, "learning_rate": 4.8098589398229895e-05, "loss": 0.356, "num_tokens": 1777609915.0, "step": 2807 }, { "epoch": 0.3320326356864136, "grad_norm": 0.13820357620716095, "learning_rate": 4.8090015738669545e-05, "loss": 0.3403, "num_tokens": 1778243872.0, "step": 2808 }, { "epoch": 0.3321508809270427, "grad_norm": 0.13904014229774475, "learning_rate": 4.808143986579021e-05, "loss": 0.3622, "num_tokens": 1778876930.0, "step": 2809 }, { "epoch": 0.33226912616767174, "grad_norm": 0.14760786294937134, "learning_rate": 4.807286178084974e-05, "loss": 0.3645, "num_tokens": 1779509817.0, "step": 2810 }, { "epoch": 0.3323873714083008, "grad_norm": 0.16146086156368256, "learning_rate": 4.806428148510634e-05, "loss": 0.3917, "num_tokens": 1780144087.0, "step": 2811 }, { "epoch": 0.33250561664892986, "grad_norm": 0.15193748474121094, "learning_rate": 4.8055698979818504e-05, "loss": 0.3657, "num_tokens": 1780778006.0, "step": 2812 }, { "epoch": 0.33262386188955895, "grad_norm": 0.13543261587619781, "learning_rate": 4.804711426624507e-05, "loss": 0.3344, "num_tokens": 1781416088.0, "step": 2813 }, { "epoch": 0.33274210713018804, "grad_norm": 0.1693994402885437, "learning_rate": 4.8038527345645196e-05, "loss": 0.3713, "num_tokens": 1782052246.0, "step": 2814 }, { "epoch": 0.33286035237081707, "grad_norm": 0.15552793443202972, "learning_rate": 4.802993821927837e-05, "loss": 0.3567, "num_tokens": 1782691473.0, "step": 2815 }, { "epoch": 0.33297859761144616, "grad_norm": 0.14168085157871246, "learning_rate": 4.8021346888404374e-05, "loss": 0.3185, "num_tokens": 1783330384.0, "step": 2816 }, { "epoch": 0.3330968428520752, "grad_norm": 0.16095520555973053, "learning_rate": 4.801275335428337e-05, "loss": 0.3409, "num_tokens": 1783965374.0, "step": 2817 }, { "epoch": 0.3332150880927043, "grad_norm": 0.15037092566490173, "learning_rate": 4.800415761817579e-05, "loss": 0.3774, "num_tokens": 1784603503.0, "step": 2818 }, { "epoch": 0.3333333333333333, "grad_norm": 0.15443269908428192, "learning_rate": 4.799555968134239e-05, "loss": 0.3544, "num_tokens": 1785238028.0, "step": 2819 }, { "epoch": 0.3334515785739624, "grad_norm": 0.15115155279636383, "learning_rate": 4.798695954504431e-05, "loss": 0.3558, "num_tokens": 1785869848.0, "step": 2820 }, { "epoch": 0.33356982381459144, "grad_norm": 0.13545335829257965, "learning_rate": 4.7978357210542945e-05, "loss": 0.375, "num_tokens": 1786501160.0, "step": 2821 }, { "epoch": 0.3336880690552205, "grad_norm": 0.15126368403434753, "learning_rate": 4.796975267910004e-05, "loss": 0.377, "num_tokens": 1787104710.0, "step": 2822 }, { "epoch": 0.3338063142958496, "grad_norm": 0.14343926310539246, "learning_rate": 4.796114595197766e-05, "loss": 0.347, "num_tokens": 1787744074.0, "step": 2823 }, { "epoch": 0.33392455953647865, "grad_norm": 0.1455092579126358, "learning_rate": 4.795253703043818e-05, "loss": 0.3853, "num_tokens": 1788378236.0, "step": 2824 }, { "epoch": 0.33404280477710774, "grad_norm": 0.15548789501190186, "learning_rate": 4.794392591574433e-05, "loss": 0.3446, "num_tokens": 1789007341.0, "step": 2825 }, { "epoch": 0.33416105001773677, "grad_norm": 0.15260563790798187, "learning_rate": 4.793531260915913e-05, "loss": 0.3851, "num_tokens": 1789638744.0, "step": 2826 }, { "epoch": 0.33427929525836586, "grad_norm": 0.15907683968544006, "learning_rate": 4.792669711194593e-05, "loss": 0.3539, "num_tokens": 1790273792.0, "step": 2827 }, { "epoch": 0.3343975404989949, "grad_norm": 0.14510215818881989, "learning_rate": 4.791807942536839e-05, "loss": 0.3476, "num_tokens": 1790909364.0, "step": 2828 }, { "epoch": 0.334515785739624, "grad_norm": 0.14346998929977417, "learning_rate": 4.7909459550690536e-05, "loss": 0.3521, "num_tokens": 1791545003.0, "step": 2829 }, { "epoch": 0.33463403098025307, "grad_norm": 0.1681375503540039, "learning_rate": 4.7900837489176653e-05, "loss": 0.3214, "num_tokens": 1792184154.0, "step": 2830 }, { "epoch": 0.3347522762208821, "grad_norm": 0.14276370406150818, "learning_rate": 4.7892213242091406e-05, "loss": 0.3358, "num_tokens": 1792815485.0, "step": 2831 }, { "epoch": 0.3348705214615112, "grad_norm": 0.1565297245979309, "learning_rate": 4.788358681069972e-05, "loss": 0.3506, "num_tokens": 1793450894.0, "step": 2832 }, { "epoch": 0.33498876670214023, "grad_norm": 0.14426593482494354, "learning_rate": 4.787495819626689e-05, "loss": 0.3953, "num_tokens": 1794090337.0, "step": 2833 }, { "epoch": 0.3351070119427693, "grad_norm": 0.17249426245689392, "learning_rate": 4.7866327400058506e-05, "loss": 0.3978, "num_tokens": 1794722209.0, "step": 2834 }, { "epoch": 0.33522525718339835, "grad_norm": 0.15101750195026398, "learning_rate": 4.7857694423340485e-05, "loss": 0.3846, "num_tokens": 1795357255.0, "step": 2835 }, { "epoch": 0.33534350242402744, "grad_norm": 0.14513811469078064, "learning_rate": 4.7849059267379065e-05, "loss": 0.3454, "num_tokens": 1795989398.0, "step": 2836 }, { "epoch": 0.3354617476646565, "grad_norm": 0.15637539327144623, "learning_rate": 4.784042193344081e-05, "loss": 0.3488, "num_tokens": 1796622060.0, "step": 2837 }, { "epoch": 0.33557999290528556, "grad_norm": 0.40818721055984497, "learning_rate": 4.783178242279259e-05, "loss": 0.4106, "num_tokens": 1797258120.0, "step": 2838 }, { "epoch": 0.33569823814591465, "grad_norm": 0.19617289304733276, "learning_rate": 4.782314073670159e-05, "loss": 0.3226, "num_tokens": 1797884875.0, "step": 2839 }, { "epoch": 0.3358164833865437, "grad_norm": 0.1506223827600479, "learning_rate": 4.781449687643534e-05, "loss": 0.3291, "num_tokens": 1798522128.0, "step": 2840 }, { "epoch": 0.3359347286271728, "grad_norm": 0.15604569017887115, "learning_rate": 4.780585084326166e-05, "loss": 0.4012, "num_tokens": 1799157252.0, "step": 2841 }, { "epoch": 0.3360529738678018, "grad_norm": 0.15093180537223816, "learning_rate": 4.779720263844872e-05, "loss": 0.3714, "num_tokens": 1799785675.0, "step": 2842 }, { "epoch": 0.3361712191084309, "grad_norm": 0.15395116806030273, "learning_rate": 4.778855226326496e-05, "loss": 0.3786, "num_tokens": 1800424932.0, "step": 2843 }, { "epoch": 0.33628946434905993, "grad_norm": 0.15151748061180115, "learning_rate": 4.77798997189792e-05, "loss": 0.3558, "num_tokens": 1801061568.0, "step": 2844 }, { "epoch": 0.336407709589689, "grad_norm": 0.1561281383037567, "learning_rate": 4.777124500686053e-05, "loss": 0.3588, "num_tokens": 1801701017.0, "step": 2845 }, { "epoch": 0.33652595483031805, "grad_norm": 0.1465723216533661, "learning_rate": 4.776258812817838e-05, "loss": 0.3713, "num_tokens": 1802333249.0, "step": 2846 }, { "epoch": 0.33664420007094714, "grad_norm": 0.14940758049488068, "learning_rate": 4.7753929084202484e-05, "loss": 0.3975, "num_tokens": 1802972590.0, "step": 2847 }, { "epoch": 0.33676244531157623, "grad_norm": 0.13902883231639862, "learning_rate": 4.774526787620292e-05, "loss": 0.3267, "num_tokens": 1803604950.0, "step": 2848 }, { "epoch": 0.33688069055220526, "grad_norm": 0.14377443492412567, "learning_rate": 4.773660450545004e-05, "loss": 0.3276, "num_tokens": 1804236690.0, "step": 2849 }, { "epoch": 0.33699893579283435, "grad_norm": 0.16051773726940155, "learning_rate": 4.7727938973214556e-05, "loss": 0.3515, "num_tokens": 1804867327.0, "step": 2850 }, { "epoch": 0.3371171810334634, "grad_norm": 0.16432417929172516, "learning_rate": 4.7719271280767485e-05, "loss": 0.3616, "num_tokens": 1805500620.0, "step": 2851 }, { "epoch": 0.3372354262740925, "grad_norm": 0.14588308334350586, "learning_rate": 4.7710601429380144e-05, "loss": 0.3615, "num_tokens": 1806133631.0, "step": 2852 }, { "epoch": 0.3373536715147215, "grad_norm": 0.1458955556154251, "learning_rate": 4.770192942032417e-05, "loss": 0.364, "num_tokens": 1806770074.0, "step": 2853 }, { "epoch": 0.3374719167553506, "grad_norm": 0.16170309484004974, "learning_rate": 4.769325525487155e-05, "loss": 0.3564, "num_tokens": 1807409537.0, "step": 2854 }, { "epoch": 0.3375901619959797, "grad_norm": 0.1301371157169342, "learning_rate": 4.7684578934294546e-05, "loss": 0.3394, "num_tokens": 1808046587.0, "step": 2855 }, { "epoch": 0.3377084072366087, "grad_norm": 0.1458226889371872, "learning_rate": 4.767590045986575e-05, "loss": 0.39, "num_tokens": 1808680179.0, "step": 2856 }, { "epoch": 0.3378266524772378, "grad_norm": 0.1311877816915512, "learning_rate": 4.7667219832858076e-05, "loss": 0.3376, "num_tokens": 1809316391.0, "step": 2857 }, { "epoch": 0.33794489771786684, "grad_norm": 0.12875714898109436, "learning_rate": 4.765853705454476e-05, "loss": 0.302, "num_tokens": 1809948986.0, "step": 2858 }, { "epoch": 0.33806314295849593, "grad_norm": 0.15500520169734955, "learning_rate": 4.7649852126199325e-05, "loss": 0.3754, "num_tokens": 1810577100.0, "step": 2859 }, { "epoch": 0.33818138819912497, "grad_norm": 0.15637916326522827, "learning_rate": 4.7641165049095634e-05, "loss": 0.3777, "num_tokens": 1811211410.0, "step": 2860 }, { "epoch": 0.33829963343975405, "grad_norm": 0.1540553718805313, "learning_rate": 4.763247582450787e-05, "loss": 0.3868, "num_tokens": 1811848333.0, "step": 2861 }, { "epoch": 0.3384178786803831, "grad_norm": 0.14837461709976196, "learning_rate": 4.76237844537105e-05, "loss": 0.3977, "num_tokens": 1812478057.0, "step": 2862 }, { "epoch": 0.3385361239210122, "grad_norm": 0.14300787448883057, "learning_rate": 4.761509093797834e-05, "loss": 0.3611, "num_tokens": 1813084620.0, "step": 2863 }, { "epoch": 0.33865436916164127, "grad_norm": 0.14363257586956024, "learning_rate": 4.76063952785865e-05, "loss": 0.3517, "num_tokens": 1813721460.0, "step": 2864 }, { "epoch": 0.3387726144022703, "grad_norm": 0.324947714805603, "learning_rate": 4.7597697476810416e-05, "loss": 0.386, "num_tokens": 1814329304.0, "step": 2865 }, { "epoch": 0.3388908596428994, "grad_norm": 0.1495596170425415, "learning_rate": 4.758899753392583e-05, "loss": 0.3238, "num_tokens": 1814966717.0, "step": 2866 }, { "epoch": 0.3390091048835284, "grad_norm": 0.16530923545360565, "learning_rate": 4.7580295451208795e-05, "loss": 0.3608, "num_tokens": 1815599043.0, "step": 2867 }, { "epoch": 0.3391273501241575, "grad_norm": 0.1510806828737259, "learning_rate": 4.757159122993569e-05, "loss": 0.3886, "num_tokens": 1816237585.0, "step": 2868 }, { "epoch": 0.33924559536478655, "grad_norm": 0.1654408574104309, "learning_rate": 4.756288487138321e-05, "loss": 0.3402, "num_tokens": 1816876037.0, "step": 2869 }, { "epoch": 0.33936384060541563, "grad_norm": 0.14748889207839966, "learning_rate": 4.755417637682834e-05, "loss": 0.3382, "num_tokens": 1817508149.0, "step": 2870 }, { "epoch": 0.3394820858460447, "grad_norm": 0.17896157503128052, "learning_rate": 4.75454657475484e-05, "loss": 0.4083, "num_tokens": 1818137499.0, "step": 2871 }, { "epoch": 0.33960033108667376, "grad_norm": 0.15122370421886444, "learning_rate": 4.753675298482101e-05, "loss": 0.3888, "num_tokens": 1818771241.0, "step": 2872 }, { "epoch": 0.33971857632730285, "grad_norm": 0.14598627388477325, "learning_rate": 4.7528038089924115e-05, "loss": 0.3367, "num_tokens": 1819395842.0, "step": 2873 }, { "epoch": 0.3398368215679319, "grad_norm": 0.14506401121616364, "learning_rate": 4.7519321064135974e-05, "loss": 0.3403, "num_tokens": 1820035357.0, "step": 2874 }, { "epoch": 0.33995506680856097, "grad_norm": 0.1349652111530304, "learning_rate": 4.751060190873514e-05, "loss": 0.3475, "num_tokens": 1820668998.0, "step": 2875 }, { "epoch": 0.34007331204919, "grad_norm": 0.16016077995300293, "learning_rate": 4.7501880625000484e-05, "loss": 0.3713, "num_tokens": 1821303202.0, "step": 2876 }, { "epoch": 0.3401915572898191, "grad_norm": 0.14127948880195618, "learning_rate": 4.749315721421123e-05, "loss": 0.3726, "num_tokens": 1821924141.0, "step": 2877 }, { "epoch": 0.3403098025304481, "grad_norm": 0.15265628695487976, "learning_rate": 4.748443167764683e-05, "loss": 0.3859, "num_tokens": 1822560446.0, "step": 2878 }, { "epoch": 0.3404280477710772, "grad_norm": 0.14217644929885864, "learning_rate": 4.7475704016587136e-05, "loss": 0.3446, "num_tokens": 1823197242.0, "step": 2879 }, { "epoch": 0.3405462930117063, "grad_norm": 0.1512807309627533, "learning_rate": 4.746697423231224e-05, "loss": 0.3712, "num_tokens": 1823836271.0, "step": 2880 }, { "epoch": 0.34066453825233534, "grad_norm": 0.14982761442661285, "learning_rate": 4.745824232610261e-05, "loss": 0.3603, "num_tokens": 1824474727.0, "step": 2881 }, { "epoch": 0.3407827834929644, "grad_norm": 0.1423172652721405, "learning_rate": 4.744950829923897e-05, "loss": 0.3423, "num_tokens": 1825105880.0, "step": 2882 }, { "epoch": 0.34090102873359346, "grad_norm": 0.14585795998573303, "learning_rate": 4.74407721530024e-05, "loss": 0.3462, "num_tokens": 1825740391.0, "step": 2883 }, { "epoch": 0.34101927397422255, "grad_norm": 0.13540300726890564, "learning_rate": 4.743203388867423e-05, "loss": 0.3281, "num_tokens": 1826370386.0, "step": 2884 }, { "epoch": 0.3411375192148516, "grad_norm": 0.13934727013111115, "learning_rate": 4.742329350753619e-05, "loss": 0.3504, "num_tokens": 1827006893.0, "step": 2885 }, { "epoch": 0.34125576445548067, "grad_norm": 0.15579134225845337, "learning_rate": 4.741455101087023e-05, "loss": 0.3771, "num_tokens": 1827637658.0, "step": 2886 }, { "epoch": 0.34137400969610976, "grad_norm": 0.14700733125209808, "learning_rate": 4.7405806399958674e-05, "loss": 0.3779, "num_tokens": 1828272786.0, "step": 2887 }, { "epoch": 0.3414922549367388, "grad_norm": 0.1470695286989212, "learning_rate": 4.739705967608411e-05, "loss": 0.3705, "num_tokens": 1828905476.0, "step": 2888 }, { "epoch": 0.3416105001773679, "grad_norm": 0.14463859796524048, "learning_rate": 4.738831084052949e-05, "loss": 0.3292, "num_tokens": 1829542302.0, "step": 2889 }, { "epoch": 0.3417287454179969, "grad_norm": 0.17020845413208008, "learning_rate": 4.737955989457802e-05, "loss": 0.3708, "num_tokens": 1830178787.0, "step": 2890 }, { "epoch": 0.341846990658626, "grad_norm": 0.15143021941184998, "learning_rate": 4.737080683951324e-05, "loss": 0.3539, "num_tokens": 1830810891.0, "step": 2891 }, { "epoch": 0.34196523589925504, "grad_norm": 0.14513170719146729, "learning_rate": 4.736205167661902e-05, "loss": 0.3503, "num_tokens": 1831436883.0, "step": 2892 }, { "epoch": 0.3420834811398841, "grad_norm": 0.17289677262306213, "learning_rate": 4.735329440717949e-05, "loss": 0.3938, "num_tokens": 1832073337.0, "step": 2893 }, { "epoch": 0.34220172638051316, "grad_norm": 0.1480056792497635, "learning_rate": 4.7344535032479136e-05, "loss": 0.3549, "num_tokens": 1832703806.0, "step": 2894 }, { "epoch": 0.34231997162114225, "grad_norm": 0.14798828959465027, "learning_rate": 4.733577355380273e-05, "loss": 0.3671, "num_tokens": 1833336901.0, "step": 2895 }, { "epoch": 0.34243821686177134, "grad_norm": 0.13571372628211975, "learning_rate": 4.7327009972435356e-05, "loss": 0.3643, "num_tokens": 1833973097.0, "step": 2896 }, { "epoch": 0.34255646210240037, "grad_norm": 0.1593737155199051, "learning_rate": 4.73182442896624e-05, "loss": 0.3466, "num_tokens": 1834607109.0, "step": 2897 }, { "epoch": 0.34267470734302946, "grad_norm": 0.15300682187080383, "learning_rate": 4.730947650676958e-05, "loss": 0.4053, "num_tokens": 1835232952.0, "step": 2898 }, { "epoch": 0.3427929525836585, "grad_norm": 0.13834519684314728, "learning_rate": 4.7300706625042894e-05, "loss": 0.3632, "num_tokens": 1835863458.0, "step": 2899 }, { "epoch": 0.3429111978242876, "grad_norm": 0.12862101197242737, "learning_rate": 4.7291934645768656e-05, "loss": 0.3292, "num_tokens": 1836495903.0, "step": 2900 }, { "epoch": 0.3430294430649166, "grad_norm": 0.14464174211025238, "learning_rate": 4.72831605702335e-05, "loss": 0.3903, "num_tokens": 1837134389.0, "step": 2901 }, { "epoch": 0.3431476883055457, "grad_norm": 0.15153537690639496, "learning_rate": 4.727438439972435e-05, "loss": 0.3498, "num_tokens": 1837762457.0, "step": 2902 }, { "epoch": 0.34326593354617474, "grad_norm": 0.1356191188097, "learning_rate": 4.726560613552845e-05, "loss": 0.3741, "num_tokens": 1838398393.0, "step": 2903 }, { "epoch": 0.34338417878680383, "grad_norm": 0.1489383578300476, "learning_rate": 4.725682577893335e-05, "loss": 0.3795, "num_tokens": 1839033027.0, "step": 2904 }, { "epoch": 0.3435024240274329, "grad_norm": 0.1578216552734375, "learning_rate": 4.7248043331226884e-05, "loss": 0.3711, "num_tokens": 1839667973.0, "step": 2905 }, { "epoch": 0.34362066926806195, "grad_norm": 0.15372967720031738, "learning_rate": 4.723925879369724e-05, "loss": 0.3499, "num_tokens": 1840279684.0, "step": 2906 }, { "epoch": 0.34373891450869104, "grad_norm": 0.1414826661348343, "learning_rate": 4.7230472167632866e-05, "loss": 0.3423, "num_tokens": 1840911990.0, "step": 2907 }, { "epoch": 0.3438571597493201, "grad_norm": 0.17094719409942627, "learning_rate": 4.722168345432255e-05, "loss": 0.3907, "num_tokens": 1841548176.0, "step": 2908 }, { "epoch": 0.34397540498994916, "grad_norm": 0.14832277595996857, "learning_rate": 4.721289265505535e-05, "loss": 0.3666, "num_tokens": 1842183835.0, "step": 2909 }, { "epoch": 0.3440936502305782, "grad_norm": 0.14011318981647491, "learning_rate": 4.720409977112068e-05, "loss": 0.3193, "num_tokens": 1842819063.0, "step": 2910 }, { "epoch": 0.3442118954712073, "grad_norm": 0.13888181746006012, "learning_rate": 4.719530480380821e-05, "loss": 0.3109, "num_tokens": 1843452364.0, "step": 2911 }, { "epoch": 0.3443301407118364, "grad_norm": 0.16240963339805603, "learning_rate": 4.718650775440792e-05, "loss": 0.3456, "num_tokens": 1844083652.0, "step": 2912 }, { "epoch": 0.3444483859524654, "grad_norm": 0.1497032195329666, "learning_rate": 4.717770862421015e-05, "loss": 0.3344, "num_tokens": 1844719281.0, "step": 2913 }, { "epoch": 0.3445666311930945, "grad_norm": 0.15329526364803314, "learning_rate": 4.7168907414505494e-05, "loss": 0.3801, "num_tokens": 1845357088.0, "step": 2914 }, { "epoch": 0.34468487643372353, "grad_norm": 0.14835692942142487, "learning_rate": 4.716010412658485e-05, "loss": 0.3487, "num_tokens": 1845992970.0, "step": 2915 }, { "epoch": 0.3448031216743526, "grad_norm": 0.16792121529579163, "learning_rate": 4.715129876173945e-05, "loss": 0.3875, "num_tokens": 1846617786.0, "step": 2916 }, { "epoch": 0.34492136691498165, "grad_norm": 0.15644723176956177, "learning_rate": 4.714249132126082e-05, "loss": 0.3423, "num_tokens": 1847244982.0, "step": 2917 }, { "epoch": 0.34503961215561074, "grad_norm": 0.1410672664642334, "learning_rate": 4.7133681806440764e-05, "loss": 0.3464, "num_tokens": 1847879407.0, "step": 2918 }, { "epoch": 0.3451578573962398, "grad_norm": 0.14493677020072937, "learning_rate": 4.712487021857143e-05, "loss": 0.3634, "num_tokens": 1848505118.0, "step": 2919 }, { "epoch": 0.34527610263686886, "grad_norm": 0.15189194679260254, "learning_rate": 4.711605655894525e-05, "loss": 0.3889, "num_tokens": 1849136589.0, "step": 2920 }, { "epoch": 0.34539434787749795, "grad_norm": 0.15531735122203827, "learning_rate": 4.710724082885496e-05, "loss": 0.3425, "num_tokens": 1849769736.0, "step": 2921 }, { "epoch": 0.345512593118127, "grad_norm": 0.1424323171377182, "learning_rate": 4.7098423029593596e-05, "loss": 0.3427, "num_tokens": 1850405648.0, "step": 2922 }, { "epoch": 0.3456308383587561, "grad_norm": 0.14800211787223816, "learning_rate": 4.708960316245451e-05, "loss": 0.3684, "num_tokens": 1851035267.0, "step": 2923 }, { "epoch": 0.3457490835993851, "grad_norm": 0.1353311985731125, "learning_rate": 4.7080781228731354e-05, "loss": 0.3235, "num_tokens": 1851668535.0, "step": 2924 }, { "epoch": 0.3458673288400142, "grad_norm": 0.14098647236824036, "learning_rate": 4.7071957229718075e-05, "loss": 0.3685, "num_tokens": 1852301873.0, "step": 2925 }, { "epoch": 0.34598557408064323, "grad_norm": 0.14755034446716309, "learning_rate": 4.706313116670893e-05, "loss": 0.3637, "num_tokens": 1852934185.0, "step": 2926 }, { "epoch": 0.3461038193212723, "grad_norm": 0.14654842019081116, "learning_rate": 4.7054303040998464e-05, "loss": 0.3511, "num_tokens": 1853566151.0, "step": 2927 }, { "epoch": 0.3462220645619014, "grad_norm": 0.1352064609527588, "learning_rate": 4.704547285388156e-05, "loss": 0.3399, "num_tokens": 1854200901.0, "step": 2928 }, { "epoch": 0.34634030980253044, "grad_norm": 0.14605306088924408, "learning_rate": 4.703664060665336e-05, "loss": 0.3342, "num_tokens": 1854836573.0, "step": 2929 }, { "epoch": 0.34645855504315953, "grad_norm": 0.14036063849925995, "learning_rate": 4.7027806300609344e-05, "loss": 0.3232, "num_tokens": 1855471194.0, "step": 2930 }, { "epoch": 0.34657680028378857, "grad_norm": 0.1484951376914978, "learning_rate": 4.701896993704527e-05, "loss": 0.3581, "num_tokens": 1856095607.0, "step": 2931 }, { "epoch": 0.34669504552441766, "grad_norm": 0.14494763314723969, "learning_rate": 4.7010131517257214e-05, "loss": 0.3438, "num_tokens": 1856729049.0, "step": 2932 }, { "epoch": 0.3468132907650467, "grad_norm": 0.14176543056964874, "learning_rate": 4.700129104254153e-05, "loss": 0.4119, "num_tokens": 1857364125.0, "step": 2933 }, { "epoch": 0.3469315360056758, "grad_norm": 0.16467148065567017, "learning_rate": 4.699244851419491e-05, "loss": 0.365, "num_tokens": 1858002062.0, "step": 2934 }, { "epoch": 0.3470497812463048, "grad_norm": 0.13335615396499634, "learning_rate": 4.6983603933514296e-05, "loss": 0.3299, "num_tokens": 1858636925.0, "step": 2935 }, { "epoch": 0.3471680264869339, "grad_norm": 0.13366688787937164, "learning_rate": 4.697475730179699e-05, "loss": 0.3376, "num_tokens": 1859268047.0, "step": 2936 }, { "epoch": 0.347286271727563, "grad_norm": 0.13884016871452332, "learning_rate": 4.696590862034055e-05, "loss": 0.3684, "num_tokens": 1859905128.0, "step": 2937 }, { "epoch": 0.347404516968192, "grad_norm": 0.15485364198684692, "learning_rate": 4.695705789044287e-05, "loss": 0.3848, "num_tokens": 1860543629.0, "step": 2938 }, { "epoch": 0.3475227622088211, "grad_norm": 0.14109674096107483, "learning_rate": 4.694820511340212e-05, "loss": 0.3354, "num_tokens": 1861183114.0, "step": 2939 }, { "epoch": 0.34764100744945015, "grad_norm": 0.14213067293167114, "learning_rate": 4.693935029051675e-05, "loss": 0.3339, "num_tokens": 1861795480.0, "step": 2940 }, { "epoch": 0.34775925269007923, "grad_norm": 0.12116250395774841, "learning_rate": 4.693049342308557e-05, "loss": 0.3057, "num_tokens": 1862433888.0, "step": 2941 }, { "epoch": 0.34787749793070827, "grad_norm": 0.1404906064271927, "learning_rate": 4.6921634512407636e-05, "loss": 0.333, "num_tokens": 1863068693.0, "step": 2942 }, { "epoch": 0.34799574317133736, "grad_norm": 0.16544196009635925, "learning_rate": 4.6912773559782325e-05, "loss": 0.3922, "num_tokens": 1863706603.0, "step": 2943 }, { "epoch": 0.3481139884119664, "grad_norm": 0.15774746239185333, "learning_rate": 4.690391056650933e-05, "loss": 0.3658, "num_tokens": 1864344213.0, "step": 2944 }, { "epoch": 0.3482322336525955, "grad_norm": 0.1595100611448288, "learning_rate": 4.68950455338886e-05, "loss": 0.3903, "num_tokens": 1864982046.0, "step": 2945 }, { "epoch": 0.34835047889322457, "grad_norm": 0.14110766351222992, "learning_rate": 4.688617846322042e-05, "loss": 0.332, "num_tokens": 1865618867.0, "step": 2946 }, { "epoch": 0.3484687241338536, "grad_norm": 0.17431697249412537, "learning_rate": 4.687730935580537e-05, "loss": 0.3776, "num_tokens": 1866253921.0, "step": 2947 }, { "epoch": 0.3485869693744827, "grad_norm": 0.14813727140426636, "learning_rate": 4.68684382129443e-05, "loss": 0.3826, "num_tokens": 1866886683.0, "step": 2948 }, { "epoch": 0.3487052146151117, "grad_norm": 0.13786371052265167, "learning_rate": 4.6859565035938394e-05, "loss": 0.3443, "num_tokens": 1867521532.0, "step": 2949 }, { "epoch": 0.3488234598557408, "grad_norm": 0.15646624565124512, "learning_rate": 4.685068982608912e-05, "loss": 0.3758, "num_tokens": 1868157789.0, "step": 2950 }, { "epoch": 0.34894170509636985, "grad_norm": 0.14616216719150543, "learning_rate": 4.684181258469825e-05, "loss": 0.324, "num_tokens": 1868788314.0, "step": 2951 }, { "epoch": 0.34905995033699894, "grad_norm": 0.14691859483718872, "learning_rate": 4.683293331306782e-05, "loss": 0.3256, "num_tokens": 1869418468.0, "step": 2952 }, { "epoch": 0.349178195577628, "grad_norm": 0.15224279463291168, "learning_rate": 4.682405201250023e-05, "loss": 0.3737, "num_tokens": 1870046311.0, "step": 2953 }, { "epoch": 0.34929644081825706, "grad_norm": 0.1775824874639511, "learning_rate": 4.681516868429811e-05, "loss": 0.3486, "num_tokens": 1870678110.0, "step": 2954 }, { "epoch": 0.34941468605888615, "grad_norm": 0.16768863797187805, "learning_rate": 4.6806283329764434e-05, "loss": 0.3793, "num_tokens": 1871313390.0, "step": 2955 }, { "epoch": 0.3495329312995152, "grad_norm": 0.15498363971710205, "learning_rate": 4.679739595020245e-05, "loss": 0.3597, "num_tokens": 1871952859.0, "step": 2956 }, { "epoch": 0.34965117654014427, "grad_norm": 0.144927516579628, "learning_rate": 4.678850654691571e-05, "loss": 0.3397, "num_tokens": 1872590080.0, "step": 2957 }, { "epoch": 0.3497694217807733, "grad_norm": 0.1701098531484604, "learning_rate": 4.6779615121208046e-05, "loss": 0.3773, "num_tokens": 1873202597.0, "step": 2958 }, { "epoch": 0.3498876670214024, "grad_norm": 0.1397876888513565, "learning_rate": 4.677072167438363e-05, "loss": 0.3649, "num_tokens": 1873838363.0, "step": 2959 }, { "epoch": 0.3500059122620314, "grad_norm": 0.15177659690380096, "learning_rate": 4.676182620774689e-05, "loss": 0.3974, "num_tokens": 1874476639.0, "step": 2960 }, { "epoch": 0.3501241575026605, "grad_norm": 0.15122610330581665, "learning_rate": 4.675292872260257e-05, "loss": 0.3653, "num_tokens": 1875108762.0, "step": 2961 }, { "epoch": 0.3502424027432896, "grad_norm": 0.13926653563976288, "learning_rate": 4.6744029220255696e-05, "loss": 0.3641, "num_tokens": 1875743254.0, "step": 2962 }, { "epoch": 0.35036064798391864, "grad_norm": 0.14569762349128723, "learning_rate": 4.673512770201159e-05, "loss": 0.3173, "num_tokens": 1876380203.0, "step": 2963 }, { "epoch": 0.3504788932245477, "grad_norm": 0.15503188967704773, "learning_rate": 4.672622416917589e-05, "loss": 0.3605, "num_tokens": 1877016580.0, "step": 2964 }, { "epoch": 0.35059713846517676, "grad_norm": 0.12961065769195557, "learning_rate": 4.67173186230545e-05, "loss": 0.327, "num_tokens": 1877642103.0, "step": 2965 }, { "epoch": 0.35071538370580585, "grad_norm": 0.13387249410152435, "learning_rate": 4.670841106495366e-05, "loss": 0.343, "num_tokens": 1878276993.0, "step": 2966 }, { "epoch": 0.3508336289464349, "grad_norm": 0.1597411036491394, "learning_rate": 4.6699501496179864e-05, "loss": 0.3416, "num_tokens": 1878911903.0, "step": 2967 }, { "epoch": 0.350951874187064, "grad_norm": 0.15502092242240906, "learning_rate": 4.669058991803992e-05, "loss": 0.3795, "num_tokens": 1879545904.0, "step": 2968 }, { "epoch": 0.35107011942769306, "grad_norm": 0.14265687763690948, "learning_rate": 4.668167633184094e-05, "loss": 0.3699, "num_tokens": 1880179811.0, "step": 2969 }, { "epoch": 0.3511883646683221, "grad_norm": 0.14740785956382751, "learning_rate": 4.6672760738890295e-05, "loss": 0.3355, "num_tokens": 1880778705.0, "step": 2970 }, { "epoch": 0.3513066099089512, "grad_norm": 0.14324913918972015, "learning_rate": 4.6663843140495684e-05, "loss": 0.3361, "num_tokens": 1881417786.0, "step": 2971 }, { "epoch": 0.3514248551495802, "grad_norm": 0.14436262845993042, "learning_rate": 4.66549235379651e-05, "loss": 0.3646, "num_tokens": 1882053095.0, "step": 2972 }, { "epoch": 0.3515431003902093, "grad_norm": 0.1565273255109787, "learning_rate": 4.664600193260682e-05, "loss": 0.3854, "num_tokens": 1882691167.0, "step": 2973 }, { "epoch": 0.35166134563083834, "grad_norm": 0.1429137885570526, "learning_rate": 4.66370783257294e-05, "loss": 0.3502, "num_tokens": 1883329201.0, "step": 2974 }, { "epoch": 0.35177959087146743, "grad_norm": 0.14288212358951569, "learning_rate": 4.662815271864173e-05, "loss": 0.3618, "num_tokens": 1883964587.0, "step": 2975 }, { "epoch": 0.35189783611209646, "grad_norm": 0.14582833647727966, "learning_rate": 4.661922511265293e-05, "loss": 0.3623, "num_tokens": 1884597895.0, "step": 2976 }, { "epoch": 0.35201608135272555, "grad_norm": 0.13543705642223358, "learning_rate": 4.6610295509072496e-05, "loss": 0.3265, "num_tokens": 1885231801.0, "step": 2977 }, { "epoch": 0.35213432659335464, "grad_norm": 0.13940003514289856, "learning_rate": 4.6601363909210135e-05, "loss": 0.3375, "num_tokens": 1885868393.0, "step": 2978 }, { "epoch": 0.3522525718339837, "grad_norm": 0.16117246448993683, "learning_rate": 4.659243031437591e-05, "loss": 0.3593, "num_tokens": 1886507299.0, "step": 2979 }, { "epoch": 0.35237081707461276, "grad_norm": 0.15542960166931152, "learning_rate": 4.658349472588012e-05, "loss": 0.3804, "num_tokens": 1887144758.0, "step": 2980 }, { "epoch": 0.3524890623152418, "grad_norm": 0.13096381723880768, "learning_rate": 4.657455714503343e-05, "loss": 0.358, "num_tokens": 1887781943.0, "step": 2981 }, { "epoch": 0.3526073075558709, "grad_norm": 5.022675037384033, "learning_rate": 4.6565617573146714e-05, "loss": 0.5078, "num_tokens": 1888388545.0, "step": 2982 }, { "epoch": 0.3527255527964999, "grad_norm": 0.1822778284549713, "learning_rate": 4.6556676011531204e-05, "loss": 0.3425, "num_tokens": 1889024661.0, "step": 2983 }, { "epoch": 0.352843798037129, "grad_norm": 0.1752808690071106, "learning_rate": 4.654773246149839e-05, "loss": 0.3731, "num_tokens": 1889653811.0, "step": 2984 }, { "epoch": 0.3529620432777581, "grad_norm": 0.1552751362323761, "learning_rate": 4.653878692436006e-05, "loss": 0.3635, "num_tokens": 1890293509.0, "step": 2985 }, { "epoch": 0.35308028851838713, "grad_norm": 0.17543655633926392, "learning_rate": 4.652983940142829e-05, "loss": 0.4077, "num_tokens": 1890931709.0, "step": 2986 }, { "epoch": 0.3531985337590162, "grad_norm": 0.17120769619941711, "learning_rate": 4.6520889894015466e-05, "loss": 0.3378, "num_tokens": 1891569184.0, "step": 2987 }, { "epoch": 0.35331677899964525, "grad_norm": 0.15140977501869202, "learning_rate": 4.651193840343424e-05, "loss": 0.3756, "num_tokens": 1892208173.0, "step": 2988 }, { "epoch": 0.35343502424027434, "grad_norm": 0.1509585827589035, "learning_rate": 4.6502984930997576e-05, "loss": 0.3157, "num_tokens": 1892847753.0, "step": 2989 }, { "epoch": 0.3535532694809034, "grad_norm": 0.16436928510665894, "learning_rate": 4.649402947801871e-05, "loss": 0.3826, "num_tokens": 1893484180.0, "step": 2990 }, { "epoch": 0.35367151472153247, "grad_norm": 0.15517498552799225, "learning_rate": 4.6485072045811184e-05, "loss": 0.3487, "num_tokens": 1894116400.0, "step": 2991 }, { "epoch": 0.3537897599621615, "grad_norm": 0.1478368490934372, "learning_rate": 4.647611263568882e-05, "loss": 0.3572, "num_tokens": 1894750565.0, "step": 2992 }, { "epoch": 0.3539080052027906, "grad_norm": 0.1622321605682373, "learning_rate": 4.646715124896573e-05, "loss": 0.3964, "num_tokens": 1895389903.0, "step": 2993 }, { "epoch": 0.3540262504434197, "grad_norm": 0.13104596734046936, "learning_rate": 4.645818788695634e-05, "loss": 0.3304, "num_tokens": 1896024458.0, "step": 2994 }, { "epoch": 0.3541444956840487, "grad_norm": 0.1360713392496109, "learning_rate": 4.6449222550975324e-05, "loss": 0.3001, "num_tokens": 1896658113.0, "step": 2995 }, { "epoch": 0.3542627409246778, "grad_norm": 0.14954175055027008, "learning_rate": 4.644025524233768e-05, "loss": 0.3505, "num_tokens": 1897291291.0, "step": 2996 }, { "epoch": 0.35438098616530683, "grad_norm": 0.15244640409946442, "learning_rate": 4.643128596235867e-05, "loss": 0.3505, "num_tokens": 1897921257.0, "step": 2997 }, { "epoch": 0.3544992314059359, "grad_norm": 0.14502538740634918, "learning_rate": 4.642231471235388e-05, "loss": 0.3489, "num_tokens": 1898559302.0, "step": 2998 }, { "epoch": 0.35461747664656496, "grad_norm": 0.15096543729305267, "learning_rate": 4.641334149363913e-05, "loss": 0.3768, "num_tokens": 1899192496.0, "step": 2999 }, { "epoch": 0.35473572188719404, "grad_norm": 0.1489545702934265, "learning_rate": 4.640436630753059e-05, "loss": 0.3553, "num_tokens": 1899825571.0, "step": 3000 }, { "epoch": 0.3548539671278231, "grad_norm": 0.14257052540779114, "learning_rate": 4.6395389155344666e-05, "loss": 0.3429, "num_tokens": 1900461962.0, "step": 3001 }, { "epoch": 0.35497221236845217, "grad_norm": 0.13992848992347717, "learning_rate": 4.638641003839811e-05, "loss": 0.3417, "num_tokens": 1901097890.0, "step": 3002 }, { "epoch": 0.35509045760908126, "grad_norm": 0.129328191280365, "learning_rate": 4.6377428958007895e-05, "loss": 0.3548, "num_tokens": 1901726729.0, "step": 3003 }, { "epoch": 0.3552087028497103, "grad_norm": 0.14039000868797302, "learning_rate": 4.6368445915491334e-05, "loss": 0.3677, "num_tokens": 1902360636.0, "step": 3004 }, { "epoch": 0.3553269480903394, "grad_norm": 0.16004405915737152, "learning_rate": 4.6359460912166006e-05, "loss": 0.4086, "num_tokens": 1903000236.0, "step": 3005 }, { "epoch": 0.3554451933309684, "grad_norm": 0.1515074372291565, "learning_rate": 4.635047394934978e-05, "loss": 0.3631, "num_tokens": 1903602913.0, "step": 3006 }, { "epoch": 0.3555634385715975, "grad_norm": 0.1359093189239502, "learning_rate": 4.634148502836081e-05, "loss": 0.3518, "num_tokens": 1904238895.0, "step": 3007 }, { "epoch": 0.35568168381222653, "grad_norm": 0.13987977802753448, "learning_rate": 4.6332494150517546e-05, "loss": 0.3393, "num_tokens": 1904871706.0, "step": 3008 }, { "epoch": 0.3557999290528556, "grad_norm": 0.14529003202915192, "learning_rate": 4.632350131713871e-05, "loss": 0.3206, "num_tokens": 1905508065.0, "step": 3009 }, { "epoch": 0.3559181742934847, "grad_norm": 0.15460041165351868, "learning_rate": 4.631450652954333e-05, "loss": 0.3713, "num_tokens": 1906138650.0, "step": 3010 }, { "epoch": 0.35603641953411375, "grad_norm": 0.13388456404209137, "learning_rate": 4.630550978905071e-05, "loss": 0.3134, "num_tokens": 1906774559.0, "step": 3011 }, { "epoch": 0.35615466477474284, "grad_norm": 0.1348342001438141, "learning_rate": 4.6296511096980444e-05, "loss": 0.3858, "num_tokens": 1907410110.0, "step": 3012 }, { "epoch": 0.35627291001537187, "grad_norm": 0.1365630328655243, "learning_rate": 4.628751045465241e-05, "loss": 0.3326, "num_tokens": 1908046143.0, "step": 3013 }, { "epoch": 0.35639115525600096, "grad_norm": 0.14720433950424194, "learning_rate": 4.6278507863386755e-05, "loss": 0.3303, "num_tokens": 1908678118.0, "step": 3014 }, { "epoch": 0.35650940049663, "grad_norm": 0.1367522031068802, "learning_rate": 4.626950332450394e-05, "loss": 0.3523, "num_tokens": 1909317706.0, "step": 3015 }, { "epoch": 0.3566276457372591, "grad_norm": 0.14324848353862762, "learning_rate": 4.626049683932471e-05, "loss": 0.3285, "num_tokens": 1909948250.0, "step": 3016 }, { "epoch": 0.3567458909778881, "grad_norm": 0.13236810266971588, "learning_rate": 4.625148840917008e-05, "loss": 0.3246, "num_tokens": 1910584187.0, "step": 3017 }, { "epoch": 0.3568641362185172, "grad_norm": 0.14364567399024963, "learning_rate": 4.6242478035361345e-05, "loss": 0.3654, "num_tokens": 1911215960.0, "step": 3018 }, { "epoch": 0.3569823814591463, "grad_norm": 0.1326141208410263, "learning_rate": 4.623346571922012e-05, "loss": 0.3101, "num_tokens": 1911851965.0, "step": 3019 }, { "epoch": 0.3571006266997753, "grad_norm": 0.15471681952476501, "learning_rate": 4.622445146206825e-05, "loss": 0.3555, "num_tokens": 1912491258.0, "step": 3020 }, { "epoch": 0.3572188719404044, "grad_norm": 0.1277155727148056, "learning_rate": 4.621543526522792e-05, "loss": 0.3582, "num_tokens": 1913124265.0, "step": 3021 }, { "epoch": 0.35733711718103345, "grad_norm": 0.13481053709983826, "learning_rate": 4.620641713002156e-05, "loss": 0.3342, "num_tokens": 1913762405.0, "step": 3022 }, { "epoch": 0.35745536242166254, "grad_norm": 0.14956584572792053, "learning_rate": 4.619739705777191e-05, "loss": 0.3428, "num_tokens": 1914393031.0, "step": 3023 }, { "epoch": 0.35757360766229157, "grad_norm": 0.15483330190181732, "learning_rate": 4.618837504980197e-05, "loss": 0.3867, "num_tokens": 1915024153.0, "step": 3024 }, { "epoch": 0.35769185290292066, "grad_norm": 0.1438196748495102, "learning_rate": 4.617935110743506e-05, "loss": 0.3387, "num_tokens": 1915662487.0, "step": 3025 }, { "epoch": 0.35781009814354975, "grad_norm": 0.15091918408870697, "learning_rate": 4.617032523199475e-05, "loss": 0.3591, "num_tokens": 1916297791.0, "step": 3026 }, { "epoch": 0.3579283433841788, "grad_norm": 0.12921683490276337, "learning_rate": 4.61612974248049e-05, "loss": 0.3438, "num_tokens": 1916930698.0, "step": 3027 }, { "epoch": 0.35804658862480787, "grad_norm": 0.13933952152729034, "learning_rate": 4.6152267687189656e-05, "loss": 0.3487, "num_tokens": 1917568686.0, "step": 3028 }, { "epoch": 0.3581648338654369, "grad_norm": 0.14768871665000916, "learning_rate": 4.614323602047345e-05, "loss": 0.3708, "num_tokens": 1918204679.0, "step": 3029 }, { "epoch": 0.358283079106066, "grad_norm": 0.145917609333992, "learning_rate": 4.613420242598102e-05, "loss": 0.3727, "num_tokens": 1918836572.0, "step": 3030 }, { "epoch": 0.358401324346695, "grad_norm": 0.14621174335479736, "learning_rate": 4.612516690503733e-05, "loss": 0.3621, "num_tokens": 1919471247.0, "step": 3031 }, { "epoch": 0.3585195695873241, "grad_norm": 0.14420290291309357, "learning_rate": 4.6116129458967686e-05, "loss": 0.3253, "num_tokens": 1920108986.0, "step": 3032 }, { "epoch": 0.35863781482795315, "grad_norm": 0.1370910257101059, "learning_rate": 4.610709008909764e-05, "loss": 0.3268, "num_tokens": 1920745157.0, "step": 3033 }, { "epoch": 0.35875606006858224, "grad_norm": 0.13778086006641388, "learning_rate": 4.609804879675303e-05, "loss": 0.3859, "num_tokens": 1921383616.0, "step": 3034 }, { "epoch": 0.35887430530921133, "grad_norm": 0.14417847990989685, "learning_rate": 4.608900558325997e-05, "loss": 0.3613, "num_tokens": 1922021463.0, "step": 3035 }, { "epoch": 0.35899255054984036, "grad_norm": 0.13720254600048065, "learning_rate": 4.60799604499449e-05, "loss": 0.3397, "num_tokens": 1922655329.0, "step": 3036 }, { "epoch": 0.35911079579046945, "grad_norm": 0.12765276432037354, "learning_rate": 4.607091339813448e-05, "loss": 0.3436, "num_tokens": 1923289727.0, "step": 3037 }, { "epoch": 0.3592290410310985, "grad_norm": 0.13553263247013092, "learning_rate": 4.6061864429155716e-05, "loss": 0.3105, "num_tokens": 1923912557.0, "step": 3038 }, { "epoch": 0.3593472862717276, "grad_norm": 0.13943645358085632, "learning_rate": 4.605281354433583e-05, "loss": 0.3213, "num_tokens": 1924547922.0, "step": 3039 }, { "epoch": 0.3594655315123566, "grad_norm": 0.13963927328586578, "learning_rate": 4.604376074500235e-05, "loss": 0.3414, "num_tokens": 1925182411.0, "step": 3040 }, { "epoch": 0.3595837767529857, "grad_norm": 0.14027321338653564, "learning_rate": 4.603470603248312e-05, "loss": 0.3357, "num_tokens": 1925811021.0, "step": 3041 }, { "epoch": 0.3597020219936148, "grad_norm": 0.14088119566440582, "learning_rate": 4.6025649408106216e-05, "loss": 0.3405, "num_tokens": 1926439054.0, "step": 3042 }, { "epoch": 0.3598202672342438, "grad_norm": 0.13346663117408752, "learning_rate": 4.601659087320002e-05, "loss": 0.3067, "num_tokens": 1927075163.0, "step": 3043 }, { "epoch": 0.3599385124748729, "grad_norm": 0.14444924890995026, "learning_rate": 4.600753042909317e-05, "loss": 0.3486, "num_tokens": 1927709473.0, "step": 3044 }, { "epoch": 0.36005675771550194, "grad_norm": 0.1457834094762802, "learning_rate": 4.599846807711462e-05, "loss": 0.3848, "num_tokens": 1928345191.0, "step": 3045 }, { "epoch": 0.36017500295613103, "grad_norm": 0.14781084656715393, "learning_rate": 4.598940381859358e-05, "loss": 0.3652, "num_tokens": 1928982706.0, "step": 3046 }, { "epoch": 0.36029324819676006, "grad_norm": 0.14243356883525848, "learning_rate": 4.598033765485955e-05, "loss": 0.3233, "num_tokens": 1929609349.0, "step": 3047 }, { "epoch": 0.36041149343738915, "grad_norm": 0.1420990526676178, "learning_rate": 4.597126958724228e-05, "loss": 0.3577, "num_tokens": 1930243514.0, "step": 3048 }, { "epoch": 0.3605297386780182, "grad_norm": 0.1330944299697876, "learning_rate": 4.5962199617071846e-05, "loss": 0.3483, "num_tokens": 1930879858.0, "step": 3049 }, { "epoch": 0.3606479839186473, "grad_norm": 0.14003290235996246, "learning_rate": 4.595312774567858e-05, "loss": 0.3471, "num_tokens": 1931515694.0, "step": 3050 }, { "epoch": 0.36076622915927636, "grad_norm": 0.14909254014492035, "learning_rate": 4.594405397439308e-05, "loss": 0.3468, "num_tokens": 1932151122.0, "step": 3051 }, { "epoch": 0.3608844743999054, "grad_norm": 0.1307414472103119, "learning_rate": 4.593497830454626e-05, "loss": 0.3209, "num_tokens": 1932784008.0, "step": 3052 }, { "epoch": 0.3610027196405345, "grad_norm": 0.13539423048496246, "learning_rate": 4.592590073746926e-05, "loss": 0.3539, "num_tokens": 1933416429.0, "step": 3053 }, { "epoch": 0.3611209648811635, "grad_norm": 0.13415098190307617, "learning_rate": 4.591682127449354e-05, "loss": 0.3458, "num_tokens": 1934045138.0, "step": 3054 }, { "epoch": 0.3612392101217926, "grad_norm": 0.1338696926832199, "learning_rate": 4.590773991695082e-05, "loss": 0.319, "num_tokens": 1934677478.0, "step": 3055 }, { "epoch": 0.36135745536242164, "grad_norm": 0.1363179087638855, "learning_rate": 4.5898656666173125e-05, "loss": 0.3862, "num_tokens": 1935316849.0, "step": 3056 }, { "epoch": 0.36147570060305073, "grad_norm": 0.14124137163162231, "learning_rate": 4.58895715234927e-05, "loss": 0.3728, "num_tokens": 1935946418.0, "step": 3057 }, { "epoch": 0.36159394584367976, "grad_norm": 0.1407114863395691, "learning_rate": 4.588048449024213e-05, "loss": 0.3359, "num_tokens": 1936576766.0, "step": 3058 }, { "epoch": 0.36171219108430885, "grad_norm": 0.14824119210243225, "learning_rate": 4.587139556775424e-05, "loss": 0.3982, "num_tokens": 1937215841.0, "step": 3059 }, { "epoch": 0.36183043632493794, "grad_norm": 0.15375636518001556, "learning_rate": 4.586230475736214e-05, "loss": 0.4075, "num_tokens": 1937855321.0, "step": 3060 }, { "epoch": 0.361948681565567, "grad_norm": 0.14733020961284637, "learning_rate": 4.585321206039922e-05, "loss": 0.3589, "num_tokens": 1938492338.0, "step": 3061 }, { "epoch": 0.36206692680619607, "grad_norm": 0.14334341883659363, "learning_rate": 4.584411747819915e-05, "loss": 0.3433, "num_tokens": 1939125939.0, "step": 3062 }, { "epoch": 0.3621851720468251, "grad_norm": 0.143397718667984, "learning_rate": 4.583502101209587e-05, "loss": 0.3253, "num_tokens": 1939762225.0, "step": 3063 }, { "epoch": 0.3623034172874542, "grad_norm": 0.1525832712650299, "learning_rate": 4.58259226634236e-05, "loss": 0.3678, "num_tokens": 1940395464.0, "step": 3064 }, { "epoch": 0.3624216625280832, "grad_norm": 0.16342173516750336, "learning_rate": 4.581682243351682e-05, "loss": 0.3587, "num_tokens": 1941029660.0, "step": 3065 }, { "epoch": 0.3625399077687123, "grad_norm": 0.14047962427139282, "learning_rate": 4.580772032371033e-05, "loss": 0.3864, "num_tokens": 1941661364.0, "step": 3066 }, { "epoch": 0.3626581530093414, "grad_norm": 0.14622509479522705, "learning_rate": 4.579861633533916e-05, "loss": 0.3566, "num_tokens": 1942296171.0, "step": 3067 }, { "epoch": 0.36277639824997043, "grad_norm": 0.1328878253698349, "learning_rate": 4.578951046973862e-05, "loss": 0.2863, "num_tokens": 1942922085.0, "step": 3068 }, { "epoch": 0.3628946434905995, "grad_norm": 0.15963684022426605, "learning_rate": 4.578040272824433e-05, "loss": 0.3692, "num_tokens": 1943556452.0, "step": 3069 }, { "epoch": 0.36301288873122856, "grad_norm": 0.14300723373889923, "learning_rate": 4.577129311219215e-05, "loss": 0.3519, "num_tokens": 1944192623.0, "step": 3070 }, { "epoch": 0.36313113397185764, "grad_norm": 0.13560111820697784, "learning_rate": 4.576218162291824e-05, "loss": 0.3384, "num_tokens": 1944822411.0, "step": 3071 }, { "epoch": 0.3632493792124867, "grad_norm": 0.14824742078781128, "learning_rate": 4.5753068261759e-05, "loss": 0.3307, "num_tokens": 1945453062.0, "step": 3072 }, { "epoch": 0.36336762445311577, "grad_norm": 0.15556548535823822, "learning_rate": 4.5743953030051136e-05, "loss": 0.3561, "num_tokens": 1946087665.0, "step": 3073 }, { "epoch": 0.3634858696937448, "grad_norm": 0.1568261831998825, "learning_rate": 4.5734835929131626e-05, "loss": 0.3823, "num_tokens": 1946722711.0, "step": 3074 }, { "epoch": 0.3636041149343739, "grad_norm": 0.15707528591156006, "learning_rate": 4.572571696033771e-05, "loss": 0.364, "num_tokens": 1947353301.0, "step": 3075 }, { "epoch": 0.363722360175003, "grad_norm": 0.1434689611196518, "learning_rate": 4.5716596125006916e-05, "loss": 0.3525, "num_tokens": 1947992860.0, "step": 3076 }, { "epoch": 0.363840605415632, "grad_norm": 0.15434996783733368, "learning_rate": 4.570747342447703e-05, "loss": 0.358, "num_tokens": 1948624737.0, "step": 3077 }, { "epoch": 0.3639588506562611, "grad_norm": 0.1441783457994461, "learning_rate": 4.569834886008611e-05, "loss": 0.3329, "num_tokens": 1949256655.0, "step": 3078 }, { "epoch": 0.36407709589689013, "grad_norm": 0.1457502543926239, "learning_rate": 4.568922243317252e-05, "loss": 0.3366, "num_tokens": 1949886274.0, "step": 3079 }, { "epoch": 0.3641953411375192, "grad_norm": 0.13884122669696808, "learning_rate": 4.5680094145074836e-05, "loss": 0.3472, "num_tokens": 1950519175.0, "step": 3080 }, { "epoch": 0.36431358637814826, "grad_norm": 0.1439608782529831, "learning_rate": 4.567096399713197e-05, "loss": 0.333, "num_tokens": 1951154366.0, "step": 3081 }, { "epoch": 0.36443183161877735, "grad_norm": 0.17237386107444763, "learning_rate": 4.566183199068308e-05, "loss": 0.3739, "num_tokens": 1951790463.0, "step": 3082 }, { "epoch": 0.36455007685940644, "grad_norm": 0.14367932081222534, "learning_rate": 4.5652698127067606e-05, "loss": 0.351, "num_tokens": 1952423797.0, "step": 3083 }, { "epoch": 0.36466832210003547, "grad_norm": 0.1440470665693283, "learning_rate": 4.564356240762523e-05, "loss": 0.3707, "num_tokens": 1953057342.0, "step": 3084 }, { "epoch": 0.36478656734066456, "grad_norm": 0.1541077047586441, "learning_rate": 4.5634424833695936e-05, "loss": 0.3856, "num_tokens": 1953661405.0, "step": 3085 }, { "epoch": 0.3649048125812936, "grad_norm": 0.141135111451149, "learning_rate": 4.5625285406619986e-05, "loss": 0.3704, "num_tokens": 1954296565.0, "step": 3086 }, { "epoch": 0.3650230578219227, "grad_norm": 0.4759691655635834, "learning_rate": 4.561614412773787e-05, "loss": 0.3939, "num_tokens": 1954904487.0, "step": 3087 }, { "epoch": 0.3651413030625517, "grad_norm": 0.17372970283031464, "learning_rate": 4.560700099839041e-05, "loss": 0.3515, "num_tokens": 1955539961.0, "step": 3088 }, { "epoch": 0.3652595483031808, "grad_norm": 0.19676460325717926, "learning_rate": 4.559785601991865e-05, "loss": 0.3672, "num_tokens": 1956169957.0, "step": 3089 }, { "epoch": 0.36537779354380984, "grad_norm": 0.18098878860473633, "learning_rate": 4.558870919366394e-05, "loss": 0.3753, "num_tokens": 1956804766.0, "step": 3090 }, { "epoch": 0.3654960387844389, "grad_norm": 0.1548042893409729, "learning_rate": 4.557956052096787e-05, "loss": 0.3843, "num_tokens": 1957435253.0, "step": 3091 }, { "epoch": 0.365614284025068, "grad_norm": 0.16227959096431732, "learning_rate": 4.557041000317233e-05, "loss": 0.3557, "num_tokens": 1958066249.0, "step": 3092 }, { "epoch": 0.36573252926569705, "grad_norm": 0.1697051078081131, "learning_rate": 4.556125764161945e-05, "loss": 0.3406, "num_tokens": 1958694553.0, "step": 3093 }, { "epoch": 0.36585077450632614, "grad_norm": 0.1481878012418747, "learning_rate": 4.555210343765166e-05, "loss": 0.3816, "num_tokens": 1959330437.0, "step": 3094 }, { "epoch": 0.36596901974695517, "grad_norm": 0.1488538235425949, "learning_rate": 4.5542947392611646e-05, "loss": 0.3609, "num_tokens": 1959941659.0, "step": 3095 }, { "epoch": 0.36608726498758426, "grad_norm": 0.14593948423862457, "learning_rate": 4.5533789507842355e-05, "loss": 0.3762, "num_tokens": 1960564982.0, "step": 3096 }, { "epoch": 0.3662055102282133, "grad_norm": 0.1727575808763504, "learning_rate": 4.552462978468704e-05, "loss": 0.3569, "num_tokens": 1961195553.0, "step": 3097 }, { "epoch": 0.3663237554688424, "grad_norm": 0.14052973687648773, "learning_rate": 4.551546822448916e-05, "loss": 0.396, "num_tokens": 1961827585.0, "step": 3098 }, { "epoch": 0.36644200070947147, "grad_norm": 0.14641112089157104, "learning_rate": 4.550630482859251e-05, "loss": 0.348, "num_tokens": 1962465602.0, "step": 3099 }, { "epoch": 0.3665602459501005, "grad_norm": 0.16688528656959534, "learning_rate": 4.549713959834112e-05, "loss": 0.3873, "num_tokens": 1963101911.0, "step": 3100 }, { "epoch": 0.3666784911907296, "grad_norm": 0.14957702159881592, "learning_rate": 4.548797253507929e-05, "loss": 0.3427, "num_tokens": 1963741352.0, "step": 3101 }, { "epoch": 0.3667967364313586, "grad_norm": 0.13828708231449127, "learning_rate": 4.54788036401516e-05, "loss": 0.3361, "num_tokens": 1964370737.0, "step": 3102 }, { "epoch": 0.3669149816719877, "grad_norm": 0.15188737213611603, "learning_rate": 4.546963291490288e-05, "loss": 0.3469, "num_tokens": 1965006869.0, "step": 3103 }, { "epoch": 0.36703322691261675, "grad_norm": 0.15303142368793488, "learning_rate": 4.5460460360678244e-05, "loss": 0.3666, "num_tokens": 1965636765.0, "step": 3104 }, { "epoch": 0.36715147215324584, "grad_norm": 0.15506453812122345, "learning_rate": 4.545128597882309e-05, "loss": 0.3955, "num_tokens": 1966271773.0, "step": 3105 }, { "epoch": 0.3672697173938749, "grad_norm": 0.13604062795639038, "learning_rate": 4.5442109770683045e-05, "loss": 0.3347, "num_tokens": 1966905539.0, "step": 3106 }, { "epoch": 0.36738796263450396, "grad_norm": 0.15197230875492096, "learning_rate": 4.543293173760402e-05, "loss": 0.338, "num_tokens": 1967536103.0, "step": 3107 }, { "epoch": 0.36750620787513305, "grad_norm": 0.1436569094657898, "learning_rate": 4.542375188093221e-05, "loss": 0.3736, "num_tokens": 1968172905.0, "step": 3108 }, { "epoch": 0.3676244531157621, "grad_norm": 0.13623261451721191, "learning_rate": 4.541457020201406e-05, "loss": 0.3506, "num_tokens": 1968802415.0, "step": 3109 }, { "epoch": 0.3677426983563912, "grad_norm": 0.14317893981933594, "learning_rate": 4.5405386702196296e-05, "loss": 0.3728, "num_tokens": 1969439353.0, "step": 3110 }, { "epoch": 0.3678609435970202, "grad_norm": 0.14719252288341522, "learning_rate": 4.5396201382825885e-05, "loss": 0.3474, "num_tokens": 1970068449.0, "step": 3111 }, { "epoch": 0.3679791888376493, "grad_norm": 0.15667855739593506, "learning_rate": 4.538701424525009e-05, "loss": 0.342, "num_tokens": 1970700883.0, "step": 3112 }, { "epoch": 0.36809743407827833, "grad_norm": 0.15193264186382294, "learning_rate": 4.5377825290816436e-05, "loss": 0.3602, "num_tokens": 1971334487.0, "step": 3113 }, { "epoch": 0.3682156793189074, "grad_norm": 0.1358126848936081, "learning_rate": 4.536863452087268e-05, "loss": 0.3062, "num_tokens": 1971970040.0, "step": 3114 }, { "epoch": 0.36833392455953645, "grad_norm": 0.1372714787721634, "learning_rate": 4.535944193676691e-05, "loss": 0.3651, "num_tokens": 1972603710.0, "step": 3115 }, { "epoch": 0.36845216980016554, "grad_norm": 0.14431990683078766, "learning_rate": 4.5350247539847416e-05, "loss": 0.3777, "num_tokens": 1973237977.0, "step": 3116 }, { "epoch": 0.36857041504079463, "grad_norm": 0.1342397779226303, "learning_rate": 4.534105133146279e-05, "loss": 0.337, "num_tokens": 1973867002.0, "step": 3117 }, { "epoch": 0.36868866028142366, "grad_norm": 0.12561605870723724, "learning_rate": 4.533185331296187e-05, "loss": 0.3151, "num_tokens": 1974499908.0, "step": 3118 }, { "epoch": 0.36880690552205275, "grad_norm": 0.1400623619556427, "learning_rate": 4.532265348569379e-05, "loss": 0.3563, "num_tokens": 1975139234.0, "step": 3119 }, { "epoch": 0.3689251507626818, "grad_norm": 0.1319972276687622, "learning_rate": 4.5313451851007915e-05, "loss": 0.3199, "num_tokens": 1975775980.0, "step": 3120 }, { "epoch": 0.3690433960033109, "grad_norm": 0.13709381222724915, "learning_rate": 4.5304248410253886e-05, "loss": 0.3557, "num_tokens": 1976390437.0, "step": 3121 }, { "epoch": 0.3691616412439399, "grad_norm": 0.13652950525283813, "learning_rate": 4.5295043164781624e-05, "loss": 0.3848, "num_tokens": 1977024836.0, "step": 3122 }, { "epoch": 0.369279886484569, "grad_norm": 0.1422300934791565, "learning_rate": 4.5285836115941285e-05, "loss": 0.36, "num_tokens": 1977664116.0, "step": 3123 }, { "epoch": 0.3693981317251981, "grad_norm": 0.138983353972435, "learning_rate": 4.527662726508333e-05, "loss": 0.3196, "num_tokens": 1978276237.0, "step": 3124 }, { "epoch": 0.3695163769658271, "grad_norm": 0.15864978730678558, "learning_rate": 4.526741661355845e-05, "loss": 0.3417, "num_tokens": 1978911566.0, "step": 3125 }, { "epoch": 0.3696346222064562, "grad_norm": 0.14756576716899872, "learning_rate": 4.5258204162717615e-05, "loss": 0.3565, "num_tokens": 1979550601.0, "step": 3126 }, { "epoch": 0.36975286744708524, "grad_norm": 0.1474129557609558, "learning_rate": 4.524898991391205e-05, "loss": 0.3695, "num_tokens": 1980180988.0, "step": 3127 }, { "epoch": 0.36987111268771433, "grad_norm": 0.15585395693778992, "learning_rate": 4.523977386849325e-05, "loss": 0.3696, "num_tokens": 1980813561.0, "step": 3128 }, { "epoch": 0.36998935792834337, "grad_norm": 0.14471499621868134, "learning_rate": 4.5230556027812984e-05, "loss": 0.3611, "num_tokens": 1981451194.0, "step": 3129 }, { "epoch": 0.37010760316897245, "grad_norm": 0.142787367105484, "learning_rate": 4.5221336393223254e-05, "loss": 0.3215, "num_tokens": 1982088392.0, "step": 3130 }, { "epoch": 0.3702258484096015, "grad_norm": 0.14640016853809357, "learning_rate": 4.521211496607636e-05, "loss": 0.3618, "num_tokens": 1982725390.0, "step": 3131 }, { "epoch": 0.3703440936502306, "grad_norm": 0.1316845566034317, "learning_rate": 4.520289174772484e-05, "loss": 0.3246, "num_tokens": 1983354155.0, "step": 3132 }, { "epoch": 0.37046233889085967, "grad_norm": 0.1320035606622696, "learning_rate": 4.5193666739521516e-05, "loss": 0.3367, "num_tokens": 1983993695.0, "step": 3133 }, { "epoch": 0.3705805841314887, "grad_norm": 0.15815205872058868, "learning_rate": 4.518443994281945e-05, "loss": 0.3801, "num_tokens": 1984621335.0, "step": 3134 }, { "epoch": 0.3706988293721178, "grad_norm": 0.12797923386096954, "learning_rate": 4.517521135897198e-05, "loss": 0.3226, "num_tokens": 1985246598.0, "step": 3135 }, { "epoch": 0.3708170746127468, "grad_norm": 0.1462465524673462, "learning_rate": 4.516598098933269e-05, "loss": 0.3489, "num_tokens": 1985875935.0, "step": 3136 }, { "epoch": 0.3709353198533759, "grad_norm": 0.1372070461511612, "learning_rate": 4.515674883525547e-05, "loss": 0.3659, "num_tokens": 1986512312.0, "step": 3137 }, { "epoch": 0.37105356509400494, "grad_norm": 0.17180359363555908, "learning_rate": 4.514751489809442e-05, "loss": 0.3871, "num_tokens": 1987139184.0, "step": 3138 }, { "epoch": 0.37117181033463403, "grad_norm": 0.14752544462680817, "learning_rate": 4.513827917920392e-05, "loss": 0.355, "num_tokens": 1987774125.0, "step": 3139 }, { "epoch": 0.3712900555752631, "grad_norm": 0.14177976548671722, "learning_rate": 4.5129041679938615e-05, "loss": 0.3654, "num_tokens": 1988406644.0, "step": 3140 }, { "epoch": 0.37140830081589216, "grad_norm": 0.13812536001205444, "learning_rate": 4.511980240165342e-05, "loss": 0.3568, "num_tokens": 1989041164.0, "step": 3141 }, { "epoch": 0.37152654605652125, "grad_norm": 0.1623033732175827, "learning_rate": 4.511056134570349e-05, "loss": 0.4223, "num_tokens": 1989674516.0, "step": 3142 }, { "epoch": 0.3716447912971503, "grad_norm": 0.13929659128189087, "learning_rate": 4.510131851344426e-05, "loss": 0.4156, "num_tokens": 1990314090.0, "step": 3143 }, { "epoch": 0.37176303653777937, "grad_norm": 0.14265066385269165, "learning_rate": 4.5092073906231396e-05, "loss": 0.3528, "num_tokens": 1990946283.0, "step": 3144 }, { "epoch": 0.3718812817784084, "grad_norm": 0.14816825091838837, "learning_rate": 4.508282752542088e-05, "loss": 0.3719, "num_tokens": 1991585720.0, "step": 3145 }, { "epoch": 0.3719995270190375, "grad_norm": 0.1360781192779541, "learning_rate": 4.507357937236888e-05, "loss": 0.34, "num_tokens": 1992217952.0, "step": 3146 }, { "epoch": 0.3721177722596665, "grad_norm": 0.14584355056285858, "learning_rate": 4.50643294484319e-05, "loss": 0.3748, "num_tokens": 1992850728.0, "step": 3147 }, { "epoch": 0.3722360175002956, "grad_norm": 0.15152835845947266, "learning_rate": 4.5055077754966645e-05, "loss": 0.3505, "num_tokens": 1993485275.0, "step": 3148 }, { "epoch": 0.3723542627409247, "grad_norm": 0.15190981328487396, "learning_rate": 4.504582429333011e-05, "loss": 0.3672, "num_tokens": 1994120404.0, "step": 3149 }, { "epoch": 0.37247250798155374, "grad_norm": 0.1420281082391739, "learning_rate": 4.5036569064879536e-05, "loss": 0.3046, "num_tokens": 1994750502.0, "step": 3150 }, { "epoch": 0.3725907532221828, "grad_norm": 0.15613354742527008, "learning_rate": 4.5027312070972424e-05, "loss": 0.3621, "num_tokens": 1995384949.0, "step": 3151 }, { "epoch": 0.37270899846281186, "grad_norm": 0.13568255305290222, "learning_rate": 4.5018053312966544e-05, "loss": 0.3072, "num_tokens": 1996012350.0, "step": 3152 }, { "epoch": 0.37282724370344095, "grad_norm": 0.1707930564880371, "learning_rate": 4.500879279221993e-05, "loss": 0.4279, "num_tokens": 1996642776.0, "step": 3153 }, { "epoch": 0.37294548894407, "grad_norm": 0.15666420757770538, "learning_rate": 4.499953051009085e-05, "loss": 0.3826, "num_tokens": 1997275747.0, "step": 3154 }, { "epoch": 0.37306373418469907, "grad_norm": 0.1399688422679901, "learning_rate": 4.499026646793783e-05, "loss": 0.3647, "num_tokens": 1997915055.0, "step": 3155 }, { "epoch": 0.3731819794253281, "grad_norm": 0.14312644302845, "learning_rate": 4.4981000667119695e-05, "loss": 0.3511, "num_tokens": 1998549173.0, "step": 3156 }, { "epoch": 0.3733002246659572, "grad_norm": 0.15440721809864044, "learning_rate": 4.4971733108995494e-05, "loss": 0.3329, "num_tokens": 1999177595.0, "step": 3157 }, { "epoch": 0.3734184699065863, "grad_norm": 0.1519797444343567, "learning_rate": 4.496246379492453e-05, "loss": 0.3625, "num_tokens": 1999814333.0, "step": 3158 }, { "epoch": 0.3735367151472153, "grad_norm": 0.1643160879611969, "learning_rate": 4.495319272626638e-05, "loss": 0.3634, "num_tokens": 2000451588.0, "step": 3159 }, { "epoch": 0.3736549603878444, "grad_norm": 0.15239298343658447, "learning_rate": 4.494391990438087e-05, "loss": 0.3466, "num_tokens": 2001086210.0, "step": 3160 }, { "epoch": 0.37377320562847344, "grad_norm": 0.1673404574394226, "learning_rate": 4.493464533062809e-05, "loss": 0.3365, "num_tokens": 2001721530.0, "step": 3161 }, { "epoch": 0.3738914508691025, "grad_norm": 0.16178713738918304, "learning_rate": 4.4925369006368384e-05, "loss": 0.3737, "num_tokens": 2002359168.0, "step": 3162 }, { "epoch": 0.37400969610973156, "grad_norm": 0.16030916571617126, "learning_rate": 4.491609093296235e-05, "loss": 0.36, "num_tokens": 2002995288.0, "step": 3163 }, { "epoch": 0.37412794135036065, "grad_norm": 0.15121066570281982, "learning_rate": 4.4906811111770834e-05, "loss": 0.3492, "num_tokens": 2003627771.0, "step": 3164 }, { "epoch": 0.37424618659098974, "grad_norm": 0.14657223224639893, "learning_rate": 4.489752954415496e-05, "loss": 0.353, "num_tokens": 2004255405.0, "step": 3165 }, { "epoch": 0.37436443183161877, "grad_norm": 0.1609494984149933, "learning_rate": 4.488824623147608e-05, "loss": 0.3639, "num_tokens": 2004885473.0, "step": 3166 }, { "epoch": 0.37448267707224786, "grad_norm": 0.1323973834514618, "learning_rate": 4.487896117509584e-05, "loss": 0.3318, "num_tokens": 2005524388.0, "step": 3167 }, { "epoch": 0.3746009223128769, "grad_norm": 0.15087586641311646, "learning_rate": 4.4869674376376126e-05, "loss": 0.3257, "num_tokens": 2006161329.0, "step": 3168 }, { "epoch": 0.374719167553506, "grad_norm": 0.13209275901317596, "learning_rate": 4.4860385836679034e-05, "loss": 0.3127, "num_tokens": 2006800621.0, "step": 3169 }, { "epoch": 0.374837412794135, "grad_norm": 0.1574212610721588, "learning_rate": 4.4851095557366986e-05, "loss": 0.3581, "num_tokens": 2007439746.0, "step": 3170 }, { "epoch": 0.3749556580347641, "grad_norm": 0.15698032081127167, "learning_rate": 4.484180353980263e-05, "loss": 0.39, "num_tokens": 2008071413.0, "step": 3171 }, { "epoch": 0.37507390327539314, "grad_norm": 0.15396735072135925, "learning_rate": 4.483250978534884e-05, "loss": 0.3856, "num_tokens": 2008702719.0, "step": 3172 }, { "epoch": 0.37519214851602223, "grad_norm": 0.13286608457565308, "learning_rate": 4.482321429536881e-05, "loss": 0.3347, "num_tokens": 2009336623.0, "step": 3173 }, { "epoch": 0.3753103937566513, "grad_norm": 0.13988083600997925, "learning_rate": 4.481391707122592e-05, "loss": 0.3001, "num_tokens": 2009963954.0, "step": 3174 }, { "epoch": 0.37542863899728035, "grad_norm": 0.15818370878696442, "learning_rate": 4.4804618114283844e-05, "loss": 0.3996, "num_tokens": 2010595012.0, "step": 3175 }, { "epoch": 0.37554688423790944, "grad_norm": 0.15121035277843475, "learning_rate": 4.479531742590651e-05, "loss": 0.357, "num_tokens": 2011229903.0, "step": 3176 }, { "epoch": 0.3756651294785385, "grad_norm": 0.15069480240345, "learning_rate": 4.478601500745807e-05, "loss": 0.3344, "num_tokens": 2011863878.0, "step": 3177 }, { "epoch": 0.37578337471916756, "grad_norm": 0.15182387828826904, "learning_rate": 4.477671086030298e-05, "loss": 0.3639, "num_tokens": 2012502881.0, "step": 3178 }, { "epoch": 0.3759016199597966, "grad_norm": 0.13847431540489197, "learning_rate": 4.47674049858059e-05, "loss": 0.3506, "num_tokens": 2013135381.0, "step": 3179 }, { "epoch": 0.3760198652004257, "grad_norm": 0.15944963693618774, "learning_rate": 4.4758097385331753e-05, "loss": 0.3653, "num_tokens": 2013772057.0, "step": 3180 }, { "epoch": 0.3761381104410548, "grad_norm": 0.14767403900623322, "learning_rate": 4.474878806024576e-05, "loss": 0.3751, "num_tokens": 2014401968.0, "step": 3181 }, { "epoch": 0.3762563556816838, "grad_norm": 0.1535082459449768, "learning_rate": 4.4739477011913327e-05, "loss": 0.3739, "num_tokens": 2015037997.0, "step": 3182 }, { "epoch": 0.3763746009223129, "grad_norm": 0.1382431983947754, "learning_rate": 4.473016424170017e-05, "loss": 0.3438, "num_tokens": 2015666659.0, "step": 3183 }, { "epoch": 0.37649284616294193, "grad_norm": 0.13137619197368622, "learning_rate": 4.4720849750972216e-05, "loss": 0.3274, "num_tokens": 2016303525.0, "step": 3184 }, { "epoch": 0.376611091403571, "grad_norm": 0.13711418211460114, "learning_rate": 4.4711533541095676e-05, "loss": 0.3367, "num_tokens": 2016937352.0, "step": 3185 }, { "epoch": 0.37672933664420005, "grad_norm": 0.14446519315242767, "learning_rate": 4.470221561343699e-05, "loss": 0.3507, "num_tokens": 2017568721.0, "step": 3186 }, { "epoch": 0.37684758188482914, "grad_norm": 0.1342419981956482, "learning_rate": 4.469289596936286e-05, "loss": 0.335, "num_tokens": 2018204251.0, "step": 3187 }, { "epoch": 0.3769658271254582, "grad_norm": 0.1402573138475418, "learning_rate": 4.4683574610240254e-05, "loss": 0.3629, "num_tokens": 2018835137.0, "step": 3188 }, { "epoch": 0.37708407236608726, "grad_norm": 0.13641111552715302, "learning_rate": 4.467425153743636e-05, "loss": 0.3098, "num_tokens": 2019468024.0, "step": 3189 }, { "epoch": 0.37720231760671635, "grad_norm": 0.1371421366930008, "learning_rate": 4.4664926752318636e-05, "loss": 0.3087, "num_tokens": 2020105434.0, "step": 3190 }, { "epoch": 0.3773205628473454, "grad_norm": 0.13854768872261047, "learning_rate": 4.46556002562548e-05, "loss": 0.3488, "num_tokens": 2020737842.0, "step": 3191 }, { "epoch": 0.3774388080879745, "grad_norm": 0.13680675625801086, "learning_rate": 4.46462720506128e-05, "loss": 0.3521, "num_tokens": 2021374372.0, "step": 3192 }, { "epoch": 0.3775570533286035, "grad_norm": 0.1483931839466095, "learning_rate": 4.463694213676084e-05, "loss": 0.3347, "num_tokens": 2022005483.0, "step": 3193 }, { "epoch": 0.3776752985692326, "grad_norm": 0.1556905210018158, "learning_rate": 4.462761051606739e-05, "loss": 0.3419, "num_tokens": 2022644607.0, "step": 3194 }, { "epoch": 0.37779354380986163, "grad_norm": 0.14121071994304657, "learning_rate": 4.4618277189901165e-05, "loss": 0.3145, "num_tokens": 2023279570.0, "step": 3195 }, { "epoch": 0.3779117890504907, "grad_norm": 0.13246171176433563, "learning_rate": 4.460894215963112e-05, "loss": 0.3458, "num_tokens": 2023914462.0, "step": 3196 }, { "epoch": 0.3780300342911198, "grad_norm": 0.14581669867038727, "learning_rate": 4.459960542662645e-05, "loss": 0.3284, "num_tokens": 2024552404.0, "step": 3197 }, { "epoch": 0.37814827953174884, "grad_norm": 0.1547105461359024, "learning_rate": 4.459026699225664e-05, "loss": 0.3251, "num_tokens": 2025161342.0, "step": 3198 }, { "epoch": 0.37826652477237793, "grad_norm": 0.15897813439369202, "learning_rate": 4.458092685789138e-05, "loss": 0.3577, "num_tokens": 2025798144.0, "step": 3199 }, { "epoch": 0.37838477001300697, "grad_norm": 0.17431743443012238, "learning_rate": 4.4571585024900625e-05, "loss": 0.3991, "num_tokens": 2026430655.0, "step": 3200 }, { "epoch": 0.37850301525363605, "grad_norm": 0.14480547606945038, "learning_rate": 4.45622414946546e-05, "loss": 0.3386, "num_tokens": 2027062975.0, "step": 3201 }, { "epoch": 0.3786212604942651, "grad_norm": 0.1322716623544693, "learning_rate": 4.455289626852376e-05, "loss": 0.3292, "num_tokens": 2027702259.0, "step": 3202 }, { "epoch": 0.3787395057348942, "grad_norm": 0.16326038539409637, "learning_rate": 4.4543549347878795e-05, "loss": 0.3575, "num_tokens": 2028338237.0, "step": 3203 }, { "epoch": 0.3788577509755232, "grad_norm": 0.14294201135635376, "learning_rate": 4.4534200734090677e-05, "loss": 0.3327, "num_tokens": 2028975439.0, "step": 3204 }, { "epoch": 0.3789759962161523, "grad_norm": 0.13339757919311523, "learning_rate": 4.45248504285306e-05, "loss": 0.3466, "num_tokens": 2029605785.0, "step": 3205 }, { "epoch": 0.3790942414567814, "grad_norm": 0.14395444095134735, "learning_rate": 4.451549843257001e-05, "loss": 0.3533, "num_tokens": 2030242829.0, "step": 3206 }, { "epoch": 0.3792124866974104, "grad_norm": 0.14896628260612488, "learning_rate": 4.450614474758061e-05, "loss": 0.3497, "num_tokens": 2030877494.0, "step": 3207 }, { "epoch": 0.3793307319380395, "grad_norm": 0.1497618556022644, "learning_rate": 4.4496789374934344e-05, "loss": 0.327, "num_tokens": 2031516965.0, "step": 3208 }, { "epoch": 0.37944897717866855, "grad_norm": 0.13994483649730682, "learning_rate": 4.448743231600341e-05, "loss": 0.3431, "num_tokens": 2032147730.0, "step": 3209 }, { "epoch": 0.37956722241929763, "grad_norm": 0.13224202394485474, "learning_rate": 4.447807357216024e-05, "loss": 0.3444, "num_tokens": 2032784897.0, "step": 3210 }, { "epoch": 0.37968546765992667, "grad_norm": 0.15263359248638153, "learning_rate": 4.446871314477755e-05, "loss": 0.3631, "num_tokens": 2033420593.0, "step": 3211 }, { "epoch": 0.37980371290055576, "grad_norm": 0.12897028028964996, "learning_rate": 4.4459351035228225e-05, "loss": 0.3206, "num_tokens": 2034056716.0, "step": 3212 }, { "epoch": 0.3799219581411848, "grad_norm": 0.12641620635986328, "learning_rate": 4.44499872448855e-05, "loss": 0.3152, "num_tokens": 2034687588.0, "step": 3213 }, { "epoch": 0.3800402033818139, "grad_norm": 0.13546869158744812, "learning_rate": 4.444062177512276e-05, "loss": 0.3347, "num_tokens": 2035322495.0, "step": 3214 }, { "epoch": 0.38015844862244297, "grad_norm": 0.1388711929321289, "learning_rate": 4.44312546273137e-05, "loss": 0.3409, "num_tokens": 2035958858.0, "step": 3215 }, { "epoch": 0.380276693863072, "grad_norm": 0.14150738716125488, "learning_rate": 4.442188580283225e-05, "loss": 0.3409, "num_tokens": 2036591381.0, "step": 3216 }, { "epoch": 0.3803949391037011, "grad_norm": 0.1340700089931488, "learning_rate": 4.441251530305257e-05, "loss": 0.3318, "num_tokens": 2037223233.0, "step": 3217 }, { "epoch": 0.3805131843443301, "grad_norm": 0.12814196944236755, "learning_rate": 4.440314312934905e-05, "loss": 0.3519, "num_tokens": 2037862855.0, "step": 3218 }, { "epoch": 0.3806314295849592, "grad_norm": 0.1399819254875183, "learning_rate": 4.439376928309638e-05, "loss": 0.3904, "num_tokens": 2038500929.0, "step": 3219 }, { "epoch": 0.38074967482558825, "grad_norm": 0.14670564234256744, "learning_rate": 4.4384393765669445e-05, "loss": 0.3621, "num_tokens": 2039136055.0, "step": 3220 }, { "epoch": 0.38086792006621734, "grad_norm": 0.133676677942276, "learning_rate": 4.43750165784434e-05, "loss": 0.3348, "num_tokens": 2039774536.0, "step": 3221 }, { "epoch": 0.3809861653068464, "grad_norm": 0.1309509575366974, "learning_rate": 4.436563772279364e-05, "loss": 0.3481, "num_tokens": 2040408154.0, "step": 3222 }, { "epoch": 0.38110441054747546, "grad_norm": 0.14204125106334686, "learning_rate": 4.435625720009579e-05, "loss": 0.3184, "num_tokens": 2041042536.0, "step": 3223 }, { "epoch": 0.38122265578810455, "grad_norm": 0.15329287946224213, "learning_rate": 4.434687501172574e-05, "loss": 0.3709, "num_tokens": 2041676687.0, "step": 3224 }, { "epoch": 0.3813409010287336, "grad_norm": 0.1237092837691307, "learning_rate": 4.4337491159059624e-05, "loss": 0.3313, "num_tokens": 2042311049.0, "step": 3225 }, { "epoch": 0.38145914626936267, "grad_norm": 0.14611393213272095, "learning_rate": 4.4328105643473814e-05, "loss": 0.3582, "num_tokens": 2042939072.0, "step": 3226 }, { "epoch": 0.3815773915099917, "grad_norm": 0.15261508524417877, "learning_rate": 4.4318718466344915e-05, "loss": 0.3651, "num_tokens": 2043576549.0, "step": 3227 }, { "epoch": 0.3816956367506208, "grad_norm": 0.14137732982635498, "learning_rate": 4.430932962904979e-05, "loss": 0.3591, "num_tokens": 2044208051.0, "step": 3228 }, { "epoch": 0.3818138819912498, "grad_norm": 0.14625613391399384, "learning_rate": 4.4299939132965524e-05, "loss": 0.3578, "num_tokens": 2044844963.0, "step": 3229 }, { "epoch": 0.3819321272318789, "grad_norm": 0.13889676332473755, "learning_rate": 4.4290546979469494e-05, "loss": 0.3575, "num_tokens": 2045481839.0, "step": 3230 }, { "epoch": 0.382050372472508, "grad_norm": 0.13176602125167847, "learning_rate": 4.428115316993927e-05, "loss": 0.3248, "num_tokens": 2046112414.0, "step": 3231 }, { "epoch": 0.38216861771313704, "grad_norm": 0.14437636733055115, "learning_rate": 4.427175770575269e-05, "loss": 0.3584, "num_tokens": 2046746144.0, "step": 3232 }, { "epoch": 0.3822868629537661, "grad_norm": 0.13305190205574036, "learning_rate": 4.426236058828781e-05, "loss": 0.3434, "num_tokens": 2047378902.0, "step": 3233 }, { "epoch": 0.38240510819439516, "grad_norm": 0.1332342028617859, "learning_rate": 4.425296181892297e-05, "loss": 0.3238, "num_tokens": 2048010438.0, "step": 3234 }, { "epoch": 0.38252335343502425, "grad_norm": 0.1428399682044983, "learning_rate": 4.424356139903673e-05, "loss": 0.3666, "num_tokens": 2048644016.0, "step": 3235 }, { "epoch": 0.3826415986756533, "grad_norm": 0.13443148136138916, "learning_rate": 4.423415933000787e-05, "loss": 0.3673, "num_tokens": 2049276589.0, "step": 3236 }, { "epoch": 0.38275984391628237, "grad_norm": 0.13801340758800507, "learning_rate": 4.422475561321544e-05, "loss": 0.3735, "num_tokens": 2049902664.0, "step": 3237 }, { "epoch": 0.38287808915691146, "grad_norm": 0.13289180397987366, "learning_rate": 4.421535025003873e-05, "loss": 0.3523, "num_tokens": 2050536747.0, "step": 3238 }, { "epoch": 0.3829963343975405, "grad_norm": 0.14388155937194824, "learning_rate": 4.420594324185727e-05, "loss": 0.3994, "num_tokens": 2051170512.0, "step": 3239 }, { "epoch": 0.3831145796381696, "grad_norm": 0.13111808896064758, "learning_rate": 4.419653459005082e-05, "loss": 0.3671, "num_tokens": 2051808061.0, "step": 3240 }, { "epoch": 0.3832328248787986, "grad_norm": 0.13376735150814056, "learning_rate": 4.418712429599939e-05, "loss": 0.3446, "num_tokens": 2052422943.0, "step": 3241 }, { "epoch": 0.3833510701194277, "grad_norm": 0.12778927385807037, "learning_rate": 4.417771236108323e-05, "loss": 0.3187, "num_tokens": 2053058136.0, "step": 3242 }, { "epoch": 0.38346931536005674, "grad_norm": 0.14296415448188782, "learning_rate": 4.416829878668283e-05, "loss": 0.3538, "num_tokens": 2053697613.0, "step": 3243 }, { "epoch": 0.38358756060068583, "grad_norm": 0.12667781114578247, "learning_rate": 4.4158883574178903e-05, "loss": 0.2916, "num_tokens": 2054334802.0, "step": 3244 }, { "epoch": 0.38370580584131486, "grad_norm": 0.13792550563812256, "learning_rate": 4.414946672495245e-05, "loss": 0.3508, "num_tokens": 2054969701.0, "step": 3245 }, { "epoch": 0.38382405108194395, "grad_norm": 0.14156068861484528, "learning_rate": 4.414004824038466e-05, "loss": 0.3699, "num_tokens": 2055607328.0, "step": 3246 }, { "epoch": 0.38394229632257304, "grad_norm": 0.13127292692661285, "learning_rate": 4.4130628121857e-05, "loss": 0.3374, "num_tokens": 2056243559.0, "step": 3247 }, { "epoch": 0.3840605415632021, "grad_norm": 0.13138099014759064, "learning_rate": 4.412120637075115e-05, "loss": 0.3266, "num_tokens": 2056879351.0, "step": 3248 }, { "epoch": 0.38417878680383116, "grad_norm": 0.1471109539270401, "learning_rate": 4.411178298844903e-05, "loss": 0.3865, "num_tokens": 2057514803.0, "step": 3249 }, { "epoch": 0.3842970320444602, "grad_norm": 0.12418897449970245, "learning_rate": 4.410235797633283e-05, "loss": 0.3207, "num_tokens": 2058153839.0, "step": 3250 }, { "epoch": 0.3844152772850893, "grad_norm": 0.14121244847774506, "learning_rate": 4.4092931335784945e-05, "loss": 0.3388, "num_tokens": 2058793269.0, "step": 3251 }, { "epoch": 0.3845335225257183, "grad_norm": 0.14821939170360565, "learning_rate": 4.408350306818802e-05, "loss": 0.393, "num_tokens": 2059432650.0, "step": 3252 }, { "epoch": 0.3846517677663474, "grad_norm": 0.13934026658535004, "learning_rate": 4.4074073174924946e-05, "loss": 0.3417, "num_tokens": 2060065724.0, "step": 3253 }, { "epoch": 0.3847700130069765, "grad_norm": 0.12703485786914825, "learning_rate": 4.406464165737886e-05, "loss": 0.3326, "num_tokens": 2060701946.0, "step": 3254 }, { "epoch": 0.38488825824760553, "grad_norm": 0.15751640498638153, "learning_rate": 4.4055208516933094e-05, "loss": 0.3854, "num_tokens": 2061340077.0, "step": 3255 }, { "epoch": 0.3850065034882346, "grad_norm": 0.1422753483057022, "learning_rate": 4.404577375497128e-05, "loss": 0.3649, "num_tokens": 2061969210.0, "step": 3256 }, { "epoch": 0.38512474872886365, "grad_norm": 0.14205151796340942, "learning_rate": 4.403633737287723e-05, "loss": 0.3556, "num_tokens": 2062608544.0, "step": 3257 }, { "epoch": 0.38524299396949274, "grad_norm": 0.1491408348083496, "learning_rate": 4.4026899372035025e-05, "loss": 0.3429, "num_tokens": 2063241550.0, "step": 3258 }, { "epoch": 0.3853612392101218, "grad_norm": 0.1517544388771057, "learning_rate": 4.4017459753828983e-05, "loss": 0.3687, "num_tokens": 2063875181.0, "step": 3259 }, { "epoch": 0.38547948445075086, "grad_norm": 0.162079855799675, "learning_rate": 4.400801851964367e-05, "loss": 0.4029, "num_tokens": 2064507028.0, "step": 3260 }, { "epoch": 0.3855977296913799, "grad_norm": 0.1333954781293869, "learning_rate": 4.399857567086385e-05, "loss": 0.3229, "num_tokens": 2065144924.0, "step": 3261 }, { "epoch": 0.385715974932009, "grad_norm": 0.14905177056789398, "learning_rate": 4.398913120887455e-05, "loss": 0.3356, "num_tokens": 2065784119.0, "step": 3262 }, { "epoch": 0.3858342201726381, "grad_norm": 0.13505235314369202, "learning_rate": 4.3979685135061044e-05, "loss": 0.307, "num_tokens": 2066416504.0, "step": 3263 }, { "epoch": 0.3859524654132671, "grad_norm": 0.13235312700271606, "learning_rate": 4.397023745080881e-05, "loss": 0.3359, "num_tokens": 2067053759.0, "step": 3264 }, { "epoch": 0.3860707106538962, "grad_norm": 0.1367298662662506, "learning_rate": 4.39607881575036e-05, "loss": 0.3537, "num_tokens": 2067688426.0, "step": 3265 }, { "epoch": 0.38618895589452523, "grad_norm": 0.13472133874893188, "learning_rate": 4.395133725653138e-05, "loss": 0.3655, "num_tokens": 2068322370.0, "step": 3266 }, { "epoch": 0.3863072011351543, "grad_norm": 0.13642257452011108, "learning_rate": 4.3941884749278335e-05, "loss": 0.3247, "num_tokens": 2068957138.0, "step": 3267 }, { "epoch": 0.38642544637578335, "grad_norm": 0.13336193561553955, "learning_rate": 4.3932430637130934e-05, "loss": 0.3481, "num_tokens": 2069587603.0, "step": 3268 }, { "epoch": 0.38654369161641244, "grad_norm": 0.15060018002986908, "learning_rate": 4.392297492147585e-05, "loss": 0.3616, "num_tokens": 2070202495.0, "step": 3269 }, { "epoch": 0.3866619368570415, "grad_norm": 0.1417251080274582, "learning_rate": 4.391351760369997e-05, "loss": 0.3624, "num_tokens": 2070837195.0, "step": 3270 }, { "epoch": 0.38678018209767057, "grad_norm": 0.13693004846572876, "learning_rate": 4.390405868519046e-05, "loss": 0.3476, "num_tokens": 2071453033.0, "step": 3271 }, { "epoch": 0.38689842733829966, "grad_norm": 0.14650927484035492, "learning_rate": 4.3894598167334706e-05, "loss": 0.3278, "num_tokens": 2072080960.0, "step": 3272 }, { "epoch": 0.3870166725789287, "grad_norm": 0.12225014716386795, "learning_rate": 4.38851360515203e-05, "loss": 0.3132, "num_tokens": 2072703947.0, "step": 3273 }, { "epoch": 0.3871349178195578, "grad_norm": 0.13498365879058838, "learning_rate": 4.387567233913512e-05, "loss": 0.3448, "num_tokens": 2073336452.0, "step": 3274 }, { "epoch": 0.3872531630601868, "grad_norm": 0.14618520438671112, "learning_rate": 4.386620703156723e-05, "loss": 0.3353, "num_tokens": 2073970462.0, "step": 3275 }, { "epoch": 0.3873714083008159, "grad_norm": 0.14478199183940887, "learning_rate": 4.3856740130204955e-05, "loss": 0.3784, "num_tokens": 2074606040.0, "step": 3276 }, { "epoch": 0.38748965354144493, "grad_norm": 0.15410687029361725, "learning_rate": 4.384727163643686e-05, "loss": 0.342, "num_tokens": 2075243833.0, "step": 3277 }, { "epoch": 0.387607898782074, "grad_norm": 0.13237904012203217, "learning_rate": 4.3837801551651706e-05, "loss": 0.3448, "num_tokens": 2075879044.0, "step": 3278 }, { "epoch": 0.3877261440227031, "grad_norm": 0.15394236147403717, "learning_rate": 4.382832987723852e-05, "loss": 0.3824, "num_tokens": 2076478312.0, "step": 3279 }, { "epoch": 0.38784438926333215, "grad_norm": 0.14793676137924194, "learning_rate": 4.381885661458657e-05, "loss": 0.3348, "num_tokens": 2077115296.0, "step": 3280 }, { "epoch": 0.38796263450396123, "grad_norm": 0.14533644914627075, "learning_rate": 4.3809381765085324e-05, "loss": 0.3312, "num_tokens": 2077746176.0, "step": 3281 }, { "epoch": 0.38808087974459027, "grad_norm": 0.15562286972999573, "learning_rate": 4.37999053301245e-05, "loss": 0.3758, "num_tokens": 2078385428.0, "step": 3282 }, { "epoch": 0.38819912498521936, "grad_norm": 0.14454635977745056, "learning_rate": 4.379042731109406e-05, "loss": 0.3605, "num_tokens": 2079020073.0, "step": 3283 }, { "epoch": 0.3883173702258484, "grad_norm": 0.12940354645252228, "learning_rate": 4.3780947709384186e-05, "loss": 0.337, "num_tokens": 2079657858.0, "step": 3284 }, { "epoch": 0.3884356154664775, "grad_norm": 0.1356223225593567, "learning_rate": 4.377146652638528e-05, "loss": 0.3732, "num_tokens": 2080296478.0, "step": 3285 }, { "epoch": 0.3885538607071065, "grad_norm": 0.14293581247329712, "learning_rate": 4.3761983763488e-05, "loss": 0.38, "num_tokens": 2080926525.0, "step": 3286 }, { "epoch": 0.3886721059477356, "grad_norm": 0.1544078141450882, "learning_rate": 4.375249942208323e-05, "loss": 0.3508, "num_tokens": 2081564301.0, "step": 3287 }, { "epoch": 0.3887903511883647, "grad_norm": 0.14788632094860077, "learning_rate": 4.374301350356205e-05, "loss": 0.3436, "num_tokens": 2082185217.0, "step": 3288 }, { "epoch": 0.3889085964289937, "grad_norm": 0.14682015776634216, "learning_rate": 4.373352600931583e-05, "loss": 0.3801, "num_tokens": 2082823628.0, "step": 3289 }, { "epoch": 0.3890268416696228, "grad_norm": 0.13958820700645447, "learning_rate": 4.3724036940736145e-05, "loss": 0.3407, "num_tokens": 2083457665.0, "step": 3290 }, { "epoch": 0.38914508691025185, "grad_norm": 0.13521619141101837, "learning_rate": 4.3714546299214784e-05, "loss": 0.3284, "num_tokens": 2084093557.0, "step": 3291 }, { "epoch": 0.38926333215088094, "grad_norm": 0.15325507521629333, "learning_rate": 4.370505408614378e-05, "loss": 0.3388, "num_tokens": 2084729456.0, "step": 3292 }, { "epoch": 0.38938157739150997, "grad_norm": 0.13952074944972992, "learning_rate": 4.369556030291541e-05, "loss": 0.3376, "num_tokens": 2085367808.0, "step": 3293 }, { "epoch": 0.38949982263213906, "grad_norm": 0.15270081162452698, "learning_rate": 4.3686064950922145e-05, "loss": 0.3743, "num_tokens": 2086002692.0, "step": 3294 }, { "epoch": 0.38961806787276815, "grad_norm": 0.13789314031600952, "learning_rate": 4.367656803155675e-05, "loss": 0.3331, "num_tokens": 2086626005.0, "step": 3295 }, { "epoch": 0.3897363131133972, "grad_norm": 0.1582498997449875, "learning_rate": 4.3667069546212146e-05, "loss": 0.3884, "num_tokens": 2087262436.0, "step": 3296 }, { "epoch": 0.38985455835402627, "grad_norm": 0.148856058716774, "learning_rate": 4.365756949628152e-05, "loss": 0.3669, "num_tokens": 2087897557.0, "step": 3297 }, { "epoch": 0.3899728035946553, "grad_norm": 0.15220125019550323, "learning_rate": 4.36480678831583e-05, "loss": 0.3447, "num_tokens": 2088536210.0, "step": 3298 }, { "epoch": 0.3900910488352844, "grad_norm": 0.1491822749376297, "learning_rate": 4.3638564708236144e-05, "loss": 0.3411, "num_tokens": 2089163820.0, "step": 3299 }, { "epoch": 0.3902092940759134, "grad_norm": 0.13953432440757751, "learning_rate": 4.362905997290889e-05, "loss": 0.327, "num_tokens": 2089798678.0, "step": 3300 }, { "epoch": 0.3903275393165425, "grad_norm": 0.1499200165271759, "learning_rate": 4.361955367857065e-05, "loss": 0.3466, "num_tokens": 2090430979.0, "step": 3301 }, { "epoch": 0.39044578455717155, "grad_norm": 0.1654845029115677, "learning_rate": 4.361004582661575e-05, "loss": 0.3734, "num_tokens": 2091066026.0, "step": 3302 }, { "epoch": 0.39056402979780064, "grad_norm": 0.15039396286010742, "learning_rate": 4.360053641843877e-05, "loss": 0.4122, "num_tokens": 2091700753.0, "step": 3303 }, { "epoch": 0.3906822750384297, "grad_norm": 0.1418847292661667, "learning_rate": 4.3591025455434476e-05, "loss": 0.3781, "num_tokens": 2092320791.0, "step": 3304 }, { "epoch": 0.39080052027905876, "grad_norm": 0.13507872819900513, "learning_rate": 4.35815129389979e-05, "loss": 0.3508, "num_tokens": 2092958592.0, "step": 3305 }, { "epoch": 0.39091876551968785, "grad_norm": 0.14857891201972961, "learning_rate": 4.3571998870524264e-05, "loss": 0.356, "num_tokens": 2093593471.0, "step": 3306 }, { "epoch": 0.3910370107603169, "grad_norm": 0.13823114335536957, "learning_rate": 4.3562483251409064e-05, "loss": 0.3242, "num_tokens": 2094228493.0, "step": 3307 }, { "epoch": 0.391155256000946, "grad_norm": 0.1461930274963379, "learning_rate": 4.355296608304797e-05, "loss": 0.3378, "num_tokens": 2094865460.0, "step": 3308 }, { "epoch": 0.391273501241575, "grad_norm": 0.14494043588638306, "learning_rate": 4.354344736683691e-05, "loss": 0.363, "num_tokens": 2095498887.0, "step": 3309 }, { "epoch": 0.3913917464822041, "grad_norm": 0.1376677006483078, "learning_rate": 4.3533927104172056e-05, "loss": 0.3685, "num_tokens": 2096135422.0, "step": 3310 }, { "epoch": 0.39150999172283313, "grad_norm": 0.16569623351097107, "learning_rate": 4.352440529644979e-05, "loss": 0.4153, "num_tokens": 2096769791.0, "step": 3311 }, { "epoch": 0.3916282369634622, "grad_norm": 0.13675329089164734, "learning_rate": 4.3514881945066684e-05, "loss": 0.3487, "num_tokens": 2097406558.0, "step": 3312 }, { "epoch": 0.3917464822040913, "grad_norm": 0.13461631536483765, "learning_rate": 4.350535705141959e-05, "loss": 0.3665, "num_tokens": 2098044308.0, "step": 3313 }, { "epoch": 0.39186472744472034, "grad_norm": 0.1416393369436264, "learning_rate": 4.3495830616905574e-05, "loss": 0.3697, "num_tokens": 2098680995.0, "step": 3314 }, { "epoch": 0.39198297268534943, "grad_norm": 0.1701831817626953, "learning_rate": 4.3486302642921917e-05, "loss": 0.416, "num_tokens": 2099319094.0, "step": 3315 }, { "epoch": 0.39210121792597846, "grad_norm": 0.12384967505931854, "learning_rate": 4.347677313086611e-05, "loss": 0.3175, "num_tokens": 2099952649.0, "step": 3316 }, { "epoch": 0.39221946316660755, "grad_norm": 0.16025914251804352, "learning_rate": 4.346724208213591e-05, "loss": 0.3665, "num_tokens": 2100589835.0, "step": 3317 }, { "epoch": 0.3923377084072366, "grad_norm": 0.147787943482399, "learning_rate": 4.345770949812927e-05, "loss": 0.3471, "num_tokens": 2101222452.0, "step": 3318 }, { "epoch": 0.3924559536478657, "grad_norm": 0.14082087576389313, "learning_rate": 4.3448175380244366e-05, "loss": 0.3938, "num_tokens": 2101856364.0, "step": 3319 }, { "epoch": 0.39257419888849476, "grad_norm": 0.14730657637119293, "learning_rate": 4.343863972987964e-05, "loss": 0.3577, "num_tokens": 2102482912.0, "step": 3320 }, { "epoch": 0.3926924441291238, "grad_norm": 0.13763591647148132, "learning_rate": 4.342910254843369e-05, "loss": 0.3467, "num_tokens": 2103118205.0, "step": 3321 }, { "epoch": 0.3928106893697529, "grad_norm": 0.14087674021720886, "learning_rate": 4.341956383730539e-05, "loss": 0.3265, "num_tokens": 2103750495.0, "step": 3322 }, { "epoch": 0.3929289346103819, "grad_norm": 0.1520490050315857, "learning_rate": 4.3410023597893834e-05, "loss": 0.3561, "num_tokens": 2104381178.0, "step": 3323 }, { "epoch": 0.393047179851011, "grad_norm": 0.13256071507930756, "learning_rate": 4.340048183159833e-05, "loss": 0.3187, "num_tokens": 2105017735.0, "step": 3324 }, { "epoch": 0.39316542509164004, "grad_norm": 0.13749298453330994, "learning_rate": 4.339093853981839e-05, "loss": 0.3529, "num_tokens": 2105655241.0, "step": 3325 }, { "epoch": 0.39328367033226913, "grad_norm": 0.15091228485107422, "learning_rate": 4.33813937239538e-05, "loss": 0.3715, "num_tokens": 2106289558.0, "step": 3326 }, { "epoch": 0.39340191557289816, "grad_norm": 0.1507575809955597, "learning_rate": 4.3371847385404516e-05, "loss": 0.3831, "num_tokens": 2106928122.0, "step": 3327 }, { "epoch": 0.39352016081352725, "grad_norm": 0.14752578735351562, "learning_rate": 4.3362299525570764e-05, "loss": 0.3717, "num_tokens": 2107560615.0, "step": 3328 }, { "epoch": 0.39363840605415634, "grad_norm": 0.13505983352661133, "learning_rate": 4.3352750145852954e-05, "loss": 0.3596, "num_tokens": 2108199455.0, "step": 3329 }, { "epoch": 0.3937566512947854, "grad_norm": 0.13878434896469116, "learning_rate": 4.3343199247651725e-05, "loss": 0.3282, "num_tokens": 2108836521.0, "step": 3330 }, { "epoch": 0.39387489653541446, "grad_norm": 0.12713849544525146, "learning_rate": 4.333364683236798e-05, "loss": 0.2831, "num_tokens": 2109451315.0, "step": 3331 }, { "epoch": 0.3939931417760435, "grad_norm": 0.16113591194152832, "learning_rate": 4.332409290140279e-05, "loss": 0.3889, "num_tokens": 2110089177.0, "step": 3332 }, { "epoch": 0.3941113870166726, "grad_norm": 0.1551305651664734, "learning_rate": 4.3314537456157486e-05, "loss": 0.3568, "num_tokens": 2110717680.0, "step": 3333 }, { "epoch": 0.3942296322573016, "grad_norm": 0.1341850310564041, "learning_rate": 4.33049804980336e-05, "loss": 0.3124, "num_tokens": 2111351244.0, "step": 3334 }, { "epoch": 0.3943478774979307, "grad_norm": 0.13533611595630646, "learning_rate": 4.3295422028432896e-05, "loss": 0.3073, "num_tokens": 2111986787.0, "step": 3335 }, { "epoch": 0.3944661227385598, "grad_norm": 0.16275104880332947, "learning_rate": 4.328586204875735e-05, "loss": 0.3619, "num_tokens": 2112626351.0, "step": 3336 }, { "epoch": 0.39458436797918883, "grad_norm": 0.15065814554691315, "learning_rate": 4.327630056040917e-05, "loss": 0.3328, "num_tokens": 2113251347.0, "step": 3337 }, { "epoch": 0.3947026132198179, "grad_norm": 0.14862404763698578, "learning_rate": 4.326673756479078e-05, "loss": 0.3826, "num_tokens": 2113883836.0, "step": 3338 }, { "epoch": 0.39482085846044696, "grad_norm": 0.1399395614862442, "learning_rate": 4.325717306330484e-05, "loss": 0.3192, "num_tokens": 2114496590.0, "step": 3339 }, { "epoch": 0.39493910370107604, "grad_norm": 0.15671762824058533, "learning_rate": 4.324760705735421e-05, "loss": 0.3623, "num_tokens": 2115130164.0, "step": 3340 }, { "epoch": 0.3950573489417051, "grad_norm": 0.13929963111877441, "learning_rate": 4.3238039548341966e-05, "loss": 0.3398, "num_tokens": 2115765903.0, "step": 3341 }, { "epoch": 0.39517559418233417, "grad_norm": 0.13438592851161957, "learning_rate": 4.3228470537671415e-05, "loss": 0.3367, "num_tokens": 2116397924.0, "step": 3342 }, { "epoch": 0.3952938394229632, "grad_norm": 0.14639262855052948, "learning_rate": 4.3218900026746116e-05, "loss": 0.3672, "num_tokens": 2117031763.0, "step": 3343 }, { "epoch": 0.3954120846635923, "grad_norm": 0.15291863679885864, "learning_rate": 4.320932801696979e-05, "loss": 0.3786, "num_tokens": 2117667748.0, "step": 3344 }, { "epoch": 0.3955303299042214, "grad_norm": 0.13712741434574127, "learning_rate": 4.319975450974641e-05, "loss": 0.3652, "num_tokens": 2118299710.0, "step": 3345 }, { "epoch": 0.3956485751448504, "grad_norm": 0.13572336733341217, "learning_rate": 4.3190179506480174e-05, "loss": 0.3521, "num_tokens": 2118936363.0, "step": 3346 }, { "epoch": 0.3957668203854795, "grad_norm": 0.14559650421142578, "learning_rate": 4.318060300857548e-05, "loss": 0.3627, "num_tokens": 2119562260.0, "step": 3347 }, { "epoch": 0.39588506562610853, "grad_norm": 0.1322968304157257, "learning_rate": 4.317102501743697e-05, "loss": 0.3106, "num_tokens": 2120199601.0, "step": 3348 }, { "epoch": 0.3960033108667376, "grad_norm": 0.14723928272724152, "learning_rate": 4.316144553446947e-05, "loss": 0.377, "num_tokens": 2120833856.0, "step": 3349 }, { "epoch": 0.39612155610736666, "grad_norm": 0.13437145948410034, "learning_rate": 4.315186456107806e-05, "loss": 0.3586, "num_tokens": 2121470625.0, "step": 3350 }, { "epoch": 0.39623980134799575, "grad_norm": 0.14519385993480682, "learning_rate": 4.3142282098668015e-05, "loss": 0.3251, "num_tokens": 2122106502.0, "step": 3351 }, { "epoch": 0.39635804658862483, "grad_norm": 0.13479699194431305, "learning_rate": 4.313269814864484e-05, "loss": 0.3315, "num_tokens": 2122738473.0, "step": 3352 }, { "epoch": 0.39647629182925387, "grad_norm": 0.14099228382110596, "learning_rate": 4.312311271241426e-05, "loss": 0.3452, "num_tokens": 2123377527.0, "step": 3353 }, { "epoch": 0.39659453706988296, "grad_norm": 0.1405223160982132, "learning_rate": 4.31135257913822e-05, "loss": 0.3181, "num_tokens": 2124005511.0, "step": 3354 }, { "epoch": 0.396712782310512, "grad_norm": 0.1536581963300705, "learning_rate": 4.3103937386954825e-05, "loss": 0.3242, "num_tokens": 2124642636.0, "step": 3355 }, { "epoch": 0.3968310275511411, "grad_norm": 0.1298004537820816, "learning_rate": 4.3094347500538515e-05, "loss": 0.3102, "num_tokens": 2125281435.0, "step": 3356 }, { "epoch": 0.3969492727917701, "grad_norm": 0.15071485936641693, "learning_rate": 4.3084756133539844e-05, "loss": 0.3741, "num_tokens": 2125919293.0, "step": 3357 }, { "epoch": 0.3970675180323992, "grad_norm": 0.13205087184906006, "learning_rate": 4.307516328736564e-05, "loss": 0.3177, "num_tokens": 2126555827.0, "step": 3358 }, { "epoch": 0.39718576327302824, "grad_norm": 0.14192543923854828, "learning_rate": 4.3065568963422907e-05, "loss": 0.339, "num_tokens": 2127188856.0, "step": 3359 }, { "epoch": 0.3973040085136573, "grad_norm": 0.1466962695121765, "learning_rate": 4.30559731631189e-05, "loss": 0.351, "num_tokens": 2127824256.0, "step": 3360 }, { "epoch": 0.3974222537542864, "grad_norm": 0.13337838649749756, "learning_rate": 4.3046375887861074e-05, "loss": 0.3417, "num_tokens": 2128459495.0, "step": 3361 }, { "epoch": 0.39754049899491545, "grad_norm": 0.1481332778930664, "learning_rate": 4.303677713905711e-05, "loss": 0.3603, "num_tokens": 2129087492.0, "step": 3362 }, { "epoch": 0.39765874423554454, "grad_norm": 0.13609378039836884, "learning_rate": 4.302717691811489e-05, "loss": 0.3618, "num_tokens": 2129726387.0, "step": 3363 }, { "epoch": 0.39777698947617357, "grad_norm": 0.12124049663543701, "learning_rate": 4.3017575226442527e-05, "loss": 0.3076, "num_tokens": 2130365240.0, "step": 3364 }, { "epoch": 0.39789523471680266, "grad_norm": 0.1475883573293686, "learning_rate": 4.300797206544833e-05, "loss": 0.3676, "num_tokens": 2130987000.0, "step": 3365 }, { "epoch": 0.3980134799574317, "grad_norm": 0.12400784343481064, "learning_rate": 4.299836743654085e-05, "loss": 0.332, "num_tokens": 2131624699.0, "step": 3366 }, { "epoch": 0.3981317251980608, "grad_norm": 0.12216700613498688, "learning_rate": 4.2988761341128836e-05, "loss": 0.3279, "num_tokens": 2132255196.0, "step": 3367 }, { "epoch": 0.3982499704386898, "grad_norm": 0.13942980766296387, "learning_rate": 4.2979153780621256e-05, "loss": 0.3261, "num_tokens": 2132890193.0, "step": 3368 }, { "epoch": 0.3983682156793189, "grad_norm": 0.15407603979110718, "learning_rate": 4.29695447564273e-05, "loss": 0.3514, "num_tokens": 2133524321.0, "step": 3369 }, { "epoch": 0.398486460919948, "grad_norm": 0.12186837941408157, "learning_rate": 4.2959934269956354e-05, "loss": 0.3444, "num_tokens": 2134162646.0, "step": 3370 }, { "epoch": 0.398604706160577, "grad_norm": 0.13027767837047577, "learning_rate": 4.2950322322618046e-05, "loss": 0.3444, "num_tokens": 2134798081.0, "step": 3371 }, { "epoch": 0.3987229514012061, "grad_norm": 0.1405530720949173, "learning_rate": 4.294070891582218e-05, "loss": 0.3508, "num_tokens": 2135429363.0, "step": 3372 }, { "epoch": 0.39884119664183515, "grad_norm": 0.13839198648929596, "learning_rate": 4.293109405097882e-05, "loss": 0.3472, "num_tokens": 2136064458.0, "step": 3373 }, { "epoch": 0.39895944188246424, "grad_norm": 0.1407589167356491, "learning_rate": 4.292147772949819e-05, "loss": 0.3373, "num_tokens": 2136696971.0, "step": 3374 }, { "epoch": 0.39907768712309327, "grad_norm": 0.14099185168743134, "learning_rate": 4.29118599527908e-05, "loss": 0.3655, "num_tokens": 2137331565.0, "step": 3375 }, { "epoch": 0.39919593236372236, "grad_norm": 0.14842982590198517, "learning_rate": 4.29022407222673e-05, "loss": 0.4025, "num_tokens": 2137967736.0, "step": 3376 }, { "epoch": 0.39931417760435145, "grad_norm": 0.14206545054912567, "learning_rate": 4.28926200393386e-05, "loss": 0.3572, "num_tokens": 2138601285.0, "step": 3377 }, { "epoch": 0.3994324228449805, "grad_norm": 0.13284285366535187, "learning_rate": 4.288299790541579e-05, "loss": 0.3481, "num_tokens": 2139237290.0, "step": 3378 }, { "epoch": 0.3995506680856096, "grad_norm": 0.1438857465982437, "learning_rate": 4.2873374321910214e-05, "loss": 0.3789, "num_tokens": 2139858165.0, "step": 3379 }, { "epoch": 0.3996689133262386, "grad_norm": 0.13993903994560242, "learning_rate": 4.2863749290233395e-05, "loss": 0.3377, "num_tokens": 2140497341.0, "step": 3380 }, { "epoch": 0.3997871585668677, "grad_norm": 0.1496640294790268, "learning_rate": 4.2854122811797075e-05, "loss": 0.3535, "num_tokens": 2141129675.0, "step": 3381 }, { "epoch": 0.39990540380749673, "grad_norm": 0.1428624540567398, "learning_rate": 4.2844494888013224e-05, "loss": 0.379, "num_tokens": 2141763747.0, "step": 3382 }, { "epoch": 0.4000236490481258, "grad_norm": 0.14122195541858673, "learning_rate": 4.283486552029399e-05, "loss": 0.3271, "num_tokens": 2142395673.0, "step": 3383 }, { "epoch": 0.40014189428875485, "grad_norm": 0.15827998518943787, "learning_rate": 4.282523471005177e-05, "loss": 0.3549, "num_tokens": 2143034729.0, "step": 3384 }, { "epoch": 0.40026013952938394, "grad_norm": 0.15658505260944366, "learning_rate": 4.281560245869916e-05, "loss": 0.4031, "num_tokens": 2143665651.0, "step": 3385 }, { "epoch": 0.40037838477001303, "grad_norm": 0.13721495866775513, "learning_rate": 4.280596876764896e-05, "loss": 0.3898, "num_tokens": 2144304199.0, "step": 3386 }, { "epoch": 0.40049663001064206, "grad_norm": 0.14921781420707703, "learning_rate": 4.279633363831419e-05, "loss": 0.343, "num_tokens": 2144941070.0, "step": 3387 }, { "epoch": 0.40061487525127115, "grad_norm": 0.17930439114570618, "learning_rate": 4.2786697072108064e-05, "loss": 0.3696, "num_tokens": 2145577644.0, "step": 3388 }, { "epoch": 0.4007331204919002, "grad_norm": 0.12927046418190002, "learning_rate": 4.2777059070444026e-05, "loss": 0.324, "num_tokens": 2146211737.0, "step": 3389 }, { "epoch": 0.4008513657325293, "grad_norm": 0.15112674236297607, "learning_rate": 4.276741963473573e-05, "loss": 0.3495, "num_tokens": 2146843473.0, "step": 3390 }, { "epoch": 0.4009696109731583, "grad_norm": 0.1612502783536911, "learning_rate": 4.275777876639703e-05, "loss": 0.3524, "num_tokens": 2147474116.0, "step": 3391 }, { "epoch": 0.4010878562137874, "grad_norm": 0.1364365816116333, "learning_rate": 4.2748136466842e-05, "loss": 0.339, "num_tokens": 2148110000.0, "step": 3392 }, { "epoch": 0.4012061014544165, "grad_norm": 0.1490819901227951, "learning_rate": 4.273849273748491e-05, "loss": 0.3849, "num_tokens": 2148745048.0, "step": 3393 }, { "epoch": 0.4013243466950455, "grad_norm": 0.1368882954120636, "learning_rate": 4.2728847579740254e-05, "loss": 0.3365, "num_tokens": 2149384037.0, "step": 3394 }, { "epoch": 0.4014425919356746, "grad_norm": 0.14275877177715302, "learning_rate": 4.271920099502272e-05, "loss": 0.3325, "num_tokens": 2150017441.0, "step": 3395 }, { "epoch": 0.40156083717630364, "grad_norm": 0.15244430303573608, "learning_rate": 4.2709552984747234e-05, "loss": 0.3604, "num_tokens": 2150651722.0, "step": 3396 }, { "epoch": 0.40167908241693273, "grad_norm": 0.15143625438213348, "learning_rate": 4.269990355032889e-05, "loss": 0.3825, "num_tokens": 2151282272.0, "step": 3397 }, { "epoch": 0.40179732765756176, "grad_norm": 0.14236131310462952, "learning_rate": 4.269025269318302e-05, "loss": 0.3739, "num_tokens": 2151921921.0, "step": 3398 }, { "epoch": 0.40191557289819085, "grad_norm": 0.14090564846992493, "learning_rate": 4.268060041472518e-05, "loss": 0.3788, "num_tokens": 2152556557.0, "step": 3399 }, { "epoch": 0.4020338181388199, "grad_norm": 0.16698665916919708, "learning_rate": 4.267094671637108e-05, "loss": 0.3703, "num_tokens": 2153191618.0, "step": 3400 }, { "epoch": 0.402152063379449, "grad_norm": 0.14072498679161072, "learning_rate": 4.266129159953669e-05, "loss": 0.3763, "num_tokens": 2153826609.0, "step": 3401 }, { "epoch": 0.40227030862007807, "grad_norm": 0.12962941825389862, "learning_rate": 4.265163506563815e-05, "loss": 0.3371, "num_tokens": 2154466079.0, "step": 3402 }, { "epoch": 0.4023885538607071, "grad_norm": 0.15119469165802002, "learning_rate": 4.2641977116091855e-05, "loss": 0.3657, "num_tokens": 2155103105.0, "step": 3403 }, { "epoch": 0.4025067991013362, "grad_norm": 0.1494438797235489, "learning_rate": 4.263231775231436e-05, "loss": 0.3548, "num_tokens": 2155740567.0, "step": 3404 }, { "epoch": 0.4026250443419652, "grad_norm": 0.2128211408853531, "learning_rate": 4.262265697572244e-05, "loss": 0.3498, "num_tokens": 2156375393.0, "step": 3405 }, { "epoch": 0.4027432895825943, "grad_norm": 0.13440506160259247, "learning_rate": 4.26129947877331e-05, "loss": 0.3521, "num_tokens": 2157000606.0, "step": 3406 }, { "epoch": 0.40286153482322334, "grad_norm": 0.1273060292005539, "learning_rate": 4.2603331189763526e-05, "loss": 0.3197, "num_tokens": 2157640204.0, "step": 3407 }, { "epoch": 0.40297978006385243, "grad_norm": 0.14308364689350128, "learning_rate": 4.259366618323112e-05, "loss": 0.3409, "num_tokens": 2158275820.0, "step": 3408 }, { "epoch": 0.4030980253044815, "grad_norm": 0.13796725869178772, "learning_rate": 4.258399976955349e-05, "loss": 0.3469, "num_tokens": 2158907962.0, "step": 3409 }, { "epoch": 0.40321627054511056, "grad_norm": 0.15192459523677826, "learning_rate": 4.257433195014846e-05, "loss": 0.3671, "num_tokens": 2159534079.0, "step": 3410 }, { "epoch": 0.40333451578573964, "grad_norm": 0.13239039480686188, "learning_rate": 4.256466272643405e-05, "loss": 0.3945, "num_tokens": 2160173546.0, "step": 3411 }, { "epoch": 0.4034527610263687, "grad_norm": 0.1365455836057663, "learning_rate": 4.255499209982848e-05, "loss": 0.3361, "num_tokens": 2160806509.0, "step": 3412 }, { "epoch": 0.40357100626699777, "grad_norm": 0.13477906584739685, "learning_rate": 4.254532007175019e-05, "loss": 0.3434, "num_tokens": 2161441000.0, "step": 3413 }, { "epoch": 0.4036892515076268, "grad_norm": 0.14106205105781555, "learning_rate": 4.253564664361782e-05, "loss": 0.3837, "num_tokens": 2162072451.0, "step": 3414 }, { "epoch": 0.4038074967482559, "grad_norm": 0.13717487454414368, "learning_rate": 4.25259718168502e-05, "loss": 0.3762, "num_tokens": 2162704670.0, "step": 3415 }, { "epoch": 0.4039257419888849, "grad_norm": 0.13567288219928741, "learning_rate": 4.2516295592866405e-05, "loss": 0.3784, "num_tokens": 2163343540.0, "step": 3416 }, { "epoch": 0.404043987229514, "grad_norm": 0.15043222904205322, "learning_rate": 4.250661797308565e-05, "loss": 0.3772, "num_tokens": 2163979826.0, "step": 3417 }, { "epoch": 0.4041622324701431, "grad_norm": 0.15035143494606018, "learning_rate": 4.249693895892744e-05, "loss": 0.3879, "num_tokens": 2164613951.0, "step": 3418 }, { "epoch": 0.40428047771077213, "grad_norm": 0.12976054847240448, "learning_rate": 4.248725855181141e-05, "loss": 0.371, "num_tokens": 2165248364.0, "step": 3419 }, { "epoch": 0.4043987229514012, "grad_norm": 0.13284435868263245, "learning_rate": 4.247757675315745e-05, "loss": 0.3579, "num_tokens": 2165861579.0, "step": 3420 }, { "epoch": 0.40451696819203026, "grad_norm": 0.129765123128891, "learning_rate": 4.2467893564385596e-05, "loss": 0.3396, "num_tokens": 2166499730.0, "step": 3421 }, { "epoch": 0.40463521343265935, "grad_norm": 0.13528721034526825, "learning_rate": 4.245820898691615e-05, "loss": 0.3181, "num_tokens": 2167134924.0, "step": 3422 }, { "epoch": 0.4047534586732884, "grad_norm": 0.14317132532596588, "learning_rate": 4.244852302216959e-05, "loss": 0.3613, "num_tokens": 2167768659.0, "step": 3423 }, { "epoch": 0.40487170391391747, "grad_norm": 0.14541728794574738, "learning_rate": 4.243883567156659e-05, "loss": 0.3523, "num_tokens": 2168405781.0, "step": 3424 }, { "epoch": 0.4049899491545465, "grad_norm": 0.13753165304660797, "learning_rate": 4.242914693652805e-05, "loss": 0.3312, "num_tokens": 2169038136.0, "step": 3425 }, { "epoch": 0.4051081943951756, "grad_norm": 0.1390046924352646, "learning_rate": 4.2419456818475044e-05, "loss": 0.3587, "num_tokens": 2169671942.0, "step": 3426 }, { "epoch": 0.4052264396358047, "grad_norm": 0.13174661993980408, "learning_rate": 4.240976531882888e-05, "loss": 0.3371, "num_tokens": 2170306342.0, "step": 3427 }, { "epoch": 0.4053446848764337, "grad_norm": 0.13509501516819, "learning_rate": 4.2400072439011035e-05, "loss": 0.3242, "num_tokens": 2170941892.0, "step": 3428 }, { "epoch": 0.4054629301170628, "grad_norm": 0.11782538890838623, "learning_rate": 4.2390378180443225e-05, "loss": 0.3109, "num_tokens": 2171578000.0, "step": 3429 }, { "epoch": 0.40558117535769184, "grad_norm": 0.14273057878017426, "learning_rate": 4.2380682544547334e-05, "loss": 0.3505, "num_tokens": 2172216631.0, "step": 3430 }, { "epoch": 0.4056994205983209, "grad_norm": 0.12517856061458588, "learning_rate": 4.237098553274546e-05, "loss": 0.2995, "num_tokens": 2172855096.0, "step": 3431 }, { "epoch": 0.40581766583894996, "grad_norm": 0.13439908623695374, "learning_rate": 4.2361287146459926e-05, "loss": 0.3268, "num_tokens": 2173484268.0, "step": 3432 }, { "epoch": 0.40593591107957905, "grad_norm": 0.1292080134153366, "learning_rate": 4.2351587387113234e-05, "loss": 0.3574, "num_tokens": 2174120413.0, "step": 3433 }, { "epoch": 0.40605415632020814, "grad_norm": 0.1257181316614151, "learning_rate": 4.234188625612808e-05, "loss": 0.3333, "num_tokens": 2174744993.0, "step": 3434 }, { "epoch": 0.40617240156083717, "grad_norm": 0.13289429247379303, "learning_rate": 4.233218375492737e-05, "loss": 0.2974, "num_tokens": 2175381175.0, "step": 3435 }, { "epoch": 0.40629064680146626, "grad_norm": 0.16252991557121277, "learning_rate": 4.2322479884934225e-05, "loss": 0.404, "num_tokens": 2176000322.0, "step": 3436 }, { "epoch": 0.4064088920420953, "grad_norm": 0.12013671547174454, "learning_rate": 4.231277464757194e-05, "loss": 0.3409, "num_tokens": 2176631990.0, "step": 3437 }, { "epoch": 0.4065271372827244, "grad_norm": 0.1394612342119217, "learning_rate": 4.230306804426403e-05, "loss": 0.3516, "num_tokens": 2177264618.0, "step": 3438 }, { "epoch": 0.4066453825233534, "grad_norm": 0.13867303729057312, "learning_rate": 4.229336007643421e-05, "loss": 0.3305, "num_tokens": 2177896983.0, "step": 3439 }, { "epoch": 0.4067636277639825, "grad_norm": 0.1321728378534317, "learning_rate": 4.2283650745506385e-05, "loss": 0.3332, "num_tokens": 2178535400.0, "step": 3440 }, { "epoch": 0.40688187300461154, "grad_norm": 0.14185450971126556, "learning_rate": 4.227394005290468e-05, "loss": 0.3678, "num_tokens": 2179168670.0, "step": 3441 }, { "epoch": 0.4070001182452406, "grad_norm": 0.14697973430156708, "learning_rate": 4.226422800005338e-05, "loss": 0.393, "num_tokens": 2179798760.0, "step": 3442 }, { "epoch": 0.4071183634858697, "grad_norm": 0.13403856754302979, "learning_rate": 4.225451458837701e-05, "loss": 0.3365, "num_tokens": 2180428748.0, "step": 3443 }, { "epoch": 0.40723660872649875, "grad_norm": 0.13773559033870697, "learning_rate": 4.224479981930028e-05, "loss": 0.3675, "num_tokens": 2181059601.0, "step": 3444 }, { "epoch": 0.40735485396712784, "grad_norm": 0.15010493993759155, "learning_rate": 4.223508369424809e-05, "loss": 0.4132, "num_tokens": 2181696143.0, "step": 3445 }, { "epoch": 0.4074730992077569, "grad_norm": 0.1381932944059372, "learning_rate": 4.2225366214645546e-05, "loss": 0.3641, "num_tokens": 2182324681.0, "step": 3446 }, { "epoch": 0.40759134444838596, "grad_norm": 0.1343626081943512, "learning_rate": 4.221564738191796e-05, "loss": 0.3449, "num_tokens": 2182956767.0, "step": 3447 }, { "epoch": 0.407709589689015, "grad_norm": 0.12731459736824036, "learning_rate": 4.220592719749085e-05, "loss": 0.3096, "num_tokens": 2183584224.0, "step": 3448 }, { "epoch": 0.4078278349296441, "grad_norm": 0.1450502872467041, "learning_rate": 4.219620566278988e-05, "loss": 0.3495, "num_tokens": 2184212168.0, "step": 3449 }, { "epoch": 0.4079460801702732, "grad_norm": 0.14873746037483215, "learning_rate": 4.218648277924099e-05, "loss": 0.3315, "num_tokens": 2184838285.0, "step": 3450 }, { "epoch": 0.4080643254109022, "grad_norm": 0.1380709707736969, "learning_rate": 4.217675854827025e-05, "loss": 0.3617, "num_tokens": 2185472481.0, "step": 3451 }, { "epoch": 0.4081825706515313, "grad_norm": 0.1693057268857956, "learning_rate": 4.2167032971303975e-05, "loss": 0.4166, "num_tokens": 2186111034.0, "step": 3452 }, { "epoch": 0.40830081589216033, "grad_norm": 0.13844488561153412, "learning_rate": 4.2157306049768634e-05, "loss": 0.3602, "num_tokens": 2186747812.0, "step": 3453 }, { "epoch": 0.4084190611327894, "grad_norm": 0.14750848710536957, "learning_rate": 4.2147577785090946e-05, "loss": 0.3713, "num_tokens": 2187384442.0, "step": 3454 }, { "epoch": 0.40853730637341845, "grad_norm": 0.14645788073539734, "learning_rate": 4.213784817869778e-05, "loss": 0.3488, "num_tokens": 2188023596.0, "step": 3455 }, { "epoch": 0.40865555161404754, "grad_norm": 0.13848792016506195, "learning_rate": 4.212811723201622e-05, "loss": 0.3807, "num_tokens": 2188659277.0, "step": 3456 }, { "epoch": 0.4087737968546766, "grad_norm": 0.13324366509914398, "learning_rate": 4.211838494647356e-05, "loss": 0.3225, "num_tokens": 2189290737.0, "step": 3457 }, { "epoch": 0.40889204209530566, "grad_norm": 0.14144651591777802, "learning_rate": 4.210865132349727e-05, "loss": 0.3712, "num_tokens": 2189927078.0, "step": 3458 }, { "epoch": 0.40901028733593475, "grad_norm": 0.1303066611289978, "learning_rate": 4.209891636451503e-05, "loss": 0.3158, "num_tokens": 2190563049.0, "step": 3459 }, { "epoch": 0.4091285325765638, "grad_norm": 0.13276982307434082, "learning_rate": 4.208918007095469e-05, "loss": 0.3317, "num_tokens": 2191198126.0, "step": 3460 }, { "epoch": 0.4092467778171929, "grad_norm": 0.14122037589550018, "learning_rate": 4.207944244424432e-05, "loss": 0.3401, "num_tokens": 2191833498.0, "step": 3461 }, { "epoch": 0.4093650230578219, "grad_norm": 0.12154078483581543, "learning_rate": 4.20697034858122e-05, "loss": 0.3145, "num_tokens": 2192469571.0, "step": 3462 }, { "epoch": 0.409483268298451, "grad_norm": 0.12884880602359772, "learning_rate": 4.205996319708677e-05, "loss": 0.3571, "num_tokens": 2193106780.0, "step": 3463 }, { "epoch": 0.40960151353908003, "grad_norm": 0.1337531954050064, "learning_rate": 4.205022157949669e-05, "loss": 0.3297, "num_tokens": 2193742669.0, "step": 3464 }, { "epoch": 0.4097197587797091, "grad_norm": 0.1416357308626175, "learning_rate": 4.20404786344708e-05, "loss": 0.333, "num_tokens": 2194379382.0, "step": 3465 }, { "epoch": 0.4098380040203382, "grad_norm": 0.14301729202270508, "learning_rate": 4.203073436343814e-05, "loss": 0.3526, "num_tokens": 2195014593.0, "step": 3466 }, { "epoch": 0.40995624926096724, "grad_norm": 0.12617382407188416, "learning_rate": 4.202098876782793e-05, "loss": 0.3287, "num_tokens": 2195635912.0, "step": 3467 }, { "epoch": 0.41007449450159633, "grad_norm": 0.138656347990036, "learning_rate": 4.201124184906963e-05, "loss": 0.3716, "num_tokens": 2196269814.0, "step": 3468 }, { "epoch": 0.41019273974222537, "grad_norm": 0.1390095204114914, "learning_rate": 4.200149360859285e-05, "loss": 0.3538, "num_tokens": 2196906432.0, "step": 3469 }, { "epoch": 0.41031098498285445, "grad_norm": 0.12725627422332764, "learning_rate": 4.19917440478274e-05, "loss": 0.3578, "num_tokens": 2197542143.0, "step": 3470 }, { "epoch": 0.4104292302234835, "grad_norm": 0.14754311740398407, "learning_rate": 4.198199316820331e-05, "loss": 0.3232, "num_tokens": 2198173215.0, "step": 3471 }, { "epoch": 0.4105474754641126, "grad_norm": 0.12785778939723969, "learning_rate": 4.1972240971150746e-05, "loss": 0.3458, "num_tokens": 2198812172.0, "step": 3472 }, { "epoch": 0.4106657207047416, "grad_norm": 0.14642608165740967, "learning_rate": 4.1962487458100146e-05, "loss": 0.3143, "num_tokens": 2199448827.0, "step": 3473 }, { "epoch": 0.4107839659453707, "grad_norm": 0.1291627436876297, "learning_rate": 4.1952732630482086e-05, "loss": 0.3475, "num_tokens": 2200085938.0, "step": 3474 }, { "epoch": 0.4109022111859998, "grad_norm": 0.13556760549545288, "learning_rate": 4.194297648972735e-05, "loss": 0.3438, "num_tokens": 2200724966.0, "step": 3475 }, { "epoch": 0.4110204564266288, "grad_norm": 0.1371416300535202, "learning_rate": 4.193321903726691e-05, "loss": 0.3738, "num_tokens": 2201359154.0, "step": 3476 }, { "epoch": 0.4111387016672579, "grad_norm": 0.14816583693027496, "learning_rate": 4.192346027453193e-05, "loss": 0.3842, "num_tokens": 2201989624.0, "step": 3477 }, { "epoch": 0.41125694690788694, "grad_norm": 0.14073611795902252, "learning_rate": 4.19137002029538e-05, "loss": 0.378, "num_tokens": 2202625507.0, "step": 3478 }, { "epoch": 0.41137519214851603, "grad_norm": 0.13431158661842346, "learning_rate": 4.190393882396403e-05, "loss": 0.3257, "num_tokens": 2203258191.0, "step": 3479 }, { "epoch": 0.41149343738914507, "grad_norm": 0.1322443038225174, "learning_rate": 4.18941761389944e-05, "loss": 0.3562, "num_tokens": 2203892809.0, "step": 3480 }, { "epoch": 0.41161168262977416, "grad_norm": 0.13481245934963226, "learning_rate": 4.188441214947682e-05, "loss": 0.3576, "num_tokens": 2204528665.0, "step": 3481 }, { "epoch": 0.4117299278704032, "grad_norm": 0.12985217571258545, "learning_rate": 4.187464685684343e-05, "loss": 0.3095, "num_tokens": 2205163136.0, "step": 3482 }, { "epoch": 0.4118481731110323, "grad_norm": 0.13570787012577057, "learning_rate": 4.186488026252656e-05, "loss": 0.3544, "num_tokens": 2205797849.0, "step": 3483 }, { "epoch": 0.41196641835166137, "grad_norm": 0.1337215155363083, "learning_rate": 4.18551123679587e-05, "loss": 0.3333, "num_tokens": 2206432266.0, "step": 3484 }, { "epoch": 0.4120846635922904, "grad_norm": 0.12432444840669632, "learning_rate": 4.184534317457256e-05, "loss": 0.34, "num_tokens": 2207063957.0, "step": 3485 }, { "epoch": 0.4122029088329195, "grad_norm": 0.13912075757980347, "learning_rate": 4.183557268380101e-05, "loss": 0.3474, "num_tokens": 2207699416.0, "step": 3486 }, { "epoch": 0.4123211540735485, "grad_norm": 0.13913863897323608, "learning_rate": 4.182580089707716e-05, "loss": 0.317, "num_tokens": 2208327931.0, "step": 3487 }, { "epoch": 0.4124393993141776, "grad_norm": 0.1396571844816208, "learning_rate": 4.1816027815834265e-05, "loss": 0.3441, "num_tokens": 2208938920.0, "step": 3488 }, { "epoch": 0.41255764455480665, "grad_norm": 0.14105142652988434, "learning_rate": 4.18062534415058e-05, "loss": 0.3611, "num_tokens": 2209574493.0, "step": 3489 }, { "epoch": 0.41267588979543574, "grad_norm": 0.14303766191005707, "learning_rate": 4.17964777755254e-05, "loss": 0.3876, "num_tokens": 2210209015.0, "step": 3490 }, { "epoch": 0.4127941350360648, "grad_norm": 0.14390628039836884, "learning_rate": 4.17867008193269e-05, "loss": 0.4019, "num_tokens": 2210847121.0, "step": 3491 }, { "epoch": 0.41291238027669386, "grad_norm": 0.13578028976917267, "learning_rate": 4.177692257434435e-05, "loss": 0.3431, "num_tokens": 2211483344.0, "step": 3492 }, { "epoch": 0.41303062551732295, "grad_norm": 0.13363789021968842, "learning_rate": 4.176714304201196e-05, "loss": 0.3325, "num_tokens": 2212118929.0, "step": 3493 }, { "epoch": 0.413148870757952, "grad_norm": 0.15023590624332428, "learning_rate": 4.1757362223764126e-05, "loss": 0.3531, "num_tokens": 2212752197.0, "step": 3494 }, { "epoch": 0.41326711599858107, "grad_norm": 0.1398572474718094, "learning_rate": 4.1747580121035464e-05, "loss": 0.3451, "num_tokens": 2213377541.0, "step": 3495 }, { "epoch": 0.4133853612392101, "grad_norm": 0.14035215973854065, "learning_rate": 4.173779673526073e-05, "loss": 0.3388, "num_tokens": 2214014893.0, "step": 3496 }, { "epoch": 0.4135036064798392, "grad_norm": 0.13972388207912445, "learning_rate": 4.1728012067874916e-05, "loss": 0.3292, "num_tokens": 2214652548.0, "step": 3497 }, { "epoch": 0.4136218517204682, "grad_norm": 0.15340398252010345, "learning_rate": 4.1718226120313184e-05, "loss": 0.4025, "num_tokens": 2215285552.0, "step": 3498 }, { "epoch": 0.4137400969610973, "grad_norm": 0.14221403002738953, "learning_rate": 4.170843889401088e-05, "loss": 0.361, "num_tokens": 2215918625.0, "step": 3499 }, { "epoch": 0.4138583422017264, "grad_norm": 0.14471471309661865, "learning_rate": 4.1698650390403534e-05, "loss": 0.3463, "num_tokens": 2216552912.0, "step": 3500 }, { "epoch": 0.41397658744235544, "grad_norm": 0.13499827682971954, "learning_rate": 4.168886061092687e-05, "loss": 0.3294, "num_tokens": 2217188927.0, "step": 3501 }, { "epoch": 0.4140948326829845, "grad_norm": 0.15284371376037598, "learning_rate": 4.1679069557016795e-05, "loss": 0.3488, "num_tokens": 2217824759.0, "step": 3502 }, { "epoch": 0.41421307792361356, "grad_norm": 0.15843680500984192, "learning_rate": 4.166927723010941e-05, "loss": 0.3927, "num_tokens": 2218461136.0, "step": 3503 }, { "epoch": 0.41433132316424265, "grad_norm": 0.1357354372739792, "learning_rate": 4.165948363164099e-05, "loss": 0.3251, "num_tokens": 2219087890.0, "step": 3504 }, { "epoch": 0.4144495684048717, "grad_norm": 0.13933934271335602, "learning_rate": 4.1649688763048034e-05, "loss": 0.3437, "num_tokens": 2219722715.0, "step": 3505 }, { "epoch": 0.41456781364550077, "grad_norm": 0.18172003328800201, "learning_rate": 4.1639892625767164e-05, "loss": 0.3956, "num_tokens": 2220358590.0, "step": 3506 }, { "epoch": 0.41468605888612986, "grad_norm": 0.1404004991054535, "learning_rate": 4.163009522123524e-05, "loss": 0.3408, "num_tokens": 2220991910.0, "step": 3507 }, { "epoch": 0.4148043041267589, "grad_norm": 0.14927715063095093, "learning_rate": 4.162029655088929e-05, "loss": 0.4178, "num_tokens": 2221625991.0, "step": 3508 }, { "epoch": 0.414922549367388, "grad_norm": 0.15280909836292267, "learning_rate": 4.161049661616653e-05, "loss": 0.3702, "num_tokens": 2222263148.0, "step": 3509 }, { "epoch": 0.415040794608017, "grad_norm": 0.1429884284734726, "learning_rate": 4.160069541850435e-05, "loss": 0.3904, "num_tokens": 2222902559.0, "step": 3510 }, { "epoch": 0.4151590398486461, "grad_norm": 0.14460358023643494, "learning_rate": 4.1590892959340334e-05, "loss": 0.3556, "num_tokens": 2223541562.0, "step": 3511 }, { "epoch": 0.41527728508927514, "grad_norm": 0.1349032074213028, "learning_rate": 4.158108924011227e-05, "loss": 0.3458, "num_tokens": 2224174124.0, "step": 3512 }, { "epoch": 0.41539553032990423, "grad_norm": 0.1488046795129776, "learning_rate": 4.157128426225809e-05, "loss": 0.3973, "num_tokens": 2224809283.0, "step": 3513 }, { "epoch": 0.41551377557053326, "grad_norm": 0.1328907310962677, "learning_rate": 4.156147802721595e-05, "loss": 0.3554, "num_tokens": 2225448482.0, "step": 3514 }, { "epoch": 0.41563202081116235, "grad_norm": 0.13789057731628418, "learning_rate": 4.155167053642415e-05, "loss": 0.3792, "num_tokens": 2226087755.0, "step": 3515 }, { "epoch": 0.41575026605179144, "grad_norm": 0.1453256458044052, "learning_rate": 4.154186179132124e-05, "loss": 0.3613, "num_tokens": 2226723304.0, "step": 3516 }, { "epoch": 0.4158685112924205, "grad_norm": 0.1442626267671585, "learning_rate": 4.153205179334586e-05, "loss": 0.3632, "num_tokens": 2227361932.0, "step": 3517 }, { "epoch": 0.41598675653304956, "grad_norm": 0.14274926483631134, "learning_rate": 4.152224054393694e-05, "loss": 0.3459, "num_tokens": 2227993476.0, "step": 3518 }, { "epoch": 0.4161050017736786, "grad_norm": 0.1480005979537964, "learning_rate": 4.151242804453349e-05, "loss": 0.3778, "num_tokens": 2228632371.0, "step": 3519 }, { "epoch": 0.4162232470143077, "grad_norm": 0.13541501760482788, "learning_rate": 4.1502614296574786e-05, "loss": 0.3521, "num_tokens": 2229265302.0, "step": 3520 }, { "epoch": 0.4163414922549367, "grad_norm": 0.1324397176504135, "learning_rate": 4.149279930150023e-05, "loss": 0.3243, "num_tokens": 2229895975.0, "step": 3521 }, { "epoch": 0.4164597374955658, "grad_norm": 0.15050506591796875, "learning_rate": 4.148298306074945e-05, "loss": 0.3626, "num_tokens": 2230527213.0, "step": 3522 }, { "epoch": 0.41657798273619484, "grad_norm": 0.1415170431137085, "learning_rate": 4.147316557576223e-05, "loss": 0.3578, "num_tokens": 2231144081.0, "step": 3523 }, { "epoch": 0.41669622797682393, "grad_norm": 0.14626160264015198, "learning_rate": 4.146334684797854e-05, "loss": 0.3321, "num_tokens": 2231777564.0, "step": 3524 }, { "epoch": 0.416814473217453, "grad_norm": 0.1472187489271164, "learning_rate": 4.1453526878838536e-05, "loss": 0.3694, "num_tokens": 2232404756.0, "step": 3525 }, { "epoch": 0.41693271845808205, "grad_norm": 0.13897337019443512, "learning_rate": 4.144370566978256e-05, "loss": 0.3447, "num_tokens": 2233034043.0, "step": 3526 }, { "epoch": 0.41705096369871114, "grad_norm": 0.13316428661346436, "learning_rate": 4.143388322225113e-05, "loss": 0.3398, "num_tokens": 2233668002.0, "step": 3527 }, { "epoch": 0.4171692089393402, "grad_norm": 0.12577737867832184, "learning_rate": 4.142405953768495e-05, "loss": 0.3239, "num_tokens": 2234298969.0, "step": 3528 }, { "epoch": 0.41728745417996926, "grad_norm": 0.13456016778945923, "learning_rate": 4.1414234617524905e-05, "loss": 0.3115, "num_tokens": 2234936895.0, "step": 3529 }, { "epoch": 0.4174056994205983, "grad_norm": 0.14213527739048004, "learning_rate": 4.140440846321206e-05, "loss": 0.3712, "num_tokens": 2235570755.0, "step": 3530 }, { "epoch": 0.4175239446612274, "grad_norm": 0.1337743103504181, "learning_rate": 4.1394581076187645e-05, "loss": 0.3353, "num_tokens": 2236201340.0, "step": 3531 }, { "epoch": 0.4176421899018565, "grad_norm": 0.1295437216758728, "learning_rate": 4.138475245789309e-05, "loss": 0.3403, "num_tokens": 2236834612.0, "step": 3532 }, { "epoch": 0.4177604351424855, "grad_norm": 0.14450348913669586, "learning_rate": 4.137492260977002e-05, "loss": 0.3713, "num_tokens": 2237471633.0, "step": 3533 }, { "epoch": 0.4178786803831146, "grad_norm": 0.14644913375377655, "learning_rate": 4.136509153326021e-05, "loss": 0.3537, "num_tokens": 2238101451.0, "step": 3534 }, { "epoch": 0.41799692562374363, "grad_norm": 0.13834308087825775, "learning_rate": 4.1355259229805624e-05, "loss": 0.337, "num_tokens": 2238738264.0, "step": 3535 }, { "epoch": 0.4181151708643727, "grad_norm": 0.13049569725990295, "learning_rate": 4.134542570084842e-05, "loss": 0.3444, "num_tokens": 2239374658.0, "step": 3536 }, { "epoch": 0.41823341610500175, "grad_norm": 0.1442946195602417, "learning_rate": 4.1335590947830915e-05, "loss": 0.3387, "num_tokens": 2240011143.0, "step": 3537 }, { "epoch": 0.41835166134563084, "grad_norm": 0.13925263285636902, "learning_rate": 4.1325754972195615e-05, "loss": 0.3498, "num_tokens": 2240648246.0, "step": 3538 }, { "epoch": 0.4184699065862599, "grad_norm": 0.13880211114883423, "learning_rate": 4.131591777538521e-05, "loss": 0.3218, "num_tokens": 2241284612.0, "step": 3539 }, { "epoch": 0.41858815182688897, "grad_norm": 0.13409270346164703, "learning_rate": 4.1306079358842564e-05, "loss": 0.2928, "num_tokens": 2241919024.0, "step": 3540 }, { "epoch": 0.41870639706751805, "grad_norm": 0.1246449202299118, "learning_rate": 4.1296239724010715e-05, "loss": 0.309, "num_tokens": 2242555197.0, "step": 3541 }, { "epoch": 0.4188246423081471, "grad_norm": 0.13481466472148895, "learning_rate": 4.128639887233291e-05, "loss": 0.3283, "num_tokens": 2243186217.0, "step": 3542 }, { "epoch": 0.4189428875487762, "grad_norm": 0.14230911433696747, "learning_rate": 4.1276556805252515e-05, "loss": 0.35, "num_tokens": 2243825320.0, "step": 3543 }, { "epoch": 0.4190611327894052, "grad_norm": 0.1405811458826065, "learning_rate": 4.126671352421313e-05, "loss": 0.352, "num_tokens": 2244460046.0, "step": 3544 }, { "epoch": 0.4191793780300343, "grad_norm": 0.14206711947917938, "learning_rate": 4.125686903065852e-05, "loss": 0.3713, "num_tokens": 2245090392.0, "step": 3545 }, { "epoch": 0.41929762327066333, "grad_norm": 0.1412377655506134, "learning_rate": 4.1247023326032586e-05, "loss": 0.373, "num_tokens": 2245726932.0, "step": 3546 }, { "epoch": 0.4194158685112924, "grad_norm": 0.14591875672340393, "learning_rate": 4.1237176411779484e-05, "loss": 0.3434, "num_tokens": 2246359632.0, "step": 3547 }, { "epoch": 0.4195341137519215, "grad_norm": 0.14009763300418854, "learning_rate": 4.1227328289343474e-05, "loss": 0.327, "num_tokens": 2246991182.0, "step": 3548 }, { "epoch": 0.41965235899255054, "grad_norm": 0.1243114173412323, "learning_rate": 4.121747896016904e-05, "loss": 0.3488, "num_tokens": 2247623833.0, "step": 3549 }, { "epoch": 0.41977060423317963, "grad_norm": 0.13717667758464813, "learning_rate": 4.120762842570081e-05, "loss": 0.3781, "num_tokens": 2248253623.0, "step": 3550 }, { "epoch": 0.41988884947380867, "grad_norm": 0.14726492762565613, "learning_rate": 4.1197776687383616e-05, "loss": 0.3821, "num_tokens": 2248890891.0, "step": 3551 }, { "epoch": 0.42000709471443776, "grad_norm": 0.13831256330013275, "learning_rate": 4.118792374666246e-05, "loss": 0.3961, "num_tokens": 2249529228.0, "step": 3552 }, { "epoch": 0.4201253399550668, "grad_norm": 0.1316251903772354, "learning_rate": 4.11780696049825e-05, "loss": 0.3585, "num_tokens": 2250167505.0, "step": 3553 }, { "epoch": 0.4202435851956959, "grad_norm": 0.1327817142009735, "learning_rate": 4.116821426378911e-05, "loss": 0.31, "num_tokens": 2250799510.0, "step": 3554 }, { "epoch": 0.4203618304363249, "grad_norm": 0.14416764676570892, "learning_rate": 4.115835772452779e-05, "loss": 0.3228, "num_tokens": 2251430574.0, "step": 3555 }, { "epoch": 0.420480075676954, "grad_norm": 0.14826732873916626, "learning_rate": 4.114849998864426e-05, "loss": 0.3483, "num_tokens": 2252066427.0, "step": 3556 }, { "epoch": 0.4205983209175831, "grad_norm": 0.14095957577228546, "learning_rate": 4.1138641057584395e-05, "loss": 0.3669, "num_tokens": 2252701522.0, "step": 3557 }, { "epoch": 0.4207165661582121, "grad_norm": 0.14075306057929993, "learning_rate": 4.1128780932794244e-05, "loss": 0.3442, "num_tokens": 2253339071.0, "step": 3558 }, { "epoch": 0.4208348113988412, "grad_norm": 0.14920516312122345, "learning_rate": 4.111891961572004e-05, "loss": 0.3596, "num_tokens": 2253977361.0, "step": 3559 }, { "epoch": 0.42095305663947025, "grad_norm": 0.1394956111907959, "learning_rate": 4.110905710780817e-05, "loss": 0.3469, "num_tokens": 2254614019.0, "step": 3560 }, { "epoch": 0.42107130188009934, "grad_norm": 0.15075185894966125, "learning_rate": 4.1099193410505236e-05, "loss": 0.3514, "num_tokens": 2255247680.0, "step": 3561 }, { "epoch": 0.42118954712072837, "grad_norm": 0.13829858601093292, "learning_rate": 4.108932852525797e-05, "loss": 0.3057, "num_tokens": 2255880763.0, "step": 3562 }, { "epoch": 0.42130779236135746, "grad_norm": 0.13777676224708557, "learning_rate": 4.107946245351332e-05, "loss": 0.3045, "num_tokens": 2256511772.0, "step": 3563 }, { "epoch": 0.42142603760198655, "grad_norm": 0.12840887904167175, "learning_rate": 4.106959519671835e-05, "loss": 0.3268, "num_tokens": 2257150798.0, "step": 3564 }, { "epoch": 0.4215442828426156, "grad_norm": 0.12945044040679932, "learning_rate": 4.105972675632037e-05, "loss": 0.332, "num_tokens": 2257781042.0, "step": 3565 }, { "epoch": 0.42166252808324467, "grad_norm": 0.142812117934227, "learning_rate": 4.104985713376681e-05, "loss": 0.356, "num_tokens": 2258419402.0, "step": 3566 }, { "epoch": 0.4217807733238737, "grad_norm": 0.14188243448734283, "learning_rate": 4.1039986330505285e-05, "loss": 0.3685, "num_tokens": 2259054299.0, "step": 3567 }, { "epoch": 0.4218990185645028, "grad_norm": 0.12117645889520645, "learning_rate": 4.10301143479836e-05, "loss": 0.3504, "num_tokens": 2259689222.0, "step": 3568 }, { "epoch": 0.4220172638051318, "grad_norm": 0.1342163383960724, "learning_rate": 4.1020241187649724e-05, "loss": 0.3373, "num_tokens": 2260320882.0, "step": 3569 }, { "epoch": 0.4221355090457609, "grad_norm": 0.1391115039587021, "learning_rate": 4.10103668509518e-05, "loss": 0.3561, "num_tokens": 2260951804.0, "step": 3570 }, { "epoch": 0.42225375428638995, "grad_norm": 0.12266550213098526, "learning_rate": 4.100049133933813e-05, "loss": 0.3374, "num_tokens": 2261585562.0, "step": 3571 }, { "epoch": 0.42237199952701904, "grad_norm": 0.15118497610092163, "learning_rate": 4.099061465425719e-05, "loss": 0.3549, "num_tokens": 2262217767.0, "step": 3572 }, { "epoch": 0.4224902447676481, "grad_norm": 0.1277872771024704, "learning_rate": 4.098073679715767e-05, "loss": 0.3207, "num_tokens": 2262854659.0, "step": 3573 }, { "epoch": 0.42260849000827716, "grad_norm": 0.13296069204807281, "learning_rate": 4.097085776948836e-05, "loss": 0.3304, "num_tokens": 2263487482.0, "step": 3574 }, { "epoch": 0.42272673524890625, "grad_norm": 0.1394803673028946, "learning_rate": 4.0960977572698275e-05, "loss": 0.3651, "num_tokens": 2264124850.0, "step": 3575 }, { "epoch": 0.4228449804895353, "grad_norm": 0.13460341095924377, "learning_rate": 4.095109620823661e-05, "loss": 0.3704, "num_tokens": 2264763680.0, "step": 3576 }, { "epoch": 0.42296322573016437, "grad_norm": 0.1488243192434311, "learning_rate": 4.094121367755268e-05, "loss": 0.3524, "num_tokens": 2265394443.0, "step": 3577 }, { "epoch": 0.4230814709707934, "grad_norm": 0.13621701300144196, "learning_rate": 4.0931329982096e-05, "loss": 0.3481, "num_tokens": 2266030659.0, "step": 3578 }, { "epoch": 0.4231997162114225, "grad_norm": 0.1369139850139618, "learning_rate": 4.092144512331627e-05, "loss": 0.3195, "num_tokens": 2266667341.0, "step": 3579 }, { "epoch": 0.4233179614520515, "grad_norm": 0.15101273357868195, "learning_rate": 4.091155910266334e-05, "loss": 0.3523, "num_tokens": 2267300227.0, "step": 3580 }, { "epoch": 0.4234362066926806, "grad_norm": 0.14834271371364594, "learning_rate": 4.090167192158723e-05, "loss": 0.3536, "num_tokens": 2267924972.0, "step": 3581 }, { "epoch": 0.4235544519333097, "grad_norm": 0.13599784672260284, "learning_rate": 4.0891783581538134e-05, "loss": 0.3359, "num_tokens": 2268554994.0, "step": 3582 }, { "epoch": 0.42367269717393874, "grad_norm": 0.13034145534038544, "learning_rate": 4.088189408396644e-05, "loss": 0.3292, "num_tokens": 2269159674.0, "step": 3583 }, { "epoch": 0.42379094241456783, "grad_norm": 0.15738891065120697, "learning_rate": 4.087200343032266e-05, "loss": 0.3486, "num_tokens": 2269798880.0, "step": 3584 }, { "epoch": 0.42390918765519686, "grad_norm": 0.1462690532207489, "learning_rate": 4.0862111622057506e-05, "loss": 0.3476, "num_tokens": 2270434462.0, "step": 3585 }, { "epoch": 0.42402743289582595, "grad_norm": 0.13451218605041504, "learning_rate": 4.085221866062187e-05, "loss": 0.3201, "num_tokens": 2271068069.0, "step": 3586 }, { "epoch": 0.424145678136455, "grad_norm": 0.1357169896364212, "learning_rate": 4.0842324547466775e-05, "loss": 0.36, "num_tokens": 2271695590.0, "step": 3587 }, { "epoch": 0.4242639233770841, "grad_norm": 0.16140080988407135, "learning_rate": 4.083242928404343e-05, "loss": 0.3745, "num_tokens": 2272333305.0, "step": 3588 }, { "epoch": 0.42438216861771316, "grad_norm": 0.14847791194915771, "learning_rate": 4.082253287180324e-05, "loss": 0.3558, "num_tokens": 2272963542.0, "step": 3589 }, { "epoch": 0.4245004138583422, "grad_norm": 0.14778472483158112, "learning_rate": 4.081263531219775e-05, "loss": 0.3469, "num_tokens": 2273599771.0, "step": 3590 }, { "epoch": 0.4246186590989713, "grad_norm": 0.14160634577274323, "learning_rate": 4.0802736606678655e-05, "loss": 0.3369, "num_tokens": 2274232040.0, "step": 3591 }, { "epoch": 0.4247369043396003, "grad_norm": 0.1434057354927063, "learning_rate": 4.0792836756697865e-05, "loss": 0.3497, "num_tokens": 2274868638.0, "step": 3592 }, { "epoch": 0.4248551495802294, "grad_norm": 0.14648908376693726, "learning_rate": 4.078293576370743e-05, "loss": 0.3255, "num_tokens": 2275507623.0, "step": 3593 }, { "epoch": 0.42497339482085844, "grad_norm": 0.14244601130485535, "learning_rate": 4.077303362915957e-05, "loss": 0.3723, "num_tokens": 2276141932.0, "step": 3594 }, { "epoch": 0.42509164006148753, "grad_norm": 0.13676977157592773, "learning_rate": 4.076313035450668e-05, "loss": 0.3384, "num_tokens": 2276780574.0, "step": 3595 }, { "epoch": 0.42520988530211656, "grad_norm": 0.13994984328746796, "learning_rate": 4.0753225941201296e-05, "loss": 0.3611, "num_tokens": 2277420277.0, "step": 3596 }, { "epoch": 0.42532813054274565, "grad_norm": 0.13869185745716095, "learning_rate": 4.0743320390696185e-05, "loss": 0.3573, "num_tokens": 2278057732.0, "step": 3597 }, { "epoch": 0.42544637578337474, "grad_norm": 0.1414029598236084, "learning_rate": 4.073341370444419e-05, "loss": 0.3466, "num_tokens": 2278687927.0, "step": 3598 }, { "epoch": 0.4255646210240038, "grad_norm": 0.15840329229831696, "learning_rate": 4.0723505883898394e-05, "loss": 0.3421, "num_tokens": 2279326904.0, "step": 3599 }, { "epoch": 0.42568286626463286, "grad_norm": 0.1386568546295166, "learning_rate": 4.0713596930512013e-05, "loss": 0.3804, "num_tokens": 2279959835.0, "step": 3600 }, { "epoch": 0.4258011115052619, "grad_norm": 0.13879238069057465, "learning_rate": 4.070368684573845e-05, "loss": 0.3359, "num_tokens": 2280556367.0, "step": 3601 }, { "epoch": 0.425919356745891, "grad_norm": 0.14438959956169128, "learning_rate": 4.069377563103123e-05, "loss": 0.3506, "num_tokens": 2281193555.0, "step": 3602 }, { "epoch": 0.42603760198652, "grad_norm": 0.14985381066799164, "learning_rate": 4.06838632878441e-05, "loss": 0.3251, "num_tokens": 2281829200.0, "step": 3603 }, { "epoch": 0.4261558472271491, "grad_norm": 0.1475895196199417, "learning_rate": 4.0673949817630954e-05, "loss": 0.3264, "num_tokens": 2282440267.0, "step": 3604 }, { "epoch": 0.4262740924677782, "grad_norm": 0.14302781224250793, "learning_rate": 4.066403522184582e-05, "loss": 0.3892, "num_tokens": 2283076943.0, "step": 3605 }, { "epoch": 0.42639233770840723, "grad_norm": 0.14409200847148895, "learning_rate": 4.0654119501942935e-05, "loss": 0.3407, "num_tokens": 2283703251.0, "step": 3606 }, { "epoch": 0.4265105829490363, "grad_norm": 0.134486123919487, "learning_rate": 4.064420265937666e-05, "loss": 0.328, "num_tokens": 2284338657.0, "step": 3607 }, { "epoch": 0.42662882818966535, "grad_norm": 0.13404302299022675, "learning_rate": 4.063428469560157e-05, "loss": 0.3362, "num_tokens": 2284954338.0, "step": 3608 }, { "epoch": 0.42674707343029444, "grad_norm": 0.12987260520458221, "learning_rate": 4.0624365612072345e-05, "loss": 0.3337, "num_tokens": 2285592298.0, "step": 3609 }, { "epoch": 0.4268653186709235, "grad_norm": 0.14308194816112518, "learning_rate": 4.06144454102439e-05, "loss": 0.3781, "num_tokens": 2286226591.0, "step": 3610 }, { "epoch": 0.42698356391155257, "grad_norm": 0.14435669779777527, "learning_rate": 4.0604524091571234e-05, "loss": 0.3638, "num_tokens": 2286863302.0, "step": 3611 }, { "epoch": 0.4271018091521816, "grad_norm": 0.13909107446670532, "learning_rate": 4.059460165750957e-05, "loss": 0.3768, "num_tokens": 2287501056.0, "step": 3612 }, { "epoch": 0.4272200543928107, "grad_norm": 0.13298842310905457, "learning_rate": 4.0584678109514277e-05, "loss": 0.3563, "num_tokens": 2288136364.0, "step": 3613 }, { "epoch": 0.4273382996334398, "grad_norm": 0.1327318698167801, "learning_rate": 4.0574753449040884e-05, "loss": 0.3307, "num_tokens": 2288772896.0, "step": 3614 }, { "epoch": 0.4274565448740688, "grad_norm": 0.13182306289672852, "learning_rate": 4.056482767754508e-05, "loss": 0.2908, "num_tokens": 2289411411.0, "step": 3615 }, { "epoch": 0.4275747901146979, "grad_norm": 0.1437261998653412, "learning_rate": 4.0554900796482726e-05, "loss": 0.3494, "num_tokens": 2290046175.0, "step": 3616 }, { "epoch": 0.42769303535532693, "grad_norm": 0.12972258031368256, "learning_rate": 4.0544972807309835e-05, "loss": 0.3373, "num_tokens": 2290684134.0, "step": 3617 }, { "epoch": 0.427811280595956, "grad_norm": 0.13290509581565857, "learning_rate": 4.053504371148259e-05, "loss": 0.3615, "num_tokens": 2291322695.0, "step": 3618 }, { "epoch": 0.42792952583658506, "grad_norm": 0.12998007237911224, "learning_rate": 4.052511351045734e-05, "loss": 0.3486, "num_tokens": 2291960792.0, "step": 3619 }, { "epoch": 0.42804777107721415, "grad_norm": 0.14278936386108398, "learning_rate": 4.05151822056906e-05, "loss": 0.3649, "num_tokens": 2292590958.0, "step": 3620 }, { "epoch": 0.42816601631784323, "grad_norm": 0.15515398979187012, "learning_rate": 4.0505249798639015e-05, "loss": 0.3558, "num_tokens": 2293227503.0, "step": 3621 }, { "epoch": 0.42828426155847227, "grad_norm": 0.13955023884773254, "learning_rate": 4.049531629075944e-05, "loss": 0.3708, "num_tokens": 2293865029.0, "step": 3622 }, { "epoch": 0.42840250679910136, "grad_norm": 0.14134563505649567, "learning_rate": 4.0485381683508844e-05, "loss": 0.3651, "num_tokens": 2294502731.0, "step": 3623 }, { "epoch": 0.4285207520397304, "grad_norm": 0.13107165694236755, "learning_rate": 4.0475445978344384e-05, "loss": 0.3527, "num_tokens": 2295137526.0, "step": 3624 }, { "epoch": 0.4286389972803595, "grad_norm": 0.13638298213481903, "learning_rate": 4.046550917672339e-05, "loss": 0.3337, "num_tokens": 2295772721.0, "step": 3625 }, { "epoch": 0.4287572425209885, "grad_norm": 0.13598746061325073, "learning_rate": 4.045557128010331e-05, "loss": 0.3204, "num_tokens": 2296409132.0, "step": 3626 }, { "epoch": 0.4288754877616176, "grad_norm": 0.14987851679325104, "learning_rate": 4.0445632289941816e-05, "loss": 0.3798, "num_tokens": 2297040961.0, "step": 3627 }, { "epoch": 0.42899373300224664, "grad_norm": 0.13103213906288147, "learning_rate": 4.043569220769667e-05, "loss": 0.3245, "num_tokens": 2297677637.0, "step": 3628 }, { "epoch": 0.4291119782428757, "grad_norm": 0.1303481012582779, "learning_rate": 4.042575103482584e-05, "loss": 0.3141, "num_tokens": 2298314192.0, "step": 3629 }, { "epoch": 0.4292302234835048, "grad_norm": 0.12986503541469574, "learning_rate": 4.041580877278744e-05, "loss": 0.3339, "num_tokens": 2298951233.0, "step": 3630 }, { "epoch": 0.42934846872413385, "grad_norm": 0.14641867578029633, "learning_rate": 4.040586542303975e-05, "loss": 0.3766, "num_tokens": 2299587497.0, "step": 3631 }, { "epoch": 0.42946671396476294, "grad_norm": 0.13781370222568512, "learning_rate": 4.039592098704119e-05, "loss": 0.3496, "num_tokens": 2300219440.0, "step": 3632 }, { "epoch": 0.42958495920539197, "grad_norm": 0.14167317748069763, "learning_rate": 4.038597546625038e-05, "loss": 0.4058, "num_tokens": 2300853547.0, "step": 3633 }, { "epoch": 0.42970320444602106, "grad_norm": 0.16290704905986786, "learning_rate": 4.037602886212605e-05, "loss": 0.3434, "num_tokens": 2301493072.0, "step": 3634 }, { "epoch": 0.4298214496866501, "grad_norm": 0.1428958624601364, "learning_rate": 4.0366081176127126e-05, "loss": 0.3545, "num_tokens": 2302130933.0, "step": 3635 }, { "epoch": 0.4299396949272792, "grad_norm": 0.12742577493190765, "learning_rate": 4.035613240971267e-05, "loss": 0.3393, "num_tokens": 2302763240.0, "step": 3636 }, { "epoch": 0.4300579401679082, "grad_norm": 0.15048520267009735, "learning_rate": 4.034618256434191e-05, "loss": 0.3644, "num_tokens": 2303399918.0, "step": 3637 }, { "epoch": 0.4301761854085373, "grad_norm": 0.15182586014270782, "learning_rate": 4.0336231641474246e-05, "loss": 0.3639, "num_tokens": 2304035438.0, "step": 3638 }, { "epoch": 0.4302944306491664, "grad_norm": 0.13624447584152222, "learning_rate": 4.032627964256922e-05, "loss": 0.3279, "num_tokens": 2304664911.0, "step": 3639 }, { "epoch": 0.4304126758897954, "grad_norm": 0.14359533786773682, "learning_rate": 4.0316326569086516e-05, "loss": 0.3518, "num_tokens": 2305297585.0, "step": 3640 }, { "epoch": 0.4305309211304245, "grad_norm": 0.14261773228645325, "learning_rate": 4.0306372422486015e-05, "loss": 0.3453, "num_tokens": 2305932739.0, "step": 3641 }, { "epoch": 0.43064916637105355, "grad_norm": 0.12279647588729858, "learning_rate": 4.029641720422774e-05, "loss": 0.3237, "num_tokens": 2306566419.0, "step": 3642 }, { "epoch": 0.43076741161168264, "grad_norm": 0.1445988565683365, "learning_rate": 4.028646091577185e-05, "loss": 0.3221, "num_tokens": 2307203006.0, "step": 3643 }, { "epoch": 0.43088565685231167, "grad_norm": 0.13652189075946808, "learning_rate": 4.02765035585787e-05, "loss": 0.3362, "num_tokens": 2307839119.0, "step": 3644 }, { "epoch": 0.43100390209294076, "grad_norm": 0.13874706625938416, "learning_rate": 4.026654513410874e-05, "loss": 0.3421, "num_tokens": 2308472150.0, "step": 3645 }, { "epoch": 0.43112214733356985, "grad_norm": 0.14106476306915283, "learning_rate": 4.025658564382266e-05, "loss": 0.3461, "num_tokens": 2309105308.0, "step": 3646 }, { "epoch": 0.4312403925741989, "grad_norm": 0.14267238974571228, "learning_rate": 4.024662508918123e-05, "loss": 0.3517, "num_tokens": 2309739344.0, "step": 3647 }, { "epoch": 0.43135863781482797, "grad_norm": 0.13789893686771393, "learning_rate": 4.023666347164543e-05, "loss": 0.3445, "num_tokens": 2310378292.0, "step": 3648 }, { "epoch": 0.431476883055457, "grad_norm": 0.13295263051986694, "learning_rate": 4.022670079267636e-05, "loss": 0.3453, "num_tokens": 2311017024.0, "step": 3649 }, { "epoch": 0.4315951282960861, "grad_norm": 0.16273227334022522, "learning_rate": 4.02167370537353e-05, "loss": 0.3706, "num_tokens": 2311649983.0, "step": 3650 }, { "epoch": 0.43171337353671513, "grad_norm": 0.14043472707271576, "learning_rate": 4.020677225628367e-05, "loss": 0.3792, "num_tokens": 2312283102.0, "step": 3651 }, { "epoch": 0.4318316187773442, "grad_norm": 0.13916391134262085, "learning_rate": 4.0196806401783056e-05, "loss": 0.3408, "num_tokens": 2312917092.0, "step": 3652 }, { "epoch": 0.43194986401797325, "grad_norm": 0.15619944036006927, "learning_rate": 4.0186839491695185e-05, "loss": 0.3895, "num_tokens": 2313550665.0, "step": 3653 }, { "epoch": 0.43206810925860234, "grad_norm": 0.12683139741420746, "learning_rate": 4.017687152748195e-05, "loss": 0.3467, "num_tokens": 2314185629.0, "step": 3654 }, { "epoch": 0.43218635449923143, "grad_norm": 0.1304204910993576, "learning_rate": 4.016690251060539e-05, "loss": 0.3279, "num_tokens": 2314824951.0, "step": 3655 }, { "epoch": 0.43230459973986046, "grad_norm": 0.1653451770544052, "learning_rate": 4.0156932442527724e-05, "loss": 0.3708, "num_tokens": 2315462082.0, "step": 3656 }, { "epoch": 0.43242284498048955, "grad_norm": 0.14071759581565857, "learning_rate": 4.01469613247113e-05, "loss": 0.3441, "num_tokens": 2316096621.0, "step": 3657 }, { "epoch": 0.4325410902211186, "grad_norm": 0.12921833992004395, "learning_rate": 4.0136989158618604e-05, "loss": 0.3171, "num_tokens": 2316727777.0, "step": 3658 }, { "epoch": 0.4326593354617477, "grad_norm": 0.14423497021198273, "learning_rate": 4.0127015945712324e-05, "loss": 0.3212, "num_tokens": 2317361850.0, "step": 3659 }, { "epoch": 0.4327775807023767, "grad_norm": 0.1323917657136917, "learning_rate": 4.0117041687455255e-05, "loss": 0.3529, "num_tokens": 2317992354.0, "step": 3660 }, { "epoch": 0.4328958259430058, "grad_norm": 0.1421685665845871, "learning_rate": 4.010706638531038e-05, "loss": 0.3444, "num_tokens": 2318622067.0, "step": 3661 }, { "epoch": 0.4330140711836349, "grad_norm": 0.12939439713954926, "learning_rate": 4.009709004074082e-05, "loss": 0.3476, "num_tokens": 2319255026.0, "step": 3662 }, { "epoch": 0.4331323164242639, "grad_norm": 0.14184452593326569, "learning_rate": 4.008711265520984e-05, "loss": 0.3545, "num_tokens": 2319886644.0, "step": 3663 }, { "epoch": 0.433250561664893, "grad_norm": 0.15800067782402039, "learning_rate": 4.0077134230180874e-05, "loss": 0.38, "num_tokens": 2320524023.0, "step": 3664 }, { "epoch": 0.43336880690552204, "grad_norm": 0.1473662406206131, "learning_rate": 4.00671547671175e-05, "loss": 0.3697, "num_tokens": 2321126690.0, "step": 3665 }, { "epoch": 0.43348705214615113, "grad_norm": 0.14693768322467804, "learning_rate": 4.0057174267483446e-05, "loss": 0.3799, "num_tokens": 2321762477.0, "step": 3666 }, { "epoch": 0.43360529738678016, "grad_norm": 0.13887563347816467, "learning_rate": 4.004719273274259e-05, "loss": 0.3356, "num_tokens": 2322392145.0, "step": 3667 }, { "epoch": 0.43372354262740925, "grad_norm": 0.14002162218093872, "learning_rate": 4.003721016435899e-05, "loss": 0.3248, "num_tokens": 2323030322.0, "step": 3668 }, { "epoch": 0.4338417878680383, "grad_norm": 0.13347502052783966, "learning_rate": 4.002722656379681e-05, "loss": 0.3558, "num_tokens": 2323663235.0, "step": 3669 }, { "epoch": 0.4339600331086674, "grad_norm": 0.13328097760677338, "learning_rate": 4.001724193252041e-05, "loss": 0.3372, "num_tokens": 2324298014.0, "step": 3670 }, { "epoch": 0.43407827834929646, "grad_norm": 0.13161614537239075, "learning_rate": 4.000725627199426e-05, "loss": 0.3516, "num_tokens": 2324928623.0, "step": 3671 }, { "epoch": 0.4341965235899255, "grad_norm": 0.1345854550600052, "learning_rate": 3.999726958368301e-05, "loss": 0.303, "num_tokens": 2325562650.0, "step": 3672 }, { "epoch": 0.4343147688305546, "grad_norm": 0.1332838088274002, "learning_rate": 3.9987281869051444e-05, "loss": 0.3404, "num_tokens": 2326201425.0, "step": 3673 }, { "epoch": 0.4344330140711836, "grad_norm": 0.15061452984809875, "learning_rate": 3.997729312956451e-05, "loss": 0.3726, "num_tokens": 2326834732.0, "step": 3674 }, { "epoch": 0.4345512593118127, "grad_norm": 0.14141757786273956, "learning_rate": 3.9967303366687306e-05, "loss": 0.3615, "num_tokens": 2327463970.0, "step": 3675 }, { "epoch": 0.43466950455244174, "grad_norm": 0.12484433501958847, "learning_rate": 3.9957312581885076e-05, "loss": 0.3068, "num_tokens": 2328097134.0, "step": 3676 }, { "epoch": 0.43478774979307083, "grad_norm": 0.13645046949386597, "learning_rate": 3.9947320776623196e-05, "loss": 0.3444, "num_tokens": 2328730879.0, "step": 3677 }, { "epoch": 0.43490599503369987, "grad_norm": 0.13252942264080048, "learning_rate": 3.993732795236722e-05, "loss": 0.3373, "num_tokens": 2329365723.0, "step": 3678 }, { "epoch": 0.43502424027432895, "grad_norm": 0.1494184285402298, "learning_rate": 3.9927334110582834e-05, "loss": 0.3274, "num_tokens": 2330002645.0, "step": 3679 }, { "epoch": 0.43514248551495804, "grad_norm": 0.14465829730033875, "learning_rate": 3.991733925273589e-05, "loss": 0.3506, "num_tokens": 2330638440.0, "step": 3680 }, { "epoch": 0.4352607307555871, "grad_norm": 0.12188902497291565, "learning_rate": 3.990734338029236e-05, "loss": 0.3308, "num_tokens": 2331275552.0, "step": 3681 }, { "epoch": 0.43537897599621617, "grad_norm": 0.13742615282535553, "learning_rate": 3.9897346494718396e-05, "loss": 0.3563, "num_tokens": 2331910178.0, "step": 3682 }, { "epoch": 0.4354972212368452, "grad_norm": 0.13084475696086884, "learning_rate": 3.9887348597480275e-05, "loss": 0.327, "num_tokens": 2332542405.0, "step": 3683 }, { "epoch": 0.4356154664774743, "grad_norm": 0.13292467594146729, "learning_rate": 3.987734969004445e-05, "loss": 0.3095, "num_tokens": 2333180430.0, "step": 3684 }, { "epoch": 0.4357337117181033, "grad_norm": 0.14613351225852966, "learning_rate": 3.986734977387748e-05, "loss": 0.3337, "num_tokens": 2333810278.0, "step": 3685 }, { "epoch": 0.4358519569587324, "grad_norm": 0.14497588574886322, "learning_rate": 3.985734885044612e-05, "loss": 0.3478, "num_tokens": 2334442668.0, "step": 3686 }, { "epoch": 0.4359702021993615, "grad_norm": 0.13977459073066711, "learning_rate": 3.984734692121724e-05, "loss": 0.3361, "num_tokens": 2335043091.0, "step": 3687 }, { "epoch": 0.43608844743999053, "grad_norm": 0.15250833332538605, "learning_rate": 3.9837343987657855e-05, "loss": 0.3547, "num_tokens": 2335673646.0, "step": 3688 }, { "epoch": 0.4362066926806196, "grad_norm": 0.13480375707149506, "learning_rate": 3.982734005123516e-05, "loss": 0.32, "num_tokens": 2336304167.0, "step": 3689 }, { "epoch": 0.43632493792124866, "grad_norm": 0.1282462775707245, "learning_rate": 3.981733511341646e-05, "loss": 0.2997, "num_tokens": 2336934258.0, "step": 3690 }, { "epoch": 0.43644318316187775, "grad_norm": 0.15213635563850403, "learning_rate": 3.9807329175669246e-05, "loss": 0.3716, "num_tokens": 2337569689.0, "step": 3691 }, { "epoch": 0.4365614284025068, "grad_norm": 0.14018255472183228, "learning_rate": 3.9797322239461106e-05, "loss": 0.3259, "num_tokens": 2338198977.0, "step": 3692 }, { "epoch": 0.43667967364313587, "grad_norm": 0.14038754999637604, "learning_rate": 3.9787314306259815e-05, "loss": 0.3503, "num_tokens": 2338833002.0, "step": 3693 }, { "epoch": 0.4367979188837649, "grad_norm": 0.1425144076347351, "learning_rate": 3.9777305377533274e-05, "loss": 0.3568, "num_tokens": 2339450864.0, "step": 3694 }, { "epoch": 0.436916164124394, "grad_norm": 0.1523418426513672, "learning_rate": 3.976729545474955e-05, "loss": 0.3669, "num_tokens": 2340079258.0, "step": 3695 }, { "epoch": 0.4370344093650231, "grad_norm": 0.1393364518880844, "learning_rate": 3.975728453937683e-05, "loss": 0.3177, "num_tokens": 2340710685.0, "step": 3696 }, { "epoch": 0.4371526546056521, "grad_norm": 0.15047705173492432, "learning_rate": 3.974727263288346e-05, "loss": 0.3393, "num_tokens": 2341346367.0, "step": 3697 }, { "epoch": 0.4372708998462812, "grad_norm": 0.1364244967699051, "learning_rate": 3.9737259736737925e-05, "loss": 0.3257, "num_tokens": 2341980889.0, "step": 3698 }, { "epoch": 0.43738914508691024, "grad_norm": 0.1398073434829712, "learning_rate": 3.972724585240888e-05, "loss": 0.3393, "num_tokens": 2342613183.0, "step": 3699 }, { "epoch": 0.4375073903275393, "grad_norm": 0.14322346448898315, "learning_rate": 3.971723098136508e-05, "loss": 0.3407, "num_tokens": 2343252717.0, "step": 3700 }, { "epoch": 0.43762563556816836, "grad_norm": 0.15253926813602448, "learning_rate": 3.970721512507547e-05, "loss": 0.3803, "num_tokens": 2343884108.0, "step": 3701 }, { "epoch": 0.43774388080879745, "grad_norm": 0.13107231259346008, "learning_rate": 3.969719828500913e-05, "loss": 0.3279, "num_tokens": 2344521246.0, "step": 3702 }, { "epoch": 0.43786212604942654, "grad_norm": 0.14524638652801514, "learning_rate": 3.968718046263524e-05, "loss": 0.3325, "num_tokens": 2345149198.0, "step": 3703 }, { "epoch": 0.43798037129005557, "grad_norm": 0.14423659443855286, "learning_rate": 3.967716165942317e-05, "loss": 0.3528, "num_tokens": 2345772591.0, "step": 3704 }, { "epoch": 0.43809861653068466, "grad_norm": 0.1340705007314682, "learning_rate": 3.966714187684244e-05, "loss": 0.3182, "num_tokens": 2346407832.0, "step": 3705 }, { "epoch": 0.4382168617713137, "grad_norm": 0.14724567532539368, "learning_rate": 3.9657121116362684e-05, "loss": 0.3361, "num_tokens": 2347034897.0, "step": 3706 }, { "epoch": 0.4383351070119428, "grad_norm": 0.14199334383010864, "learning_rate": 3.964709937945368e-05, "loss": 0.3498, "num_tokens": 2347672758.0, "step": 3707 }, { "epoch": 0.4384533522525718, "grad_norm": 0.1375984251499176, "learning_rate": 3.963707666758538e-05, "loss": 0.3395, "num_tokens": 2348310212.0, "step": 3708 }, { "epoch": 0.4385715974932009, "grad_norm": 0.13796629011631012, "learning_rate": 3.962705298222784e-05, "loss": 0.3821, "num_tokens": 2348946396.0, "step": 3709 }, { "epoch": 0.43868984273382994, "grad_norm": 0.12877051532268524, "learning_rate": 3.96170283248513e-05, "loss": 0.3426, "num_tokens": 2349584737.0, "step": 3710 }, { "epoch": 0.438808087974459, "grad_norm": 0.13451899588108063, "learning_rate": 3.96070026969261e-05, "loss": 0.3578, "num_tokens": 2350216166.0, "step": 3711 }, { "epoch": 0.4389263332150881, "grad_norm": 0.13340266048908234, "learning_rate": 3.959697609992275e-05, "loss": 0.3694, "num_tokens": 2350853161.0, "step": 3712 }, { "epoch": 0.43904457845571715, "grad_norm": 0.1505095213651657, "learning_rate": 3.95869485353119e-05, "loss": 0.3759, "num_tokens": 2351489807.0, "step": 3713 }, { "epoch": 0.43916282369634624, "grad_norm": 0.1442621648311615, "learning_rate": 3.9576920004564334e-05, "loss": 0.3428, "num_tokens": 2352129202.0, "step": 3714 }, { "epoch": 0.43928106893697527, "grad_norm": 0.1380746215581894, "learning_rate": 3.956689050915097e-05, "loss": 0.3704, "num_tokens": 2352763771.0, "step": 3715 }, { "epoch": 0.43939931417760436, "grad_norm": 0.14075051248073578, "learning_rate": 3.95568600505429e-05, "loss": 0.352, "num_tokens": 2353394001.0, "step": 3716 }, { "epoch": 0.4395175594182334, "grad_norm": 0.14557555317878723, "learning_rate": 3.954682863021132e-05, "loss": 0.3404, "num_tokens": 2354026499.0, "step": 3717 }, { "epoch": 0.4396358046588625, "grad_norm": 0.14910592138767242, "learning_rate": 3.953679624962759e-05, "loss": 0.3705, "num_tokens": 2354658430.0, "step": 3718 }, { "epoch": 0.4397540498994916, "grad_norm": 0.1509704887866974, "learning_rate": 3.952676291026319e-05, "loss": 0.4087, "num_tokens": 2355289904.0, "step": 3719 }, { "epoch": 0.4398722951401206, "grad_norm": 0.12589006125926971, "learning_rate": 3.951672861358976e-05, "loss": 0.3046, "num_tokens": 2355906300.0, "step": 3720 }, { "epoch": 0.4399905403807497, "grad_norm": 0.13697707653045654, "learning_rate": 3.950669336107909e-05, "loss": 0.3425, "num_tokens": 2356538436.0, "step": 3721 }, { "epoch": 0.44010878562137873, "grad_norm": 0.1480291783809662, "learning_rate": 3.949665715420308e-05, "loss": 0.3734, "num_tokens": 2357170442.0, "step": 3722 }, { "epoch": 0.4402270308620078, "grad_norm": 0.14728876948356628, "learning_rate": 3.948661999443379e-05, "loss": 0.3829, "num_tokens": 2357803544.0, "step": 3723 }, { "epoch": 0.44034527610263685, "grad_norm": 0.12507696449756622, "learning_rate": 3.9476581883243405e-05, "loss": 0.3277, "num_tokens": 2358441228.0, "step": 3724 }, { "epoch": 0.44046352134326594, "grad_norm": 0.15164604783058167, "learning_rate": 3.946654282210427e-05, "loss": 0.3974, "num_tokens": 2359075199.0, "step": 3725 }, { "epoch": 0.440581766583895, "grad_norm": 0.12342245876789093, "learning_rate": 3.945650281248885e-05, "loss": 0.3005, "num_tokens": 2359712001.0, "step": 3726 }, { "epoch": 0.44070001182452406, "grad_norm": 0.13745999336242676, "learning_rate": 3.944646185586977e-05, "loss": 0.3563, "num_tokens": 2360349048.0, "step": 3727 }, { "epoch": 0.44081825706515315, "grad_norm": 0.14663036167621613, "learning_rate": 3.943641995371976e-05, "loss": 0.3784, "num_tokens": 2360982230.0, "step": 3728 }, { "epoch": 0.4409365023057822, "grad_norm": 0.14280691742897034, "learning_rate": 3.942637710751173e-05, "loss": 0.323, "num_tokens": 2361611367.0, "step": 3729 }, { "epoch": 0.4410547475464113, "grad_norm": 0.14438486099243164, "learning_rate": 3.9416333318718706e-05, "loss": 0.3656, "num_tokens": 2362250832.0, "step": 3730 }, { "epoch": 0.4411729927870403, "grad_norm": 0.13002169132232666, "learning_rate": 3.9406288588813845e-05, "loss": 0.3107, "num_tokens": 2362879630.0, "step": 3731 }, { "epoch": 0.4412912380276694, "grad_norm": 0.13181143999099731, "learning_rate": 3.9396242919270454e-05, "loss": 0.314, "num_tokens": 2363516522.0, "step": 3732 }, { "epoch": 0.44140948326829843, "grad_norm": 0.14150695502758026, "learning_rate": 3.9386196311561976e-05, "loss": 0.3791, "num_tokens": 2364149911.0, "step": 3733 }, { "epoch": 0.4415277285089275, "grad_norm": 0.12908640503883362, "learning_rate": 3.937614876716199e-05, "loss": 0.3382, "num_tokens": 2364752645.0, "step": 3734 }, { "epoch": 0.44164597374955655, "grad_norm": 0.15224741399288177, "learning_rate": 3.9366100287544215e-05, "loss": 0.369, "num_tokens": 2365380404.0, "step": 3735 }, { "epoch": 0.44176421899018564, "grad_norm": 0.1275174915790558, "learning_rate": 3.935605087418252e-05, "loss": 0.3409, "num_tokens": 2366015226.0, "step": 3736 }, { "epoch": 0.44188246423081473, "grad_norm": 0.1429666429758072, "learning_rate": 3.9346000528550876e-05, "loss": 0.3309, "num_tokens": 2366648339.0, "step": 3737 }, { "epoch": 0.44200070947144376, "grad_norm": 0.1406528204679489, "learning_rate": 3.933594925212343e-05, "loss": 0.3465, "num_tokens": 2367287689.0, "step": 3738 }, { "epoch": 0.44211895471207285, "grad_norm": 0.13658319413661957, "learning_rate": 3.932589704637442e-05, "loss": 0.3846, "num_tokens": 2367921236.0, "step": 3739 }, { "epoch": 0.4422371999527019, "grad_norm": 0.1432870477437973, "learning_rate": 3.931584391277827e-05, "loss": 0.3552, "num_tokens": 2368560343.0, "step": 3740 }, { "epoch": 0.442355445193331, "grad_norm": 0.12824226915836334, "learning_rate": 3.9305789852809504e-05, "loss": 0.3221, "num_tokens": 2369192849.0, "step": 3741 }, { "epoch": 0.44247369043396, "grad_norm": 0.1461295187473297, "learning_rate": 3.929573486794281e-05, "loss": 0.3328, "num_tokens": 2369818255.0, "step": 3742 }, { "epoch": 0.4425919356745891, "grad_norm": 0.14038154482841492, "learning_rate": 3.928567895965299e-05, "loss": 0.3643, "num_tokens": 2370450052.0, "step": 3743 }, { "epoch": 0.4427101809152182, "grad_norm": 0.1331811547279358, "learning_rate": 3.927562212941499e-05, "loss": 0.3156, "num_tokens": 2371079628.0, "step": 3744 }, { "epoch": 0.4428284261558472, "grad_norm": 0.1411089450120926, "learning_rate": 3.926556437870388e-05, "loss": 0.3514, "num_tokens": 2371713864.0, "step": 3745 }, { "epoch": 0.4429466713964763, "grad_norm": 0.1484958529472351, "learning_rate": 3.925550570899488e-05, "loss": 0.3669, "num_tokens": 2372315746.0, "step": 3746 }, { "epoch": 0.44306491663710534, "grad_norm": 0.138286754488945, "learning_rate": 3.924544612176334e-05, "loss": 0.3242, "num_tokens": 2372955189.0, "step": 3747 }, { "epoch": 0.44318316187773443, "grad_norm": 0.13551412522792816, "learning_rate": 3.923538561848475e-05, "loss": 0.317, "num_tokens": 2373593388.0, "step": 3748 }, { "epoch": 0.44330140711836347, "grad_norm": 0.15042339265346527, "learning_rate": 3.922532420063471e-05, "loss": 0.3436, "num_tokens": 2374229559.0, "step": 3749 }, { "epoch": 0.44341965235899256, "grad_norm": 0.14970549941062927, "learning_rate": 3.9215261869689e-05, "loss": 0.3555, "num_tokens": 2374863874.0, "step": 3750 }, { "epoch": 0.4435378975996216, "grad_norm": 0.1378648430109024, "learning_rate": 3.920519862712349e-05, "loss": 0.3451, "num_tokens": 2375503003.0, "step": 3751 }, { "epoch": 0.4436561428402507, "grad_norm": 0.14662185311317444, "learning_rate": 3.9195134474414195e-05, "loss": 0.362, "num_tokens": 2376140745.0, "step": 3752 }, { "epoch": 0.44377438808087977, "grad_norm": 0.13942405581474304, "learning_rate": 3.918506941303728e-05, "loss": 0.3225, "num_tokens": 2376758437.0, "step": 3753 }, { "epoch": 0.4438926333215088, "grad_norm": 0.14500845968723297, "learning_rate": 3.9175003444469025e-05, "loss": 0.35, "num_tokens": 2377395405.0, "step": 3754 }, { "epoch": 0.4440108785621379, "grad_norm": 0.14367496967315674, "learning_rate": 3.916493657018586e-05, "loss": 0.3501, "num_tokens": 2378013045.0, "step": 3755 }, { "epoch": 0.4441291238027669, "grad_norm": 0.13987870514392853, "learning_rate": 3.915486879166431e-05, "loss": 0.3473, "num_tokens": 2378650604.0, "step": 3756 }, { "epoch": 0.444247369043396, "grad_norm": 0.13734471797943115, "learning_rate": 3.91448001103811e-05, "loss": 0.353, "num_tokens": 2379288196.0, "step": 3757 }, { "epoch": 0.44436561428402505, "grad_norm": 0.11899980157613754, "learning_rate": 3.9134730527813015e-05, "loss": 0.3257, "num_tokens": 2379922414.0, "step": 3758 }, { "epoch": 0.44448385952465413, "grad_norm": 0.1532949060201645, "learning_rate": 3.912466004543702e-05, "loss": 0.4127, "num_tokens": 2380555969.0, "step": 3759 }, { "epoch": 0.4446021047652832, "grad_norm": 0.13468709588050842, "learning_rate": 3.91145886647302e-05, "loss": 0.3497, "num_tokens": 2381187017.0, "step": 3760 }, { "epoch": 0.44472035000591226, "grad_norm": 0.12531614303588867, "learning_rate": 3.910451638716976e-05, "loss": 0.3432, "num_tokens": 2381818694.0, "step": 3761 }, { "epoch": 0.44483859524654135, "grad_norm": 0.12386646121740341, "learning_rate": 3.9094443214233044e-05, "loss": 0.3218, "num_tokens": 2382449111.0, "step": 3762 }, { "epoch": 0.4449568404871704, "grad_norm": 0.1307145208120346, "learning_rate": 3.9084369147397546e-05, "loss": 0.3395, "num_tokens": 2383082206.0, "step": 3763 }, { "epoch": 0.44507508572779947, "grad_norm": 0.13185256719589233, "learning_rate": 3.907429418814084e-05, "loss": 0.2828, "num_tokens": 2383714233.0, "step": 3764 }, { "epoch": 0.4451933309684285, "grad_norm": 0.14038564264774323, "learning_rate": 3.906421833794071e-05, "loss": 0.3414, "num_tokens": 2384309826.0, "step": 3765 }, { "epoch": 0.4453115762090576, "grad_norm": 0.14294637739658356, "learning_rate": 3.9054141598274985e-05, "loss": 0.3355, "num_tokens": 2384944186.0, "step": 3766 }, { "epoch": 0.4454298214496866, "grad_norm": 0.13880331814289093, "learning_rate": 3.9044063970621675e-05, "loss": 0.3351, "num_tokens": 2385579327.0, "step": 3767 }, { "epoch": 0.4455480666903157, "grad_norm": 0.13934610784053802, "learning_rate": 3.9033985456458925e-05, "loss": 0.3434, "num_tokens": 2386218066.0, "step": 3768 }, { "epoch": 0.4456663119309448, "grad_norm": 0.14990878105163574, "learning_rate": 3.9023906057264984e-05, "loss": 0.4016, "num_tokens": 2386853845.0, "step": 3769 }, { "epoch": 0.44578455717157384, "grad_norm": 0.13110403716564178, "learning_rate": 3.9013825774518244e-05, "loss": 0.3669, "num_tokens": 2387490242.0, "step": 3770 }, { "epoch": 0.4459028024122029, "grad_norm": 0.13142332434654236, "learning_rate": 3.9003744609697223e-05, "loss": 0.3383, "num_tokens": 2388127184.0, "step": 3771 }, { "epoch": 0.44602104765283196, "grad_norm": 0.13439467549324036, "learning_rate": 3.899366256428057e-05, "loss": 0.3236, "num_tokens": 2388763084.0, "step": 3772 }, { "epoch": 0.44613929289346105, "grad_norm": 0.13600732386112213, "learning_rate": 3.8983579639747065e-05, "loss": 0.3635, "num_tokens": 2389401501.0, "step": 3773 }, { "epoch": 0.4462575381340901, "grad_norm": 0.1340043693780899, "learning_rate": 3.897349583757561e-05, "loss": 0.3434, "num_tokens": 2390039162.0, "step": 3774 }, { "epoch": 0.44637578337471917, "grad_norm": 0.12954182922840118, "learning_rate": 3.896341115924525e-05, "loss": 0.3727, "num_tokens": 2390677609.0, "step": 3775 }, { "epoch": 0.44649402861534826, "grad_norm": 0.13317452371120453, "learning_rate": 3.8953325606235135e-05, "loss": 0.3623, "num_tokens": 2391311638.0, "step": 3776 }, { "epoch": 0.4466122738559773, "grad_norm": 0.137006938457489, "learning_rate": 3.894323918002457e-05, "loss": 0.3246, "num_tokens": 2391951322.0, "step": 3777 }, { "epoch": 0.4467305190966064, "grad_norm": 0.15118049085140228, "learning_rate": 3.893315188209297e-05, "loss": 0.3729, "num_tokens": 2392586485.0, "step": 3778 }, { "epoch": 0.4468487643372354, "grad_norm": 0.13379117846488953, "learning_rate": 3.892306371391988e-05, "loss": 0.3331, "num_tokens": 2393220671.0, "step": 3779 }, { "epoch": 0.4469670095778645, "grad_norm": 0.13679690659046173, "learning_rate": 3.891297467698499e-05, "loss": 0.3574, "num_tokens": 2393858091.0, "step": 3780 }, { "epoch": 0.44708525481849354, "grad_norm": 0.12221894413232803, "learning_rate": 3.890288477276809e-05, "loss": 0.2921, "num_tokens": 2394495743.0, "step": 3781 }, { "epoch": 0.4472035000591226, "grad_norm": 0.13254757225513458, "learning_rate": 3.889279400274911e-05, "loss": 0.344, "num_tokens": 2395128090.0, "step": 3782 }, { "epoch": 0.44732174529975166, "grad_norm": 0.14514589309692383, "learning_rate": 3.888270236840811e-05, "loss": 0.3605, "num_tokens": 2395759031.0, "step": 3783 }, { "epoch": 0.44743999054038075, "grad_norm": 0.14841583371162415, "learning_rate": 3.887260987122529e-05, "loss": 0.3864, "num_tokens": 2396396895.0, "step": 3784 }, { "epoch": 0.44755823578100984, "grad_norm": 0.128132626414299, "learning_rate": 3.8862516512680935e-05, "loss": 0.3441, "num_tokens": 2397032323.0, "step": 3785 }, { "epoch": 0.4476764810216389, "grad_norm": 0.13456888496875763, "learning_rate": 3.885242229425549e-05, "loss": 0.3271, "num_tokens": 2397671737.0, "step": 3786 }, { "epoch": 0.44779472626226796, "grad_norm": 0.143964022397995, "learning_rate": 3.884232721742954e-05, "loss": 0.3701, "num_tokens": 2398302094.0, "step": 3787 }, { "epoch": 0.447912971502897, "grad_norm": 0.14143337309360504, "learning_rate": 3.8832231283683744e-05, "loss": 0.3541, "num_tokens": 2398929837.0, "step": 3788 }, { "epoch": 0.4480312167435261, "grad_norm": 0.1339057832956314, "learning_rate": 3.882213449449895e-05, "loss": 0.3433, "num_tokens": 2399566611.0, "step": 3789 }, { "epoch": 0.4481494619841551, "grad_norm": 0.1379936784505844, "learning_rate": 3.8812036851356046e-05, "loss": 0.3668, "num_tokens": 2400204015.0, "step": 3790 }, { "epoch": 0.4482677072247842, "grad_norm": 0.13963399827480316, "learning_rate": 3.880193835573615e-05, "loss": 0.3712, "num_tokens": 2400834780.0, "step": 3791 }, { "epoch": 0.44838595246541324, "grad_norm": 0.127555713057518, "learning_rate": 3.8791839009120434e-05, "loss": 0.3322, "num_tokens": 2401468857.0, "step": 3792 }, { "epoch": 0.44850419770604233, "grad_norm": 0.14377307891845703, "learning_rate": 3.878173881299021e-05, "loss": 0.3693, "num_tokens": 2402073375.0, "step": 3793 }, { "epoch": 0.4486224429466714, "grad_norm": 0.14683173596858978, "learning_rate": 3.8771637768826925e-05, "loss": 0.3362, "num_tokens": 2402707846.0, "step": 3794 }, { "epoch": 0.44874068818730045, "grad_norm": 0.13279475271701813, "learning_rate": 3.876153587811214e-05, "loss": 0.3314, "num_tokens": 2403337286.0, "step": 3795 }, { "epoch": 0.44885893342792954, "grad_norm": 0.14349013566970825, "learning_rate": 3.875143314232754e-05, "loss": 0.3679, "num_tokens": 2403962516.0, "step": 3796 }, { "epoch": 0.4489771786685586, "grad_norm": 0.13521108031272888, "learning_rate": 3.8741329562954936e-05, "loss": 0.3504, "num_tokens": 2404596683.0, "step": 3797 }, { "epoch": 0.44909542390918766, "grad_norm": 0.13424549996852875, "learning_rate": 3.8731225141476276e-05, "loss": 0.3498, "num_tokens": 2405233515.0, "step": 3798 }, { "epoch": 0.4492136691498167, "grad_norm": 0.13512404263019562, "learning_rate": 3.8721119879373624e-05, "loss": 0.3463, "num_tokens": 2405860619.0, "step": 3799 }, { "epoch": 0.4493319143904458, "grad_norm": 0.13411173224449158, "learning_rate": 3.871101377812914e-05, "loss": 0.3453, "num_tokens": 2406495947.0, "step": 3800 }, { "epoch": 0.4494501596310749, "grad_norm": 0.12391994893550873, "learning_rate": 3.8700906839225144e-05, "loss": 0.3356, "num_tokens": 2407135288.0, "step": 3801 }, { "epoch": 0.4495684048717039, "grad_norm": 0.1350843459367752, "learning_rate": 3.869079906414406e-05, "loss": 0.3314, "num_tokens": 2407766576.0, "step": 3802 }, { "epoch": 0.449686650112333, "grad_norm": 0.13177691400051117, "learning_rate": 3.868069045436845e-05, "loss": 0.3481, "num_tokens": 2408379753.0, "step": 3803 }, { "epoch": 0.44980489535296203, "grad_norm": 3.1713204383850098, "learning_rate": 3.867058101138098e-05, "loss": 0.4961, "num_tokens": 2408983505.0, "step": 3804 }, { "epoch": 0.4499231405935911, "grad_norm": 0.14535090327262878, "learning_rate": 3.866047073666445e-05, "loss": 0.3446, "num_tokens": 2409610994.0, "step": 3805 }, { "epoch": 0.45004138583422015, "grad_norm": 0.13809144496917725, "learning_rate": 3.865035963170178e-05, "loss": 0.3245, "num_tokens": 2410242783.0, "step": 3806 }, { "epoch": 0.45015963107484924, "grad_norm": 0.13312749564647675, "learning_rate": 3.8640247697976004e-05, "loss": 0.3365, "num_tokens": 2410879014.0, "step": 3807 }, { "epoch": 0.4502778763154783, "grad_norm": 0.34955543279647827, "learning_rate": 3.86301349369703e-05, "loss": 0.3399, "num_tokens": 2411486885.0, "step": 3808 }, { "epoch": 0.45039612155610736, "grad_norm": 0.17560644447803497, "learning_rate": 3.862002135016792e-05, "loss": 0.3152, "num_tokens": 2412117323.0, "step": 3809 }, { "epoch": 0.45051436679673645, "grad_norm": 0.19448815286159515, "learning_rate": 3.86099069390523e-05, "loss": 0.3417, "num_tokens": 2412754754.0, "step": 3810 }, { "epoch": 0.4506326120373655, "grad_norm": 0.19309182465076447, "learning_rate": 3.859979170510694e-05, "loss": 0.3787, "num_tokens": 2413392187.0, "step": 3811 }, { "epoch": 0.4507508572779946, "grad_norm": 0.13861624896526337, "learning_rate": 3.85896756498155e-05, "loss": 0.3238, "num_tokens": 2414019497.0, "step": 3812 }, { "epoch": 0.4508691025186236, "grad_norm": 0.15748220682144165, "learning_rate": 3.857955877466175e-05, "loss": 0.319, "num_tokens": 2414654639.0, "step": 3813 }, { "epoch": 0.4509873477592527, "grad_norm": 0.19903945922851562, "learning_rate": 3.856944108112958e-05, "loss": 0.327, "num_tokens": 2415284332.0, "step": 3814 }, { "epoch": 0.45110559299988173, "grad_norm": 0.15651720762252808, "learning_rate": 3.855932257070297e-05, "loss": 0.328, "num_tokens": 2415911193.0, "step": 3815 }, { "epoch": 0.4512238382405108, "grad_norm": 0.13788564503192902, "learning_rate": 3.8549203244866066e-05, "loss": 0.3289, "num_tokens": 2416536717.0, "step": 3816 }, { "epoch": 0.4513420834811399, "grad_norm": 0.1800713837146759, "learning_rate": 3.853908310510312e-05, "loss": 0.3817, "num_tokens": 2417168578.0, "step": 3817 }, { "epoch": 0.45146032872176894, "grad_norm": 0.17437736690044403, "learning_rate": 3.8528962152898486e-05, "loss": 0.3698, "num_tokens": 2417800178.0, "step": 3818 }, { "epoch": 0.45157857396239803, "grad_norm": 0.1600605994462967, "learning_rate": 3.851884038973665e-05, "loss": 0.337, "num_tokens": 2418427225.0, "step": 3819 }, { "epoch": 0.45169681920302707, "grad_norm": 0.15050017833709717, "learning_rate": 3.850871781710222e-05, "loss": 0.3379, "num_tokens": 2419065948.0, "step": 3820 }, { "epoch": 0.45181506444365616, "grad_norm": 0.1534409075975418, "learning_rate": 3.8498594436479914e-05, "loss": 0.3815, "num_tokens": 2419703840.0, "step": 3821 }, { "epoch": 0.4519333096842852, "grad_norm": 0.15687526762485504, "learning_rate": 3.8488470249354574e-05, "loss": 0.3497, "num_tokens": 2420340358.0, "step": 3822 }, { "epoch": 0.4520515549249143, "grad_norm": 0.16108126938343048, "learning_rate": 3.847834525721116e-05, "loss": 0.3543, "num_tokens": 2420959869.0, "step": 3823 }, { "epoch": 0.4521698001655433, "grad_norm": 0.13574698567390442, "learning_rate": 3.846821946153475e-05, "loss": 0.3714, "num_tokens": 2421596121.0, "step": 3824 }, { "epoch": 0.4522880454061724, "grad_norm": 0.1346721351146698, "learning_rate": 3.8458092863810544e-05, "loss": 0.3288, "num_tokens": 2422230754.0, "step": 3825 }, { "epoch": 0.4524062906468015, "grad_norm": 0.13744673132896423, "learning_rate": 3.844796546552383e-05, "loss": 0.3669, "num_tokens": 2422862317.0, "step": 3826 }, { "epoch": 0.4525245358874305, "grad_norm": 0.1350315362215042, "learning_rate": 3.8437837268160075e-05, "loss": 0.342, "num_tokens": 2423499148.0, "step": 3827 }, { "epoch": 0.4526427811280596, "grad_norm": 0.14041239023208618, "learning_rate": 3.84277082732048e-05, "loss": 0.343, "num_tokens": 2424128461.0, "step": 3828 }, { "epoch": 0.45276102636868865, "grad_norm": 0.1326526701450348, "learning_rate": 3.841757848214367e-05, "loss": 0.3534, "num_tokens": 2424763055.0, "step": 3829 }, { "epoch": 0.45287927160931774, "grad_norm": 0.11860594898462296, "learning_rate": 3.840744789646248e-05, "loss": 0.3147, "num_tokens": 2425391705.0, "step": 3830 }, { "epoch": 0.45299751684994677, "grad_norm": 0.13567069172859192, "learning_rate": 3.839731651764712e-05, "loss": 0.3561, "num_tokens": 2426024401.0, "step": 3831 }, { "epoch": 0.45311576209057586, "grad_norm": 0.1393301784992218, "learning_rate": 3.8387184347183606e-05, "loss": 0.3566, "num_tokens": 2426663668.0, "step": 3832 }, { "epoch": 0.45323400733120495, "grad_norm": 0.13577182590961456, "learning_rate": 3.837705138655807e-05, "loss": 0.3365, "num_tokens": 2427302396.0, "step": 3833 }, { "epoch": 0.453352252571834, "grad_norm": 0.1289210468530655, "learning_rate": 3.836691763725674e-05, "loss": 0.3289, "num_tokens": 2427935840.0, "step": 3834 }, { "epoch": 0.45347049781246307, "grad_norm": 0.12742289900779724, "learning_rate": 3.8356783100766e-05, "loss": 0.3223, "num_tokens": 2428541918.0, "step": 3835 }, { "epoch": 0.4535887430530921, "grad_norm": 0.1354181468486786, "learning_rate": 3.834664777857232e-05, "loss": 0.3712, "num_tokens": 2429171501.0, "step": 3836 }, { "epoch": 0.4537069882937212, "grad_norm": 0.13311196863651276, "learning_rate": 3.8336511672162295e-05, "loss": 0.35, "num_tokens": 2429808592.0, "step": 3837 }, { "epoch": 0.4538252335343502, "grad_norm": 0.13524937629699707, "learning_rate": 3.832637478302262e-05, "loss": 0.35, "num_tokens": 2430442616.0, "step": 3838 }, { "epoch": 0.4539434787749793, "grad_norm": 0.14774532616138458, "learning_rate": 3.831623711264013e-05, "loss": 0.3563, "num_tokens": 2431077308.0, "step": 3839 }, { "epoch": 0.45406172401560835, "grad_norm": 0.14101000130176544, "learning_rate": 3.8306098662501755e-05, "loss": 0.369, "num_tokens": 2431709719.0, "step": 3840 }, { "epoch": 0.45417996925623744, "grad_norm": 0.14233753085136414, "learning_rate": 3.829595943409455e-05, "loss": 0.3876, "num_tokens": 2432341887.0, "step": 3841 }, { "epoch": 0.4542982144968665, "grad_norm": 0.12345699965953827, "learning_rate": 3.828581942890568e-05, "loss": 0.361, "num_tokens": 2432980829.0, "step": 3842 }, { "epoch": 0.45441645973749556, "grad_norm": 0.14217312633991241, "learning_rate": 3.827567864842242e-05, "loss": 0.2985, "num_tokens": 2433602548.0, "step": 3843 }, { "epoch": 0.45453470497812465, "grad_norm": 0.15398955345153809, "learning_rate": 3.826553709413217e-05, "loss": 0.3833, "num_tokens": 2434241562.0, "step": 3844 }, { "epoch": 0.4546529502187537, "grad_norm": 0.14918404817581177, "learning_rate": 3.825539476752243e-05, "loss": 0.355, "num_tokens": 2434874290.0, "step": 3845 }, { "epoch": 0.45477119545938277, "grad_norm": 0.12870965898036957, "learning_rate": 3.8245251670080816e-05, "loss": 0.324, "num_tokens": 2435507536.0, "step": 3846 }, { "epoch": 0.4548894407000118, "grad_norm": 0.13504089415073395, "learning_rate": 3.823510780329507e-05, "loss": 0.3173, "num_tokens": 2436142610.0, "step": 3847 }, { "epoch": 0.4550076859406409, "grad_norm": 0.15330632030963898, "learning_rate": 3.8224963168653035e-05, "loss": 0.3804, "num_tokens": 2436774773.0, "step": 3848 }, { "epoch": 0.4551259311812699, "grad_norm": 0.13112595677375793, "learning_rate": 3.821481776764266e-05, "loss": 0.3344, "num_tokens": 2437414354.0, "step": 3849 }, { "epoch": 0.455244176421899, "grad_norm": 0.1262529343366623, "learning_rate": 3.820467160175204e-05, "loss": 0.3543, "num_tokens": 2438046582.0, "step": 3850 }, { "epoch": 0.4553624216625281, "grad_norm": 0.12683576345443726, "learning_rate": 3.819452467246934e-05, "loss": 0.3329, "num_tokens": 2438664845.0, "step": 3851 }, { "epoch": 0.45548066690315714, "grad_norm": 0.3226027488708496, "learning_rate": 3.8184376981282846e-05, "loss": 0.3609, "num_tokens": 2439276808.0, "step": 3852 }, { "epoch": 0.4555989121437862, "grad_norm": 0.17608359456062317, "learning_rate": 3.817422852968098e-05, "loss": 0.3239, "num_tokens": 2439909936.0, "step": 3853 }, { "epoch": 0.45571715738441526, "grad_norm": 0.20098738372325897, "learning_rate": 3.816407931915225e-05, "loss": 0.3423, "num_tokens": 2440548984.0, "step": 3854 }, { "epoch": 0.45583540262504435, "grad_norm": 0.1953142285346985, "learning_rate": 3.815392935118529e-05, "loss": 0.3701, "num_tokens": 2441180414.0, "step": 3855 }, { "epoch": 0.4559536478656734, "grad_norm": 0.16032487154006958, "learning_rate": 3.814377862726884e-05, "loss": 0.3568, "num_tokens": 2441818014.0, "step": 3856 }, { "epoch": 0.4560718931063025, "grad_norm": 0.14344067871570587, "learning_rate": 3.8133627148891754e-05, "loss": 0.3203, "num_tokens": 2442449749.0, "step": 3857 }, { "epoch": 0.45619013834693156, "grad_norm": 0.16603632271289825, "learning_rate": 3.8123474917542996e-05, "loss": 0.3622, "num_tokens": 2443083571.0, "step": 3858 }, { "epoch": 0.4563083835875606, "grad_norm": 0.16413836181163788, "learning_rate": 3.8113321934711636e-05, "loss": 0.338, "num_tokens": 2443718467.0, "step": 3859 }, { "epoch": 0.4564266288281897, "grad_norm": 0.14462654292583466, "learning_rate": 3.810316820188685e-05, "loss": 0.3101, "num_tokens": 2444350647.0, "step": 3860 }, { "epoch": 0.4565448740688187, "grad_norm": 0.15708740055561066, "learning_rate": 3.809301372055793e-05, "loss": 0.3618, "num_tokens": 2444986139.0, "step": 3861 }, { "epoch": 0.4566631193094478, "grad_norm": 0.17443260550498962, "learning_rate": 3.808285849221429e-05, "loss": 0.3596, "num_tokens": 2445622283.0, "step": 3862 }, { "epoch": 0.45678136455007684, "grad_norm": 0.16800348460674286, "learning_rate": 3.807270251834544e-05, "loss": 0.3983, "num_tokens": 2446261307.0, "step": 3863 }, { "epoch": 0.45689960979070593, "grad_norm": 0.15120601654052734, "learning_rate": 3.8062545800441e-05, "loss": 0.3516, "num_tokens": 2446890181.0, "step": 3864 }, { "epoch": 0.45701785503133496, "grad_norm": 0.16092827916145325, "learning_rate": 3.8052388339990686e-05, "loss": 0.3654, "num_tokens": 2447527190.0, "step": 3865 }, { "epoch": 0.45713610027196405, "grad_norm": 0.1545756757259369, "learning_rate": 3.804223013848436e-05, "loss": 0.3716, "num_tokens": 2448159871.0, "step": 3866 }, { "epoch": 0.45725434551259314, "grad_norm": 0.13537544012069702, "learning_rate": 3.803207119741195e-05, "loss": 0.3066, "num_tokens": 2448788420.0, "step": 3867 }, { "epoch": 0.4573725907532222, "grad_norm": 0.14240007102489471, "learning_rate": 3.802191151826353e-05, "loss": 0.3646, "num_tokens": 2449420333.0, "step": 3868 }, { "epoch": 0.45749083599385126, "grad_norm": 0.1382836401462555, "learning_rate": 3.801175110252925e-05, "loss": 0.3368, "num_tokens": 2450059212.0, "step": 3869 }, { "epoch": 0.4576090812344803, "grad_norm": 0.1388619840145111, "learning_rate": 3.8001589951699394e-05, "loss": 0.3336, "num_tokens": 2450695071.0, "step": 3870 }, { "epoch": 0.4577273264751094, "grad_norm": 0.13486970961093903, "learning_rate": 3.799142806726434e-05, "loss": 0.34, "num_tokens": 2451327869.0, "step": 3871 }, { "epoch": 0.4578455717157384, "grad_norm": 0.13937507569789886, "learning_rate": 3.798126545071457e-05, "loss": 0.3452, "num_tokens": 2451956887.0, "step": 3872 }, { "epoch": 0.4579638169563675, "grad_norm": 0.15091826021671295, "learning_rate": 3.797110210354069e-05, "loss": 0.3583, "num_tokens": 2452593921.0, "step": 3873 }, { "epoch": 0.4580820621969966, "grad_norm": 0.12756749987602234, "learning_rate": 3.79609380272334e-05, "loss": 0.3399, "num_tokens": 2453230893.0, "step": 3874 }, { "epoch": 0.45820030743762563, "grad_norm": 0.12464489787817001, "learning_rate": 3.79507732232835e-05, "loss": 0.3303, "num_tokens": 2453866085.0, "step": 3875 }, { "epoch": 0.4583185526782547, "grad_norm": 0.13437677919864655, "learning_rate": 3.7940607693181914e-05, "loss": 0.3712, "num_tokens": 2454502637.0, "step": 3876 }, { "epoch": 0.45843679791888375, "grad_norm": 0.13985957205295563, "learning_rate": 3.793044143841966e-05, "loss": 0.3194, "num_tokens": 2455140031.0, "step": 3877 }, { "epoch": 0.45855504315951284, "grad_norm": 0.14011012017726898, "learning_rate": 3.7920274460487875e-05, "loss": 0.3235, "num_tokens": 2455779530.0, "step": 3878 }, { "epoch": 0.4586732884001419, "grad_norm": 0.13614895939826965, "learning_rate": 3.79101067608778e-05, "loss": 0.3447, "num_tokens": 2456409693.0, "step": 3879 }, { "epoch": 0.45879153364077097, "grad_norm": 0.14478668570518494, "learning_rate": 3.789993834108076e-05, "loss": 0.3724, "num_tokens": 2457042036.0, "step": 3880 }, { "epoch": 0.4589097788814, "grad_norm": 0.130126953125, "learning_rate": 3.788976920258821e-05, "loss": 0.3404, "num_tokens": 2457674589.0, "step": 3881 }, { "epoch": 0.4590280241220291, "grad_norm": 0.12941338121891022, "learning_rate": 3.7879599346891704e-05, "loss": 0.309, "num_tokens": 2458311365.0, "step": 3882 }, { "epoch": 0.4591462693626582, "grad_norm": 0.1443038433790207, "learning_rate": 3.786942877548289e-05, "loss": 0.3483, "num_tokens": 2458946392.0, "step": 3883 }, { "epoch": 0.4592645146032872, "grad_norm": 0.154031440615654, "learning_rate": 3.785925748985354e-05, "loss": 0.3557, "num_tokens": 2459584726.0, "step": 3884 }, { "epoch": 0.4593827598439163, "grad_norm": 0.13177894055843353, "learning_rate": 3.7849085491495525e-05, "loss": 0.3243, "num_tokens": 2460223551.0, "step": 3885 }, { "epoch": 0.45950100508454533, "grad_norm": 0.13324731588363647, "learning_rate": 3.783891278190081e-05, "loss": 0.3386, "num_tokens": 2460859557.0, "step": 3886 }, { "epoch": 0.4596192503251744, "grad_norm": 0.127590611577034, "learning_rate": 3.782873936256147e-05, "loss": 0.3202, "num_tokens": 2461492664.0, "step": 3887 }, { "epoch": 0.45973749556580346, "grad_norm": 0.14338769018650055, "learning_rate": 3.781856523496968e-05, "loss": 0.3329, "num_tokens": 2462130769.0, "step": 3888 }, { "epoch": 0.45985574080643254, "grad_norm": 0.1259353905916214, "learning_rate": 3.780839040061774e-05, "loss": 0.3524, "num_tokens": 2462766314.0, "step": 3889 }, { "epoch": 0.4599739860470616, "grad_norm": 0.1342475563287735, "learning_rate": 3.779821486099802e-05, "loss": 0.3578, "num_tokens": 2463397071.0, "step": 3890 }, { "epoch": 0.46009223128769067, "grad_norm": 0.13999657332897186, "learning_rate": 3.778803861760303e-05, "loss": 0.3392, "num_tokens": 2464011222.0, "step": 3891 }, { "epoch": 0.46021047652831976, "grad_norm": 0.13120925426483154, "learning_rate": 3.777786167192534e-05, "loss": 0.3308, "num_tokens": 2464647601.0, "step": 3892 }, { "epoch": 0.4603287217689488, "grad_norm": 0.12994001805782318, "learning_rate": 3.7767684025457674e-05, "loss": 0.3318, "num_tokens": 2465279577.0, "step": 3893 }, { "epoch": 0.4604469670095779, "grad_norm": 0.12647908926010132, "learning_rate": 3.775750567969281e-05, "loss": 0.3118, "num_tokens": 2465916308.0, "step": 3894 }, { "epoch": 0.4605652122502069, "grad_norm": 0.14714443683624268, "learning_rate": 3.774732663612366e-05, "loss": 0.3885, "num_tokens": 2466555751.0, "step": 3895 }, { "epoch": 0.460683457490836, "grad_norm": 0.12896093726158142, "learning_rate": 3.773714689624325e-05, "loss": 0.3313, "num_tokens": 2467192589.0, "step": 3896 }, { "epoch": 0.46080170273146503, "grad_norm": 0.14541277289390564, "learning_rate": 3.772696646154464e-05, "loss": 0.3742, "num_tokens": 2467830518.0, "step": 3897 }, { "epoch": 0.4609199479720941, "grad_norm": 0.1371169537305832, "learning_rate": 3.7716785333521076e-05, "loss": 0.3338, "num_tokens": 2468464380.0, "step": 3898 }, { "epoch": 0.4610381932127232, "grad_norm": 0.12842649221420288, "learning_rate": 3.770660351366586e-05, "loss": 0.3297, "num_tokens": 2469102567.0, "step": 3899 }, { "epoch": 0.46115643845335225, "grad_norm": 0.13351000845432281, "learning_rate": 3.769642100347241e-05, "loss": 0.3269, "num_tokens": 2469735811.0, "step": 3900 }, { "epoch": 0.46127468369398134, "grad_norm": 0.14200951159000397, "learning_rate": 3.768623780443422e-05, "loss": 0.3583, "num_tokens": 2470370227.0, "step": 3901 }, { "epoch": 0.46139292893461037, "grad_norm": 0.14399529993534088, "learning_rate": 3.767605391804493e-05, "loss": 0.3689, "num_tokens": 2471005928.0, "step": 3902 }, { "epoch": 0.46151117417523946, "grad_norm": 0.14468246698379517, "learning_rate": 3.766586934579823e-05, "loss": 0.3636, "num_tokens": 2471644703.0, "step": 3903 }, { "epoch": 0.4616294194158685, "grad_norm": 0.14660610258579254, "learning_rate": 3.765568408918796e-05, "loss": 0.3761, "num_tokens": 2472281089.0, "step": 3904 }, { "epoch": 0.4617476646564976, "grad_norm": 0.15228256583213806, "learning_rate": 3.764549814970801e-05, "loss": 0.3851, "num_tokens": 2472917516.0, "step": 3905 }, { "epoch": 0.4618659098971266, "grad_norm": 0.14706744253635406, "learning_rate": 3.763531152885242e-05, "loss": 0.3689, "num_tokens": 2473556057.0, "step": 3906 }, { "epoch": 0.4619841551377557, "grad_norm": 0.14073427021503448, "learning_rate": 3.7625124228115296e-05, "loss": 0.333, "num_tokens": 2474192138.0, "step": 3907 }, { "epoch": 0.4621024003783848, "grad_norm": 0.1423603743314743, "learning_rate": 3.7614936248990854e-05, "loss": 0.3301, "num_tokens": 2474827958.0, "step": 3908 }, { "epoch": 0.4622206456190138, "grad_norm": 0.13918080925941467, "learning_rate": 3.760474759297342e-05, "loss": 0.3554, "num_tokens": 2475464829.0, "step": 3909 }, { "epoch": 0.4623388908596429, "grad_norm": 0.14071124792099, "learning_rate": 3.75945582615574e-05, "loss": 0.3545, "num_tokens": 2476099455.0, "step": 3910 }, { "epoch": 0.46245713610027195, "grad_norm": 0.13127675652503967, "learning_rate": 3.7584368256237304e-05, "loss": 0.3292, "num_tokens": 2476722637.0, "step": 3911 }, { "epoch": 0.46257538134090104, "grad_norm": 0.13140974938869476, "learning_rate": 3.757417757850774e-05, "loss": 0.3315, "num_tokens": 2477356408.0, "step": 3912 }, { "epoch": 0.46269362658153007, "grad_norm": 0.13180401921272278, "learning_rate": 3.756398622986345e-05, "loss": 0.2983, "num_tokens": 2477995317.0, "step": 3913 }, { "epoch": 0.46281187182215916, "grad_norm": 0.13292314112186432, "learning_rate": 3.755379421179921e-05, "loss": 0.326, "num_tokens": 2478629616.0, "step": 3914 }, { "epoch": 0.46293011706278825, "grad_norm": 0.1323765069246292, "learning_rate": 3.7543601525809946e-05, "loss": 0.3625, "num_tokens": 2479268195.0, "step": 3915 }, { "epoch": 0.4630483623034173, "grad_norm": 0.1451491266489029, "learning_rate": 3.753340817339066e-05, "loss": 0.3695, "num_tokens": 2479906992.0, "step": 3916 }, { "epoch": 0.46316660754404637, "grad_norm": 0.14067569375038147, "learning_rate": 3.752321415603646e-05, "loss": 0.3752, "num_tokens": 2480542924.0, "step": 3917 }, { "epoch": 0.4632848527846754, "grad_norm": 0.11893464624881744, "learning_rate": 3.751301947524253e-05, "loss": 0.3707, "num_tokens": 2481177450.0, "step": 3918 }, { "epoch": 0.4634030980253045, "grad_norm": 0.1424737572669983, "learning_rate": 3.7502824132504185e-05, "loss": 0.3509, "num_tokens": 2481809610.0, "step": 3919 }, { "epoch": 0.4635213432659335, "grad_norm": 0.13269728422164917, "learning_rate": 3.749262812931682e-05, "loss": 0.3334, "num_tokens": 2482443542.0, "step": 3920 }, { "epoch": 0.4636395885065626, "grad_norm": 0.130513995885849, "learning_rate": 3.748243146717592e-05, "loss": 0.3204, "num_tokens": 2483077406.0, "step": 3921 }, { "epoch": 0.46375783374719165, "grad_norm": 0.14589394629001617, "learning_rate": 3.747223414757709e-05, "loss": 0.3553, "num_tokens": 2483714847.0, "step": 3922 }, { "epoch": 0.46387607898782074, "grad_norm": 0.1450115442276001, "learning_rate": 3.7462036172016e-05, "loss": 0.4009, "num_tokens": 2484349109.0, "step": 3923 }, { "epoch": 0.46399432422844983, "grad_norm": 0.14208939671516418, "learning_rate": 3.745183754198844e-05, "loss": 0.3372, "num_tokens": 2484981374.0, "step": 3924 }, { "epoch": 0.46411256946907886, "grad_norm": 0.15408356487751007, "learning_rate": 3.744163825899028e-05, "loss": 0.3569, "num_tokens": 2485616846.0, "step": 3925 }, { "epoch": 0.46423081470970795, "grad_norm": 0.13549917936325073, "learning_rate": 3.743143832451749e-05, "loss": 0.3651, "num_tokens": 2486231698.0, "step": 3926 }, { "epoch": 0.464349059950337, "grad_norm": 0.1388550102710724, "learning_rate": 3.742123774006616e-05, "loss": 0.3311, "num_tokens": 2486867243.0, "step": 3927 }, { "epoch": 0.4644673051909661, "grad_norm": 0.1412866860628128, "learning_rate": 3.741103650713243e-05, "loss": 0.307, "num_tokens": 2487497669.0, "step": 3928 }, { "epoch": 0.4645855504315951, "grad_norm": 0.1308547407388687, "learning_rate": 3.740083462721258e-05, "loss": 0.3377, "num_tokens": 2488136999.0, "step": 3929 }, { "epoch": 0.4647037956722242, "grad_norm": 0.15298253297805786, "learning_rate": 3.739063210180297e-05, "loss": 0.3644, "num_tokens": 2488771499.0, "step": 3930 }, { "epoch": 0.4648220409128533, "grad_norm": 0.15428221225738525, "learning_rate": 3.738042893240002e-05, "loss": 0.3939, "num_tokens": 2489403775.0, "step": 3931 }, { "epoch": 0.4649402861534823, "grad_norm": 0.1231127679347992, "learning_rate": 3.7370225120500295e-05, "loss": 0.3265, "num_tokens": 2490039366.0, "step": 3932 }, { "epoch": 0.4650585313941114, "grad_norm": 0.12526163458824158, "learning_rate": 3.736002066760042e-05, "loss": 0.3117, "num_tokens": 2490673385.0, "step": 3933 }, { "epoch": 0.46517677663474044, "grad_norm": 0.14405515789985657, "learning_rate": 3.734981557519714e-05, "loss": 0.3727, "num_tokens": 2491305543.0, "step": 3934 }, { "epoch": 0.46529502187536953, "grad_norm": 0.13125772774219513, "learning_rate": 3.7339609844787275e-05, "loss": 0.3568, "num_tokens": 2491943175.0, "step": 3935 }, { "epoch": 0.46541326711599856, "grad_norm": 0.13898950815200806, "learning_rate": 3.732940347786774e-05, "loss": 0.3539, "num_tokens": 2492577407.0, "step": 3936 }, { "epoch": 0.46553151235662765, "grad_norm": 0.13669686019420624, "learning_rate": 3.7319196475935564e-05, "loss": 0.3538, "num_tokens": 2493216012.0, "step": 3937 }, { "epoch": 0.4656497575972567, "grad_norm": 0.1439264565706253, "learning_rate": 3.7308988840487834e-05, "loss": 0.3444, "num_tokens": 2493851704.0, "step": 3938 }, { "epoch": 0.4657680028378858, "grad_norm": 0.13166779279708862, "learning_rate": 3.7298780573021754e-05, "loss": 0.331, "num_tokens": 2494488586.0, "step": 3939 }, { "epoch": 0.46588624807851486, "grad_norm": 0.1400129199028015, "learning_rate": 3.728857167503463e-05, "loss": 0.3401, "num_tokens": 2495119816.0, "step": 3940 }, { "epoch": 0.4660044933191439, "grad_norm": 0.12894032895565033, "learning_rate": 3.7278362148023807e-05, "loss": 0.3179, "num_tokens": 2495754033.0, "step": 3941 }, { "epoch": 0.466122738559773, "grad_norm": 0.14074555039405823, "learning_rate": 3.726815199348681e-05, "loss": 0.3368, "num_tokens": 2496389995.0, "step": 3942 }, { "epoch": 0.466240983800402, "grad_norm": 0.12544149160385132, "learning_rate": 3.725794121292118e-05, "loss": 0.3361, "num_tokens": 2497026459.0, "step": 3943 }, { "epoch": 0.4663592290410311, "grad_norm": 0.13784654438495636, "learning_rate": 3.724772980782458e-05, "loss": 0.3659, "num_tokens": 2497659282.0, "step": 3944 }, { "epoch": 0.46647747428166014, "grad_norm": 0.14545594155788422, "learning_rate": 3.723751777969476e-05, "loss": 0.3502, "num_tokens": 2498292474.0, "step": 3945 }, { "epoch": 0.46659571952228923, "grad_norm": 0.14407359063625336, "learning_rate": 3.7227305130029575e-05, "loss": 0.3732, "num_tokens": 2498928528.0, "step": 3946 }, { "epoch": 0.46671396476291827, "grad_norm": 0.13314013183116913, "learning_rate": 3.7217091860326955e-05, "loss": 0.344, "num_tokens": 2499563728.0, "step": 3947 }, { "epoch": 0.46683221000354735, "grad_norm": 0.16116072237491608, "learning_rate": 3.720687797208491e-05, "loss": 0.3802, "num_tokens": 2500198258.0, "step": 3948 }, { "epoch": 0.46695045524417644, "grad_norm": 0.12206191569566727, "learning_rate": 3.719666346680157e-05, "loss": 0.3096, "num_tokens": 2500834617.0, "step": 3949 }, { "epoch": 0.4670687004848055, "grad_norm": 0.1402384638786316, "learning_rate": 3.718644834597513e-05, "loss": 0.3591, "num_tokens": 2501470140.0, "step": 3950 }, { "epoch": 0.46718694572543457, "grad_norm": 0.14176394045352936, "learning_rate": 3.717623261110392e-05, "loss": 0.3188, "num_tokens": 2502108027.0, "step": 3951 }, { "epoch": 0.4673051909660636, "grad_norm": 0.1415473222732544, "learning_rate": 3.716601626368628e-05, "loss": 0.3519, "num_tokens": 2502737724.0, "step": 3952 }, { "epoch": 0.4674234362066927, "grad_norm": 0.1485365480184555, "learning_rate": 3.7155799305220724e-05, "loss": 0.3718, "num_tokens": 2503372089.0, "step": 3953 }, { "epoch": 0.4675416814473217, "grad_norm": 0.14031894505023956, "learning_rate": 3.71455817372058e-05, "loss": 0.3604, "num_tokens": 2504008985.0, "step": 3954 }, { "epoch": 0.4676599266879508, "grad_norm": 0.13367871940135956, "learning_rate": 3.713536356114015e-05, "loss": 0.3558, "num_tokens": 2504645097.0, "step": 3955 }, { "epoch": 0.4677781719285799, "grad_norm": 0.12945802509784698, "learning_rate": 3.712514477852255e-05, "loss": 0.3023, "num_tokens": 2505280294.0, "step": 3956 }, { "epoch": 0.46789641716920893, "grad_norm": 0.1667196899652481, "learning_rate": 3.711492539085183e-05, "loss": 0.3704, "num_tokens": 2505915505.0, "step": 3957 }, { "epoch": 0.468014662409838, "grad_norm": 0.14336946606636047, "learning_rate": 3.710470539962688e-05, "loss": 0.326, "num_tokens": 2506553542.0, "step": 3958 }, { "epoch": 0.46813290765046706, "grad_norm": 0.1279696822166443, "learning_rate": 3.709448480634675e-05, "loss": 0.3143, "num_tokens": 2507190355.0, "step": 3959 }, { "epoch": 0.46825115289109615, "grad_norm": 0.1330834925174713, "learning_rate": 3.7084263612510516e-05, "loss": 0.3255, "num_tokens": 2507824870.0, "step": 3960 }, { "epoch": 0.4683693981317252, "grad_norm": 0.15358354151248932, "learning_rate": 3.707404181961737e-05, "loss": 0.3862, "num_tokens": 2508458153.0, "step": 3961 }, { "epoch": 0.46848764337235427, "grad_norm": 0.1538732498884201, "learning_rate": 3.706381942916659e-05, "loss": 0.3232, "num_tokens": 2509083824.0, "step": 3962 }, { "epoch": 0.4686058886129833, "grad_norm": 0.1258312463760376, "learning_rate": 3.705359644265753e-05, "loss": 0.3444, "num_tokens": 2509720777.0, "step": 3963 }, { "epoch": 0.4687241338536124, "grad_norm": 0.1512129306793213, "learning_rate": 3.7043372861589664e-05, "loss": 0.3745, "num_tokens": 2510351606.0, "step": 3964 }, { "epoch": 0.4688423790942415, "grad_norm": 0.14860308170318604, "learning_rate": 3.70331486874625e-05, "loss": 0.3829, "num_tokens": 2510990965.0, "step": 3965 }, { "epoch": 0.4689606243348705, "grad_norm": 0.14315414428710938, "learning_rate": 3.7022923921775694e-05, "loss": 0.383, "num_tokens": 2511629777.0, "step": 3966 }, { "epoch": 0.4690788695754996, "grad_norm": 0.12718065083026886, "learning_rate": 3.7012698566028934e-05, "loss": 0.3266, "num_tokens": 2512267834.0, "step": 3967 }, { "epoch": 0.46919711481612864, "grad_norm": 0.1359032839536667, "learning_rate": 3.7002472621722014e-05, "loss": 0.3342, "num_tokens": 2512901810.0, "step": 3968 }, { "epoch": 0.4693153600567577, "grad_norm": 0.13408690690994263, "learning_rate": 3.699224609035482e-05, "loss": 0.3663, "num_tokens": 2513533372.0, "step": 3969 }, { "epoch": 0.46943360529738676, "grad_norm": 0.12954385578632355, "learning_rate": 3.698201897342736e-05, "loss": 0.3319, "num_tokens": 2514153234.0, "step": 3970 }, { "epoch": 0.46955185053801585, "grad_norm": 0.12623503804206848, "learning_rate": 3.6971791272439636e-05, "loss": 0.3176, "num_tokens": 2514782799.0, "step": 3971 }, { "epoch": 0.46967009577864494, "grad_norm": 0.12511765956878662, "learning_rate": 3.6961562988891835e-05, "loss": 0.335, "num_tokens": 2515416817.0, "step": 3972 }, { "epoch": 0.46978834101927397, "grad_norm": 0.1250404268503189, "learning_rate": 3.695133412428415e-05, "loss": 0.321, "num_tokens": 2516053356.0, "step": 3973 }, { "epoch": 0.46990658625990306, "grad_norm": 0.12472764402627945, "learning_rate": 3.6941104680116915e-05, "loss": 0.3274, "num_tokens": 2516688333.0, "step": 3974 }, { "epoch": 0.4700248315005321, "grad_norm": 0.12167119979858398, "learning_rate": 3.693087465789051e-05, "loss": 0.2891, "num_tokens": 2517315112.0, "step": 3975 }, { "epoch": 0.4701430767411612, "grad_norm": 0.13805116713047028, "learning_rate": 3.692064405910543e-05, "loss": 0.3553, "num_tokens": 2517947681.0, "step": 3976 }, { "epoch": 0.4702613219817902, "grad_norm": 0.14652645587921143, "learning_rate": 3.691041288526224e-05, "loss": 0.3295, "num_tokens": 2518579991.0, "step": 3977 }, { "epoch": 0.4703795672224193, "grad_norm": 0.1324182152748108, "learning_rate": 3.690018113786159e-05, "loss": 0.3423, "num_tokens": 2519219313.0, "step": 3978 }, { "epoch": 0.47049781246304834, "grad_norm": 0.1377650499343872, "learning_rate": 3.688994881840423e-05, "loss": 0.3554, "num_tokens": 2519856617.0, "step": 3979 }, { "epoch": 0.4706160577036774, "grad_norm": 0.1367582380771637, "learning_rate": 3.687971592839095e-05, "loss": 0.3323, "num_tokens": 2520493476.0, "step": 3980 }, { "epoch": 0.4707343029443065, "grad_norm": 0.12978622317314148, "learning_rate": 3.6869482469322674e-05, "loss": 0.3168, "num_tokens": 2521124777.0, "step": 3981 }, { "epoch": 0.47085254818493555, "grad_norm": 0.14385199546813965, "learning_rate": 3.6859248442700387e-05, "loss": 0.36, "num_tokens": 2521759871.0, "step": 3982 }, { "epoch": 0.47097079342556464, "grad_norm": 0.13228943943977356, "learning_rate": 3.684901385002515e-05, "loss": 0.3331, "num_tokens": 2522396852.0, "step": 3983 }, { "epoch": 0.47108903866619367, "grad_norm": 0.15396688878536224, "learning_rate": 3.683877869279811e-05, "loss": 0.3864, "num_tokens": 2523023261.0, "step": 3984 }, { "epoch": 0.47120728390682276, "grad_norm": 0.13691699504852295, "learning_rate": 3.6828542972520536e-05, "loss": 0.3584, "num_tokens": 2523655255.0, "step": 3985 }, { "epoch": 0.4713255291474518, "grad_norm": 0.14494481682777405, "learning_rate": 3.681830669069371e-05, "loss": 0.3586, "num_tokens": 2524281931.0, "step": 3986 }, { "epoch": 0.4714437743880809, "grad_norm": 0.14103366434574127, "learning_rate": 3.680806984881904e-05, "loss": 0.3375, "num_tokens": 2524917961.0, "step": 3987 }, { "epoch": 0.47156201962870997, "grad_norm": 0.14578574895858765, "learning_rate": 3.679783244839802e-05, "loss": 0.3387, "num_tokens": 2525544793.0, "step": 3988 }, { "epoch": 0.471680264869339, "grad_norm": 0.14774754643440247, "learning_rate": 3.6787594490932216e-05, "loss": 0.3373, "num_tokens": 2526178582.0, "step": 3989 }, { "epoch": 0.4717985101099681, "grad_norm": 0.13990414142608643, "learning_rate": 3.677735597792325e-05, "loss": 0.3659, "num_tokens": 2526815018.0, "step": 3990 }, { "epoch": 0.47191675535059713, "grad_norm": 0.14428508281707764, "learning_rate": 3.676711691087288e-05, "loss": 0.3408, "num_tokens": 2527453000.0, "step": 3991 }, { "epoch": 0.4720350005912262, "grad_norm": 0.13579270243644714, "learning_rate": 3.675687729128289e-05, "loss": 0.3511, "num_tokens": 2528089823.0, "step": 3992 }, { "epoch": 0.47215324583185525, "grad_norm": 0.1417362540960312, "learning_rate": 3.67466371206552e-05, "loss": 0.3608, "num_tokens": 2528716059.0, "step": 3993 }, { "epoch": 0.47227149107248434, "grad_norm": 0.14577245712280273, "learning_rate": 3.673639640049175e-05, "loss": 0.3516, "num_tokens": 2529350356.0, "step": 3994 }, { "epoch": 0.4723897363131134, "grad_norm": 0.14460138976573944, "learning_rate": 3.67261551322946e-05, "loss": 0.3412, "num_tokens": 2529984252.0, "step": 3995 }, { "epoch": 0.47250798155374246, "grad_norm": 0.13139726221561432, "learning_rate": 3.671591331756589e-05, "loss": 0.317, "num_tokens": 2530619091.0, "step": 3996 }, { "epoch": 0.47262622679437155, "grad_norm": 0.15259845554828644, "learning_rate": 3.670567095780783e-05, "loss": 0.3448, "num_tokens": 2531257109.0, "step": 3997 }, { "epoch": 0.4727444720350006, "grad_norm": 0.13954246044158936, "learning_rate": 3.66954280545227e-05, "loss": 0.3563, "num_tokens": 2531895711.0, "step": 3998 }, { "epoch": 0.4728627172756297, "grad_norm": 0.13835002481937408, "learning_rate": 3.668518460921289e-05, "loss": 0.3347, "num_tokens": 2532524301.0, "step": 3999 }, { "epoch": 0.4729809625162587, "grad_norm": 0.1319245845079422, "learning_rate": 3.6674940623380856e-05, "loss": 0.3588, "num_tokens": 2533159435.0, "step": 4000 }, { "epoch": 0.4730992077568878, "grad_norm": 0.1335378736257553, "learning_rate": 3.66646960985291e-05, "loss": 0.3379, "num_tokens": 2533793585.0, "step": 4001 }, { "epoch": 0.47321745299751683, "grad_norm": 0.1339777708053589, "learning_rate": 3.665445103616025e-05, "loss": 0.3242, "num_tokens": 2534427344.0, "step": 4002 }, { "epoch": 0.4733356982381459, "grad_norm": 0.14589923620224, "learning_rate": 3.664420543777699e-05, "loss": 0.332, "num_tokens": 2535062461.0, "step": 4003 }, { "epoch": 0.47345394347877495, "grad_norm": 0.14028775691986084, "learning_rate": 3.663395930488209e-05, "loss": 0.319, "num_tokens": 2535687414.0, "step": 4004 }, { "epoch": 0.47357218871940404, "grad_norm": 0.1348424255847931, "learning_rate": 3.6623712638978385e-05, "loss": 0.3134, "num_tokens": 2536322903.0, "step": 4005 }, { "epoch": 0.47369043396003313, "grad_norm": 0.1581929475069046, "learning_rate": 3.661346544156882e-05, "loss": 0.3389, "num_tokens": 2536958674.0, "step": 4006 }, { "epoch": 0.47380867920066216, "grad_norm": 0.12370273470878601, "learning_rate": 3.660321771415637e-05, "loss": 0.3011, "num_tokens": 2537598312.0, "step": 4007 }, { "epoch": 0.47392692444129125, "grad_norm": 0.14595982432365417, "learning_rate": 3.659296945824414e-05, "loss": 0.3174, "num_tokens": 2538205886.0, "step": 4008 }, { "epoch": 0.4740451696819203, "grad_norm": 0.14253956079483032, "learning_rate": 3.658272067533527e-05, "loss": 0.3385, "num_tokens": 2538842977.0, "step": 4009 }, { "epoch": 0.4741634149225494, "grad_norm": 0.14842188358306885, "learning_rate": 3.6572471366932996e-05, "loss": 0.3476, "num_tokens": 2539477150.0, "step": 4010 }, { "epoch": 0.4742816601631784, "grad_norm": 0.13529092073440552, "learning_rate": 3.6562221534540644e-05, "loss": 0.3601, "num_tokens": 2540111685.0, "step": 4011 }, { "epoch": 0.4743999054038075, "grad_norm": 0.1342865377664566, "learning_rate": 3.6551971179661576e-05, "loss": 0.3357, "num_tokens": 2540740587.0, "step": 4012 }, { "epoch": 0.4745181506444366, "grad_norm": 0.1356489211320877, "learning_rate": 3.6541720303799274e-05, "loss": 0.3035, "num_tokens": 2541365552.0, "step": 4013 }, { "epoch": 0.4746363958850656, "grad_norm": 0.13288074731826782, "learning_rate": 3.653146890845727e-05, "loss": 0.3568, "num_tokens": 2541996490.0, "step": 4014 }, { "epoch": 0.4747546411256947, "grad_norm": 0.13309447467327118, "learning_rate": 3.65212169951392e-05, "loss": 0.3337, "num_tokens": 2542629624.0, "step": 4015 }, { "epoch": 0.47487288636632374, "grad_norm": 0.15447112917900085, "learning_rate": 3.651096456534873e-05, "loss": 0.3587, "num_tokens": 2543264493.0, "step": 4016 }, { "epoch": 0.47499113160695283, "grad_norm": 0.13232676684856415, "learning_rate": 3.650071162058966e-05, "loss": 0.3001, "num_tokens": 2543898945.0, "step": 4017 }, { "epoch": 0.47510937684758187, "grad_norm": 0.12445862591266632, "learning_rate": 3.64904581623658e-05, "loss": 0.3099, "num_tokens": 2544536259.0, "step": 4018 }, { "epoch": 0.47522762208821095, "grad_norm": 0.13123227655887604, "learning_rate": 3.64802041921811e-05, "loss": 0.3026, "num_tokens": 2545170615.0, "step": 4019 }, { "epoch": 0.47534586732884, "grad_norm": 0.1369551569223404, "learning_rate": 3.6469949711539526e-05, "loss": 0.3358, "num_tokens": 2545804758.0, "step": 4020 }, { "epoch": 0.4754641125694691, "grad_norm": 0.12536725401878357, "learning_rate": 3.645969472194518e-05, "loss": 0.3092, "num_tokens": 2546440065.0, "step": 4021 }, { "epoch": 0.47558235781009817, "grad_norm": 0.1265394389629364, "learning_rate": 3.644943922490218e-05, "loss": 0.3099, "num_tokens": 2547069801.0, "step": 4022 }, { "epoch": 0.4757006030507272, "grad_norm": 0.13847008347511292, "learning_rate": 3.643918322191477e-05, "loss": 0.3486, "num_tokens": 2547706340.0, "step": 4023 }, { "epoch": 0.4758188482913563, "grad_norm": 0.13712120056152344, "learning_rate": 3.642892671448721e-05, "loss": 0.3504, "num_tokens": 2548341121.0, "step": 4024 }, { "epoch": 0.4759370935319853, "grad_norm": 0.12925811111927032, "learning_rate": 3.64186697041239e-05, "loss": 0.3599, "num_tokens": 2548980616.0, "step": 4025 }, { "epoch": 0.4760553387726144, "grad_norm": 0.14022938907146454, "learning_rate": 3.6408412192329254e-05, "loss": 0.3671, "num_tokens": 2549616188.0, "step": 4026 }, { "epoch": 0.47617358401324344, "grad_norm": 0.1300198882818222, "learning_rate": 3.63981541806078e-05, "loss": 0.3272, "num_tokens": 2550249997.0, "step": 4027 }, { "epoch": 0.47629182925387253, "grad_norm": 0.15320922434329987, "learning_rate": 3.6387895670464136e-05, "loss": 0.3754, "num_tokens": 2550886630.0, "step": 4028 }, { "epoch": 0.4764100744945016, "grad_norm": 0.1333877295255661, "learning_rate": 3.637763666340289e-05, "loss": 0.3521, "num_tokens": 2551520738.0, "step": 4029 }, { "epoch": 0.47652831973513066, "grad_norm": 0.12951117753982544, "learning_rate": 3.6367377160928844e-05, "loss": 0.3801, "num_tokens": 2552154513.0, "step": 4030 }, { "epoch": 0.47664656497575975, "grad_norm": 0.13349071145057678, "learning_rate": 3.635711716454676e-05, "loss": 0.2938, "num_tokens": 2552778419.0, "step": 4031 }, { "epoch": 0.4767648102163888, "grad_norm": 0.1404813677072525, "learning_rate": 3.6346856675761546e-05, "loss": 0.334, "num_tokens": 2553415472.0, "step": 4032 }, { "epoch": 0.47688305545701787, "grad_norm": 0.14191339910030365, "learning_rate": 3.633659569607813e-05, "loss": 0.3885, "num_tokens": 2554049216.0, "step": 4033 }, { "epoch": 0.4770013006976469, "grad_norm": 0.12913140654563904, "learning_rate": 3.632633422700155e-05, "loss": 0.3207, "num_tokens": 2554685648.0, "step": 4034 }, { "epoch": 0.477119545938276, "grad_norm": 0.1449916958808899, "learning_rate": 3.631607227003689e-05, "loss": 0.3893, "num_tokens": 2555320822.0, "step": 4035 }, { "epoch": 0.477237791178905, "grad_norm": 0.1506611704826355, "learning_rate": 3.630580982668933e-05, "loss": 0.3454, "num_tokens": 2555960451.0, "step": 4036 }, { "epoch": 0.4773560364195341, "grad_norm": 0.12548136711120605, "learning_rate": 3.6295546898464094e-05, "loss": 0.3239, "num_tokens": 2556593623.0, "step": 4037 }, { "epoch": 0.4774742816601632, "grad_norm": 0.14082060754299164, "learning_rate": 3.628528348686651e-05, "loss": 0.3447, "num_tokens": 2557230182.0, "step": 4038 }, { "epoch": 0.47759252690079224, "grad_norm": 0.12074068188667297, "learning_rate": 3.6275019593401934e-05, "loss": 0.3037, "num_tokens": 2557860805.0, "step": 4039 }, { "epoch": 0.4777107721414213, "grad_norm": 0.12529721856117249, "learning_rate": 3.626475521957584e-05, "loss": 0.3314, "num_tokens": 2558500396.0, "step": 4040 }, { "epoch": 0.47782901738205036, "grad_norm": 0.14694544672966003, "learning_rate": 3.625449036689372e-05, "loss": 0.379, "num_tokens": 2559133053.0, "step": 4041 }, { "epoch": 0.47794726262267945, "grad_norm": 0.14133751392364502, "learning_rate": 3.6244225036861196e-05, "loss": 0.3622, "num_tokens": 2559769083.0, "step": 4042 }, { "epoch": 0.4780655078633085, "grad_norm": 0.13683554530143738, "learning_rate": 3.623395923098391e-05, "loss": 0.332, "num_tokens": 2560408458.0, "step": 4043 }, { "epoch": 0.47818375310393757, "grad_norm": 0.1589665412902832, "learning_rate": 3.62236929507676e-05, "loss": 0.3896, "num_tokens": 2561043707.0, "step": 4044 }, { "epoch": 0.4783019983445666, "grad_norm": 0.13758757710456848, "learning_rate": 3.621342619771807e-05, "loss": 0.3595, "num_tokens": 2561645098.0, "step": 4045 }, { "epoch": 0.4784202435851957, "grad_norm": 0.13879168033599854, "learning_rate": 3.620315897334116e-05, "loss": 0.3607, "num_tokens": 2562280416.0, "step": 4046 }, { "epoch": 0.4785384888258248, "grad_norm": 0.14484867453575134, "learning_rate": 3.6192891279142865e-05, "loss": 0.3517, "num_tokens": 2562915290.0, "step": 4047 }, { "epoch": 0.4786567340664538, "grad_norm": 0.1298568695783615, "learning_rate": 3.618262311662913e-05, "loss": 0.3605, "num_tokens": 2563551144.0, "step": 4048 }, { "epoch": 0.4787749793070829, "grad_norm": 0.14369560778141022, "learning_rate": 3.6172354487306085e-05, "loss": 0.3565, "num_tokens": 2564186846.0, "step": 4049 }, { "epoch": 0.47889322454771194, "grad_norm": 0.1552843451499939, "learning_rate": 3.616208539267984e-05, "loss": 0.3639, "num_tokens": 2564805451.0, "step": 4050 }, { "epoch": 0.479011469788341, "grad_norm": 0.13371916115283966, "learning_rate": 3.615181583425664e-05, "loss": 0.3304, "num_tokens": 2565440344.0, "step": 4051 }, { "epoch": 0.47912971502897006, "grad_norm": 0.1426142156124115, "learning_rate": 3.614154581354272e-05, "loss": 0.3666, "num_tokens": 2566078400.0, "step": 4052 }, { "epoch": 0.47924796026959915, "grad_norm": 0.13460007309913635, "learning_rate": 3.613127533204448e-05, "loss": 0.3265, "num_tokens": 2566712809.0, "step": 4053 }, { "epoch": 0.47936620551022824, "grad_norm": 0.1395997405052185, "learning_rate": 3.6121004391268296e-05, "loss": 0.3662, "num_tokens": 2567348699.0, "step": 4054 }, { "epoch": 0.47948445075085727, "grad_norm": 0.1332845389842987, "learning_rate": 3.611073299272068e-05, "loss": 0.3525, "num_tokens": 2567985012.0, "step": 4055 }, { "epoch": 0.47960269599148636, "grad_norm": 0.144358292222023, "learning_rate": 3.610046113790817e-05, "loss": 0.3706, "num_tokens": 2568618240.0, "step": 4056 }, { "epoch": 0.4797209412321154, "grad_norm": 0.12824246287345886, "learning_rate": 3.6090188828337374e-05, "loss": 0.343, "num_tokens": 2569257691.0, "step": 4057 }, { "epoch": 0.4798391864727445, "grad_norm": 0.14124761521816254, "learning_rate": 3.607991606551501e-05, "loss": 0.3502, "num_tokens": 2569893152.0, "step": 4058 }, { "epoch": 0.4799574317133735, "grad_norm": 0.13194508850574493, "learning_rate": 3.606964285094779e-05, "loss": 0.3221, "num_tokens": 2570530940.0, "step": 4059 }, { "epoch": 0.4800756769540026, "grad_norm": 0.13558435440063477, "learning_rate": 3.605936918614256e-05, "loss": 0.3431, "num_tokens": 2571166988.0, "step": 4060 }, { "epoch": 0.48019392219463164, "grad_norm": 0.1462765485048294, "learning_rate": 3.6049095072606194e-05, "loss": 0.2993, "num_tokens": 2571795752.0, "step": 4061 }, { "epoch": 0.48031216743526073, "grad_norm": 0.14017203450202942, "learning_rate": 3.6038820511845645e-05, "loss": 0.3593, "num_tokens": 2572429784.0, "step": 4062 }, { "epoch": 0.4804304126758898, "grad_norm": 0.1368127465248108, "learning_rate": 3.6028545505367916e-05, "loss": 0.3516, "num_tokens": 2573069049.0, "step": 4063 }, { "epoch": 0.48054865791651885, "grad_norm": 0.12677626311779022, "learning_rate": 3.6018270054680095e-05, "loss": 0.321, "num_tokens": 2573702809.0, "step": 4064 }, { "epoch": 0.48066690315714794, "grad_norm": 0.14179334044456482, "learning_rate": 3.600799416128933e-05, "loss": 0.3334, "num_tokens": 2574330086.0, "step": 4065 }, { "epoch": 0.480785148397777, "grad_norm": 0.144611194729805, "learning_rate": 3.599771782670284e-05, "loss": 0.34, "num_tokens": 2574961266.0, "step": 4066 }, { "epoch": 0.48090339363840606, "grad_norm": 0.1331501603126526, "learning_rate": 3.5987441052427876e-05, "loss": 0.3529, "num_tokens": 2575596333.0, "step": 4067 }, { "epoch": 0.4810216388790351, "grad_norm": 0.13987374305725098, "learning_rate": 3.59771638399718e-05, "loss": 0.3825, "num_tokens": 2576233164.0, "step": 4068 }, { "epoch": 0.4811398841196642, "grad_norm": 0.11577088385820389, "learning_rate": 3.596688619084199e-05, "loss": 0.3479, "num_tokens": 2576868072.0, "step": 4069 }, { "epoch": 0.4812581293602933, "grad_norm": 0.14202016592025757, "learning_rate": 3.595660810654596e-05, "loss": 0.376, "num_tokens": 2577506253.0, "step": 4070 }, { "epoch": 0.4813763746009223, "grad_norm": 0.1251603364944458, "learning_rate": 3.594632958859119e-05, "loss": 0.3377, "num_tokens": 2578139515.0, "step": 4071 }, { "epoch": 0.4814946198415514, "grad_norm": 0.12858018279075623, "learning_rate": 3.593605063848531e-05, "loss": 0.3317, "num_tokens": 2578746642.0, "step": 4072 }, { "epoch": 0.48161286508218043, "grad_norm": 0.12275779992341995, "learning_rate": 3.592577125773596e-05, "loss": 0.3281, "num_tokens": 2579382938.0, "step": 4073 }, { "epoch": 0.4817311103228095, "grad_norm": 0.12818209826946259, "learning_rate": 3.591549144785087e-05, "loss": 0.3342, "num_tokens": 2580020595.0, "step": 4074 }, { "epoch": 0.48184935556343855, "grad_norm": 0.12177938967943192, "learning_rate": 3.590521121033784e-05, "loss": 0.3256, "num_tokens": 2580656452.0, "step": 4075 }, { "epoch": 0.48196760080406764, "grad_norm": 0.13082227110862732, "learning_rate": 3.5894930546704686e-05, "loss": 0.3311, "num_tokens": 2581292082.0, "step": 4076 }, { "epoch": 0.4820858460446967, "grad_norm": 0.14053499698638916, "learning_rate": 3.588464945845934e-05, "loss": 0.3701, "num_tokens": 2581928385.0, "step": 4077 }, { "epoch": 0.48220409128532576, "grad_norm": 0.1290092170238495, "learning_rate": 3.587436794710976e-05, "loss": 0.3593, "num_tokens": 2582566013.0, "step": 4078 }, { "epoch": 0.48232233652595485, "grad_norm": 0.14229103922843933, "learning_rate": 3.5864086014164e-05, "loss": 0.3075, "num_tokens": 2583202316.0, "step": 4079 }, { "epoch": 0.4824405817665839, "grad_norm": 0.14083987474441528, "learning_rate": 3.585380366113014e-05, "loss": 0.3458, "num_tokens": 2583838844.0, "step": 4080 }, { "epoch": 0.482558827007213, "grad_norm": 0.15189461410045624, "learning_rate": 3.584352088951635e-05, "loss": 0.3805, "num_tokens": 2584471150.0, "step": 4081 }, { "epoch": 0.482677072247842, "grad_norm": 0.14112773537635803, "learning_rate": 3.5833237700830834e-05, "loss": 0.4071, "num_tokens": 2585107307.0, "step": 4082 }, { "epoch": 0.4827953174884711, "grad_norm": 0.1300394982099533, "learning_rate": 3.5822954096581895e-05, "loss": 0.3365, "num_tokens": 2585746547.0, "step": 4083 }, { "epoch": 0.48291356272910013, "grad_norm": 0.14232979714870453, "learning_rate": 3.581267007827784e-05, "loss": 0.3344, "num_tokens": 2586376034.0, "step": 4084 }, { "epoch": 0.4830318079697292, "grad_norm": 0.13368478417396545, "learning_rate": 3.5802385647427106e-05, "loss": 0.366, "num_tokens": 2587008062.0, "step": 4085 }, { "epoch": 0.4831500532103583, "grad_norm": 0.1463761180639267, "learning_rate": 3.579210080553814e-05, "loss": 0.3801, "num_tokens": 2587640405.0, "step": 4086 }, { "epoch": 0.48326829845098734, "grad_norm": 0.13374623656272888, "learning_rate": 3.5781815554119465e-05, "loss": 0.3218, "num_tokens": 2588269775.0, "step": 4087 }, { "epoch": 0.48338654369161643, "grad_norm": 0.15855486690998077, "learning_rate": 3.5771529894679676e-05, "loss": 0.3735, "num_tokens": 2588902626.0, "step": 4088 }, { "epoch": 0.48350478893224547, "grad_norm": 0.13875475525856018, "learning_rate": 3.576124382872739e-05, "loss": 0.3732, "num_tokens": 2589537779.0, "step": 4089 }, { "epoch": 0.48362303417287456, "grad_norm": 0.1613033562898636, "learning_rate": 3.575095735777134e-05, "loss": 0.3435, "num_tokens": 2590155068.0, "step": 4090 }, { "epoch": 0.4837412794135036, "grad_norm": 0.1565161943435669, "learning_rate": 3.5740670483320265e-05, "loss": 0.3448, "num_tokens": 2590787574.0, "step": 4091 }, { "epoch": 0.4838595246541327, "grad_norm": 0.13141120970249176, "learning_rate": 3.5730383206882986e-05, "loss": 0.343, "num_tokens": 2591418746.0, "step": 4092 }, { "epoch": 0.4839777698947617, "grad_norm": 0.15110965073108673, "learning_rate": 3.57200955299684e-05, "loss": 0.3438, "num_tokens": 2592047930.0, "step": 4093 }, { "epoch": 0.4840960151353908, "grad_norm": 0.13205847144126892, "learning_rate": 3.570980745408544e-05, "loss": 0.3422, "num_tokens": 2592684539.0, "step": 4094 }, { "epoch": 0.4842142603760199, "grad_norm": 0.15809045732021332, "learning_rate": 3.56995189807431e-05, "loss": 0.355, "num_tokens": 2593307254.0, "step": 4095 }, { "epoch": 0.4843325056166489, "grad_norm": 0.14061684906482697, "learning_rate": 3.568923011145043e-05, "loss": 0.3422, "num_tokens": 2593944228.0, "step": 4096 }, { "epoch": 0.484450750857278, "grad_norm": 0.1374458521604538, "learning_rate": 3.567894084771656e-05, "loss": 0.3801, "num_tokens": 2594579688.0, "step": 4097 }, { "epoch": 0.48456899609790705, "grad_norm": 0.13814577460289001, "learning_rate": 3.5668651191050645e-05, "loss": 0.3497, "num_tokens": 2595195707.0, "step": 4098 }, { "epoch": 0.48468724133853613, "grad_norm": 0.14346368610858917, "learning_rate": 3.565836114296192e-05, "loss": 0.3923, "num_tokens": 2595830222.0, "step": 4099 }, { "epoch": 0.48480548657916517, "grad_norm": 0.11451078951358795, "learning_rate": 3.564807070495967e-05, "loss": 0.2963, "num_tokens": 2596462946.0, "step": 4100 }, { "epoch": 0.48492373181979426, "grad_norm": 0.1214311495423317, "learning_rate": 3.563777987855325e-05, "loss": 0.3117, "num_tokens": 2597098001.0, "step": 4101 }, { "epoch": 0.4850419770604233, "grad_norm": 0.1432051658630371, "learning_rate": 3.562748866525205e-05, "loss": 0.3342, "num_tokens": 2597731993.0, "step": 4102 }, { "epoch": 0.4851602223010524, "grad_norm": 0.13262422382831573, "learning_rate": 3.561719706656553e-05, "loss": 0.3557, "num_tokens": 2598368752.0, "step": 4103 }, { "epoch": 0.48527846754168147, "grad_norm": 0.13325107097625732, "learning_rate": 3.560690508400319e-05, "loss": 0.3431, "num_tokens": 2598998893.0, "step": 4104 }, { "epoch": 0.4853967127823105, "grad_norm": 0.11775943636894226, "learning_rate": 3.559661271907464e-05, "loss": 0.3193, "num_tokens": 2599636891.0, "step": 4105 }, { "epoch": 0.4855149580229396, "grad_norm": 0.1461496651172638, "learning_rate": 3.5586319973289456e-05, "loss": 0.3693, "num_tokens": 2600271364.0, "step": 4106 }, { "epoch": 0.4856332032635686, "grad_norm": 0.1352386623620987, "learning_rate": 3.557602684815735e-05, "loss": 0.3519, "num_tokens": 2600904411.0, "step": 4107 }, { "epoch": 0.4857514485041977, "grad_norm": 0.14187118411064148, "learning_rate": 3.556573334518806e-05, "loss": 0.3611, "num_tokens": 2601539642.0, "step": 4108 }, { "epoch": 0.48586969374482675, "grad_norm": 0.1425711214542389, "learning_rate": 3.555543946589137e-05, "loss": 0.3419, "num_tokens": 2602177137.0, "step": 4109 }, { "epoch": 0.48598793898545584, "grad_norm": 0.14473024010658264, "learning_rate": 3.554514521177714e-05, "loss": 0.3334, "num_tokens": 2602816454.0, "step": 4110 }, { "epoch": 0.4861061842260849, "grad_norm": 0.14630712568759918, "learning_rate": 3.553485058435526e-05, "loss": 0.3636, "num_tokens": 2603448982.0, "step": 4111 }, { "epoch": 0.48622442946671396, "grad_norm": 0.15151263773441315, "learning_rate": 3.552455558513569e-05, "loss": 0.3672, "num_tokens": 2604076506.0, "step": 4112 }, { "epoch": 0.48634267470734305, "grad_norm": 0.13674379885196686, "learning_rate": 3.551426021562845e-05, "loss": 0.3593, "num_tokens": 2604712335.0, "step": 4113 }, { "epoch": 0.4864609199479721, "grad_norm": 0.14820034801959991, "learning_rate": 3.550396447734361e-05, "loss": 0.376, "num_tokens": 2605344298.0, "step": 4114 }, { "epoch": 0.48657916518860117, "grad_norm": 0.14232975244522095, "learning_rate": 3.549366837179128e-05, "loss": 0.3538, "num_tokens": 2605983210.0, "step": 4115 }, { "epoch": 0.4866974104292302, "grad_norm": 0.14556898176670074, "learning_rate": 3.548337190048164e-05, "loss": 0.3729, "num_tokens": 2606618726.0, "step": 4116 }, { "epoch": 0.4868156556698593, "grad_norm": 0.1420363336801529, "learning_rate": 3.547307506492493e-05, "loss": 0.3266, "num_tokens": 2607231898.0, "step": 4117 }, { "epoch": 0.4869339009104883, "grad_norm": 0.1365519016981125, "learning_rate": 3.5462777866631405e-05, "loss": 0.3487, "num_tokens": 2607866416.0, "step": 4118 }, { "epoch": 0.4870521461511174, "grad_norm": 0.12212308496236801, "learning_rate": 3.545248030711143e-05, "loss": 0.3265, "num_tokens": 2608499216.0, "step": 4119 }, { "epoch": 0.4871703913917465, "grad_norm": 0.13433539867401123, "learning_rate": 3.544218238787536e-05, "loss": 0.322, "num_tokens": 2609129587.0, "step": 4120 }, { "epoch": 0.48728863663237554, "grad_norm": 0.15073136985301971, "learning_rate": 3.543188411043369e-05, "loss": 0.376, "num_tokens": 2609765867.0, "step": 4121 }, { "epoch": 0.4874068818730046, "grad_norm": 0.1368079036474228, "learning_rate": 3.542158547629686e-05, "loss": 0.363, "num_tokens": 2610401103.0, "step": 4122 }, { "epoch": 0.48752512711363366, "grad_norm": 0.15114285051822662, "learning_rate": 3.541128648697543e-05, "loss": 0.3544, "num_tokens": 2611024131.0, "step": 4123 }, { "epoch": 0.48764337235426275, "grad_norm": 0.13432766497135162, "learning_rate": 3.5400987143980015e-05, "loss": 0.3304, "num_tokens": 2611659436.0, "step": 4124 }, { "epoch": 0.4877616175948918, "grad_norm": 0.12747666239738464, "learning_rate": 3.539068744882124e-05, "loss": 0.3422, "num_tokens": 2612286715.0, "step": 4125 }, { "epoch": 0.48787986283552087, "grad_norm": 0.14622461795806885, "learning_rate": 3.538038740300984e-05, "loss": 0.3464, "num_tokens": 2612920747.0, "step": 4126 }, { "epoch": 0.48799810807614996, "grad_norm": 0.1379140317440033, "learning_rate": 3.537008700805654e-05, "loss": 0.3624, "num_tokens": 2613556999.0, "step": 4127 }, { "epoch": 0.488116353316779, "grad_norm": 0.14647388458251953, "learning_rate": 3.535978626547215e-05, "loss": 0.3876, "num_tokens": 2614194849.0, "step": 4128 }, { "epoch": 0.4882345985574081, "grad_norm": 0.13551151752471924, "learning_rate": 3.5349485176767526e-05, "loss": 0.344, "num_tokens": 2614833787.0, "step": 4129 }, { "epoch": 0.4883528437980371, "grad_norm": 0.14090850949287415, "learning_rate": 3.5339183743453587e-05, "loss": 0.3285, "num_tokens": 2615466692.0, "step": 4130 }, { "epoch": 0.4884710890386662, "grad_norm": 0.14261981844902039, "learning_rate": 3.532888196704127e-05, "loss": 0.3654, "num_tokens": 2616104019.0, "step": 4131 }, { "epoch": 0.48858933427929524, "grad_norm": 0.13445858657360077, "learning_rate": 3.5318579849041595e-05, "loss": 0.3534, "num_tokens": 2616740153.0, "step": 4132 }, { "epoch": 0.48870757951992433, "grad_norm": 0.13411261141300201, "learning_rate": 3.5308277390965606e-05, "loss": 0.3317, "num_tokens": 2617371807.0, "step": 4133 }, { "epoch": 0.48882582476055336, "grad_norm": 0.14508196711540222, "learning_rate": 3.529797459432443e-05, "loss": 0.3527, "num_tokens": 2618000444.0, "step": 4134 }, { "epoch": 0.48894407000118245, "grad_norm": 0.1280847191810608, "learning_rate": 3.528767146062919e-05, "loss": 0.3338, "num_tokens": 2618628979.0, "step": 4135 }, { "epoch": 0.48906231524181154, "grad_norm": 0.13238674402236938, "learning_rate": 3.527736799139114e-05, "loss": 0.3588, "num_tokens": 2619263037.0, "step": 4136 }, { "epoch": 0.4891805604824406, "grad_norm": 0.13411609828472137, "learning_rate": 3.52670641881215e-05, "loss": 0.3237, "num_tokens": 2619902367.0, "step": 4137 }, { "epoch": 0.48929880572306966, "grad_norm": 0.1492214947938919, "learning_rate": 3.525676005233158e-05, "loss": 0.3009, "num_tokens": 2620518979.0, "step": 4138 }, { "epoch": 0.4894170509636987, "grad_norm": 0.13254965841770172, "learning_rate": 3.524645558553274e-05, "loss": 0.3571, "num_tokens": 2621156948.0, "step": 4139 }, { "epoch": 0.4895352962043278, "grad_norm": 0.1256634145975113, "learning_rate": 3.5236150789236376e-05, "loss": 0.3376, "num_tokens": 2621795795.0, "step": 4140 }, { "epoch": 0.4896535414449568, "grad_norm": 0.129542276263237, "learning_rate": 3.522584566495393e-05, "loss": 0.3237, "num_tokens": 2622428734.0, "step": 4141 }, { "epoch": 0.4897717866855859, "grad_norm": 0.13579097390174866, "learning_rate": 3.5215540214196924e-05, "loss": 0.3461, "num_tokens": 2623060921.0, "step": 4142 }, { "epoch": 0.489890031926215, "grad_norm": 0.12879833579063416, "learning_rate": 3.520523443847688e-05, "loss": 0.3649, "num_tokens": 2623700144.0, "step": 4143 }, { "epoch": 0.49000827716684403, "grad_norm": 0.13606125116348267, "learning_rate": 3.51949283393054e-05, "loss": 0.3447, "num_tokens": 2624338566.0, "step": 4144 }, { "epoch": 0.4901265224074731, "grad_norm": 0.1381816864013672, "learning_rate": 3.5184621918194136e-05, "loss": 0.3466, "num_tokens": 2624973627.0, "step": 4145 }, { "epoch": 0.49024476764810215, "grad_norm": 0.12976513803005219, "learning_rate": 3.517431517665475e-05, "loss": 0.335, "num_tokens": 2625606658.0, "step": 4146 }, { "epoch": 0.49036301288873124, "grad_norm": 0.11561882495880127, "learning_rate": 3.5164008116199e-05, "loss": 0.3005, "num_tokens": 2626242113.0, "step": 4147 }, { "epoch": 0.4904812581293603, "grad_norm": 0.1402827948331833, "learning_rate": 3.515370073833866e-05, "loss": 0.3367, "num_tokens": 2626876910.0, "step": 4148 }, { "epoch": 0.49059950336998936, "grad_norm": 0.14285992085933685, "learning_rate": 3.514339304458556e-05, "loss": 0.3826, "num_tokens": 2627515687.0, "step": 4149 }, { "epoch": 0.4907177486106184, "grad_norm": 0.13603876531124115, "learning_rate": 3.513308503645157e-05, "loss": 0.344, "num_tokens": 2628147754.0, "step": 4150 }, { "epoch": 0.4908359938512475, "grad_norm": 0.1294233500957489, "learning_rate": 3.512277671544862e-05, "loss": 0.3331, "num_tokens": 2628786205.0, "step": 4151 }, { "epoch": 0.4909542390918766, "grad_norm": 0.14283709228038788, "learning_rate": 3.5112468083088666e-05, "loss": 0.3272, "num_tokens": 2629423686.0, "step": 4152 }, { "epoch": 0.4910724843325056, "grad_norm": 0.12521356344223022, "learning_rate": 3.510215914088373e-05, "loss": 0.3118, "num_tokens": 2630055049.0, "step": 4153 }, { "epoch": 0.4911907295731347, "grad_norm": 0.14060862362384796, "learning_rate": 3.509184989034586e-05, "loss": 0.3406, "num_tokens": 2630693770.0, "step": 4154 }, { "epoch": 0.49130897481376373, "grad_norm": 0.13871480524539948, "learning_rate": 3.5081540332987167e-05, "loss": 0.3671, "num_tokens": 2631322976.0, "step": 4155 }, { "epoch": 0.4914272200543928, "grad_norm": 0.12852779030799866, "learning_rate": 3.50712304703198e-05, "loss": 0.3419, "num_tokens": 2631955978.0, "step": 4156 }, { "epoch": 0.49154546529502186, "grad_norm": 0.13459856808185577, "learning_rate": 3.506092030385594e-05, "loss": 0.3416, "num_tokens": 2632573481.0, "step": 4157 }, { "epoch": 0.49166371053565094, "grad_norm": 0.1298157274723053, "learning_rate": 3.505060983510785e-05, "loss": 0.3477, "num_tokens": 2633205397.0, "step": 4158 }, { "epoch": 0.49178195577628, "grad_norm": 0.1467588245868683, "learning_rate": 3.504029906558778e-05, "loss": 0.3864, "num_tokens": 2633837956.0, "step": 4159 }, { "epoch": 0.49190020101690907, "grad_norm": 0.13736259937286377, "learning_rate": 3.5029987996808084e-05, "loss": 0.3335, "num_tokens": 2634471462.0, "step": 4160 }, { "epoch": 0.49201844625753816, "grad_norm": 0.13906556367874146, "learning_rate": 3.5019676630281106e-05, "loss": 0.3604, "num_tokens": 2635108889.0, "step": 4161 }, { "epoch": 0.4921366914981672, "grad_norm": 0.131052166223526, "learning_rate": 3.500936496751928e-05, "loss": 0.3425, "num_tokens": 2635744729.0, "step": 4162 }, { "epoch": 0.4922549367387963, "grad_norm": 0.13207662105560303, "learning_rate": 3.499905301003505e-05, "loss": 0.3221, "num_tokens": 2636377848.0, "step": 4163 }, { "epoch": 0.4923731819794253, "grad_norm": 0.1605994552373886, "learning_rate": 3.498874075934093e-05, "loss": 0.377, "num_tokens": 2637014126.0, "step": 4164 }, { "epoch": 0.4924914272200544, "grad_norm": 0.1389603316783905, "learning_rate": 3.4978428216949445e-05, "loss": 0.3509, "num_tokens": 2637653800.0, "step": 4165 }, { "epoch": 0.49260967246068343, "grad_norm": 0.1342684030532837, "learning_rate": 3.49681153843732e-05, "loss": 0.327, "num_tokens": 2638285867.0, "step": 4166 }, { "epoch": 0.4927279177013125, "grad_norm": 0.1423492729663849, "learning_rate": 3.4957802263124806e-05, "loss": 0.315, "num_tokens": 2638915795.0, "step": 4167 }, { "epoch": 0.4928461629419416, "grad_norm": 0.14419640600681305, "learning_rate": 3.494748885471695e-05, "loss": 0.3475, "num_tokens": 2639545789.0, "step": 4168 }, { "epoch": 0.49296440818257065, "grad_norm": 0.12171070277690887, "learning_rate": 3.493717516066233e-05, "loss": 0.3146, "num_tokens": 2640183046.0, "step": 4169 }, { "epoch": 0.49308265342319973, "grad_norm": 0.13157206773757935, "learning_rate": 3.4926861182473705e-05, "loss": 0.3172, "num_tokens": 2640815149.0, "step": 4170 }, { "epoch": 0.49320089866382877, "grad_norm": 0.13332614302635193, "learning_rate": 3.491654692166388e-05, "loss": 0.334, "num_tokens": 2641448838.0, "step": 4171 }, { "epoch": 0.49331914390445786, "grad_norm": 0.13443900644779205, "learning_rate": 3.490623237974569e-05, "loss": 0.3667, "num_tokens": 2642084929.0, "step": 4172 }, { "epoch": 0.4934373891450869, "grad_norm": 0.1395246982574463, "learning_rate": 3.4895917558232e-05, "loss": 0.3492, "num_tokens": 2642714886.0, "step": 4173 }, { "epoch": 0.493555634385716, "grad_norm": 0.13717785477638245, "learning_rate": 3.4885602458635756e-05, "loss": 0.3293, "num_tokens": 2643348719.0, "step": 4174 }, { "epoch": 0.493673879626345, "grad_norm": 0.13508754968643188, "learning_rate": 3.48752870824699e-05, "loss": 0.3671, "num_tokens": 2643987236.0, "step": 4175 }, { "epoch": 0.4937921248669741, "grad_norm": 0.13642603158950806, "learning_rate": 3.486497143124743e-05, "loss": 0.3483, "num_tokens": 2644616605.0, "step": 4176 }, { "epoch": 0.4939103701076032, "grad_norm": 0.13110430538654327, "learning_rate": 3.485465550648141e-05, "loss": 0.3537, "num_tokens": 2645249339.0, "step": 4177 }, { "epoch": 0.4940286153482322, "grad_norm": 0.14112728834152222, "learning_rate": 3.4844339309684896e-05, "loss": 0.3521, "num_tokens": 2645886792.0, "step": 4178 }, { "epoch": 0.4941468605888613, "grad_norm": 0.1532880663871765, "learning_rate": 3.483402284237103e-05, "loss": 0.3477, "num_tokens": 2646523900.0, "step": 4179 }, { "epoch": 0.49426510582949035, "grad_norm": 0.12895214557647705, "learning_rate": 3.482370610605297e-05, "loss": 0.346, "num_tokens": 2647158011.0, "step": 4180 }, { "epoch": 0.49438335107011944, "grad_norm": 0.13196022808551788, "learning_rate": 3.481338910224391e-05, "loss": 0.3356, "num_tokens": 2647796634.0, "step": 4181 }, { "epoch": 0.49450159631074847, "grad_norm": 0.1278996467590332, "learning_rate": 3.480307183245709e-05, "loss": 0.3759, "num_tokens": 2648434070.0, "step": 4182 }, { "epoch": 0.49461984155137756, "grad_norm": 0.1457846462726593, "learning_rate": 3.4792754298205793e-05, "loss": 0.3562, "num_tokens": 2649069083.0, "step": 4183 }, { "epoch": 0.49473808679200665, "grad_norm": 0.13205194473266602, "learning_rate": 3.478243650100334e-05, "loss": 0.3097, "num_tokens": 2649703732.0, "step": 4184 }, { "epoch": 0.4948563320326357, "grad_norm": 0.12790246307849884, "learning_rate": 3.477211844236309e-05, "loss": 0.3306, "num_tokens": 2650338181.0, "step": 4185 }, { "epoch": 0.49497457727326477, "grad_norm": 0.14772215485572815, "learning_rate": 3.4761800123798415e-05, "loss": 0.3591, "num_tokens": 2650971906.0, "step": 4186 }, { "epoch": 0.4950928225138938, "grad_norm": 0.13219520449638367, "learning_rate": 3.475148154682278e-05, "loss": 0.3209, "num_tokens": 2651607769.0, "step": 4187 }, { "epoch": 0.4952110677545229, "grad_norm": 0.15164072811603546, "learning_rate": 3.474116271294964e-05, "loss": 0.3734, "num_tokens": 2652245010.0, "step": 4188 }, { "epoch": 0.4953293129951519, "grad_norm": 0.1399146020412445, "learning_rate": 3.47308436236925e-05, "loss": 0.3294, "num_tokens": 2652882035.0, "step": 4189 }, { "epoch": 0.495447558235781, "grad_norm": 0.11933863908052444, "learning_rate": 3.472052428056492e-05, "loss": 0.3085, "num_tokens": 2653514033.0, "step": 4190 }, { "epoch": 0.49556580347641005, "grad_norm": 0.14277394115924835, "learning_rate": 3.471020468508047e-05, "loss": 0.3756, "num_tokens": 2654146612.0, "step": 4191 }, { "epoch": 0.49568404871703914, "grad_norm": 0.12868241965770721, "learning_rate": 3.469988483875277e-05, "loss": 0.3414, "num_tokens": 2654782604.0, "step": 4192 }, { "epoch": 0.4958022939576682, "grad_norm": 0.13501137495040894, "learning_rate": 3.4689564743095495e-05, "loss": 0.3162, "num_tokens": 2655415827.0, "step": 4193 }, { "epoch": 0.49592053919829726, "grad_norm": 0.14407940208911896, "learning_rate": 3.467924439962232e-05, "loss": 0.3726, "num_tokens": 2656047468.0, "step": 4194 }, { "epoch": 0.49603878443892635, "grad_norm": 0.12575429677963257, "learning_rate": 3.466892380984699e-05, "loss": 0.2945, "num_tokens": 2656685495.0, "step": 4195 }, { "epoch": 0.4961570296795554, "grad_norm": 0.1355261355638504, "learning_rate": 3.4658602975283255e-05, "loss": 0.3199, "num_tokens": 2657318848.0, "step": 4196 }, { "epoch": 0.4962752749201845, "grad_norm": 0.1493084877729416, "learning_rate": 3.4648281897444926e-05, "loss": 0.3558, "num_tokens": 2657950152.0, "step": 4197 }, { "epoch": 0.4963935201608135, "grad_norm": 0.1381651908159256, "learning_rate": 3.4637960577845856e-05, "loss": 0.3556, "num_tokens": 2658583906.0, "step": 4198 }, { "epoch": 0.4965117654014426, "grad_norm": 0.14919638633728027, "learning_rate": 3.462763901799988e-05, "loss": 0.3374, "num_tokens": 2659219550.0, "step": 4199 }, { "epoch": 0.4966300106420717, "grad_norm": 0.16682785749435425, "learning_rate": 3.461731721942094e-05, "loss": 0.3412, "num_tokens": 2659848969.0, "step": 4200 }, { "epoch": 0.4967482558827007, "grad_norm": 0.15126492083072662, "learning_rate": 3.460699518362297e-05, "loss": 0.3643, "num_tokens": 2660485787.0, "step": 4201 }, { "epoch": 0.4968665011233298, "grad_norm": 0.12974666059017181, "learning_rate": 3.4596672912119945e-05, "loss": 0.3326, "num_tokens": 2661121259.0, "step": 4202 }, { "epoch": 0.49698474636395884, "grad_norm": 0.13601620495319366, "learning_rate": 3.4586350406425884e-05, "loss": 0.3233, "num_tokens": 2661756643.0, "step": 4203 }, { "epoch": 0.49710299160458793, "grad_norm": 0.16229425370693207, "learning_rate": 3.457602766805483e-05, "loss": 0.3592, "num_tokens": 2662389533.0, "step": 4204 }, { "epoch": 0.49722123684521696, "grad_norm": 0.16106867790222168, "learning_rate": 3.456570469852086e-05, "loss": 0.334, "num_tokens": 2663024177.0, "step": 4205 }, { "epoch": 0.49733948208584605, "grad_norm": 0.14738799631595612, "learning_rate": 3.45553814993381e-05, "loss": 0.3681, "num_tokens": 2663653832.0, "step": 4206 }, { "epoch": 0.4974577273264751, "grad_norm": 0.13845351338386536, "learning_rate": 3.454505807202069e-05, "loss": 0.3359, "num_tokens": 2664283466.0, "step": 4207 }, { "epoch": 0.4975759725671042, "grad_norm": 0.13969829678535461, "learning_rate": 3.453473441808282e-05, "loss": 0.3491, "num_tokens": 2664917911.0, "step": 4208 }, { "epoch": 0.49769421780773326, "grad_norm": 0.13912709057331085, "learning_rate": 3.4524410539038695e-05, "loss": 0.3419, "num_tokens": 2665555236.0, "step": 4209 }, { "epoch": 0.4978124630483623, "grad_norm": 0.14119914174079895, "learning_rate": 3.451408643640258e-05, "loss": 0.3298, "num_tokens": 2666194698.0, "step": 4210 }, { "epoch": 0.4979307082889914, "grad_norm": 0.1436162292957306, "learning_rate": 3.4503762111688744e-05, "loss": 0.3263, "num_tokens": 2666830222.0, "step": 4211 }, { "epoch": 0.4980489535296204, "grad_norm": 0.13840103149414062, "learning_rate": 3.44934375664115e-05, "loss": 0.3317, "num_tokens": 2667464246.0, "step": 4212 }, { "epoch": 0.4981671987702495, "grad_norm": 0.15591959655284882, "learning_rate": 3.448311280208519e-05, "loss": 0.3428, "num_tokens": 2668094370.0, "step": 4213 }, { "epoch": 0.49828544401087854, "grad_norm": 0.13981321454048157, "learning_rate": 3.4472787820224216e-05, "loss": 0.2964, "num_tokens": 2668724951.0, "step": 4214 }, { "epoch": 0.49840368925150763, "grad_norm": 0.13262978196144104, "learning_rate": 3.446246262234298e-05, "loss": 0.3132, "num_tokens": 2669360441.0, "step": 4215 }, { "epoch": 0.49852193449213666, "grad_norm": 0.15402524173259735, "learning_rate": 3.44521372099559e-05, "loss": 0.3624, "num_tokens": 2669996225.0, "step": 4216 }, { "epoch": 0.49864017973276575, "grad_norm": 0.1375131905078888, "learning_rate": 3.444181158457748e-05, "loss": 0.3423, "num_tokens": 2670635387.0, "step": 4217 }, { "epoch": 0.49875842497339484, "grad_norm": 0.12969988584518433, "learning_rate": 3.4431485747722206e-05, "loss": 0.3618, "num_tokens": 2671271382.0, "step": 4218 }, { "epoch": 0.4988766702140239, "grad_norm": 0.14088723063468933, "learning_rate": 3.442115970090462e-05, "loss": 0.3236, "num_tokens": 2671907386.0, "step": 4219 }, { "epoch": 0.49899491545465297, "grad_norm": 0.1271054744720459, "learning_rate": 3.4410833445639303e-05, "loss": 0.2969, "num_tokens": 2672543585.0, "step": 4220 }, { "epoch": 0.499113160695282, "grad_norm": 0.1303916573524475, "learning_rate": 3.440050698344083e-05, "loss": 0.3194, "num_tokens": 2673176090.0, "step": 4221 }, { "epoch": 0.4992314059359111, "grad_norm": 0.14752157032489777, "learning_rate": 3.4390180315823824e-05, "loss": 0.3457, "num_tokens": 2673810810.0, "step": 4222 }, { "epoch": 0.4993496511765401, "grad_norm": 0.13021235167980194, "learning_rate": 3.4379853444302964e-05, "loss": 0.2991, "num_tokens": 2674441066.0, "step": 4223 }, { "epoch": 0.4994678964171692, "grad_norm": 0.13429340720176697, "learning_rate": 3.436952637039294e-05, "loss": 0.3221, "num_tokens": 2675074381.0, "step": 4224 }, { "epoch": 0.4995861416577983, "grad_norm": 0.14172106981277466, "learning_rate": 3.435919909560845e-05, "loss": 0.3616, "num_tokens": 2675710269.0, "step": 4225 }, { "epoch": 0.49970438689842733, "grad_norm": 0.130488321185112, "learning_rate": 3.4348871621464245e-05, "loss": 0.3135, "num_tokens": 2676341704.0, "step": 4226 }, { "epoch": 0.4998226321390564, "grad_norm": 0.1281290501356125, "learning_rate": 3.433854394947511e-05, "loss": 0.3455, "num_tokens": 2676975376.0, "step": 4227 }, { "epoch": 0.49994087737968546, "grad_norm": 0.12438276410102844, "learning_rate": 3.432821608115585e-05, "loss": 0.3369, "num_tokens": 2677609354.0, "step": 4228 }, { "epoch": 0.5000591226203145, "grad_norm": 0.12472159415483475, "learning_rate": 3.431788801802129e-05, "loss": 0.3387, "num_tokens": 2678244809.0, "step": 4229 }, { "epoch": 0.5001773678609436, "grad_norm": 0.13397561013698578, "learning_rate": 3.43075597615863e-05, "loss": 0.3906, "num_tokens": 2678877318.0, "step": 4230 }, { "epoch": 0.5002956131015727, "grad_norm": 0.13377828896045685, "learning_rate": 3.429723131336577e-05, "loss": 0.3652, "num_tokens": 2679510589.0, "step": 4231 }, { "epoch": 0.5004138583422018, "grad_norm": 0.11871729791164398, "learning_rate": 3.4286902674874616e-05, "loss": 0.3339, "num_tokens": 2680141850.0, "step": 4232 }, { "epoch": 0.5005321035828308, "grad_norm": 0.11867047846317291, "learning_rate": 3.427657384762778e-05, "loss": 0.3401, "num_tokens": 2680778950.0, "step": 4233 }, { "epoch": 0.5006503488234598, "grad_norm": 0.13714860379695892, "learning_rate": 3.4266244833140246e-05, "loss": 0.3719, "num_tokens": 2681418371.0, "step": 4234 }, { "epoch": 0.5007685940640889, "grad_norm": 0.13612151145935059, "learning_rate": 3.425591563292701e-05, "loss": 0.358, "num_tokens": 2682053628.0, "step": 4235 }, { "epoch": 0.500886839304718, "grad_norm": 0.12488899379968643, "learning_rate": 3.424558624850311e-05, "loss": 0.3311, "num_tokens": 2682683953.0, "step": 4236 }, { "epoch": 0.5010050845453471, "grad_norm": 0.1336156129837036, "learning_rate": 3.4235256681383586e-05, "loss": 0.3666, "num_tokens": 2683317131.0, "step": 4237 }, { "epoch": 0.5011233297859761, "grad_norm": 0.1304590106010437, "learning_rate": 3.422492693308353e-05, "loss": 0.3329, "num_tokens": 2683953898.0, "step": 4238 }, { "epoch": 0.5012415750266052, "grad_norm": 0.13023251295089722, "learning_rate": 3.421459700511807e-05, "loss": 0.3188, "num_tokens": 2684589412.0, "step": 4239 }, { "epoch": 0.5013598202672342, "grad_norm": 0.1346651315689087, "learning_rate": 3.420426689900231e-05, "loss": 0.349, "num_tokens": 2685226379.0, "step": 4240 }, { "epoch": 0.5014780655078633, "grad_norm": 1.2696667909622192, "learning_rate": 3.419393661625144e-05, "loss": 0.3939, "num_tokens": 2685819658.0, "step": 4241 }, { "epoch": 0.5015963107484924, "grad_norm": 0.14897817373275757, "learning_rate": 3.418360615838061e-05, "loss": 0.3322, "num_tokens": 2686457624.0, "step": 4242 }, { "epoch": 0.5017145559891214, "grad_norm": 0.1385791152715683, "learning_rate": 3.4173275526905077e-05, "loss": 0.345, "num_tokens": 2687088812.0, "step": 4243 }, { "epoch": 0.5018328012297505, "grad_norm": 0.145106703042984, "learning_rate": 3.416294472334005e-05, "loss": 0.3699, "num_tokens": 2687722334.0, "step": 4244 }, { "epoch": 0.5019510464703796, "grad_norm": 0.1424427628517151, "learning_rate": 3.4152613749200816e-05, "loss": 0.3691, "num_tokens": 2688361255.0, "step": 4245 }, { "epoch": 0.5020692917110087, "grad_norm": 0.1422753632068634, "learning_rate": 3.414228260600265e-05, "loss": 0.3433, "num_tokens": 2689000100.0, "step": 4246 }, { "epoch": 0.5021875369516376, "grad_norm": 0.13340966403484344, "learning_rate": 3.4131951295260876e-05, "loss": 0.3531, "num_tokens": 2689633536.0, "step": 4247 }, { "epoch": 0.5023057821922667, "grad_norm": 0.1369578093290329, "learning_rate": 3.412161981849081e-05, "loss": 0.3533, "num_tokens": 2690268524.0, "step": 4248 }, { "epoch": 0.5024240274328958, "grad_norm": 0.14394892752170563, "learning_rate": 3.411128817720784e-05, "loss": 0.3804, "num_tokens": 2690906748.0, "step": 4249 }, { "epoch": 0.5025422726735249, "grad_norm": 0.12651318311691284, "learning_rate": 3.410095637292735e-05, "loss": 0.3456, "num_tokens": 2691546078.0, "step": 4250 }, { "epoch": 0.502660517914154, "grad_norm": 0.14396196603775024, "learning_rate": 3.409062440716474e-05, "loss": 0.3641, "num_tokens": 2692184604.0, "step": 4251 }, { "epoch": 0.502778763154783, "grad_norm": 0.1432640552520752, "learning_rate": 3.408029228143544e-05, "loss": 0.3014, "num_tokens": 2692815925.0, "step": 4252 }, { "epoch": 0.5028970083954121, "grad_norm": 0.12709850072860718, "learning_rate": 3.406995999725494e-05, "loss": 0.3207, "num_tokens": 2693452545.0, "step": 4253 }, { "epoch": 0.5030152536360412, "grad_norm": 0.13210740685462952, "learning_rate": 3.405962755613869e-05, "loss": 0.3295, "num_tokens": 2694084670.0, "step": 4254 }, { "epoch": 0.5031334988766702, "grad_norm": 0.13774734735488892, "learning_rate": 3.4049294959602206e-05, "loss": 0.3401, "num_tokens": 2694717043.0, "step": 4255 }, { "epoch": 0.5032517441172992, "grad_norm": 0.13880518078804016, "learning_rate": 3.403896220916101e-05, "loss": 0.3596, "num_tokens": 2695346908.0, "step": 4256 }, { "epoch": 0.5033699893579283, "grad_norm": 0.1396685391664505, "learning_rate": 3.402862930633066e-05, "loss": 0.3469, "num_tokens": 2695986036.0, "step": 4257 }, { "epoch": 0.5034882345985574, "grad_norm": 0.12972547113895416, "learning_rate": 3.401829625262673e-05, "loss": 0.3505, "num_tokens": 2696620917.0, "step": 4258 }, { "epoch": 0.5036064798391865, "grad_norm": 0.13677559792995453, "learning_rate": 3.40079630495648e-05, "loss": 0.3348, "num_tokens": 2697256694.0, "step": 4259 }, { "epoch": 0.5037247250798156, "grad_norm": 0.14170341193675995, "learning_rate": 3.399762969866051e-05, "loss": 0.3499, "num_tokens": 2697895635.0, "step": 4260 }, { "epoch": 0.5038429703204446, "grad_norm": 0.14167846739292145, "learning_rate": 3.398729620142947e-05, "loss": 0.3563, "num_tokens": 2698529973.0, "step": 4261 }, { "epoch": 0.5039612155610736, "grad_norm": 0.13320842385292053, "learning_rate": 3.397696255938736e-05, "loss": 0.3264, "num_tokens": 2699162602.0, "step": 4262 }, { "epoch": 0.5040794608017027, "grad_norm": 0.11501599848270416, "learning_rate": 3.396662877404985e-05, "loss": 0.2651, "num_tokens": 2699798212.0, "step": 4263 }, { "epoch": 0.5041977060423318, "grad_norm": 0.13217060267925262, "learning_rate": 3.395629484693265e-05, "loss": 0.3184, "num_tokens": 2700437040.0, "step": 4264 }, { "epoch": 0.5043159512829608, "grad_norm": 0.13935016095638275, "learning_rate": 3.394596077955147e-05, "loss": 0.3734, "num_tokens": 2701074107.0, "step": 4265 }, { "epoch": 0.5044341965235899, "grad_norm": 0.13708184659481049, "learning_rate": 3.3935626573422066e-05, "loss": 0.3686, "num_tokens": 2701709125.0, "step": 4266 }, { "epoch": 0.504552441764219, "grad_norm": 0.13118486106395721, "learning_rate": 3.3925292230060196e-05, "loss": 0.3196, "num_tokens": 2702338456.0, "step": 4267 }, { "epoch": 0.5046706870048481, "grad_norm": 0.12831558287143707, "learning_rate": 3.3914957750981646e-05, "loss": 0.3189, "num_tokens": 2702972592.0, "step": 4268 }, { "epoch": 0.5047889322454772, "grad_norm": 0.14281296730041504, "learning_rate": 3.3904623137702224e-05, "loss": 0.3607, "num_tokens": 2703608656.0, "step": 4269 }, { "epoch": 0.5049071774861061, "grad_norm": 0.12626832723617554, "learning_rate": 3.3894288391737734e-05, "loss": 0.2991, "num_tokens": 2704239703.0, "step": 4270 }, { "epoch": 0.5050254227267352, "grad_norm": 0.14343659579753876, "learning_rate": 3.388395351460404e-05, "loss": 0.3356, "num_tokens": 2704845527.0, "step": 4271 }, { "epoch": 0.5051436679673643, "grad_norm": 0.12673059105873108, "learning_rate": 3.3873618507816983e-05, "loss": 0.3117, "num_tokens": 2705479748.0, "step": 4272 }, { "epoch": 0.5052619132079934, "grad_norm": 0.14020366966724396, "learning_rate": 3.386328337289248e-05, "loss": 0.3492, "num_tokens": 2706118430.0, "step": 4273 }, { "epoch": 0.5053801584486225, "grad_norm": 0.12373946607112885, "learning_rate": 3.385294811134639e-05, "loss": 0.3259, "num_tokens": 2706757015.0, "step": 4274 }, { "epoch": 0.5054984036892515, "grad_norm": 0.14105355739593506, "learning_rate": 3.384261272469466e-05, "loss": 0.3743, "num_tokens": 2707393767.0, "step": 4275 }, { "epoch": 0.5056166489298806, "grad_norm": 0.13561658561229706, "learning_rate": 3.3832277214453206e-05, "loss": 0.3256, "num_tokens": 2708028606.0, "step": 4276 }, { "epoch": 0.5057348941705097, "grad_norm": 0.14896999299526215, "learning_rate": 3.382194158213799e-05, "loss": 0.3419, "num_tokens": 2708662215.0, "step": 4277 }, { "epoch": 0.5058531394111387, "grad_norm": 0.13363991677761078, "learning_rate": 3.381160582926499e-05, "loss": 0.3413, "num_tokens": 2709292984.0, "step": 4278 }, { "epoch": 0.5059713846517677, "grad_norm": 0.14005868136882782, "learning_rate": 3.380126995735019e-05, "loss": 0.353, "num_tokens": 2709928689.0, "step": 4279 }, { "epoch": 0.5060896298923968, "grad_norm": 0.136679545044899, "learning_rate": 3.379093396790961e-05, "loss": 0.3332, "num_tokens": 2710558403.0, "step": 4280 }, { "epoch": 0.5062078751330259, "grad_norm": 0.14714841544628143, "learning_rate": 3.3780597862459265e-05, "loss": 0.3631, "num_tokens": 2711196196.0, "step": 4281 }, { "epoch": 0.506326120373655, "grad_norm": 0.14747333526611328, "learning_rate": 3.377026164251519e-05, "loss": 0.3561, "num_tokens": 2711834363.0, "step": 4282 }, { "epoch": 0.5064443656142841, "grad_norm": 0.14267125725746155, "learning_rate": 3.3759925309593446e-05, "loss": 0.3365, "num_tokens": 2712472933.0, "step": 4283 }, { "epoch": 0.506562610854913, "grad_norm": 0.1312456727027893, "learning_rate": 3.3749588865210125e-05, "loss": 0.3586, "num_tokens": 2713108343.0, "step": 4284 }, { "epoch": 0.5066808560955421, "grad_norm": 0.13675612211227417, "learning_rate": 3.373925231088131e-05, "loss": 0.3435, "num_tokens": 2713743141.0, "step": 4285 }, { "epoch": 0.5067991013361712, "grad_norm": 0.15073366463184357, "learning_rate": 3.37289156481231e-05, "loss": 0.3619, "num_tokens": 2714381332.0, "step": 4286 }, { "epoch": 0.5069173465768003, "grad_norm": 0.14299766719341278, "learning_rate": 3.371857887845163e-05, "loss": 0.3568, "num_tokens": 2715016160.0, "step": 4287 }, { "epoch": 0.5070355918174293, "grad_norm": 0.12667277455329895, "learning_rate": 3.370824200338303e-05, "loss": 0.3341, "num_tokens": 2715655335.0, "step": 4288 }, { "epoch": 0.5071538370580584, "grad_norm": 0.1397131383419037, "learning_rate": 3.369790502443346e-05, "loss": 0.3597, "num_tokens": 2716284123.0, "step": 4289 }, { "epoch": 0.5072720822986875, "grad_norm": 0.13297130167484283, "learning_rate": 3.368756794311909e-05, "loss": 0.3332, "num_tokens": 2716921831.0, "step": 4290 }, { "epoch": 0.5073903275393166, "grad_norm": 0.13355493545532227, "learning_rate": 3.36772307609561e-05, "loss": 0.3288, "num_tokens": 2717558731.0, "step": 4291 }, { "epoch": 0.5075085727799457, "grad_norm": 0.12521106004714966, "learning_rate": 3.366689347946071e-05, "loss": 0.3032, "num_tokens": 2718192185.0, "step": 4292 }, { "epoch": 0.5076268180205746, "grad_norm": 0.14257171750068665, "learning_rate": 3.3656556100149115e-05, "loss": 0.3679, "num_tokens": 2718831361.0, "step": 4293 }, { "epoch": 0.5077450632612037, "grad_norm": 0.12331908196210861, "learning_rate": 3.3646218624537544e-05, "loss": 0.3229, "num_tokens": 2719467263.0, "step": 4294 }, { "epoch": 0.5078633085018328, "grad_norm": 0.14436398446559906, "learning_rate": 3.363588105414225e-05, "loss": 0.3372, "num_tokens": 2720104461.0, "step": 4295 }, { "epoch": 0.5079815537424619, "grad_norm": 0.13744190335273743, "learning_rate": 3.362554339047948e-05, "loss": 0.3513, "num_tokens": 2720740190.0, "step": 4296 }, { "epoch": 0.5080997989830909, "grad_norm": 0.1338513344526291, "learning_rate": 3.361520563506551e-05, "loss": 0.3459, "num_tokens": 2721376744.0, "step": 4297 }, { "epoch": 0.50821804422372, "grad_norm": 0.13838458061218262, "learning_rate": 3.3604867789416625e-05, "loss": 0.3234, "num_tokens": 2722005043.0, "step": 4298 }, { "epoch": 0.5083362894643491, "grad_norm": 0.12755118310451508, "learning_rate": 3.359452985504913e-05, "loss": 0.3347, "num_tokens": 2722638447.0, "step": 4299 }, { "epoch": 0.5084545347049781, "grad_norm": 0.13709282875061035, "learning_rate": 3.358419183347931e-05, "loss": 0.3593, "num_tokens": 2723274574.0, "step": 4300 }, { "epoch": 0.5085727799456072, "grad_norm": 0.13341310620307922, "learning_rate": 3.357385372622351e-05, "loss": 0.3351, "num_tokens": 2723901211.0, "step": 4301 }, { "epoch": 0.5086910251862362, "grad_norm": 0.14542871713638306, "learning_rate": 3.3563515534798055e-05, "loss": 0.3204, "num_tokens": 2724532736.0, "step": 4302 }, { "epoch": 0.5088092704268653, "grad_norm": 0.1273500621318817, "learning_rate": 3.35531772607193e-05, "loss": 0.2711, "num_tokens": 2725162997.0, "step": 4303 }, { "epoch": 0.5089275156674944, "grad_norm": 0.12943150103092194, "learning_rate": 3.354283890550361e-05, "loss": 0.3214, "num_tokens": 2725800991.0, "step": 4304 }, { "epoch": 0.5090457609081235, "grad_norm": 0.1419239193201065, "learning_rate": 3.353250047066734e-05, "loss": 0.3164, "num_tokens": 2726436128.0, "step": 4305 }, { "epoch": 0.5091640061487526, "grad_norm": 0.13370804488658905, "learning_rate": 3.352216195772689e-05, "loss": 0.3122, "num_tokens": 2727072170.0, "step": 4306 }, { "epoch": 0.5092822513893815, "grad_norm": 0.14259478449821472, "learning_rate": 3.3511823368198645e-05, "loss": 0.343, "num_tokens": 2727705904.0, "step": 4307 }, { "epoch": 0.5094004966300106, "grad_norm": 0.13681969046592712, "learning_rate": 3.350148470359901e-05, "loss": 0.3452, "num_tokens": 2728341735.0, "step": 4308 }, { "epoch": 0.5095187418706397, "grad_norm": 0.12984465062618256, "learning_rate": 3.3491145965444425e-05, "loss": 0.2968, "num_tokens": 2728943606.0, "step": 4309 }, { "epoch": 0.5096369871112688, "grad_norm": 0.12371018528938293, "learning_rate": 3.3480807155251285e-05, "loss": 0.3373, "num_tokens": 2729580572.0, "step": 4310 }, { "epoch": 0.5097552323518978, "grad_norm": 0.12464231252670288, "learning_rate": 3.347046827453606e-05, "loss": 0.3161, "num_tokens": 2730217317.0, "step": 4311 }, { "epoch": 0.5098734775925269, "grad_norm": 0.1333385705947876, "learning_rate": 3.346012932481517e-05, "loss": 0.341, "num_tokens": 2730853218.0, "step": 4312 }, { "epoch": 0.509991722833156, "grad_norm": 0.12953224778175354, "learning_rate": 3.3449790307605096e-05, "loss": 0.3428, "num_tokens": 2731492779.0, "step": 4313 }, { "epoch": 0.5101099680737851, "grad_norm": 0.14195652306079865, "learning_rate": 3.3439451224422284e-05, "loss": 0.369, "num_tokens": 2732125555.0, "step": 4314 }, { "epoch": 0.5102282133144141, "grad_norm": 0.14597578346729279, "learning_rate": 3.3429112076783246e-05, "loss": 0.3406, "num_tokens": 2732764048.0, "step": 4315 }, { "epoch": 0.5103464585550431, "grad_norm": 0.13467925786972046, "learning_rate": 3.3418772866204436e-05, "loss": 0.3372, "num_tokens": 2733400640.0, "step": 4316 }, { "epoch": 0.5104647037956722, "grad_norm": 0.13776041567325592, "learning_rate": 3.3408433594202375e-05, "loss": 0.3564, "num_tokens": 2734038270.0, "step": 4317 }, { "epoch": 0.5105829490363013, "grad_norm": 0.1274189054965973, "learning_rate": 3.339809426229357e-05, "loss": 0.3361, "num_tokens": 2734677600.0, "step": 4318 }, { "epoch": 0.5107011942769304, "grad_norm": 0.14822641015052795, "learning_rate": 3.3387754871994513e-05, "loss": 0.3582, "num_tokens": 2735311911.0, "step": 4319 }, { "epoch": 0.5108194395175594, "grad_norm": 0.12869931757450104, "learning_rate": 3.3377415424821756e-05, "loss": 0.3181, "num_tokens": 2735950405.0, "step": 4320 }, { "epoch": 0.5109376847581885, "grad_norm": 0.1444638967514038, "learning_rate": 3.336707592229181e-05, "loss": 0.347, "num_tokens": 2736574875.0, "step": 4321 }, { "epoch": 0.5110559299988175, "grad_norm": 0.1429613083600998, "learning_rate": 3.335673636592123e-05, "loss": 0.3892, "num_tokens": 2737209523.0, "step": 4322 }, { "epoch": 0.5111741752394466, "grad_norm": 0.14722692966461182, "learning_rate": 3.334639675722655e-05, "loss": 0.314, "num_tokens": 2737844930.0, "step": 4323 }, { "epoch": 0.5112924204800757, "grad_norm": 0.14058755338191986, "learning_rate": 3.333605709772435e-05, "loss": 0.3282, "num_tokens": 2738481792.0, "step": 4324 }, { "epoch": 0.5114106657207047, "grad_norm": 0.13245239853858948, "learning_rate": 3.332571738893117e-05, "loss": 0.3428, "num_tokens": 2739119157.0, "step": 4325 }, { "epoch": 0.5115289109613338, "grad_norm": 0.13998375833034515, "learning_rate": 3.331537763236358e-05, "loss": 0.3662, "num_tokens": 2739751092.0, "step": 4326 }, { "epoch": 0.5116471562019629, "grad_norm": 0.1469363421201706, "learning_rate": 3.3305037829538175e-05, "loss": 0.3741, "num_tokens": 2740388473.0, "step": 4327 }, { "epoch": 0.511765401442592, "grad_norm": 0.14689095318317413, "learning_rate": 3.329469798197152e-05, "loss": 0.3511, "num_tokens": 2741025109.0, "step": 4328 }, { "epoch": 0.511883646683221, "grad_norm": 0.13249145448207855, "learning_rate": 3.328435809118022e-05, "loss": 0.3482, "num_tokens": 2741658907.0, "step": 4329 }, { "epoch": 0.51200189192385, "grad_norm": 0.16633141040802002, "learning_rate": 3.327401815868087e-05, "loss": 0.3551, "num_tokens": 2742291031.0, "step": 4330 }, { "epoch": 0.5121201371644791, "grad_norm": 0.135064959526062, "learning_rate": 3.326367818599007e-05, "loss": 0.3417, "num_tokens": 2742926048.0, "step": 4331 }, { "epoch": 0.5122383824051082, "grad_norm": 0.14188624918460846, "learning_rate": 3.325333817462443e-05, "loss": 0.3293, "num_tokens": 2743564509.0, "step": 4332 }, { "epoch": 0.5123566276457373, "grad_norm": 0.14461566507816315, "learning_rate": 3.324299812610056e-05, "loss": 0.3375, "num_tokens": 2744199566.0, "step": 4333 }, { "epoch": 0.5124748728863663, "grad_norm": 0.1456352323293686, "learning_rate": 3.323265804193509e-05, "loss": 0.3619, "num_tokens": 2744837541.0, "step": 4334 }, { "epoch": 0.5125931181269954, "grad_norm": 0.14926594495773315, "learning_rate": 3.322231792364464e-05, "loss": 0.3466, "num_tokens": 2745466919.0, "step": 4335 }, { "epoch": 0.5127113633676245, "grad_norm": 0.12891361117362976, "learning_rate": 3.321197777274585e-05, "loss": 0.3325, "num_tokens": 2746099728.0, "step": 4336 }, { "epoch": 0.5128296086082536, "grad_norm": 0.13448065519332886, "learning_rate": 3.320163759075535e-05, "loss": 0.3426, "num_tokens": 2746719012.0, "step": 4337 }, { "epoch": 0.5129478538488825, "grad_norm": 0.1508650779724121, "learning_rate": 3.319129737918977e-05, "loss": 0.3848, "num_tokens": 2747349182.0, "step": 4338 }, { "epoch": 0.5130660990895116, "grad_norm": 0.13755759596824646, "learning_rate": 3.318095713956576e-05, "loss": 0.3234, "num_tokens": 2747988516.0, "step": 4339 }, { "epoch": 0.5131843443301407, "grad_norm": 0.12890096008777618, "learning_rate": 3.317061687339997e-05, "loss": 0.3441, "num_tokens": 2748623904.0, "step": 4340 }, { "epoch": 0.5133025895707698, "grad_norm": 0.13304780423641205, "learning_rate": 3.316027658220906e-05, "loss": 0.3367, "num_tokens": 2749259777.0, "step": 4341 }, { "epoch": 0.5134208348113989, "grad_norm": 0.13831275701522827, "learning_rate": 3.314993626750967e-05, "loss": 0.3331, "num_tokens": 2749890852.0, "step": 4342 }, { "epoch": 0.5135390800520279, "grad_norm": 0.13639675080776215, "learning_rate": 3.3139595930818475e-05, "loss": 0.3463, "num_tokens": 2750529382.0, "step": 4343 }, { "epoch": 0.513657325292657, "grad_norm": 0.13781847059726715, "learning_rate": 3.312925557365213e-05, "loss": 0.3408, "num_tokens": 2751164405.0, "step": 4344 }, { "epoch": 0.513775570533286, "grad_norm": 0.1312367171049118, "learning_rate": 3.311891519752729e-05, "loss": 0.3474, "num_tokens": 2751793060.0, "step": 4345 }, { "epoch": 0.5138938157739151, "grad_norm": 0.13094070553779602, "learning_rate": 3.310857480396065e-05, "loss": 0.3339, "num_tokens": 2752427779.0, "step": 4346 }, { "epoch": 0.5140120610145442, "grad_norm": 0.14280563592910767, "learning_rate": 3.309823439446886e-05, "loss": 0.3313, "num_tokens": 2753059334.0, "step": 4347 }, { "epoch": 0.5141303062551732, "grad_norm": 0.13303902745246887, "learning_rate": 3.30878939705686e-05, "loss": 0.3713, "num_tokens": 2753693506.0, "step": 4348 }, { "epoch": 0.5142485514958023, "grad_norm": 0.14079269766807556, "learning_rate": 3.307755353377654e-05, "loss": 0.3618, "num_tokens": 2754329810.0, "step": 4349 }, { "epoch": 0.5143667967364314, "grad_norm": 0.12959611415863037, "learning_rate": 3.3067213085609366e-05, "loss": 0.3543, "num_tokens": 2754965872.0, "step": 4350 }, { "epoch": 0.5144850419770605, "grad_norm": 0.1360633224248886, "learning_rate": 3.305687262758375e-05, "loss": 0.309, "num_tokens": 2755571636.0, "step": 4351 }, { "epoch": 0.5146032872176894, "grad_norm": 0.13547050952911377, "learning_rate": 3.304653216121639e-05, "loss": 0.3507, "num_tokens": 2756206645.0, "step": 4352 }, { "epoch": 0.5147215324583185, "grad_norm": 0.13930854201316833, "learning_rate": 3.3036191688023934e-05, "loss": 0.3347, "num_tokens": 2756840745.0, "step": 4353 }, { "epoch": 0.5148397776989476, "grad_norm": 0.1336626261472702, "learning_rate": 3.3025851209523104e-05, "loss": 0.3557, "num_tokens": 2757479979.0, "step": 4354 }, { "epoch": 0.5149580229395767, "grad_norm": 0.12198323756456375, "learning_rate": 3.301551072723054e-05, "loss": 0.3287, "num_tokens": 2758117848.0, "step": 4355 }, { "epoch": 0.5150762681802058, "grad_norm": 0.15089742839336395, "learning_rate": 3.300517024266297e-05, "loss": 0.378, "num_tokens": 2758752771.0, "step": 4356 }, { "epoch": 0.5151945134208348, "grad_norm": 0.135053813457489, "learning_rate": 3.2994829757337034e-05, "loss": 0.3611, "num_tokens": 2759389448.0, "step": 4357 }, { "epoch": 0.5153127586614639, "grad_norm": 0.13089625537395477, "learning_rate": 3.298448927276947e-05, "loss": 0.3326, "num_tokens": 2760021572.0, "step": 4358 }, { "epoch": 0.515431003902093, "grad_norm": 0.13579261302947998, "learning_rate": 3.297414879047691e-05, "loss": 0.3603, "num_tokens": 2760655007.0, "step": 4359 }, { "epoch": 0.515549249142722, "grad_norm": 0.13628718256950378, "learning_rate": 3.2963808311976064e-05, "loss": 0.3246, "num_tokens": 2761290737.0, "step": 4360 }, { "epoch": 0.515667494383351, "grad_norm": 0.13145118951797485, "learning_rate": 3.2953467838783624e-05, "loss": 0.3379, "num_tokens": 2761925508.0, "step": 4361 }, { "epoch": 0.5157857396239801, "grad_norm": 0.12890103459358215, "learning_rate": 3.294312737241624e-05, "loss": 0.3384, "num_tokens": 2762562203.0, "step": 4362 }, { "epoch": 0.5159039848646092, "grad_norm": 0.13432826101779938, "learning_rate": 3.293278691439064e-05, "loss": 0.3574, "num_tokens": 2763193624.0, "step": 4363 }, { "epoch": 0.5160222301052383, "grad_norm": 0.13956794142723083, "learning_rate": 3.292244646622346e-05, "loss": 0.3373, "num_tokens": 2763825792.0, "step": 4364 }, { "epoch": 0.5161404753458674, "grad_norm": 0.12901918590068817, "learning_rate": 3.2912106029431416e-05, "loss": 0.3388, "num_tokens": 2764460601.0, "step": 4365 }, { "epoch": 0.5162587205864964, "grad_norm": 0.1397905796766281, "learning_rate": 3.290176560553114e-05, "loss": 0.3549, "num_tokens": 2765095265.0, "step": 4366 }, { "epoch": 0.5163769658271254, "grad_norm": 0.12709425389766693, "learning_rate": 3.289142519603937e-05, "loss": 0.3161, "num_tokens": 2765728556.0, "step": 4367 }, { "epoch": 0.5164952110677545, "grad_norm": 0.12512819468975067, "learning_rate": 3.288108480247271e-05, "loss": 0.3053, "num_tokens": 2766364254.0, "step": 4368 }, { "epoch": 0.5166134563083836, "grad_norm": 0.11500777304172516, "learning_rate": 3.287074442634788e-05, "loss": 0.3245, "num_tokens": 2766980734.0, "step": 4369 }, { "epoch": 0.5167317015490126, "grad_norm": 0.1231665313243866, "learning_rate": 3.286040406918153e-05, "loss": 0.3154, "num_tokens": 2767618764.0, "step": 4370 }, { "epoch": 0.5168499467896417, "grad_norm": 0.13701462745666504, "learning_rate": 3.285006373249033e-05, "loss": 0.3566, "num_tokens": 2768253191.0, "step": 4371 }, { "epoch": 0.5169681920302708, "grad_norm": 0.12913160026073456, "learning_rate": 3.283972341779095e-05, "loss": 0.3253, "num_tokens": 2768885327.0, "step": 4372 }, { "epoch": 0.5170864372708999, "grad_norm": 0.1234656423330307, "learning_rate": 3.282938312660003e-05, "loss": 0.316, "num_tokens": 2769523332.0, "step": 4373 }, { "epoch": 0.517204682511529, "grad_norm": 0.1218821331858635, "learning_rate": 3.281904286043425e-05, "loss": 0.327, "num_tokens": 2770160249.0, "step": 4374 }, { "epoch": 0.5173229277521579, "grad_norm": 0.12821948528289795, "learning_rate": 3.280870262081024e-05, "loss": 0.3344, "num_tokens": 2770790177.0, "step": 4375 }, { "epoch": 0.517441172992787, "grad_norm": 0.1447463184595108, "learning_rate": 3.2798362409244657e-05, "loss": 0.3734, "num_tokens": 2771423512.0, "step": 4376 }, { "epoch": 0.5175594182334161, "grad_norm": 0.15610899031162262, "learning_rate": 3.2788022227254154e-05, "loss": 0.3995, "num_tokens": 2772059654.0, "step": 4377 }, { "epoch": 0.5176776634740452, "grad_norm": 0.13827823102474213, "learning_rate": 3.277768207635536e-05, "loss": 0.346, "num_tokens": 2772692935.0, "step": 4378 }, { "epoch": 0.5177959087146742, "grad_norm": 0.1357641965150833, "learning_rate": 3.276734195806491e-05, "loss": 0.3636, "num_tokens": 2773327360.0, "step": 4379 }, { "epoch": 0.5179141539553033, "grad_norm": 0.12887753546237946, "learning_rate": 3.2757001873899444e-05, "loss": 0.3546, "num_tokens": 2773964565.0, "step": 4380 }, { "epoch": 0.5180323991959324, "grad_norm": 0.1432798057794571, "learning_rate": 3.274666182537558e-05, "loss": 0.3884, "num_tokens": 2774600622.0, "step": 4381 }, { "epoch": 0.5181506444365614, "grad_norm": 0.13307106494903564, "learning_rate": 3.273632181400994e-05, "loss": 0.3118, "num_tokens": 2775239939.0, "step": 4382 }, { "epoch": 0.5182688896771905, "grad_norm": 0.13313323259353638, "learning_rate": 3.272598184131914e-05, "loss": 0.3154, "num_tokens": 2775870933.0, "step": 4383 }, { "epoch": 0.5183871349178195, "grad_norm": 0.14657188951969147, "learning_rate": 3.271564190881979e-05, "loss": 0.3502, "num_tokens": 2776507115.0, "step": 4384 }, { "epoch": 0.5185053801584486, "grad_norm": 0.12686577439308167, "learning_rate": 3.2705302018028485e-05, "loss": 0.3348, "num_tokens": 2777141430.0, "step": 4385 }, { "epoch": 0.5186236253990777, "grad_norm": 0.13183827698230743, "learning_rate": 3.2694962170461844e-05, "loss": 0.334, "num_tokens": 2777768334.0, "step": 4386 }, { "epoch": 0.5187418706397068, "grad_norm": 0.14970363676548004, "learning_rate": 3.2684622367636425e-05, "loss": 0.3433, "num_tokens": 2778406493.0, "step": 4387 }, { "epoch": 0.5188601158803359, "grad_norm": 0.1463664025068283, "learning_rate": 3.2674282611068844e-05, "loss": 0.3448, "num_tokens": 2779033882.0, "step": 4388 }, { "epoch": 0.5189783611209648, "grad_norm": 0.1458437144756317, "learning_rate": 3.2663942902275656e-05, "loss": 0.3431, "num_tokens": 2779671565.0, "step": 4389 }, { "epoch": 0.5190966063615939, "grad_norm": 0.14625556766986847, "learning_rate": 3.265360324277344e-05, "loss": 0.3445, "num_tokens": 2780304203.0, "step": 4390 }, { "epoch": 0.519214851602223, "grad_norm": 0.1411028951406479, "learning_rate": 3.264326363407878e-05, "loss": 0.3687, "num_tokens": 2780943837.0, "step": 4391 }, { "epoch": 0.5193330968428521, "grad_norm": 0.12397205829620361, "learning_rate": 3.263292407770819e-05, "loss": 0.3463, "num_tokens": 2781582246.0, "step": 4392 }, { "epoch": 0.5194513420834811, "grad_norm": 0.14086659252643585, "learning_rate": 3.262258457517825e-05, "loss": 0.3765, "num_tokens": 2782215219.0, "step": 4393 }, { "epoch": 0.5195695873241102, "grad_norm": 0.14263395965099335, "learning_rate": 3.261224512800549e-05, "loss": 0.3555, "num_tokens": 2782845501.0, "step": 4394 }, { "epoch": 0.5196878325647393, "grad_norm": 0.1238996610045433, "learning_rate": 3.260190573770644e-05, "loss": 0.3154, "num_tokens": 2783480889.0, "step": 4395 }, { "epoch": 0.5198060778053684, "grad_norm": 0.1281658113002777, "learning_rate": 3.259156640579763e-05, "loss": 0.3207, "num_tokens": 2784118333.0, "step": 4396 }, { "epoch": 0.5199243230459974, "grad_norm": 0.1275750696659088, "learning_rate": 3.258122713379556e-05, "loss": 0.3239, "num_tokens": 2784749776.0, "step": 4397 }, { "epoch": 0.5200425682866264, "grad_norm": 0.1386025995016098, "learning_rate": 3.257088792321677e-05, "loss": 0.3182, "num_tokens": 2785375603.0, "step": 4398 }, { "epoch": 0.5201608135272555, "grad_norm": 0.13421015441417694, "learning_rate": 3.256054877557772e-05, "loss": 0.3563, "num_tokens": 2786010783.0, "step": 4399 }, { "epoch": 0.5202790587678846, "grad_norm": 0.13540783524513245, "learning_rate": 3.255020969239492e-05, "loss": 0.3452, "num_tokens": 2786640195.0, "step": 4400 }, { "epoch": 0.5203973040085137, "grad_norm": 0.13247185945510864, "learning_rate": 3.253987067518484e-05, "loss": 0.3582, "num_tokens": 2787279842.0, "step": 4401 }, { "epoch": 0.5205155492491427, "grad_norm": 0.12009762227535248, "learning_rate": 3.252953172546395e-05, "loss": 0.2884, "num_tokens": 2787915196.0, "step": 4402 }, { "epoch": 0.5206337944897718, "grad_norm": 0.13493642210960388, "learning_rate": 3.251919284474872e-05, "loss": 0.3167, "num_tokens": 2788544073.0, "step": 4403 }, { "epoch": 0.5207520397304009, "grad_norm": 0.1378370225429535, "learning_rate": 3.250885403455558e-05, "loss": 0.3519, "num_tokens": 2789177036.0, "step": 4404 }, { "epoch": 0.5208702849710299, "grad_norm": 0.13489918410778046, "learning_rate": 3.2498515296400994e-05, "loss": 0.3312, "num_tokens": 2789813084.0, "step": 4405 }, { "epoch": 0.520988530211659, "grad_norm": 0.12931835651397705, "learning_rate": 3.248817663180135e-05, "loss": 0.32, "num_tokens": 2790446339.0, "step": 4406 }, { "epoch": 0.521106775452288, "grad_norm": 0.1425992101430893, "learning_rate": 3.2477838042273124e-05, "loss": 0.3665, "num_tokens": 2791082610.0, "step": 4407 }, { "epoch": 0.5212250206929171, "grad_norm": 0.13714702427387238, "learning_rate": 3.2467499529332665e-05, "loss": 0.3461, "num_tokens": 2791718049.0, "step": 4408 }, { "epoch": 0.5213432659335462, "grad_norm": 0.1337772160768509, "learning_rate": 3.2457161094496396e-05, "loss": 0.3558, "num_tokens": 2792353461.0, "step": 4409 }, { "epoch": 0.5214615111741753, "grad_norm": 0.1303798258304596, "learning_rate": 3.244682273928071e-05, "loss": 0.3076, "num_tokens": 2792990152.0, "step": 4410 }, { "epoch": 0.5215797564148043, "grad_norm": 0.12623848021030426, "learning_rate": 3.243648446520195e-05, "loss": 0.3093, "num_tokens": 2793610158.0, "step": 4411 }, { "epoch": 0.5216980016554333, "grad_norm": 0.13823366165161133, "learning_rate": 3.24261462737765e-05, "loss": 0.3436, "num_tokens": 2794240663.0, "step": 4412 }, { "epoch": 0.5218162468960624, "grad_norm": 0.13156113028526306, "learning_rate": 3.24158081665207e-05, "loss": 0.3324, "num_tokens": 2794849467.0, "step": 4413 }, { "epoch": 0.5219344921366915, "grad_norm": 0.1348951905965805, "learning_rate": 3.240547014495089e-05, "loss": 0.3002, "num_tokens": 2795488760.0, "step": 4414 }, { "epoch": 0.5220527373773206, "grad_norm": 0.1353495568037033, "learning_rate": 3.239513221058339e-05, "loss": 0.327, "num_tokens": 2796124001.0, "step": 4415 }, { "epoch": 0.5221709826179496, "grad_norm": 0.13248348236083984, "learning_rate": 3.23847943649345e-05, "loss": 0.3127, "num_tokens": 2796762534.0, "step": 4416 }, { "epoch": 0.5222892278585787, "grad_norm": 0.14123935997486115, "learning_rate": 3.2374456609520524e-05, "loss": 0.3533, "num_tokens": 2797396419.0, "step": 4417 }, { "epoch": 0.5224074730992078, "grad_norm": 0.13137567043304443, "learning_rate": 3.236411894585775e-05, "loss": 0.3344, "num_tokens": 2798029586.0, "step": 4418 }, { "epoch": 0.5225257183398369, "grad_norm": 0.1312532126903534, "learning_rate": 3.235378137546246e-05, "loss": 0.3355, "num_tokens": 2798664497.0, "step": 4419 }, { "epoch": 0.5226439635804658, "grad_norm": 0.14854486286640167, "learning_rate": 3.234344389985088e-05, "loss": 0.3658, "num_tokens": 2799302908.0, "step": 4420 }, { "epoch": 0.5227622088210949, "grad_norm": 0.123899444937706, "learning_rate": 3.2333106520539294e-05, "loss": 0.3201, "num_tokens": 2799935434.0, "step": 4421 }, { "epoch": 0.522880454061724, "grad_norm": 0.13732899725437164, "learning_rate": 3.2322769239043896e-05, "loss": 0.3738, "num_tokens": 2800571655.0, "step": 4422 }, { "epoch": 0.5229986993023531, "grad_norm": 0.1200191080570221, "learning_rate": 3.231243205688092e-05, "loss": 0.3292, "num_tokens": 2801210405.0, "step": 4423 }, { "epoch": 0.5231169445429822, "grad_norm": 0.13458675146102905, "learning_rate": 3.230209497556655e-05, "loss": 0.313, "num_tokens": 2801844700.0, "step": 4424 }, { "epoch": 0.5232351897836112, "grad_norm": 0.1328052282333374, "learning_rate": 3.229175799661697e-05, "loss": 0.3134, "num_tokens": 2802479360.0, "step": 4425 }, { "epoch": 0.5233534350242403, "grad_norm": 0.12626305222511292, "learning_rate": 3.2281421121548384e-05, "loss": 0.3367, "num_tokens": 2803111868.0, "step": 4426 }, { "epoch": 0.5234716802648693, "grad_norm": 0.1351298689842224, "learning_rate": 3.22710843518769e-05, "loss": 0.3427, "num_tokens": 2803743719.0, "step": 4427 }, { "epoch": 0.5235899255054984, "grad_norm": 0.14293397963047028, "learning_rate": 3.2260747689118694e-05, "loss": 0.3625, "num_tokens": 2804376742.0, "step": 4428 }, { "epoch": 0.5237081707461275, "grad_norm": 0.12691456079483032, "learning_rate": 3.225041113478988e-05, "loss": 0.3007, "num_tokens": 2805008809.0, "step": 4429 }, { "epoch": 0.5238264159867565, "grad_norm": 0.14208610355854034, "learning_rate": 3.224007469040655e-05, "loss": 0.3587, "num_tokens": 2805647725.0, "step": 4430 }, { "epoch": 0.5239446612273856, "grad_norm": 0.1398632824420929, "learning_rate": 3.222973835748482e-05, "loss": 0.369, "num_tokens": 2806278806.0, "step": 4431 }, { "epoch": 0.5240629064680147, "grad_norm": 0.1288808286190033, "learning_rate": 3.2219402137540746e-05, "loss": 0.3157, "num_tokens": 2806915022.0, "step": 4432 }, { "epoch": 0.5241811517086438, "grad_norm": 0.1304522007703781, "learning_rate": 3.22090660320904e-05, "loss": 0.3461, "num_tokens": 2807551959.0, "step": 4433 }, { "epoch": 0.5242993969492727, "grad_norm": 0.12589798867702484, "learning_rate": 3.219873004264981e-05, "loss": 0.3242, "num_tokens": 2808188571.0, "step": 4434 }, { "epoch": 0.5244176421899018, "grad_norm": 0.12361311912536621, "learning_rate": 3.218839417073502e-05, "loss": 0.3403, "num_tokens": 2808824613.0, "step": 4435 }, { "epoch": 0.5245358874305309, "grad_norm": 0.11911483108997345, "learning_rate": 3.2178058417862015e-05, "loss": 0.3197, "num_tokens": 2809455876.0, "step": 4436 }, { "epoch": 0.52465413267116, "grad_norm": 0.14217838644981384, "learning_rate": 3.216772278554681e-05, "loss": 0.313, "num_tokens": 2810095494.0, "step": 4437 }, { "epoch": 0.5247723779117891, "grad_norm": 0.13972318172454834, "learning_rate": 3.215738727530535e-05, "loss": 0.3661, "num_tokens": 2810729967.0, "step": 4438 }, { "epoch": 0.5248906231524181, "grad_norm": 0.12330927699804306, "learning_rate": 3.214705188865362e-05, "loss": 0.2959, "num_tokens": 2811369223.0, "step": 4439 }, { "epoch": 0.5250088683930472, "grad_norm": 0.12357178330421448, "learning_rate": 3.213671662710753e-05, "loss": 0.3546, "num_tokens": 2811996641.0, "step": 4440 }, { "epoch": 0.5251271136336763, "grad_norm": 0.12222689390182495, "learning_rate": 3.212638149218301e-05, "loss": 0.3097, "num_tokens": 2812617201.0, "step": 4441 }, { "epoch": 0.5252453588743053, "grad_norm": 0.12438540905714035, "learning_rate": 3.211604648539597e-05, "loss": 0.3066, "num_tokens": 2813255638.0, "step": 4442 }, { "epoch": 0.5253636041149343, "grad_norm": 0.13485212624073029, "learning_rate": 3.210571160826227e-05, "loss": 0.3772, "num_tokens": 2813895289.0, "step": 4443 }, { "epoch": 0.5254818493555634, "grad_norm": 0.12412358075380325, "learning_rate": 3.2095376862297794e-05, "loss": 0.32, "num_tokens": 2814528665.0, "step": 4444 }, { "epoch": 0.5256000945961925, "grad_norm": 0.13262785971164703, "learning_rate": 3.208504224901836e-05, "loss": 0.3081, "num_tokens": 2815156131.0, "step": 4445 }, { "epoch": 0.5257183398368216, "grad_norm": 0.13527683913707733, "learning_rate": 3.2074707769939816e-05, "loss": 0.3358, "num_tokens": 2815793252.0, "step": 4446 }, { "epoch": 0.5258365850774507, "grad_norm": 0.12618765234947205, "learning_rate": 3.206437342657794e-05, "loss": 0.3458, "num_tokens": 2816425934.0, "step": 4447 }, { "epoch": 0.5259548303180797, "grad_norm": 0.12965553998947144, "learning_rate": 3.205403922044853e-05, "loss": 0.355, "num_tokens": 2817053614.0, "step": 4448 }, { "epoch": 0.5260730755587087, "grad_norm": 0.13043104112148285, "learning_rate": 3.204370515306736e-05, "loss": 0.3282, "num_tokens": 2817690368.0, "step": 4449 }, { "epoch": 0.5261913207993378, "grad_norm": 0.12002342194318771, "learning_rate": 3.2033371225950157e-05, "loss": 0.3091, "num_tokens": 2818300405.0, "step": 4450 }, { "epoch": 0.5263095660399669, "grad_norm": 0.12761610746383667, "learning_rate": 3.202303744061266e-05, "loss": 0.3225, "num_tokens": 2818939983.0, "step": 4451 }, { "epoch": 0.5264278112805959, "grad_norm": 0.1299896091222763, "learning_rate": 3.201270379857054e-05, "loss": 0.3251, "num_tokens": 2819577049.0, "step": 4452 }, { "epoch": 0.526546056521225, "grad_norm": 0.1275833398103714, "learning_rate": 3.200237030133951e-05, "loss": 0.3362, "num_tokens": 2820215299.0, "step": 4453 }, { "epoch": 0.5266643017618541, "grad_norm": 0.138396754860878, "learning_rate": 3.1992036950435206e-05, "loss": 0.3265, "num_tokens": 2820843243.0, "step": 4454 }, { "epoch": 0.5267825470024832, "grad_norm": 0.14828316867351532, "learning_rate": 3.1981703747373274e-05, "loss": 0.3455, "num_tokens": 2821482462.0, "step": 4455 }, { "epoch": 0.5269007922431123, "grad_norm": 0.13491690158843994, "learning_rate": 3.197137069366935e-05, "loss": 0.3442, "num_tokens": 2822118977.0, "step": 4456 }, { "epoch": 0.5270190374837412, "grad_norm": 0.1521153450012207, "learning_rate": 3.196103779083899e-05, "loss": 0.3366, "num_tokens": 2822738463.0, "step": 4457 }, { "epoch": 0.5271372827243703, "grad_norm": 0.1559378057718277, "learning_rate": 3.1950705040397806e-05, "loss": 0.3037, "num_tokens": 2823370514.0, "step": 4458 }, { "epoch": 0.5272555279649994, "grad_norm": 0.13340570032596588, "learning_rate": 3.194037244386132e-05, "loss": 0.3214, "num_tokens": 2823990484.0, "step": 4459 }, { "epoch": 0.5273737732056285, "grad_norm": 0.16179561614990234, "learning_rate": 3.193004000274507e-05, "loss": 0.3981, "num_tokens": 2824618205.0, "step": 4460 }, { "epoch": 0.5274920184462576, "grad_norm": 0.1522541046142578, "learning_rate": 3.191970771856456e-05, "loss": 0.3151, "num_tokens": 2825257597.0, "step": 4461 }, { "epoch": 0.5276102636868866, "grad_norm": 0.13836775720119476, "learning_rate": 3.190937559283527e-05, "loss": 0.3416, "num_tokens": 2825890050.0, "step": 4462 }, { "epoch": 0.5277285089275157, "grad_norm": 0.13746298849582672, "learning_rate": 3.1899043627072665e-05, "loss": 0.3087, "num_tokens": 2826525718.0, "step": 4463 }, { "epoch": 0.5278467541681447, "grad_norm": 0.13608817756175995, "learning_rate": 3.188871182279216e-05, "loss": 0.3155, "num_tokens": 2827153866.0, "step": 4464 }, { "epoch": 0.5279649994087738, "grad_norm": 0.142583429813385, "learning_rate": 3.1878380181509194e-05, "loss": 0.3246, "num_tokens": 2827790629.0, "step": 4465 }, { "epoch": 0.5280832446494028, "grad_norm": 0.13943246006965637, "learning_rate": 3.1868048704739135e-05, "loss": 0.3646, "num_tokens": 2828425462.0, "step": 4466 }, { "epoch": 0.5282014898900319, "grad_norm": 0.1414436548948288, "learning_rate": 3.185771739399736e-05, "loss": 0.3498, "num_tokens": 2829062302.0, "step": 4467 }, { "epoch": 0.528319735130661, "grad_norm": 0.1335756778717041, "learning_rate": 3.184738625079919e-05, "loss": 0.3354, "num_tokens": 2829690810.0, "step": 4468 }, { "epoch": 0.5284379803712901, "grad_norm": 0.1487240344285965, "learning_rate": 3.1837055276659945e-05, "loss": 0.3928, "num_tokens": 2830316524.0, "step": 4469 }, { "epoch": 0.5285562256119192, "grad_norm": 0.13935624063014984, "learning_rate": 3.182672447309493e-05, "loss": 0.3064, "num_tokens": 2830950150.0, "step": 4470 }, { "epoch": 0.5286744708525482, "grad_norm": 0.1535981148481369, "learning_rate": 3.1816393841619394e-05, "loss": 0.3747, "num_tokens": 2831578458.0, "step": 4471 }, { "epoch": 0.5287927160931772, "grad_norm": 0.1287432461977005, "learning_rate": 3.180606338374858e-05, "loss": 0.3256, "num_tokens": 2832204137.0, "step": 4472 }, { "epoch": 0.5289109613338063, "grad_norm": 0.12710289657115936, "learning_rate": 3.1795733100997696e-05, "loss": 0.3174, "num_tokens": 2832835080.0, "step": 4473 }, { "epoch": 0.5290292065744354, "grad_norm": 0.1293431669473648, "learning_rate": 3.178540299488194e-05, "loss": 0.3258, "num_tokens": 2833472903.0, "step": 4474 }, { "epoch": 0.5291474518150644, "grad_norm": 0.12803402543067932, "learning_rate": 3.177507306691647e-05, "loss": 0.327, "num_tokens": 2834108484.0, "step": 4475 }, { "epoch": 0.5292656970556935, "grad_norm": 0.12964743375778198, "learning_rate": 3.176474331861641e-05, "loss": 0.3451, "num_tokens": 2834748148.0, "step": 4476 }, { "epoch": 0.5293839422963226, "grad_norm": 0.12737919390201569, "learning_rate": 3.17544137514969e-05, "loss": 0.3305, "num_tokens": 2835386453.0, "step": 4477 }, { "epoch": 0.5295021875369517, "grad_norm": 0.12180329859256744, "learning_rate": 3.174408436707299e-05, "loss": 0.3438, "num_tokens": 2836024471.0, "step": 4478 }, { "epoch": 0.5296204327775808, "grad_norm": 0.1279119849205017, "learning_rate": 3.173375516685976e-05, "loss": 0.3179, "num_tokens": 2836654484.0, "step": 4479 }, { "epoch": 0.5297386780182097, "grad_norm": 0.12197789549827576, "learning_rate": 3.172342615237223e-05, "loss": 0.3106, "num_tokens": 2837292579.0, "step": 4480 }, { "epoch": 0.5298569232588388, "grad_norm": 0.12274414300918579, "learning_rate": 3.1713097325125396e-05, "loss": 0.3119, "num_tokens": 2837921700.0, "step": 4481 }, { "epoch": 0.5299751684994679, "grad_norm": 0.1298719197511673, "learning_rate": 3.170276868663424e-05, "loss": 0.3419, "num_tokens": 2838559329.0, "step": 4482 }, { "epoch": 0.530093413740097, "grad_norm": 0.14335808157920837, "learning_rate": 3.1692440238413704e-05, "loss": 0.3621, "num_tokens": 2839197595.0, "step": 4483 }, { "epoch": 0.530211658980726, "grad_norm": 0.1414574384689331, "learning_rate": 3.1682111981978716e-05, "loss": 0.3727, "num_tokens": 2839829226.0, "step": 4484 }, { "epoch": 0.5303299042213551, "grad_norm": 0.1297711282968521, "learning_rate": 3.167178391884415e-05, "loss": 0.3412, "num_tokens": 2840463245.0, "step": 4485 }, { "epoch": 0.5304481494619842, "grad_norm": 0.14485140144824982, "learning_rate": 3.16614560505249e-05, "loss": 0.3718, "num_tokens": 2841089755.0, "step": 4486 }, { "epoch": 0.5305663947026132, "grad_norm": 0.13886789977550507, "learning_rate": 3.165112837853576e-05, "loss": 0.3485, "num_tokens": 2841722308.0, "step": 4487 }, { "epoch": 0.5306846399432423, "grad_norm": 0.13945627212524414, "learning_rate": 3.164080090439157e-05, "loss": 0.3795, "num_tokens": 2842354810.0, "step": 4488 }, { "epoch": 0.5308028851838713, "grad_norm": 0.13052865862846375, "learning_rate": 3.163047362960707e-05, "loss": 0.3091, "num_tokens": 2842987750.0, "step": 4489 }, { "epoch": 0.5309211304245004, "grad_norm": 0.1344643235206604, "learning_rate": 3.162014655569704e-05, "loss": 0.3371, "num_tokens": 2843621209.0, "step": 4490 }, { "epoch": 0.5310393756651295, "grad_norm": 0.14256803691387177, "learning_rate": 3.160981968417618e-05, "loss": 0.368, "num_tokens": 2844253945.0, "step": 4491 }, { "epoch": 0.5311576209057586, "grad_norm": 0.13688264787197113, "learning_rate": 3.1599493016559185e-05, "loss": 0.3721, "num_tokens": 2844888738.0, "step": 4492 }, { "epoch": 0.5312758661463876, "grad_norm": 0.1345473676919937, "learning_rate": 3.1589166554360715e-05, "loss": 0.3447, "num_tokens": 2845524956.0, "step": 4493 }, { "epoch": 0.5313941113870166, "grad_norm": 0.14109358191490173, "learning_rate": 3.157884029909538e-05, "loss": 0.3461, "num_tokens": 2846161664.0, "step": 4494 }, { "epoch": 0.5315123566276457, "grad_norm": 0.13970284163951874, "learning_rate": 3.1568514252277805e-05, "loss": 0.3547, "num_tokens": 2846795160.0, "step": 4495 }, { "epoch": 0.5316306018682748, "grad_norm": 0.131881445646286, "learning_rate": 3.1558188415422524e-05, "loss": 0.3499, "num_tokens": 2847430328.0, "step": 4496 }, { "epoch": 0.5317488471089039, "grad_norm": 0.13926444947719574, "learning_rate": 3.15478627900441e-05, "loss": 0.3302, "num_tokens": 2848064972.0, "step": 4497 }, { "epoch": 0.5318670923495329, "grad_norm": 0.12392543256282806, "learning_rate": 3.153753737765703e-05, "loss": 0.2833, "num_tokens": 2848695806.0, "step": 4498 }, { "epoch": 0.531985337590162, "grad_norm": 0.12532900273799896, "learning_rate": 3.152721217977578e-05, "loss": 0.343, "num_tokens": 2849328971.0, "step": 4499 }, { "epoch": 0.5321035828307911, "grad_norm": 0.1343892216682434, "learning_rate": 3.1516887197914806e-05, "loss": 0.3179, "num_tokens": 2849960387.0, "step": 4500 }, { "epoch": 0.5322218280714202, "grad_norm": 0.12777821719646454, "learning_rate": 3.150656243358851e-05, "loss": 0.3359, "num_tokens": 2850592110.0, "step": 4501 }, { "epoch": 0.5323400733120492, "grad_norm": 0.13186711072921753, "learning_rate": 3.1496237888311275e-05, "loss": 0.3345, "num_tokens": 2851222271.0, "step": 4502 }, { "epoch": 0.5324583185526782, "grad_norm": 0.1241973489522934, "learning_rate": 3.1485913563597426e-05, "loss": 0.3177, "num_tokens": 2851812251.0, "step": 4503 }, { "epoch": 0.5325765637933073, "grad_norm": 0.14013512432575226, "learning_rate": 3.1475589460961304e-05, "loss": 0.3281, "num_tokens": 2852446195.0, "step": 4504 }, { "epoch": 0.5326948090339364, "grad_norm": 0.14507374167442322, "learning_rate": 3.1465265581917194e-05, "loss": 0.3314, "num_tokens": 2853080067.0, "step": 4505 }, { "epoch": 0.5328130542745655, "grad_norm": 0.12750408053398132, "learning_rate": 3.1454941927979314e-05, "loss": 0.3517, "num_tokens": 2853713010.0, "step": 4506 }, { "epoch": 0.5329312995151945, "grad_norm": 0.12521685659885406, "learning_rate": 3.1444618500661906e-05, "loss": 0.3324, "num_tokens": 2854350371.0, "step": 4507 }, { "epoch": 0.5330495447558236, "grad_norm": 0.12183456122875214, "learning_rate": 3.143429530147914e-05, "loss": 0.3282, "num_tokens": 2854985302.0, "step": 4508 }, { "epoch": 0.5331677899964526, "grad_norm": 0.1368231475353241, "learning_rate": 3.1423972331945184e-05, "loss": 0.3489, "num_tokens": 2855621691.0, "step": 4509 }, { "epoch": 0.5332860352370817, "grad_norm": 0.1344636082649231, "learning_rate": 3.141364959357412e-05, "loss": 0.3538, "num_tokens": 2856251856.0, "step": 4510 }, { "epoch": 0.5334042804777108, "grad_norm": 0.13617056608200073, "learning_rate": 3.140332708788006e-05, "loss": 0.3793, "num_tokens": 2856888198.0, "step": 4511 }, { "epoch": 0.5335225257183398, "grad_norm": 0.12791381776332855, "learning_rate": 3.1393004816377036e-05, "loss": 0.337, "num_tokens": 2857524494.0, "step": 4512 }, { "epoch": 0.5336407709589689, "grad_norm": 0.1289823204278946, "learning_rate": 3.138268278057906e-05, "loss": 0.3116, "num_tokens": 2858160948.0, "step": 4513 }, { "epoch": 0.533759016199598, "grad_norm": 0.12830911576747894, "learning_rate": 3.137236098200013e-05, "loss": 0.3145, "num_tokens": 2858796339.0, "step": 4514 }, { "epoch": 0.5338772614402271, "grad_norm": 0.12719693779945374, "learning_rate": 3.136203942215415e-05, "loss": 0.3259, "num_tokens": 2859434274.0, "step": 4515 }, { "epoch": 0.533995506680856, "grad_norm": 0.1359463483095169, "learning_rate": 3.1351718102555086e-05, "loss": 0.3254, "num_tokens": 2860068640.0, "step": 4516 }, { "epoch": 0.5341137519214851, "grad_norm": 0.12524300813674927, "learning_rate": 3.134139702471675e-05, "loss": 0.3192, "num_tokens": 2860695006.0, "step": 4517 }, { "epoch": 0.5342319971621142, "grad_norm": 0.14347638189792633, "learning_rate": 3.133107619015301e-05, "loss": 0.3516, "num_tokens": 2861322434.0, "step": 4518 }, { "epoch": 0.5343502424027433, "grad_norm": 0.1405467540025711, "learning_rate": 3.132075560037768e-05, "loss": 0.3616, "num_tokens": 2861953782.0, "step": 4519 }, { "epoch": 0.5344684876433724, "grad_norm": 0.12365765124559402, "learning_rate": 3.131043525690451e-05, "loss": 0.3226, "num_tokens": 2862584555.0, "step": 4520 }, { "epoch": 0.5345867328840014, "grad_norm": 0.13775530457496643, "learning_rate": 3.130011516124723e-05, "loss": 0.3093, "num_tokens": 2863220828.0, "step": 4521 }, { "epoch": 0.5347049781246305, "grad_norm": 0.14558236300945282, "learning_rate": 3.128979531491954e-05, "loss": 0.382, "num_tokens": 2863856539.0, "step": 4522 }, { "epoch": 0.5348232233652596, "grad_norm": 0.1401584893465042, "learning_rate": 3.1279475719435094e-05, "loss": 0.3402, "num_tokens": 2864490653.0, "step": 4523 }, { "epoch": 0.5349414686058886, "grad_norm": 0.12122541666030884, "learning_rate": 3.126915637630751e-05, "loss": 0.307, "num_tokens": 2865124178.0, "step": 4524 }, { "epoch": 0.5350597138465176, "grad_norm": 0.129710853099823, "learning_rate": 3.125883728705037e-05, "loss": 0.345, "num_tokens": 2865761983.0, "step": 4525 }, { "epoch": 0.5351779590871467, "grad_norm": 0.13920558989048004, "learning_rate": 3.124851845317722e-05, "loss": 0.3671, "num_tokens": 2866398238.0, "step": 4526 }, { "epoch": 0.5352962043277758, "grad_norm": 0.1276615709066391, "learning_rate": 3.123819987620159e-05, "loss": 0.3303, "num_tokens": 2867037763.0, "step": 4527 }, { "epoch": 0.5354144495684049, "grad_norm": 0.14444683492183685, "learning_rate": 3.1227881557636925e-05, "loss": 0.3882, "num_tokens": 2867674866.0, "step": 4528 }, { "epoch": 0.535532694809034, "grad_norm": 0.12817177176475525, "learning_rate": 3.121756349899667e-05, "loss": 0.3174, "num_tokens": 2868306716.0, "step": 4529 }, { "epoch": 0.535650940049663, "grad_norm": 0.13109171390533447, "learning_rate": 3.120724570179422e-05, "loss": 0.3414, "num_tokens": 2868946212.0, "step": 4530 }, { "epoch": 0.535769185290292, "grad_norm": 0.13075508177280426, "learning_rate": 3.1196928167542916e-05, "loss": 0.3519, "num_tokens": 2869577803.0, "step": 4531 }, { "epoch": 0.5358874305309211, "grad_norm": 0.14080758392810822, "learning_rate": 3.1186610897756104e-05, "loss": 0.3113, "num_tokens": 2870210570.0, "step": 4532 }, { "epoch": 0.5360056757715502, "grad_norm": 0.14662696421146393, "learning_rate": 3.1176293893947035e-05, "loss": 0.3595, "num_tokens": 2870849044.0, "step": 4533 }, { "epoch": 0.5361239210121792, "grad_norm": 0.13410794734954834, "learning_rate": 3.1165977157628965e-05, "loss": 0.3534, "num_tokens": 2871483773.0, "step": 4534 }, { "epoch": 0.5362421662528083, "grad_norm": 0.1303064078092575, "learning_rate": 3.11556606903151e-05, "loss": 0.3494, "num_tokens": 2872115430.0, "step": 4535 }, { "epoch": 0.5363604114934374, "grad_norm": 0.13950741291046143, "learning_rate": 3.114534449351859e-05, "loss": 0.3817, "num_tokens": 2872750240.0, "step": 4536 }, { "epoch": 0.5364786567340665, "grad_norm": 0.14669805765151978, "learning_rate": 3.1135028568752574e-05, "loss": 0.3516, "num_tokens": 2873384978.0, "step": 4537 }, { "epoch": 0.5365969019746956, "grad_norm": 0.15502840280532837, "learning_rate": 3.112471291753011e-05, "loss": 0.3394, "num_tokens": 2874017063.0, "step": 4538 }, { "epoch": 0.5367151472153245, "grad_norm": 0.12950372695922852, "learning_rate": 3.1114397541364256e-05, "loss": 0.3349, "num_tokens": 2874648778.0, "step": 4539 }, { "epoch": 0.5368333924559536, "grad_norm": 0.1326417177915573, "learning_rate": 3.1104082441768e-05, "loss": 0.3181, "num_tokens": 2875287028.0, "step": 4540 }, { "epoch": 0.5369516376965827, "grad_norm": 0.13379524648189545, "learning_rate": 3.1093767620254316e-05, "loss": 0.2842, "num_tokens": 2875919124.0, "step": 4541 }, { "epoch": 0.5370698829372118, "grad_norm": 0.1449882984161377, "learning_rate": 3.108345307833612e-05, "loss": 0.381, "num_tokens": 2876557242.0, "step": 4542 }, { "epoch": 0.5371881281778409, "grad_norm": 0.1254216581583023, "learning_rate": 3.107313881752629e-05, "loss": 0.3426, "num_tokens": 2877191148.0, "step": 4543 }, { "epoch": 0.5373063734184699, "grad_norm": 0.14550235867500305, "learning_rate": 3.106282483933768e-05, "loss": 0.3347, "num_tokens": 2877825608.0, "step": 4544 }, { "epoch": 0.537424618659099, "grad_norm": 0.14066602289676666, "learning_rate": 3.1052511145283055e-05, "loss": 0.3376, "num_tokens": 2878458906.0, "step": 4545 }, { "epoch": 0.537542863899728, "grad_norm": 0.14838829636573792, "learning_rate": 3.1042197736875205e-05, "loss": 0.3593, "num_tokens": 2879090894.0, "step": 4546 }, { "epoch": 0.5376611091403571, "grad_norm": 0.13313329219818115, "learning_rate": 3.1031884615626805e-05, "loss": 0.3321, "num_tokens": 2879726312.0, "step": 4547 }, { "epoch": 0.5377793543809861, "grad_norm": 0.1268407106399536, "learning_rate": 3.1021571783050554e-05, "loss": 0.3173, "num_tokens": 2880353585.0, "step": 4548 }, { "epoch": 0.5378975996216152, "grad_norm": 0.1329355388879776, "learning_rate": 3.1011259240659076e-05, "loss": 0.3524, "num_tokens": 2880988614.0, "step": 4549 }, { "epoch": 0.5380158448622443, "grad_norm": 0.13745461404323578, "learning_rate": 3.100094698996495e-05, "loss": 0.349, "num_tokens": 2881618273.0, "step": 4550 }, { "epoch": 0.5381340901028734, "grad_norm": 0.14565438032150269, "learning_rate": 3.099063503248072e-05, "loss": 0.3536, "num_tokens": 2882253526.0, "step": 4551 }, { "epoch": 0.5382523353435025, "grad_norm": 0.12764719128608704, "learning_rate": 3.098032336971889e-05, "loss": 0.3163, "num_tokens": 2882885013.0, "step": 4552 }, { "epoch": 0.5383705805841315, "grad_norm": 0.12336045503616333, "learning_rate": 3.097001200319193e-05, "loss": 0.3386, "num_tokens": 2883517578.0, "step": 4553 }, { "epoch": 0.5384888258247605, "grad_norm": 0.12353664636611938, "learning_rate": 3.0959700934412223e-05, "loss": 0.3212, "num_tokens": 2884152294.0, "step": 4554 }, { "epoch": 0.5386070710653896, "grad_norm": 0.12828172743320465, "learning_rate": 3.094939016489215e-05, "loss": 0.343, "num_tokens": 2884781946.0, "step": 4555 }, { "epoch": 0.5387253163060187, "grad_norm": 0.13579270243644714, "learning_rate": 3.093907969614406e-05, "loss": 0.3523, "num_tokens": 2885415387.0, "step": 4556 }, { "epoch": 0.5388435615466477, "grad_norm": 0.13422158360481262, "learning_rate": 3.0928769529680205e-05, "loss": 0.3378, "num_tokens": 2886053525.0, "step": 4557 }, { "epoch": 0.5389618067872768, "grad_norm": 0.12798093259334564, "learning_rate": 3.091845966701284e-05, "loss": 0.3083, "num_tokens": 2886688706.0, "step": 4558 }, { "epoch": 0.5390800520279059, "grad_norm": 0.15729419887065887, "learning_rate": 3.090815010965414e-05, "loss": 0.3747, "num_tokens": 2887321466.0, "step": 4559 }, { "epoch": 0.539198297268535, "grad_norm": 0.13011299073696136, "learning_rate": 3.0897840859116276e-05, "loss": 0.3053, "num_tokens": 2887952219.0, "step": 4560 }, { "epoch": 0.539316542509164, "grad_norm": 0.140217587351799, "learning_rate": 3.088753191691134e-05, "loss": 0.3179, "num_tokens": 2888586824.0, "step": 4561 }, { "epoch": 0.539434787749793, "grad_norm": 0.1561988741159439, "learning_rate": 3.087722328455139e-05, "loss": 0.3396, "num_tokens": 2889223810.0, "step": 4562 }, { "epoch": 0.5395530329904221, "grad_norm": 0.13991893827915192, "learning_rate": 3.086691496354844e-05, "loss": 0.3493, "num_tokens": 2889859298.0, "step": 4563 }, { "epoch": 0.5396712782310512, "grad_norm": 0.13708215951919556, "learning_rate": 3.085660695541444e-05, "loss": 0.3094, "num_tokens": 2890451202.0, "step": 4564 }, { "epoch": 0.5397895234716803, "grad_norm": 0.1190737932920456, "learning_rate": 3.084629926166135e-05, "loss": 0.3155, "num_tokens": 2891081402.0, "step": 4565 }, { "epoch": 0.5399077687123093, "grad_norm": 0.14303728938102722, "learning_rate": 3.0835991883801004e-05, "loss": 0.3472, "num_tokens": 2891717667.0, "step": 4566 }, { "epoch": 0.5400260139529384, "grad_norm": 0.13096755743026733, "learning_rate": 3.082568482334526e-05, "loss": 0.3326, "num_tokens": 2892357038.0, "step": 4567 }, { "epoch": 0.5401442591935675, "grad_norm": 0.1413148045539856, "learning_rate": 3.0815378081805876e-05, "loss": 0.3319, "num_tokens": 2892990181.0, "step": 4568 }, { "epoch": 0.5402625044341965, "grad_norm": 0.13072682917118073, "learning_rate": 3.0805071660694604e-05, "loss": 0.3389, "num_tokens": 2893627866.0, "step": 4569 }, { "epoch": 0.5403807496748256, "grad_norm": 0.14950864017009735, "learning_rate": 3.079476556152313e-05, "loss": 0.376, "num_tokens": 2894256639.0, "step": 4570 }, { "epoch": 0.5404989949154546, "grad_norm": 0.15612107515335083, "learning_rate": 3.078445978580309e-05, "loss": 0.3538, "num_tokens": 2894891074.0, "step": 4571 }, { "epoch": 0.5406172401560837, "grad_norm": 0.13681839406490326, "learning_rate": 3.077415433504607e-05, "loss": 0.3825, "num_tokens": 2895530011.0, "step": 4572 }, { "epoch": 0.5407354853967128, "grad_norm": 0.11796949058771133, "learning_rate": 3.076384921076363e-05, "loss": 0.2962, "num_tokens": 2896163933.0, "step": 4573 }, { "epoch": 0.5408537306373419, "grad_norm": 0.1466299295425415, "learning_rate": 3.075354441446727e-05, "loss": 0.3577, "num_tokens": 2896791367.0, "step": 4574 }, { "epoch": 0.5409719758779709, "grad_norm": 0.12981289625167847, "learning_rate": 3.0743239947668424e-05, "loss": 0.304, "num_tokens": 2897422482.0, "step": 4575 }, { "epoch": 0.5410902211186, "grad_norm": 0.1325444132089615, "learning_rate": 3.07329358118785e-05, "loss": 0.3359, "num_tokens": 2898061679.0, "step": 4576 }, { "epoch": 0.541208466359229, "grad_norm": 0.14680616557598114, "learning_rate": 3.072263200860886e-05, "loss": 0.3513, "num_tokens": 2898690490.0, "step": 4577 }, { "epoch": 0.5413267115998581, "grad_norm": 0.13593360781669617, "learning_rate": 3.0712328539370805e-05, "loss": 0.3316, "num_tokens": 2899326497.0, "step": 4578 }, { "epoch": 0.5414449568404872, "grad_norm": 0.11878427863121033, "learning_rate": 3.070202540567558e-05, "loss": 0.3411, "num_tokens": 2899963744.0, "step": 4579 }, { "epoch": 0.5415632020811162, "grad_norm": 0.13710777461528778, "learning_rate": 3.069172260903439e-05, "loss": 0.3494, "num_tokens": 2900600262.0, "step": 4580 }, { "epoch": 0.5416814473217453, "grad_norm": 0.13264553248882294, "learning_rate": 3.068142015095842e-05, "loss": 0.3065, "num_tokens": 2901236194.0, "step": 4581 }, { "epoch": 0.5417996925623744, "grad_norm": 0.14045804738998413, "learning_rate": 3.067111803295874e-05, "loss": 0.3477, "num_tokens": 2901875159.0, "step": 4582 }, { "epoch": 0.5419179378030035, "grad_norm": 0.13735030591487885, "learning_rate": 3.066081625654642e-05, "loss": 0.3594, "num_tokens": 2902511772.0, "step": 4583 }, { "epoch": 0.5420361830436325, "grad_norm": 0.1373601108789444, "learning_rate": 3.065051482323248e-05, "loss": 0.3277, "num_tokens": 2903142654.0, "step": 4584 }, { "epoch": 0.5421544282842615, "grad_norm": 0.13407739996910095, "learning_rate": 3.064021373452785e-05, "loss": 0.3267, "num_tokens": 2903781387.0, "step": 4585 }, { "epoch": 0.5422726735248906, "grad_norm": 0.1219981238245964, "learning_rate": 3.0629912991943475e-05, "loss": 0.3349, "num_tokens": 2904419606.0, "step": 4586 }, { "epoch": 0.5423909187655197, "grad_norm": 0.12963177263736725, "learning_rate": 3.061961259699017e-05, "loss": 0.3301, "num_tokens": 2905055950.0, "step": 4587 }, { "epoch": 0.5425091640061488, "grad_norm": 0.12387169152498245, "learning_rate": 3.060931255117876e-05, "loss": 0.3439, "num_tokens": 2905688837.0, "step": 4588 }, { "epoch": 0.5426274092467778, "grad_norm": 0.13615617156028748, "learning_rate": 3.0599012856019996e-05, "loss": 0.3248, "num_tokens": 2906324007.0, "step": 4589 }, { "epoch": 0.5427456544874069, "grad_norm": 0.13167648017406464, "learning_rate": 3.0588713513024576e-05, "loss": 0.3442, "num_tokens": 2906963633.0, "step": 4590 }, { "epoch": 0.542863899728036, "grad_norm": 0.12965233623981476, "learning_rate": 3.0578414523703155e-05, "loss": 0.3493, "num_tokens": 2907599527.0, "step": 4591 }, { "epoch": 0.542982144968665, "grad_norm": 0.12943953275680542, "learning_rate": 3.0568115889566324e-05, "loss": 0.3397, "num_tokens": 2908223532.0, "step": 4592 }, { "epoch": 0.5431003902092941, "grad_norm": 0.12004944682121277, "learning_rate": 3.0557817612124636e-05, "loss": 0.3168, "num_tokens": 2908848709.0, "step": 4593 }, { "epoch": 0.5432186354499231, "grad_norm": 0.12604735791683197, "learning_rate": 3.0547519692888574e-05, "loss": 0.3348, "num_tokens": 2909482619.0, "step": 4594 }, { "epoch": 0.5433368806905522, "grad_norm": 0.12683911621570587, "learning_rate": 3.053722213336861e-05, "loss": 0.3167, "num_tokens": 2910114935.0, "step": 4595 }, { "epoch": 0.5434551259311813, "grad_norm": 0.13369600474834442, "learning_rate": 3.052692493507508e-05, "loss": 0.3433, "num_tokens": 2910749408.0, "step": 4596 }, { "epoch": 0.5435733711718104, "grad_norm": 0.14493054151535034, "learning_rate": 3.051662809951836e-05, "loss": 0.3933, "num_tokens": 2911387975.0, "step": 4597 }, { "epoch": 0.5436916164124393, "grad_norm": 0.12986412644386292, "learning_rate": 3.0506331628208725e-05, "loss": 0.3235, "num_tokens": 2912022052.0, "step": 4598 }, { "epoch": 0.5438098616530684, "grad_norm": 0.12951986491680145, "learning_rate": 3.0496035522656394e-05, "loss": 0.3654, "num_tokens": 2912658371.0, "step": 4599 }, { "epoch": 0.5439281068936975, "grad_norm": 0.11981291323900223, "learning_rate": 3.0485739784371556e-05, "loss": 0.2853, "num_tokens": 2913289697.0, "step": 4600 }, { "epoch": 0.5440463521343266, "grad_norm": 0.13923956453800201, "learning_rate": 3.0475444414864317e-05, "loss": 0.3461, "num_tokens": 2913927794.0, "step": 4601 }, { "epoch": 0.5441645973749557, "grad_norm": 0.13197728991508484, "learning_rate": 3.0465149415644754e-05, "loss": 0.3649, "num_tokens": 2914562474.0, "step": 4602 }, { "epoch": 0.5442828426155847, "grad_norm": 0.11822417378425598, "learning_rate": 3.0454854788222873e-05, "loss": 0.303, "num_tokens": 2915196283.0, "step": 4603 }, { "epoch": 0.5444010878562138, "grad_norm": 0.13266828656196594, "learning_rate": 3.044456053410864e-05, "loss": 0.3579, "num_tokens": 2915809500.0, "step": 4604 }, { "epoch": 0.5445193330968429, "grad_norm": 0.13357321918010712, "learning_rate": 3.043426665481195e-05, "loss": 0.3441, "num_tokens": 2916445823.0, "step": 4605 }, { "epoch": 0.544637578337472, "grad_norm": 0.11986372619867325, "learning_rate": 3.042397315184265e-05, "loss": 0.3176, "num_tokens": 2917077617.0, "step": 4606 }, { "epoch": 0.5447558235781009, "grad_norm": 0.135109081864357, "learning_rate": 3.0413680026710552e-05, "loss": 0.3624, "num_tokens": 2917709254.0, "step": 4607 }, { "epoch": 0.54487406881873, "grad_norm": 0.14258895814418793, "learning_rate": 3.040338728092537e-05, "loss": 0.3656, "num_tokens": 2918344277.0, "step": 4608 }, { "epoch": 0.5449923140593591, "grad_norm": 0.12875501811504364, "learning_rate": 3.0393094915996814e-05, "loss": 0.3284, "num_tokens": 2918980006.0, "step": 4609 }, { "epoch": 0.5451105592999882, "grad_norm": 0.1304452270269394, "learning_rate": 3.038280293343448e-05, "loss": 0.3578, "num_tokens": 2919612317.0, "step": 4610 }, { "epoch": 0.5452288045406173, "grad_norm": 0.13012197613716125, "learning_rate": 3.0372511334747964e-05, "loss": 0.3221, "num_tokens": 2920246478.0, "step": 4611 }, { "epoch": 0.5453470497812463, "grad_norm": 0.13202176988124847, "learning_rate": 3.0362220121446757e-05, "loss": 0.3234, "num_tokens": 2920874579.0, "step": 4612 }, { "epoch": 0.5454652950218754, "grad_norm": 0.11895111203193665, "learning_rate": 3.035192929504032e-05, "loss": 0.3281, "num_tokens": 2921512138.0, "step": 4613 }, { "epoch": 0.5455835402625044, "grad_norm": 0.138818621635437, "learning_rate": 3.034163885703809e-05, "loss": 0.3807, "num_tokens": 2922132256.0, "step": 4614 }, { "epoch": 0.5457017855031335, "grad_norm": 0.13614849746227264, "learning_rate": 3.0331348808949357e-05, "loss": 0.3782, "num_tokens": 2922771348.0, "step": 4615 }, { "epoch": 0.5458200307437626, "grad_norm": 0.13040447235107422, "learning_rate": 3.0321059152283457e-05, "loss": 0.3221, "num_tokens": 2923409574.0, "step": 4616 }, { "epoch": 0.5459382759843916, "grad_norm": 0.15488547086715698, "learning_rate": 3.0310769888549572e-05, "loss": 0.3723, "num_tokens": 2924043457.0, "step": 4617 }, { "epoch": 0.5460565212250207, "grad_norm": 0.13938048481941223, "learning_rate": 3.0300481019256908e-05, "loss": 0.3676, "num_tokens": 2924680957.0, "step": 4618 }, { "epoch": 0.5461747664656498, "grad_norm": 0.12366258352994919, "learning_rate": 3.0290192545914562e-05, "loss": 0.3106, "num_tokens": 2925318479.0, "step": 4619 }, { "epoch": 0.5462930117062789, "grad_norm": 0.14661805331707, "learning_rate": 3.0279904470031602e-05, "loss": 0.3486, "num_tokens": 2925953311.0, "step": 4620 }, { "epoch": 0.5464112569469078, "grad_norm": 0.14342649281024933, "learning_rate": 3.026961679311702e-05, "loss": 0.3773, "num_tokens": 2926590434.0, "step": 4621 }, { "epoch": 0.5465295021875369, "grad_norm": 0.12844231724739075, "learning_rate": 3.0259329516679747e-05, "loss": 0.3176, "num_tokens": 2927220886.0, "step": 4622 }, { "epoch": 0.546647747428166, "grad_norm": 0.13051363825798035, "learning_rate": 3.0249042642228674e-05, "loss": 0.3138, "num_tokens": 2927856701.0, "step": 4623 }, { "epoch": 0.5467659926687951, "grad_norm": 0.14437390863895416, "learning_rate": 3.023875617127262e-05, "loss": 0.3478, "num_tokens": 2928479092.0, "step": 4624 }, { "epoch": 0.5468842379094242, "grad_norm": 0.13333760201931, "learning_rate": 3.022847010532034e-05, "loss": 0.3355, "num_tokens": 2929110695.0, "step": 4625 }, { "epoch": 0.5470024831500532, "grad_norm": 0.12677398324012756, "learning_rate": 3.0218184445880533e-05, "loss": 0.3345, "num_tokens": 2929744488.0, "step": 4626 }, { "epoch": 0.5471207283906823, "grad_norm": 0.1290328949689865, "learning_rate": 3.020789919446186e-05, "loss": 0.3013, "num_tokens": 2930378333.0, "step": 4627 }, { "epoch": 0.5472389736313114, "grad_norm": 0.1310172975063324, "learning_rate": 3.01976143525729e-05, "loss": 0.3343, "num_tokens": 2931016799.0, "step": 4628 }, { "epoch": 0.5473572188719404, "grad_norm": 0.12499239295721054, "learning_rate": 3.0187329921722162e-05, "loss": 0.3053, "num_tokens": 2931652641.0, "step": 4629 }, { "epoch": 0.5474754641125694, "grad_norm": 0.12911757826805115, "learning_rate": 3.017704590341812e-05, "loss": 0.2971, "num_tokens": 2932281980.0, "step": 4630 }, { "epoch": 0.5475937093531985, "grad_norm": 0.12312507629394531, "learning_rate": 3.016676229916917e-05, "loss": 0.3155, "num_tokens": 2932910395.0, "step": 4631 }, { "epoch": 0.5477119545938276, "grad_norm": 0.1201612800359726, "learning_rate": 3.0156479110483665e-05, "loss": 0.3168, "num_tokens": 2933549551.0, "step": 4632 }, { "epoch": 0.5478301998344567, "grad_norm": 0.13052192330360413, "learning_rate": 3.0146196338869868e-05, "loss": 0.3162, "num_tokens": 2934180846.0, "step": 4633 }, { "epoch": 0.5479484450750858, "grad_norm": 0.1382652223110199, "learning_rate": 3.0135913985836002e-05, "loss": 0.2871, "num_tokens": 2934783984.0, "step": 4634 }, { "epoch": 0.5480666903157148, "grad_norm": 0.1441129744052887, "learning_rate": 3.0125632052890254e-05, "loss": 0.3664, "num_tokens": 2935422030.0, "step": 4635 }, { "epoch": 0.5481849355563438, "grad_norm": 0.12682127952575684, "learning_rate": 3.011535054154067e-05, "loss": 0.3273, "num_tokens": 2936024443.0, "step": 4636 }, { "epoch": 0.5483031807969729, "grad_norm": 0.14051422476768494, "learning_rate": 3.0105069453295326e-05, "loss": 0.3416, "num_tokens": 2936657395.0, "step": 4637 }, { "epoch": 0.548421426037602, "grad_norm": 0.12424027919769287, "learning_rate": 3.009478878966217e-05, "loss": 0.3053, "num_tokens": 2937292953.0, "step": 4638 }, { "epoch": 0.548539671278231, "grad_norm": 0.1464979201555252, "learning_rate": 3.0084508552149138e-05, "loss": 0.3653, "num_tokens": 2937923016.0, "step": 4639 }, { "epoch": 0.5486579165188601, "grad_norm": 0.12419566512107849, "learning_rate": 3.0074228742264042e-05, "loss": 0.3418, "num_tokens": 2938559511.0, "step": 4640 }, { "epoch": 0.5487761617594892, "grad_norm": 0.12949226796627045, "learning_rate": 3.00639493615147e-05, "loss": 0.3358, "num_tokens": 2939158778.0, "step": 4641 }, { "epoch": 0.5488944070001183, "grad_norm": 0.1297057867050171, "learning_rate": 3.0053670411408823e-05, "loss": 0.3248, "num_tokens": 2939796137.0, "step": 4642 }, { "epoch": 0.5490126522407474, "grad_norm": 0.1254677027463913, "learning_rate": 3.004339189345406e-05, "loss": 0.3184, "num_tokens": 2940406483.0, "step": 4643 }, { "epoch": 0.5491308974813763, "grad_norm": 0.13701191544532776, "learning_rate": 3.0033113809158016e-05, "loss": 0.376, "num_tokens": 2941045425.0, "step": 4644 }, { "epoch": 0.5492491427220054, "grad_norm": 0.13341163098812103, "learning_rate": 3.002283616002821e-05, "loss": 0.3488, "num_tokens": 2941675756.0, "step": 4645 }, { "epoch": 0.5493673879626345, "grad_norm": 0.12945379316806793, "learning_rate": 3.001255894757214e-05, "loss": 0.3589, "num_tokens": 2942311651.0, "step": 4646 }, { "epoch": 0.5494856332032636, "grad_norm": 0.1415429413318634, "learning_rate": 3.0002282173297174e-05, "loss": 0.3254, "num_tokens": 2942945533.0, "step": 4647 }, { "epoch": 0.5496038784438926, "grad_norm": 0.13073867559432983, "learning_rate": 2.999200583871067e-05, "loss": 0.3193, "num_tokens": 2943584386.0, "step": 4648 }, { "epoch": 0.5497221236845217, "grad_norm": 0.12632104754447937, "learning_rate": 2.998172994531991e-05, "loss": 0.3509, "num_tokens": 2944219607.0, "step": 4649 }, { "epoch": 0.5498403689251508, "grad_norm": 0.1489030122756958, "learning_rate": 2.9971454494632093e-05, "loss": 0.3481, "num_tokens": 2944853758.0, "step": 4650 }, { "epoch": 0.5499586141657798, "grad_norm": 0.1315758228302002, "learning_rate": 2.996117948815437e-05, "loss": 0.3735, "num_tokens": 2945485495.0, "step": 4651 }, { "epoch": 0.5500768594064089, "grad_norm": 0.13730177283287048, "learning_rate": 2.995090492739381e-05, "loss": 0.3713, "num_tokens": 2946124429.0, "step": 4652 }, { "epoch": 0.5501951046470379, "grad_norm": 0.12769606709480286, "learning_rate": 2.994063081385745e-05, "loss": 0.3123, "num_tokens": 2946737591.0, "step": 4653 }, { "epoch": 0.550313349887667, "grad_norm": 0.12812687456607819, "learning_rate": 2.9930357149052215e-05, "loss": 0.3236, "num_tokens": 2947374875.0, "step": 4654 }, { "epoch": 0.5504315951282961, "grad_norm": 0.12623640894889832, "learning_rate": 2.992008393448499e-05, "loss": 0.3411, "num_tokens": 2948009139.0, "step": 4655 }, { "epoch": 0.5505498403689252, "grad_norm": 0.143077090382576, "learning_rate": 2.9909811171662624e-05, "loss": 0.3536, "num_tokens": 2948645843.0, "step": 4656 }, { "epoch": 0.5506680856095543, "grad_norm": 0.12945066392421722, "learning_rate": 2.989953886209184e-05, "loss": 0.3605, "num_tokens": 2949281670.0, "step": 4657 }, { "epoch": 0.5507863308501832, "grad_norm": 0.14323307573795319, "learning_rate": 2.9889267007279325e-05, "loss": 0.3414, "num_tokens": 2949914012.0, "step": 4658 }, { "epoch": 0.5509045760908123, "grad_norm": 0.1426634043455124, "learning_rate": 2.9878995608731706e-05, "loss": 0.3236, "num_tokens": 2950548012.0, "step": 4659 }, { "epoch": 0.5510228213314414, "grad_norm": 0.1293485015630722, "learning_rate": 2.9868724667955532e-05, "loss": 0.3511, "num_tokens": 2951177385.0, "step": 4660 }, { "epoch": 0.5511410665720705, "grad_norm": 0.12778067588806152, "learning_rate": 2.985845418645728e-05, "loss": 0.3652, "num_tokens": 2951814066.0, "step": 4661 }, { "epoch": 0.5512593118126995, "grad_norm": 0.1299331933259964, "learning_rate": 2.984818416574337e-05, "loss": 0.3142, "num_tokens": 2952451681.0, "step": 4662 }, { "epoch": 0.5513775570533286, "grad_norm": 0.1452513337135315, "learning_rate": 2.9837914607320166e-05, "loss": 0.3343, "num_tokens": 2953081872.0, "step": 4663 }, { "epoch": 0.5514958022939577, "grad_norm": 0.14050456881523132, "learning_rate": 2.9827645512693917e-05, "loss": 0.379, "num_tokens": 2953717472.0, "step": 4664 }, { "epoch": 0.5516140475345868, "grad_norm": 0.12956516444683075, "learning_rate": 2.9817376883370872e-05, "loss": 0.3167, "num_tokens": 2954352570.0, "step": 4665 }, { "epoch": 0.5517322927752158, "grad_norm": 0.12273352593183517, "learning_rate": 2.9807108720857147e-05, "loss": 0.326, "num_tokens": 2954990805.0, "step": 4666 }, { "epoch": 0.5518505380158448, "grad_norm": 0.13977205753326416, "learning_rate": 2.979684102665884e-05, "loss": 0.3849, "num_tokens": 2955627375.0, "step": 4667 }, { "epoch": 0.5519687832564739, "grad_norm": 0.12857426702976227, "learning_rate": 2.9786573802281947e-05, "loss": 0.3447, "num_tokens": 2956263558.0, "step": 4668 }, { "epoch": 0.552087028497103, "grad_norm": 0.12647801637649536, "learning_rate": 2.9776307049232405e-05, "loss": 0.321, "num_tokens": 2956893463.0, "step": 4669 }, { "epoch": 0.5522052737377321, "grad_norm": 0.1265292763710022, "learning_rate": 2.9766040769016095e-05, "loss": 0.3326, "num_tokens": 2957526213.0, "step": 4670 }, { "epoch": 0.5523235189783611, "grad_norm": 0.13011233508586884, "learning_rate": 2.975577496313881e-05, "loss": 0.2949, "num_tokens": 2958160232.0, "step": 4671 }, { "epoch": 0.5524417642189902, "grad_norm": 0.1453290432691574, "learning_rate": 2.9745509633106285e-05, "loss": 0.3782, "num_tokens": 2958798516.0, "step": 4672 }, { "epoch": 0.5525600094596193, "grad_norm": 0.13991543650627136, "learning_rate": 2.9735244780424174e-05, "loss": 0.3683, "num_tokens": 2959428945.0, "step": 4673 }, { "epoch": 0.5526782547002483, "grad_norm": 0.13394972681999207, "learning_rate": 2.972498040659807e-05, "loss": 0.3485, "num_tokens": 2960055396.0, "step": 4674 }, { "epoch": 0.5527964999408774, "grad_norm": 0.12765048444271088, "learning_rate": 2.971471651313349e-05, "loss": 0.3337, "num_tokens": 2960690935.0, "step": 4675 }, { "epoch": 0.5529147451815064, "grad_norm": 0.1380266398191452, "learning_rate": 2.9704453101535904e-05, "loss": 0.3237, "num_tokens": 2961325948.0, "step": 4676 }, { "epoch": 0.5530329904221355, "grad_norm": 0.13531430065631866, "learning_rate": 2.9694190173310673e-05, "loss": 0.3351, "num_tokens": 2961955423.0, "step": 4677 }, { "epoch": 0.5531512356627646, "grad_norm": 0.1396808922290802, "learning_rate": 2.9683927729963114e-05, "loss": 0.3811, "num_tokens": 2962591254.0, "step": 4678 }, { "epoch": 0.5532694809033937, "grad_norm": 0.12224498391151428, "learning_rate": 2.967366577299846e-05, "loss": 0.3081, "num_tokens": 2963229366.0, "step": 4679 }, { "epoch": 0.5533877261440227, "grad_norm": 0.1330767273902893, "learning_rate": 2.9663404303921878e-05, "loss": 0.3608, "num_tokens": 2963868549.0, "step": 4680 }, { "epoch": 0.5535059713846517, "grad_norm": 0.12426838278770447, "learning_rate": 2.965314332423847e-05, "loss": 0.3338, "num_tokens": 2964505273.0, "step": 4681 }, { "epoch": 0.5536242166252808, "grad_norm": 0.1434025764465332, "learning_rate": 2.964288283545325e-05, "loss": 0.3499, "num_tokens": 2965138504.0, "step": 4682 }, { "epoch": 0.5537424618659099, "grad_norm": 0.12465034425258636, "learning_rate": 2.9632622839071165e-05, "loss": 0.3262, "num_tokens": 2965771735.0, "step": 4683 }, { "epoch": 0.553860707106539, "grad_norm": 0.1450902819633484, "learning_rate": 2.9622363336597105e-05, "loss": 0.4065, "num_tokens": 2966408944.0, "step": 4684 }, { "epoch": 0.553978952347168, "grad_norm": 0.13101564347743988, "learning_rate": 2.9612104329535866e-05, "loss": 0.3686, "num_tokens": 2967044112.0, "step": 4685 }, { "epoch": 0.5540971975877971, "grad_norm": 0.1171937808394432, "learning_rate": 2.9601845819392204e-05, "loss": 0.2858, "num_tokens": 2967675823.0, "step": 4686 }, { "epoch": 0.5542154428284262, "grad_norm": 0.15617471933364868, "learning_rate": 2.959158780767075e-05, "loss": 0.3835, "num_tokens": 2968308637.0, "step": 4687 }, { "epoch": 0.5543336880690553, "grad_norm": 0.1380336880683899, "learning_rate": 2.958133029587611e-05, "loss": 0.3687, "num_tokens": 2968942297.0, "step": 4688 }, { "epoch": 0.5544519333096842, "grad_norm": 0.13384568691253662, "learning_rate": 2.9571073285512792e-05, "loss": 0.3087, "num_tokens": 2969572249.0, "step": 4689 }, { "epoch": 0.5545701785503133, "grad_norm": 0.12940359115600586, "learning_rate": 2.956081677808525e-05, "loss": 0.3228, "num_tokens": 2970211594.0, "step": 4690 }, { "epoch": 0.5546884237909424, "grad_norm": 0.14926238358020782, "learning_rate": 2.9550560775097822e-05, "loss": 0.3648, "num_tokens": 2970846321.0, "step": 4691 }, { "epoch": 0.5548066690315715, "grad_norm": 0.12755157053470612, "learning_rate": 2.9540305278054827e-05, "loss": 0.3315, "num_tokens": 2971476908.0, "step": 4692 }, { "epoch": 0.5549249142722006, "grad_norm": 0.12470575422048569, "learning_rate": 2.9530050288460482e-05, "loss": 0.3318, "num_tokens": 2972111012.0, "step": 4693 }, { "epoch": 0.5550431595128296, "grad_norm": 0.12430764734745026, "learning_rate": 2.951979580781891e-05, "loss": 0.314, "num_tokens": 2972749986.0, "step": 4694 }, { "epoch": 0.5551614047534587, "grad_norm": 0.13771064579486847, "learning_rate": 2.950954183763421e-05, "loss": 0.3216, "num_tokens": 2973383027.0, "step": 4695 }, { "epoch": 0.5552796499940877, "grad_norm": 0.1357216089963913, "learning_rate": 2.9499288379410355e-05, "loss": 0.3452, "num_tokens": 2974020759.0, "step": 4696 }, { "epoch": 0.5553978952347168, "grad_norm": 0.1275547593832016, "learning_rate": 2.9489035434651285e-05, "loss": 0.3445, "num_tokens": 2974656155.0, "step": 4697 }, { "epoch": 0.5555161404753459, "grad_norm": 0.1479395627975464, "learning_rate": 2.9478783004860807e-05, "loss": 0.3784, "num_tokens": 2975292870.0, "step": 4698 }, { "epoch": 0.5556343857159749, "grad_norm": 0.1393517404794693, "learning_rate": 2.946853109154273e-05, "loss": 0.3652, "num_tokens": 2975930737.0, "step": 4699 }, { "epoch": 0.555752630956604, "grad_norm": 0.14285938441753387, "learning_rate": 2.9458279696200735e-05, "loss": 0.3353, "num_tokens": 2976566363.0, "step": 4700 }, { "epoch": 0.5558708761972331, "grad_norm": 0.13739249110221863, "learning_rate": 2.944802882033843e-05, "loss": 0.3251, "num_tokens": 2977191113.0, "step": 4701 }, { "epoch": 0.5559891214378622, "grad_norm": 0.1398620307445526, "learning_rate": 2.943777846545937e-05, "loss": 0.3242, "num_tokens": 2977824806.0, "step": 4702 }, { "epoch": 0.5561073666784911, "grad_norm": 0.13715268671512604, "learning_rate": 2.9427528633067006e-05, "loss": 0.3691, "num_tokens": 2978459587.0, "step": 4703 }, { "epoch": 0.5562256119191202, "grad_norm": 0.12215851247310638, "learning_rate": 2.9417279324664737e-05, "loss": 0.3123, "num_tokens": 2979089884.0, "step": 4704 }, { "epoch": 0.5563438571597493, "grad_norm": 0.13108830153942108, "learning_rate": 2.9407030541755863e-05, "loss": 0.3463, "num_tokens": 2979725148.0, "step": 4705 }, { "epoch": 0.5564621024003784, "grad_norm": 0.1453961282968521, "learning_rate": 2.9396782285843625e-05, "loss": 0.3623, "num_tokens": 2980355116.0, "step": 4706 }, { "epoch": 0.5565803476410075, "grad_norm": 0.1450875699520111, "learning_rate": 2.938653455843118e-05, "loss": 0.3526, "num_tokens": 2980993874.0, "step": 4707 }, { "epoch": 0.5566985928816365, "grad_norm": 0.12484719604253769, "learning_rate": 2.9376287361021613e-05, "loss": 0.305, "num_tokens": 2981625744.0, "step": 4708 }, { "epoch": 0.5568168381222656, "grad_norm": 0.13839255273342133, "learning_rate": 2.936604069511792e-05, "loss": 0.3734, "num_tokens": 2982261450.0, "step": 4709 }, { "epoch": 0.5569350833628947, "grad_norm": 0.12941516935825348, "learning_rate": 2.9355794562223016e-05, "loss": 0.3285, "num_tokens": 2982883505.0, "step": 4710 }, { "epoch": 0.5570533286035237, "grad_norm": 0.13880260288715363, "learning_rate": 2.934554896383976e-05, "loss": 0.3256, "num_tokens": 2983519205.0, "step": 4711 }, { "epoch": 0.5571715738441527, "grad_norm": 0.14001944661140442, "learning_rate": 2.933530390147091e-05, "loss": 0.329, "num_tokens": 2984155890.0, "step": 4712 }, { "epoch": 0.5572898190847818, "grad_norm": 0.12339422851800919, "learning_rate": 2.9325059376619142e-05, "loss": 0.3367, "num_tokens": 2984794281.0, "step": 4713 }, { "epoch": 0.5574080643254109, "grad_norm": 0.1353040486574173, "learning_rate": 2.9314815390787107e-05, "loss": 0.3495, "num_tokens": 2985430573.0, "step": 4714 }, { "epoch": 0.55752630956604, "grad_norm": 0.12176649272441864, "learning_rate": 2.930457194547729e-05, "loss": 0.2937, "num_tokens": 2986046442.0, "step": 4715 }, { "epoch": 0.5576445548066691, "grad_norm": 0.12893836200237274, "learning_rate": 2.929432904219218e-05, "loss": 0.3287, "num_tokens": 2986679600.0, "step": 4716 }, { "epoch": 0.5577628000472981, "grad_norm": 0.15930968523025513, "learning_rate": 2.928408668243411e-05, "loss": 0.3703, "num_tokens": 2987307562.0, "step": 4717 }, { "epoch": 0.5578810452879271, "grad_norm": 0.13330434262752533, "learning_rate": 2.927384486770541e-05, "loss": 0.3414, "num_tokens": 2987942447.0, "step": 4718 }, { "epoch": 0.5579992905285562, "grad_norm": 0.12262073904275894, "learning_rate": 2.9263603599508263e-05, "loss": 0.3569, "num_tokens": 2988579239.0, "step": 4719 }, { "epoch": 0.5581175357691853, "grad_norm": 0.1280517578125, "learning_rate": 2.925336287934481e-05, "loss": 0.3251, "num_tokens": 2989211027.0, "step": 4720 }, { "epoch": 0.5582357810098143, "grad_norm": 0.12984614074230194, "learning_rate": 2.924312270871711e-05, "loss": 0.3344, "num_tokens": 2989842077.0, "step": 4721 }, { "epoch": 0.5583540262504434, "grad_norm": 0.12519779801368713, "learning_rate": 2.923288308912713e-05, "loss": 0.3365, "num_tokens": 2990477935.0, "step": 4722 }, { "epoch": 0.5584722714910725, "grad_norm": 0.1363186091184616, "learning_rate": 2.9222644022076756e-05, "loss": 0.3474, "num_tokens": 2991113386.0, "step": 4723 }, { "epoch": 0.5585905167317016, "grad_norm": 0.13838958740234375, "learning_rate": 2.9212405509067793e-05, "loss": 0.3651, "num_tokens": 2991753111.0, "step": 4724 }, { "epoch": 0.5587087619723307, "grad_norm": 0.13261725008487701, "learning_rate": 2.9202167551601993e-05, "loss": 0.3333, "num_tokens": 2992388237.0, "step": 4725 }, { "epoch": 0.5588270072129596, "grad_norm": 0.12993498146533966, "learning_rate": 2.919193015118096e-05, "loss": 0.3508, "num_tokens": 2993023042.0, "step": 4726 }, { "epoch": 0.5589452524535887, "grad_norm": 0.12272331118583679, "learning_rate": 2.9181693309306298e-05, "loss": 0.301, "num_tokens": 2993649840.0, "step": 4727 }, { "epoch": 0.5590634976942178, "grad_norm": 0.1314024031162262, "learning_rate": 2.9171457027479473e-05, "loss": 0.306, "num_tokens": 2994280622.0, "step": 4728 }, { "epoch": 0.5591817429348469, "grad_norm": 0.13289964199066162, "learning_rate": 2.9161221307201884e-05, "loss": 0.2936, "num_tokens": 2994913652.0, "step": 4729 }, { "epoch": 0.5592999881754759, "grad_norm": 0.13850125670433044, "learning_rate": 2.9150986149974862e-05, "loss": 0.338, "num_tokens": 2995547993.0, "step": 4730 }, { "epoch": 0.559418233416105, "grad_norm": 0.14361771941184998, "learning_rate": 2.9140751557299622e-05, "loss": 0.3859, "num_tokens": 2996185163.0, "step": 4731 }, { "epoch": 0.5595364786567341, "grad_norm": 0.12969015538692474, "learning_rate": 2.9130517530677337e-05, "loss": 0.3555, "num_tokens": 2996820372.0, "step": 4732 }, { "epoch": 0.5596547238973631, "grad_norm": 0.13500678539276123, "learning_rate": 2.9120284071609052e-05, "loss": 0.3478, "num_tokens": 2997459423.0, "step": 4733 }, { "epoch": 0.5597729691379922, "grad_norm": 0.1410571187734604, "learning_rate": 2.9110051181595773e-05, "loss": 0.3246, "num_tokens": 2998086417.0, "step": 4734 }, { "epoch": 0.5598912143786212, "grad_norm": 0.14483830332756042, "learning_rate": 2.909981886213841e-05, "loss": 0.3552, "num_tokens": 2998720304.0, "step": 4735 }, { "epoch": 0.5600094596192503, "grad_norm": 0.1385873407125473, "learning_rate": 2.9089587114737756e-05, "loss": 0.3633, "num_tokens": 2999349739.0, "step": 4736 }, { "epoch": 0.5601277048598794, "grad_norm": 0.1423257291316986, "learning_rate": 2.9079355940894574e-05, "loss": 0.3289, "num_tokens": 2999986410.0, "step": 4737 }, { "epoch": 0.5602459501005085, "grad_norm": 0.13436084985733032, "learning_rate": 2.9069125342109493e-05, "loss": 0.3359, "num_tokens": 3000618606.0, "step": 4738 }, { "epoch": 0.5603641953411376, "grad_norm": 0.13036927580833435, "learning_rate": 2.9058895319883104e-05, "loss": 0.3058, "num_tokens": 3001251833.0, "step": 4739 }, { "epoch": 0.5604824405817666, "grad_norm": 0.13279187679290771, "learning_rate": 2.9048665875715857e-05, "loss": 0.3597, "num_tokens": 3001884427.0, "step": 4740 }, { "epoch": 0.5606006858223956, "grad_norm": 0.1311497539281845, "learning_rate": 2.9038437011108174e-05, "loss": 0.3483, "num_tokens": 3002517745.0, "step": 4741 }, { "epoch": 0.5607189310630247, "grad_norm": 0.15608030557632446, "learning_rate": 2.9028208727560362e-05, "loss": 0.3574, "num_tokens": 3003154912.0, "step": 4742 }, { "epoch": 0.5608371763036538, "grad_norm": 0.1378091275691986, "learning_rate": 2.9017981026572647e-05, "loss": 0.3425, "num_tokens": 3003779594.0, "step": 4743 }, { "epoch": 0.5609554215442828, "grad_norm": 0.12835277616977692, "learning_rate": 2.9007753909645177e-05, "loss": 0.3456, "num_tokens": 3004416468.0, "step": 4744 }, { "epoch": 0.5610736667849119, "grad_norm": 0.1316828578710556, "learning_rate": 2.8997527378277994e-05, "loss": 0.3441, "num_tokens": 3005052431.0, "step": 4745 }, { "epoch": 0.561191912025541, "grad_norm": 0.14184559881687164, "learning_rate": 2.898730143397109e-05, "loss": 0.3237, "num_tokens": 3005686972.0, "step": 4746 }, { "epoch": 0.5613101572661701, "grad_norm": 0.13118331134319305, "learning_rate": 2.8977076078224314e-05, "loss": 0.3088, "num_tokens": 3006286195.0, "step": 4747 }, { "epoch": 0.5614284025067992, "grad_norm": 0.13296613097190857, "learning_rate": 2.8966851312537493e-05, "loss": 0.3273, "num_tokens": 3006914732.0, "step": 4748 }, { "epoch": 0.5615466477474281, "grad_norm": 0.1327364295721054, "learning_rate": 2.8956627138410344e-05, "loss": 0.3386, "num_tokens": 3007549766.0, "step": 4749 }, { "epoch": 0.5616648929880572, "grad_norm": 0.1353210210800171, "learning_rate": 2.8946403557342468e-05, "loss": 0.3608, "num_tokens": 3008188387.0, "step": 4750 }, { "epoch": 0.5617831382286863, "grad_norm": 0.1294814646244049, "learning_rate": 2.8936180570833417e-05, "loss": 0.317, "num_tokens": 3008827711.0, "step": 4751 }, { "epoch": 0.5619013834693154, "grad_norm": 0.12475042790174484, "learning_rate": 2.8925958180382638e-05, "loss": 0.3386, "num_tokens": 3009462862.0, "step": 4752 }, { "epoch": 0.5620196287099444, "grad_norm": 0.1220722571015358, "learning_rate": 2.89157363874895e-05, "loss": 0.3286, "num_tokens": 3010098115.0, "step": 4753 }, { "epoch": 0.5621378739505735, "grad_norm": 0.13289958238601685, "learning_rate": 2.8905515193653254e-05, "loss": 0.359, "num_tokens": 3010731748.0, "step": 4754 }, { "epoch": 0.5622561191912026, "grad_norm": 0.13439960777759552, "learning_rate": 2.889529460037312e-05, "loss": 0.3093, "num_tokens": 3011370415.0, "step": 4755 }, { "epoch": 0.5623743644318316, "grad_norm": 0.14214621484279633, "learning_rate": 2.8885074609148177e-05, "loss": 0.366, "num_tokens": 3012005356.0, "step": 4756 }, { "epoch": 0.5624926096724607, "grad_norm": 0.1400797963142395, "learning_rate": 2.887485522147745e-05, "loss": 0.3766, "num_tokens": 3012642401.0, "step": 4757 }, { "epoch": 0.5626108549130897, "grad_norm": 0.1304653137922287, "learning_rate": 2.8864636438859845e-05, "loss": 0.3516, "num_tokens": 3013282010.0, "step": 4758 }, { "epoch": 0.5627291001537188, "grad_norm": 0.13103795051574707, "learning_rate": 2.8854418262794213e-05, "loss": 0.3327, "num_tokens": 3013919435.0, "step": 4759 }, { "epoch": 0.5628473453943479, "grad_norm": 0.12832878530025482, "learning_rate": 2.8844200694779284e-05, "loss": 0.3602, "num_tokens": 3014549425.0, "step": 4760 }, { "epoch": 0.562965590634977, "grad_norm": 0.12637506425380707, "learning_rate": 2.883398373631372e-05, "loss": 0.2991, "num_tokens": 3015182526.0, "step": 4761 }, { "epoch": 0.563083835875606, "grad_norm": 0.11733275651931763, "learning_rate": 2.8823767388896085e-05, "loss": 0.3036, "num_tokens": 3015814333.0, "step": 4762 }, { "epoch": 0.563202081116235, "grad_norm": 0.13067573308944702, "learning_rate": 2.8813551654024866e-05, "loss": 0.3211, "num_tokens": 3016451910.0, "step": 4763 }, { "epoch": 0.5633203263568641, "grad_norm": 0.13346967101097107, "learning_rate": 2.8803336533198436e-05, "loss": 0.3, "num_tokens": 3017081036.0, "step": 4764 }, { "epoch": 0.5634385715974932, "grad_norm": 0.13233347237110138, "learning_rate": 2.8793122027915106e-05, "loss": 0.3317, "num_tokens": 3017716253.0, "step": 4765 }, { "epoch": 0.5635568168381223, "grad_norm": 0.14388686418533325, "learning_rate": 2.8782908139673057e-05, "loss": 0.3735, "num_tokens": 3018352487.0, "step": 4766 }, { "epoch": 0.5636750620787513, "grad_norm": 0.12594769895076752, "learning_rate": 2.8772694869970434e-05, "loss": 0.3289, "num_tokens": 3018988683.0, "step": 4767 }, { "epoch": 0.5637933073193804, "grad_norm": 0.12846118211746216, "learning_rate": 2.8762482220305245e-05, "loss": 0.3627, "num_tokens": 3019624005.0, "step": 4768 }, { "epoch": 0.5639115525600095, "grad_norm": 0.13552458584308624, "learning_rate": 2.8752270192175432e-05, "loss": 0.3381, "num_tokens": 3020261274.0, "step": 4769 }, { "epoch": 0.5640297978006386, "grad_norm": 0.13627943396568298, "learning_rate": 2.874205878707883e-05, "loss": 0.34, "num_tokens": 3020897812.0, "step": 4770 }, { "epoch": 0.5641480430412676, "grad_norm": 0.13905057311058044, "learning_rate": 2.87318480065132e-05, "loss": 0.3649, "num_tokens": 3021530441.0, "step": 4771 }, { "epoch": 0.5642662882818966, "grad_norm": 0.12724965810775757, "learning_rate": 2.87216378519762e-05, "loss": 0.3362, "num_tokens": 3022163868.0, "step": 4772 }, { "epoch": 0.5643845335225257, "grad_norm": 0.12489200383424759, "learning_rate": 2.8711428324965384e-05, "loss": 0.3027, "num_tokens": 3022795195.0, "step": 4773 }, { "epoch": 0.5645027787631548, "grad_norm": 0.13057805597782135, "learning_rate": 2.8701219426978258e-05, "loss": 0.3439, "num_tokens": 3023430919.0, "step": 4774 }, { "epoch": 0.5646210240037839, "grad_norm": 0.13217253983020782, "learning_rate": 2.8691011159512174e-05, "loss": 0.3309, "num_tokens": 3024066579.0, "step": 4775 }, { "epoch": 0.5647392692444129, "grad_norm": 0.12652873992919922, "learning_rate": 2.868080352406445e-05, "loss": 0.2945, "num_tokens": 3024703806.0, "step": 4776 }, { "epoch": 0.564857514485042, "grad_norm": 0.13296230137348175, "learning_rate": 2.8670596522132263e-05, "loss": 0.3272, "num_tokens": 3025322896.0, "step": 4777 }, { "epoch": 0.564975759725671, "grad_norm": 0.14832791686058044, "learning_rate": 2.8660390155212733e-05, "loss": 0.3772, "num_tokens": 3025962712.0, "step": 4778 }, { "epoch": 0.5650940049663001, "grad_norm": 0.143338143825531, "learning_rate": 2.8650184424802864e-05, "loss": 0.3272, "num_tokens": 3026597986.0, "step": 4779 }, { "epoch": 0.5652122502069292, "grad_norm": 0.1341952681541443, "learning_rate": 2.8639979332399587e-05, "loss": 0.3292, "num_tokens": 3027234046.0, "step": 4780 }, { "epoch": 0.5653304954475582, "grad_norm": 0.12003029882907867, "learning_rate": 2.8629774879499717e-05, "loss": 0.2899, "num_tokens": 3027873357.0, "step": 4781 }, { "epoch": 0.5654487406881873, "grad_norm": 0.15666699409484863, "learning_rate": 2.861957106759999e-05, "loss": 0.3451, "num_tokens": 3028510051.0, "step": 4782 }, { "epoch": 0.5655669859288164, "grad_norm": 0.14598889648914337, "learning_rate": 2.8609367898197048e-05, "loss": 0.3452, "num_tokens": 3029142839.0, "step": 4783 }, { "epoch": 0.5656852311694455, "grad_norm": 0.13970455527305603, "learning_rate": 2.859916537278742e-05, "loss": 0.3568, "num_tokens": 3029778877.0, "step": 4784 }, { "epoch": 0.5658034764100744, "grad_norm": 0.14531385898590088, "learning_rate": 2.858896349286756e-05, "loss": 0.3493, "num_tokens": 3030399555.0, "step": 4785 }, { "epoch": 0.5659217216507035, "grad_norm": 0.1340916007757187, "learning_rate": 2.8578762259933843e-05, "loss": 0.3149, "num_tokens": 3031036283.0, "step": 4786 }, { "epoch": 0.5660399668913326, "grad_norm": 0.15150173008441925, "learning_rate": 2.856856167548251e-05, "loss": 0.3541, "num_tokens": 3031673829.0, "step": 4787 }, { "epoch": 0.5661582121319617, "grad_norm": 0.14587737619876862, "learning_rate": 2.8558361741009732e-05, "loss": 0.3842, "num_tokens": 3032309314.0, "step": 4788 }, { "epoch": 0.5662764573725908, "grad_norm": 0.1365949660539627, "learning_rate": 2.854816245801157e-05, "loss": 0.3318, "num_tokens": 3032944322.0, "step": 4789 }, { "epoch": 0.5663947026132198, "grad_norm": 0.1404065191745758, "learning_rate": 2.8537963827984005e-05, "loss": 0.3268, "num_tokens": 3033582564.0, "step": 4790 }, { "epoch": 0.5665129478538489, "grad_norm": 0.16226065158843994, "learning_rate": 2.8527765852422915e-05, "loss": 0.3436, "num_tokens": 3034216233.0, "step": 4791 }, { "epoch": 0.566631193094478, "grad_norm": 0.1445150077342987, "learning_rate": 2.851756853282407e-05, "loss": 0.3438, "num_tokens": 3034852139.0, "step": 4792 }, { "epoch": 0.566749438335107, "grad_norm": 0.1415191888809204, "learning_rate": 2.8507371870683186e-05, "loss": 0.3281, "num_tokens": 3035482708.0, "step": 4793 }, { "epoch": 0.566867683575736, "grad_norm": 0.14420393109321594, "learning_rate": 2.8497175867495814e-05, "loss": 0.3632, "num_tokens": 3036116067.0, "step": 4794 }, { "epoch": 0.5669859288163651, "grad_norm": 0.14180314540863037, "learning_rate": 2.8486980524757482e-05, "loss": 0.335, "num_tokens": 3036749219.0, "step": 4795 }, { "epoch": 0.5671041740569942, "grad_norm": 0.13119079172611237, "learning_rate": 2.847678584396355e-05, "loss": 0.3419, "num_tokens": 3037383464.0, "step": 4796 }, { "epoch": 0.5672224192976233, "grad_norm": 0.12735165655612946, "learning_rate": 2.8466591826609348e-05, "loss": 0.313, "num_tokens": 3038015374.0, "step": 4797 }, { "epoch": 0.5673406645382524, "grad_norm": 0.13023538887500763, "learning_rate": 2.8456398474190056e-05, "loss": 0.3489, "num_tokens": 3038652443.0, "step": 4798 }, { "epoch": 0.5674589097788814, "grad_norm": 0.12888909876346588, "learning_rate": 2.8446205788200792e-05, "loss": 0.3658, "num_tokens": 3039283481.0, "step": 4799 }, { "epoch": 0.5675771550195104, "grad_norm": 0.13038040697574615, "learning_rate": 2.8436013770136558e-05, "loss": 0.3248, "num_tokens": 3039922368.0, "step": 4800 }, { "epoch": 0.5676954002601395, "grad_norm": 0.1572960466146469, "learning_rate": 2.8425822421492256e-05, "loss": 0.3895, "num_tokens": 3040555460.0, "step": 4801 }, { "epoch": 0.5678136455007686, "grad_norm": 0.11915724724531174, "learning_rate": 2.841563174376271e-05, "loss": 0.357, "num_tokens": 3041188802.0, "step": 4802 }, { "epoch": 0.5679318907413976, "grad_norm": 0.1456926167011261, "learning_rate": 2.8405441738442615e-05, "loss": 0.3722, "num_tokens": 3041803532.0, "step": 4803 }, { "epoch": 0.5680501359820267, "grad_norm": 0.12876029312610626, "learning_rate": 2.839525240702659e-05, "loss": 0.2948, "num_tokens": 3042438553.0, "step": 4804 }, { "epoch": 0.5681683812226558, "grad_norm": 0.1336798071861267, "learning_rate": 2.8385063751009144e-05, "loss": 0.3472, "num_tokens": 3043063353.0, "step": 4805 }, { "epoch": 0.5682866264632849, "grad_norm": 0.13073159754276276, "learning_rate": 2.8374875771884706e-05, "loss": 0.3458, "num_tokens": 3043692672.0, "step": 4806 }, { "epoch": 0.568404871703914, "grad_norm": 0.1369251012802124, "learning_rate": 2.8364688471147586e-05, "loss": 0.3363, "num_tokens": 3044329706.0, "step": 4807 }, { "epoch": 0.5685231169445429, "grad_norm": 0.12570255994796753, "learning_rate": 2.835450185029199e-05, "loss": 0.285, "num_tokens": 3044958452.0, "step": 4808 }, { "epoch": 0.568641362185172, "grad_norm": 0.14240115880966187, "learning_rate": 2.834431591081205e-05, "loss": 0.3196, "num_tokens": 3045587184.0, "step": 4809 }, { "epoch": 0.5687596074258011, "grad_norm": 0.12686067819595337, "learning_rate": 2.8334130654201776e-05, "loss": 0.3242, "num_tokens": 3046219861.0, "step": 4810 }, { "epoch": 0.5688778526664302, "grad_norm": 0.13335004448890686, "learning_rate": 2.8323946081955084e-05, "loss": 0.3619, "num_tokens": 3046852675.0, "step": 4811 }, { "epoch": 0.5689960979070593, "grad_norm": 0.1289409101009369, "learning_rate": 2.831376219556578e-05, "loss": 0.3156, "num_tokens": 3047492266.0, "step": 4812 }, { "epoch": 0.5691143431476883, "grad_norm": 0.14068540930747986, "learning_rate": 2.830357899652759e-05, "loss": 0.3727, "num_tokens": 3048128044.0, "step": 4813 }, { "epoch": 0.5692325883883174, "grad_norm": 0.12587130069732666, "learning_rate": 2.8293396486334146e-05, "loss": 0.3179, "num_tokens": 3048761753.0, "step": 4814 }, { "epoch": 0.5693508336289465, "grad_norm": 0.14222373068332672, "learning_rate": 2.8283214666478922e-05, "loss": 0.3755, "num_tokens": 3049401340.0, "step": 4815 }, { "epoch": 0.5694690788695755, "grad_norm": 0.1282602697610855, "learning_rate": 2.8273033538455364e-05, "loss": 0.2922, "num_tokens": 3050038283.0, "step": 4816 }, { "epoch": 0.5695873241102045, "grad_norm": 0.12231062352657318, "learning_rate": 2.8262853103756764e-05, "loss": 0.3116, "num_tokens": 3050667589.0, "step": 4817 }, { "epoch": 0.5697055693508336, "grad_norm": 0.13159188628196716, "learning_rate": 2.825267336387634e-05, "loss": 0.3093, "num_tokens": 3051302564.0, "step": 4818 }, { "epoch": 0.5698238145914627, "grad_norm": 0.13908398151397705, "learning_rate": 2.8242494320307196e-05, "loss": 0.3548, "num_tokens": 3051936573.0, "step": 4819 }, { "epoch": 0.5699420598320918, "grad_norm": 0.13234072923660278, "learning_rate": 2.8232315974542334e-05, "loss": 0.36, "num_tokens": 3052573480.0, "step": 4820 }, { "epoch": 0.5700603050727209, "grad_norm": 0.12214532494544983, "learning_rate": 2.8222138328074664e-05, "loss": 0.3314, "num_tokens": 3053207957.0, "step": 4821 }, { "epoch": 0.5701785503133499, "grad_norm": 0.13618502020835876, "learning_rate": 2.821196138239698e-05, "loss": 0.3502, "num_tokens": 3053840763.0, "step": 4822 }, { "epoch": 0.5702967955539789, "grad_norm": 0.14503705501556396, "learning_rate": 2.8201785139001987e-05, "loss": 0.3448, "num_tokens": 3054474615.0, "step": 4823 }, { "epoch": 0.570415040794608, "grad_norm": 0.13278581202030182, "learning_rate": 2.819160959938226e-05, "loss": 0.3464, "num_tokens": 3055113369.0, "step": 4824 }, { "epoch": 0.5705332860352371, "grad_norm": 0.1336623877286911, "learning_rate": 2.8181434765030333e-05, "loss": 0.3273, "num_tokens": 3055749774.0, "step": 4825 }, { "epoch": 0.5706515312758661, "grad_norm": 0.13133534789085388, "learning_rate": 2.8171260637438534e-05, "loss": 0.3087, "num_tokens": 3056388438.0, "step": 4826 }, { "epoch": 0.5707697765164952, "grad_norm": 0.1376994103193283, "learning_rate": 2.816108721809919e-05, "loss": 0.3546, "num_tokens": 3057021503.0, "step": 4827 }, { "epoch": 0.5708880217571243, "grad_norm": 0.1403975784778595, "learning_rate": 2.8150914508504483e-05, "loss": 0.3439, "num_tokens": 3057653478.0, "step": 4828 }, { "epoch": 0.5710062669977534, "grad_norm": 0.1268882304430008, "learning_rate": 2.814074251014646e-05, "loss": 0.3057, "num_tokens": 3058291300.0, "step": 4829 }, { "epoch": 0.5711245122383825, "grad_norm": 0.13612399995326996, "learning_rate": 2.8130571224517113e-05, "loss": 0.3471, "num_tokens": 3058924713.0, "step": 4830 }, { "epoch": 0.5712427574790114, "grad_norm": 0.13707154989242554, "learning_rate": 2.8120400653108304e-05, "loss": 0.34, "num_tokens": 3059561495.0, "step": 4831 }, { "epoch": 0.5713610027196405, "grad_norm": 0.13159269094467163, "learning_rate": 2.8110230797411798e-05, "loss": 0.3575, "num_tokens": 3060195610.0, "step": 4832 }, { "epoch": 0.5714792479602696, "grad_norm": 0.12121181935071945, "learning_rate": 2.8100061658919248e-05, "loss": 0.306, "num_tokens": 3060834491.0, "step": 4833 }, { "epoch": 0.5715974932008987, "grad_norm": 0.1309276521205902, "learning_rate": 2.8089893239122206e-05, "loss": 0.3515, "num_tokens": 3061473139.0, "step": 4834 }, { "epoch": 0.5717157384415277, "grad_norm": 0.1230047419667244, "learning_rate": 2.8079725539512123e-05, "loss": 0.3326, "num_tokens": 3062108785.0, "step": 4835 }, { "epoch": 0.5718339836821568, "grad_norm": 0.12972773611545563, "learning_rate": 2.8069558561580342e-05, "loss": 0.3381, "num_tokens": 3062745455.0, "step": 4836 }, { "epoch": 0.5719522289227859, "grad_norm": 0.12409421801567078, "learning_rate": 2.805939230681809e-05, "loss": 0.3159, "num_tokens": 3063381589.0, "step": 4837 }, { "epoch": 0.5720704741634149, "grad_norm": 0.14521381258964539, "learning_rate": 2.8049226776716507e-05, "loss": 0.3758, "num_tokens": 3064012749.0, "step": 4838 }, { "epoch": 0.572188719404044, "grad_norm": 0.14089520275592804, "learning_rate": 2.8039061972766617e-05, "loss": 0.3428, "num_tokens": 3064648736.0, "step": 4839 }, { "epoch": 0.572306964644673, "grad_norm": 0.1315004974603653, "learning_rate": 2.8028897896459316e-05, "loss": 0.3401, "num_tokens": 3065280207.0, "step": 4840 }, { "epoch": 0.5724252098853021, "grad_norm": 0.12384464591741562, "learning_rate": 2.8018734549285423e-05, "loss": 0.3374, "num_tokens": 3065917657.0, "step": 4841 }, { "epoch": 0.5725434551259312, "grad_norm": 0.13768650591373444, "learning_rate": 2.8008571932735672e-05, "loss": 0.369, "num_tokens": 3066552542.0, "step": 4842 }, { "epoch": 0.5726617003665603, "grad_norm": 0.12164103239774704, "learning_rate": 2.7998410048300604e-05, "loss": 0.2992, "num_tokens": 3067180109.0, "step": 4843 }, { "epoch": 0.5727799456071893, "grad_norm": 0.12639504671096802, "learning_rate": 2.7988248897470756e-05, "loss": 0.3392, "num_tokens": 3067809103.0, "step": 4844 }, { "epoch": 0.5728981908478183, "grad_norm": 0.12443830072879791, "learning_rate": 2.7978088481736473e-05, "loss": 0.2869, "num_tokens": 3068440810.0, "step": 4845 }, { "epoch": 0.5730164360884474, "grad_norm": 0.13065537810325623, "learning_rate": 2.796792880258805e-05, "loss": 0.3332, "num_tokens": 3069061799.0, "step": 4846 }, { "epoch": 0.5731346813290765, "grad_norm": 0.12528462707996368, "learning_rate": 2.795776986151565e-05, "loss": 0.3488, "num_tokens": 3069701126.0, "step": 4847 }, { "epoch": 0.5732529265697056, "grad_norm": 0.12407174706459045, "learning_rate": 2.7947611660009323e-05, "loss": 0.3057, "num_tokens": 3070334286.0, "step": 4848 }, { "epoch": 0.5733711718103346, "grad_norm": 0.1288670003414154, "learning_rate": 2.7937454199559013e-05, "loss": 0.341, "num_tokens": 3070964162.0, "step": 4849 }, { "epoch": 0.5734894170509637, "grad_norm": 0.12044865638017654, "learning_rate": 2.7927297481654567e-05, "loss": 0.3395, "num_tokens": 3071597600.0, "step": 4850 }, { "epoch": 0.5736076622915928, "grad_norm": 0.13069286942481995, "learning_rate": 2.7917141507785714e-05, "loss": 0.3798, "num_tokens": 3072235275.0, "step": 4851 }, { "epoch": 0.5737259075322219, "grad_norm": 0.1297805905342102, "learning_rate": 2.7906986279442075e-05, "loss": 0.3975, "num_tokens": 3072869353.0, "step": 4852 }, { "epoch": 0.573844152772851, "grad_norm": 0.11915034055709839, "learning_rate": 2.789683179811316e-05, "loss": 0.3211, "num_tokens": 3073502185.0, "step": 4853 }, { "epoch": 0.5739623980134799, "grad_norm": 0.12418359518051147, "learning_rate": 2.7886678065288373e-05, "loss": 0.2969, "num_tokens": 3074137714.0, "step": 4854 }, { "epoch": 0.574080643254109, "grad_norm": 0.11765977740287781, "learning_rate": 2.7876525082457016e-05, "loss": 0.2985, "num_tokens": 3074771326.0, "step": 4855 }, { "epoch": 0.5741988884947381, "grad_norm": 0.12750069797039032, "learning_rate": 2.7866372851108244e-05, "loss": 0.3379, "num_tokens": 3075404223.0, "step": 4856 }, { "epoch": 0.5743171337353672, "grad_norm": 0.11935320496559143, "learning_rate": 2.7856221372731157e-05, "loss": 0.3123, "num_tokens": 3076037574.0, "step": 4857 }, { "epoch": 0.5744353789759962, "grad_norm": 0.13127093017101288, "learning_rate": 2.7846070648814717e-05, "loss": 0.3866, "num_tokens": 3076674008.0, "step": 4858 }, { "epoch": 0.5745536242166253, "grad_norm": 0.12393956631422043, "learning_rate": 2.783592068084776e-05, "loss": 0.3349, "num_tokens": 3077311977.0, "step": 4859 }, { "epoch": 0.5746718694572543, "grad_norm": 0.12561221420764923, "learning_rate": 2.7825771470319036e-05, "loss": 0.3464, "num_tokens": 3077946693.0, "step": 4860 }, { "epoch": 0.5747901146978834, "grad_norm": 0.12446265667676926, "learning_rate": 2.7815623018717163e-05, "loss": 0.3251, "num_tokens": 3078581843.0, "step": 4861 }, { "epoch": 0.5749083599385125, "grad_norm": 0.12924252450466156, "learning_rate": 2.7805475327530678e-05, "loss": 0.3503, "num_tokens": 3079218183.0, "step": 4862 }, { "epoch": 0.5750266051791415, "grad_norm": 0.12878188490867615, "learning_rate": 2.779532839824797e-05, "loss": 0.3326, "num_tokens": 3079857460.0, "step": 4863 }, { "epoch": 0.5751448504197706, "grad_norm": 0.15288905799388885, "learning_rate": 2.7785182232357336e-05, "loss": 0.3789, "num_tokens": 3080489891.0, "step": 4864 }, { "epoch": 0.5752630956603997, "grad_norm": 0.1299024075269699, "learning_rate": 2.777503683134697e-05, "loss": 0.3125, "num_tokens": 3081121677.0, "step": 4865 }, { "epoch": 0.5753813409010288, "grad_norm": 0.12932272255420685, "learning_rate": 2.776489219670493e-05, "loss": 0.3658, "num_tokens": 3081755898.0, "step": 4866 }, { "epoch": 0.5754995861416577, "grad_norm": 0.12484418600797653, "learning_rate": 2.7754748329919192e-05, "loss": 0.3035, "num_tokens": 3082389382.0, "step": 4867 }, { "epoch": 0.5756178313822868, "grad_norm": 0.14563296735286713, "learning_rate": 2.7744605232477576e-05, "loss": 0.379, "num_tokens": 3083028918.0, "step": 4868 }, { "epoch": 0.5757360766229159, "grad_norm": 0.1261821836233139, "learning_rate": 2.7734462905867842e-05, "loss": 0.333, "num_tokens": 3083665469.0, "step": 4869 }, { "epoch": 0.575854321863545, "grad_norm": 0.1288059502840042, "learning_rate": 2.7724321351577585e-05, "loss": 0.3243, "num_tokens": 3084299445.0, "step": 4870 }, { "epoch": 0.5759725671041741, "grad_norm": 0.14175377786159515, "learning_rate": 2.7714180571094324e-05, "loss": 0.3507, "num_tokens": 3084935150.0, "step": 4871 }, { "epoch": 0.5760908123448031, "grad_norm": 0.13915686309337616, "learning_rate": 2.7704040565905454e-05, "loss": 0.3694, "num_tokens": 3085572391.0, "step": 4872 }, { "epoch": 0.5762090575854322, "grad_norm": 0.13372111320495605, "learning_rate": 2.7693901337498247e-05, "loss": 0.3288, "num_tokens": 3086208662.0, "step": 4873 }, { "epoch": 0.5763273028260613, "grad_norm": 0.13713672757148743, "learning_rate": 2.768376288735988e-05, "loss": 0.3352, "num_tokens": 3086841048.0, "step": 4874 }, { "epoch": 0.5764455480666903, "grad_norm": 0.12931889295578003, "learning_rate": 2.767362521697738e-05, "loss": 0.3305, "num_tokens": 3087480323.0, "step": 4875 }, { "epoch": 0.5765637933073193, "grad_norm": 0.14369727671146393, "learning_rate": 2.766348832783772e-05, "loss": 0.3911, "num_tokens": 3088110709.0, "step": 4876 }, { "epoch": 0.5766820385479484, "grad_norm": 0.14111854135990143, "learning_rate": 2.7653352221427684e-05, "loss": 0.3667, "num_tokens": 3088748167.0, "step": 4877 }, { "epoch": 0.5768002837885775, "grad_norm": 0.13891051709651947, "learning_rate": 2.7643216899234e-05, "loss": 0.333, "num_tokens": 3089387672.0, "step": 4878 }, { "epoch": 0.5769185290292066, "grad_norm": 0.1371324360370636, "learning_rate": 2.763308236274326e-05, "loss": 0.3355, "num_tokens": 3090025616.0, "step": 4879 }, { "epoch": 0.5770367742698357, "grad_norm": 0.12909723818302155, "learning_rate": 2.7622948613441942e-05, "loss": 0.3053, "num_tokens": 3090660081.0, "step": 4880 }, { "epoch": 0.5771550195104647, "grad_norm": 0.13537858426570892, "learning_rate": 2.7612815652816406e-05, "loss": 0.3195, "num_tokens": 3091296005.0, "step": 4881 }, { "epoch": 0.5772732647510938, "grad_norm": 0.42363205552101135, "learning_rate": 2.7602683482352886e-05, "loss": 0.3802, "num_tokens": 3091903102.0, "step": 4882 }, { "epoch": 0.5773915099917228, "grad_norm": 0.1347428411245346, "learning_rate": 2.7592552103537526e-05, "loss": 0.3239, "num_tokens": 3092533668.0, "step": 4883 }, { "epoch": 0.5775097552323519, "grad_norm": 0.12768055498600006, "learning_rate": 2.7582421517856326e-05, "loss": 0.3186, "num_tokens": 3093169558.0, "step": 4884 }, { "epoch": 0.577628000472981, "grad_norm": 0.15951009094715118, "learning_rate": 2.7572291726795203e-05, "loss": 0.3608, "num_tokens": 3093805119.0, "step": 4885 }, { "epoch": 0.57774624571361, "grad_norm": 0.13202936947345734, "learning_rate": 2.7562162731839934e-05, "loss": 0.3131, "num_tokens": 3094430592.0, "step": 4886 }, { "epoch": 0.5778644909542391, "grad_norm": 0.12263690680265427, "learning_rate": 2.755203453447617e-05, "loss": 0.3096, "num_tokens": 3095063746.0, "step": 4887 }, { "epoch": 0.5779827361948682, "grad_norm": 0.1343851387500763, "learning_rate": 2.754190713618947e-05, "loss": 0.3613, "num_tokens": 3095700365.0, "step": 4888 }, { "epoch": 0.5781009814354973, "grad_norm": 0.13119418919086456, "learning_rate": 2.7531780538465253e-05, "loss": 0.337, "num_tokens": 3096334430.0, "step": 4889 }, { "epoch": 0.5782192266761262, "grad_norm": 0.13180528581142426, "learning_rate": 2.7521654742788845e-05, "loss": 0.3579, "num_tokens": 3096971720.0, "step": 4890 }, { "epoch": 0.5783374719167553, "grad_norm": 0.1438124030828476, "learning_rate": 2.751152975064543e-05, "loss": 0.3662, "num_tokens": 3097607504.0, "step": 4891 }, { "epoch": 0.5784557171573844, "grad_norm": 0.12804941833019257, "learning_rate": 2.7501405563520084e-05, "loss": 0.3304, "num_tokens": 3098244477.0, "step": 4892 }, { "epoch": 0.5785739623980135, "grad_norm": 0.13856256008148193, "learning_rate": 2.7491282182897784e-05, "loss": 0.3657, "num_tokens": 3098880209.0, "step": 4893 }, { "epoch": 0.5786922076386426, "grad_norm": 0.12783263623714447, "learning_rate": 2.748115961026335e-05, "loss": 0.3555, "num_tokens": 3099513933.0, "step": 4894 }, { "epoch": 0.5788104528792716, "grad_norm": 0.13598720729351044, "learning_rate": 2.747103784710152e-05, "loss": 0.3291, "num_tokens": 3100150361.0, "step": 4895 }, { "epoch": 0.5789286981199007, "grad_norm": 0.14102181792259216, "learning_rate": 2.7460916894896882e-05, "loss": 0.3594, "num_tokens": 3100786025.0, "step": 4896 }, { "epoch": 0.5790469433605298, "grad_norm": 0.15792040526866913, "learning_rate": 2.745079675513394e-05, "loss": 0.3841, "num_tokens": 3101420214.0, "step": 4897 }, { "epoch": 0.5791651886011588, "grad_norm": 0.14579033851623535, "learning_rate": 2.7440677429297033e-05, "loss": 0.3317, "num_tokens": 3102059580.0, "step": 4898 }, { "epoch": 0.5792834338417878, "grad_norm": 0.13739870488643646, "learning_rate": 2.743055891887043e-05, "loss": 0.3388, "num_tokens": 3102697836.0, "step": 4899 }, { "epoch": 0.5794016790824169, "grad_norm": 0.1328890025615692, "learning_rate": 2.742044122533825e-05, "loss": 0.3128, "num_tokens": 3103333081.0, "step": 4900 }, { "epoch": 0.579519924323046, "grad_norm": 0.13614985346794128, "learning_rate": 2.7410324350184505e-05, "loss": 0.3148, "num_tokens": 3103969275.0, "step": 4901 }, { "epoch": 0.5796381695636751, "grad_norm": 0.14370904862880707, "learning_rate": 2.7400208294893068e-05, "loss": 0.3848, "num_tokens": 3104605397.0, "step": 4902 }, { "epoch": 0.5797564148043042, "grad_norm": 0.1270517259836197, "learning_rate": 2.73900930609477e-05, "loss": 0.3186, "num_tokens": 3105236530.0, "step": 4903 }, { "epoch": 0.5798746600449332, "grad_norm": 0.13519485294818878, "learning_rate": 2.7379978649832088e-05, "loss": 0.3472, "num_tokens": 3105869621.0, "step": 4904 }, { "epoch": 0.5799929052855622, "grad_norm": 0.14802402257919312, "learning_rate": 2.736986506302971e-05, "loss": 0.357, "num_tokens": 3106505069.0, "step": 4905 }, { "epoch": 0.5801111505261913, "grad_norm": 0.13162340223789215, "learning_rate": 2.7359752302023994e-05, "loss": 0.3453, "num_tokens": 3107137661.0, "step": 4906 }, { "epoch": 0.5802293957668204, "grad_norm": 0.1362345665693283, "learning_rate": 2.734964036829823e-05, "loss": 0.3635, "num_tokens": 3107771510.0, "step": 4907 }, { "epoch": 0.5803476410074494, "grad_norm": 0.13036547601222992, "learning_rate": 2.733952926333555e-05, "loss": 0.3289, "num_tokens": 3108410922.0, "step": 4908 }, { "epoch": 0.5804658862480785, "grad_norm": 0.11769413203001022, "learning_rate": 2.7329418988619023e-05, "loss": 0.3295, "num_tokens": 3109046912.0, "step": 4909 }, { "epoch": 0.5805841314887076, "grad_norm": 0.1390754133462906, "learning_rate": 2.7319309545631556e-05, "loss": 0.3649, "num_tokens": 3109681584.0, "step": 4910 }, { "epoch": 0.5807023767293367, "grad_norm": 0.13335414230823517, "learning_rate": 2.7309200935855945e-05, "loss": 0.3712, "num_tokens": 3110321294.0, "step": 4911 }, { "epoch": 0.5808206219699658, "grad_norm": 0.12057428807020187, "learning_rate": 2.729909316077487e-05, "loss": 0.3121, "num_tokens": 3110957569.0, "step": 4912 }, { "epoch": 0.5809388672105947, "grad_norm": 0.13167425990104675, "learning_rate": 2.728898622187087e-05, "loss": 0.3626, "num_tokens": 3111596649.0, "step": 4913 }, { "epoch": 0.5810571124512238, "grad_norm": 0.13587965071201324, "learning_rate": 2.7278880120626385e-05, "loss": 0.3164, "num_tokens": 3112231478.0, "step": 4914 }, { "epoch": 0.5811753576918529, "grad_norm": 0.13638092577457428, "learning_rate": 2.726877485852372e-05, "loss": 0.3583, "num_tokens": 3112868692.0, "step": 4915 }, { "epoch": 0.581293602932482, "grad_norm": 0.13706693053245544, "learning_rate": 2.7258670437045062e-05, "loss": 0.3473, "num_tokens": 3113508273.0, "step": 4916 }, { "epoch": 0.581411848173111, "grad_norm": 0.12911364436149597, "learning_rate": 2.7248566857672466e-05, "loss": 0.3115, "num_tokens": 3114136155.0, "step": 4917 }, { "epoch": 0.5815300934137401, "grad_norm": 0.11837147921323776, "learning_rate": 2.7238464121887867e-05, "loss": 0.3076, "num_tokens": 3114770160.0, "step": 4918 }, { "epoch": 0.5816483386543692, "grad_norm": 0.1280023753643036, "learning_rate": 2.7228362231173077e-05, "loss": 0.3278, "num_tokens": 3115407197.0, "step": 4919 }, { "epoch": 0.5817665838949982, "grad_norm": 0.1379202902317047, "learning_rate": 2.7218261187009787e-05, "loss": 0.3215, "num_tokens": 3116038507.0, "step": 4920 }, { "epoch": 0.5818848291356273, "grad_norm": 0.13650895655155182, "learning_rate": 2.720816099087957e-05, "loss": 0.3585, "num_tokens": 3116666692.0, "step": 4921 }, { "epoch": 0.5820030743762563, "grad_norm": 0.1368371546268463, "learning_rate": 2.7198061644263844e-05, "loss": 0.3615, "num_tokens": 3117306204.0, "step": 4922 }, { "epoch": 0.5821213196168854, "grad_norm": 0.13002456724643707, "learning_rate": 2.718796314864396e-05, "loss": 0.3427, "num_tokens": 3117940992.0, "step": 4923 }, { "epoch": 0.5822395648575145, "grad_norm": 0.13257886469364166, "learning_rate": 2.717786550550106e-05, "loss": 0.3599, "num_tokens": 3118575824.0, "step": 4924 }, { "epoch": 0.5823578100981436, "grad_norm": 0.13295061886310577, "learning_rate": 2.716776871631626e-05, "loss": 0.3459, "num_tokens": 3119213302.0, "step": 4925 }, { "epoch": 0.5824760553387727, "grad_norm": 0.14778810739517212, "learning_rate": 2.7157672782570463e-05, "loss": 0.3531, "num_tokens": 3119850733.0, "step": 4926 }, { "epoch": 0.5825943005794016, "grad_norm": 0.13112978637218475, "learning_rate": 2.7147577705744512e-05, "loss": 0.3328, "num_tokens": 3120488160.0, "step": 4927 }, { "epoch": 0.5827125458200307, "grad_norm": 0.1317385584115982, "learning_rate": 2.713748348731907e-05, "loss": 0.3146, "num_tokens": 3121126054.0, "step": 4928 }, { "epoch": 0.5828307910606598, "grad_norm": 0.13345059752464294, "learning_rate": 2.7127390128774717e-05, "loss": 0.3205, "num_tokens": 3121758255.0, "step": 4929 }, { "epoch": 0.5829490363012889, "grad_norm": 0.13584038615226746, "learning_rate": 2.711729763159189e-05, "loss": 0.3208, "num_tokens": 3122391977.0, "step": 4930 }, { "epoch": 0.5830672815419179, "grad_norm": 0.12380063533782959, "learning_rate": 2.7107205997250897e-05, "loss": 0.314, "num_tokens": 3123019910.0, "step": 4931 }, { "epoch": 0.583185526782547, "grad_norm": 0.13450871407985687, "learning_rate": 2.7097115227231922e-05, "loss": 0.3427, "num_tokens": 3123651031.0, "step": 4932 }, { "epoch": 0.5833037720231761, "grad_norm": 0.14210808277130127, "learning_rate": 2.7087025323015013e-05, "loss": 0.3577, "num_tokens": 3124284217.0, "step": 4933 }, { "epoch": 0.5834220172638052, "grad_norm": 0.1407274454832077, "learning_rate": 2.707693628608013e-05, "loss": 0.3147, "num_tokens": 3124916134.0, "step": 4934 }, { "epoch": 0.5835402625044342, "grad_norm": 0.15880313515663147, "learning_rate": 2.7066848117907035e-05, "loss": 0.3501, "num_tokens": 3125549255.0, "step": 4935 }, { "epoch": 0.5836585077450632, "grad_norm": 0.13399799168109894, "learning_rate": 2.7056760819975434e-05, "loss": 0.3078, "num_tokens": 3126148022.0, "step": 4936 }, { "epoch": 0.5837767529856923, "grad_norm": 0.12085355818271637, "learning_rate": 2.704667439376487e-05, "loss": 0.3375, "num_tokens": 3126784285.0, "step": 4937 }, { "epoch": 0.5838949982263214, "grad_norm": 0.12426553666591644, "learning_rate": 2.7036588840754758e-05, "loss": 0.3299, "num_tokens": 3127414409.0, "step": 4938 }, { "epoch": 0.5840132434669505, "grad_norm": 0.1321752816438675, "learning_rate": 2.7026504162424396e-05, "loss": 0.3193, "num_tokens": 3128052652.0, "step": 4939 }, { "epoch": 0.5841314887075795, "grad_norm": 0.15325699746608734, "learning_rate": 2.7016420360252944e-05, "loss": 0.3187, "num_tokens": 3128686856.0, "step": 4940 }, { "epoch": 0.5842497339482086, "grad_norm": 0.13514329493045807, "learning_rate": 2.7006337435719437e-05, "loss": 0.3513, "num_tokens": 3129318856.0, "step": 4941 }, { "epoch": 0.5843679791888376, "grad_norm": 0.13028091192245483, "learning_rate": 2.6996255390302785e-05, "loss": 0.314, "num_tokens": 3129954050.0, "step": 4942 }, { "epoch": 0.5844862244294667, "grad_norm": 0.14671501517295837, "learning_rate": 2.6986174225481754e-05, "loss": 0.3801, "num_tokens": 3130552215.0, "step": 4943 }, { "epoch": 0.5846044696700958, "grad_norm": 0.14126726984977722, "learning_rate": 2.6976093942735024e-05, "loss": 0.366, "num_tokens": 3131187535.0, "step": 4944 }, { "epoch": 0.5847227149107248, "grad_norm": 0.13243252038955688, "learning_rate": 2.6966014543541073e-05, "loss": 0.3293, "num_tokens": 3131824361.0, "step": 4945 }, { "epoch": 0.5848409601513539, "grad_norm": 0.12733623385429382, "learning_rate": 2.695593602937833e-05, "loss": 0.3282, "num_tokens": 3132459377.0, "step": 4946 }, { "epoch": 0.584959205391983, "grad_norm": 0.1315654218196869, "learning_rate": 2.6945858401725024e-05, "loss": 0.3544, "num_tokens": 3133097207.0, "step": 4947 }, { "epoch": 0.5850774506326121, "grad_norm": 0.13287466764450073, "learning_rate": 2.693578166205931e-05, "loss": 0.3395, "num_tokens": 3133726399.0, "step": 4948 }, { "epoch": 0.585195695873241, "grad_norm": 0.12637192010879517, "learning_rate": 2.6925705811859158e-05, "loss": 0.3466, "num_tokens": 3134363799.0, "step": 4949 }, { "epoch": 0.5853139411138701, "grad_norm": 0.14394859969615936, "learning_rate": 2.6915630852602466e-05, "loss": 0.3739, "num_tokens": 3134996393.0, "step": 4950 }, { "epoch": 0.5854321863544992, "grad_norm": 0.12379495054483414, "learning_rate": 2.690555678576696e-05, "loss": 0.2762, "num_tokens": 3135633192.0, "step": 4951 }, { "epoch": 0.5855504315951283, "grad_norm": 0.14735004305839539, "learning_rate": 2.6895483612830245e-05, "loss": 0.3863, "num_tokens": 3136262823.0, "step": 4952 }, { "epoch": 0.5856686768357574, "grad_norm": 0.13880063593387604, "learning_rate": 2.6885411335269808e-05, "loss": 0.3628, "num_tokens": 3136900141.0, "step": 4953 }, { "epoch": 0.5857869220763864, "grad_norm": 0.13745932281017303, "learning_rate": 2.6875339954562977e-05, "loss": 0.345, "num_tokens": 3137511549.0, "step": 4954 }, { "epoch": 0.5859051673170155, "grad_norm": 0.13085809350013733, "learning_rate": 2.6865269472186997e-05, "loss": 0.3273, "num_tokens": 3138142740.0, "step": 4955 }, { "epoch": 0.5860234125576446, "grad_norm": 0.15091407299041748, "learning_rate": 2.6855199889618905e-05, "loss": 0.3459, "num_tokens": 3138776101.0, "step": 4956 }, { "epoch": 0.5861416577982737, "grad_norm": 0.12276977300643921, "learning_rate": 2.6845131208335685e-05, "loss": 0.3299, "num_tokens": 3139409273.0, "step": 4957 }, { "epoch": 0.5862599030389026, "grad_norm": 0.14980152249336243, "learning_rate": 2.6835063429814152e-05, "loss": 0.361, "num_tokens": 3140037355.0, "step": 4958 }, { "epoch": 0.5863781482795317, "grad_norm": 0.1409127414226532, "learning_rate": 2.682499655553098e-05, "loss": 0.3212, "num_tokens": 3140667641.0, "step": 4959 }, { "epoch": 0.5864963935201608, "grad_norm": 0.1572718769311905, "learning_rate": 2.6814930586962728e-05, "loss": 0.3768, "num_tokens": 3141298885.0, "step": 4960 }, { "epoch": 0.5866146387607899, "grad_norm": 0.1265023648738861, "learning_rate": 2.680486552558581e-05, "loss": 0.3184, "num_tokens": 3141924395.0, "step": 4961 }, { "epoch": 0.586732884001419, "grad_norm": 0.12826286256313324, "learning_rate": 2.679480137287652e-05, "loss": 0.353, "num_tokens": 3142557170.0, "step": 4962 }, { "epoch": 0.586851129242048, "grad_norm": 0.1363903433084488, "learning_rate": 2.678473813031101e-05, "loss": 0.3435, "num_tokens": 3143190719.0, "step": 4963 }, { "epoch": 0.586969374482677, "grad_norm": 0.1559261977672577, "learning_rate": 2.6774675799365285e-05, "loss": 0.3696, "num_tokens": 3143826656.0, "step": 4964 }, { "epoch": 0.5870876197233061, "grad_norm": 0.13685084879398346, "learning_rate": 2.6764614381515253e-05, "loss": 0.3424, "num_tokens": 3144466189.0, "step": 4965 }, { "epoch": 0.5872058649639352, "grad_norm": 0.1287839263677597, "learning_rate": 2.675455387823666e-05, "loss": 0.337, "num_tokens": 3145102275.0, "step": 4966 }, { "epoch": 0.5873241102045643, "grad_norm": 0.1584591269493103, "learning_rate": 2.6744494291005126e-05, "loss": 0.3734, "num_tokens": 3145738220.0, "step": 4967 }, { "epoch": 0.5874423554451933, "grad_norm": 0.15506227314472198, "learning_rate": 2.673443562129613e-05, "loss": 0.3414, "num_tokens": 3146376524.0, "step": 4968 }, { "epoch": 0.5875606006858224, "grad_norm": 0.13725633919239044, "learning_rate": 2.6724377870585025e-05, "loss": 0.3328, "num_tokens": 3147011675.0, "step": 4969 }, { "epoch": 0.5876788459264515, "grad_norm": 0.1283891350030899, "learning_rate": 2.6714321040347015e-05, "loss": 0.3093, "num_tokens": 3147641755.0, "step": 4970 }, { "epoch": 0.5877970911670806, "grad_norm": 0.1451350599527359, "learning_rate": 2.6704265132057185e-05, "loss": 0.3653, "num_tokens": 3148271193.0, "step": 4971 }, { "epoch": 0.5879153364077095, "grad_norm": 0.15056556463241577, "learning_rate": 2.66942101471905e-05, "loss": 0.3452, "num_tokens": 3148902053.0, "step": 4972 }, { "epoch": 0.5880335816483386, "grad_norm": 0.16993173956871033, "learning_rate": 2.6684156087221734e-05, "loss": 0.3346, "num_tokens": 3149538160.0, "step": 4973 }, { "epoch": 0.5881518268889677, "grad_norm": 0.1431465893983841, "learning_rate": 2.667410295362559e-05, "loss": 0.3187, "num_tokens": 3150174962.0, "step": 4974 }, { "epoch": 0.5882700721295968, "grad_norm": 0.1370813399553299, "learning_rate": 2.666405074787658e-05, "loss": 0.3657, "num_tokens": 3150810085.0, "step": 4975 }, { "epoch": 0.5883883173702259, "grad_norm": 0.14625753462314606, "learning_rate": 2.6653999471449133e-05, "loss": 0.3475, "num_tokens": 3151440959.0, "step": 4976 }, { "epoch": 0.5885065626108549, "grad_norm": 0.16104483604431152, "learning_rate": 2.6643949125817483e-05, "loss": 0.3252, "num_tokens": 3152078238.0, "step": 4977 }, { "epoch": 0.588624807851484, "grad_norm": 0.15258730947971344, "learning_rate": 2.6633899712455783e-05, "loss": 0.3557, "num_tokens": 3152710573.0, "step": 4978 }, { "epoch": 0.5887430530921131, "grad_norm": 0.14531481266021729, "learning_rate": 2.6623851232838024e-05, "loss": 0.3485, "num_tokens": 3153338660.0, "step": 4979 }, { "epoch": 0.5888612983327421, "grad_norm": 0.14305394887924194, "learning_rate": 2.6613803688438033e-05, "loss": 0.322, "num_tokens": 3153970090.0, "step": 4980 }, { "epoch": 0.5889795435733711, "grad_norm": 0.15512114763259888, "learning_rate": 2.660375708072956e-05, "loss": 0.3925, "num_tokens": 3154601835.0, "step": 4981 }, { "epoch": 0.5890977888140002, "grad_norm": 0.1287706196308136, "learning_rate": 2.6593711411186163e-05, "loss": 0.324, "num_tokens": 3155235367.0, "step": 4982 }, { "epoch": 0.5892160340546293, "grad_norm": 0.13339164853096008, "learning_rate": 2.658366668128131e-05, "loss": 0.3423, "num_tokens": 3155870980.0, "step": 4983 }, { "epoch": 0.5893342792952584, "grad_norm": 0.13711270689964294, "learning_rate": 2.6573622892488275e-05, "loss": 0.3235, "num_tokens": 3156504579.0, "step": 4984 }, { "epoch": 0.5894525245358875, "grad_norm": 0.13873513042926788, "learning_rate": 2.656358004628024e-05, "loss": 0.3275, "num_tokens": 3157138083.0, "step": 4985 }, { "epoch": 0.5895707697765165, "grad_norm": 0.14465875923633575, "learning_rate": 2.655353814413024e-05, "loss": 0.3525, "num_tokens": 3157775957.0, "step": 4986 }, { "epoch": 0.5896890150171455, "grad_norm": 0.14062054455280304, "learning_rate": 2.654349718751115e-05, "loss": 0.3522, "num_tokens": 3158408690.0, "step": 4987 }, { "epoch": 0.5898072602577746, "grad_norm": 0.13835962116718292, "learning_rate": 2.653345717789574e-05, "loss": 0.3145, "num_tokens": 3159046999.0, "step": 4988 }, { "epoch": 0.5899255054984037, "grad_norm": 0.11916203796863556, "learning_rate": 2.6523418116756594e-05, "loss": 0.2812, "num_tokens": 3159679535.0, "step": 4989 }, { "epoch": 0.5900437507390327, "grad_norm": 0.14218366146087646, "learning_rate": 2.651338000556622e-05, "loss": 0.3876, "num_tokens": 3160316911.0, "step": 4990 }, { "epoch": 0.5901619959796618, "grad_norm": 0.12111547589302063, "learning_rate": 2.6503342845796925e-05, "loss": 0.2999, "num_tokens": 3160952929.0, "step": 4991 }, { "epoch": 0.5902802412202909, "grad_norm": 0.1453481912612915, "learning_rate": 2.649330663892091e-05, "loss": 0.3398, "num_tokens": 3161587989.0, "step": 4992 }, { "epoch": 0.59039848646092, "grad_norm": 0.13178685307502747, "learning_rate": 2.648327138641024e-05, "loss": 0.3313, "num_tokens": 3162219278.0, "step": 4993 }, { "epoch": 0.5905167317015491, "grad_norm": 0.12832584977149963, "learning_rate": 2.6473237089736812e-05, "loss": 0.3012, "num_tokens": 3162856880.0, "step": 4994 }, { "epoch": 0.590634976942178, "grad_norm": 0.13767936825752258, "learning_rate": 2.6463203750372423e-05, "loss": 0.3116, "num_tokens": 3163486609.0, "step": 4995 }, { "epoch": 0.5907532221828071, "grad_norm": 0.14545345306396484, "learning_rate": 2.6453171369788686e-05, "loss": 0.3516, "num_tokens": 3164125158.0, "step": 4996 }, { "epoch": 0.5908714674234362, "grad_norm": 0.13309422135353088, "learning_rate": 2.644313994945711e-05, "loss": 0.3671, "num_tokens": 3164763531.0, "step": 4997 }, { "epoch": 0.5909897126640653, "grad_norm": 0.11672263592481613, "learning_rate": 2.6433109490849027e-05, "loss": 0.2998, "num_tokens": 3165402251.0, "step": 4998 }, { "epoch": 0.5911079579046943, "grad_norm": 0.1273418515920639, "learning_rate": 2.642307999543567e-05, "loss": 0.305, "num_tokens": 3166038526.0, "step": 4999 }, { "epoch": 0.5912262031453234, "grad_norm": 0.13947945833206177, "learning_rate": 2.6413051464688108e-05, "loss": 0.347, "num_tokens": 3166669256.0, "step": 5000 }, { "epoch": 0.5913444483859525, "grad_norm": 0.1379593312740326, "learning_rate": 2.6403023900077248e-05, "loss": 0.3335, "num_tokens": 3167300579.0, "step": 5001 }, { "epoch": 0.5914626936265815, "grad_norm": 0.13181087374687195, "learning_rate": 2.639299730307391e-05, "loss": 0.3568, "num_tokens": 3167939267.0, "step": 5002 }, { "epoch": 0.5915809388672106, "grad_norm": 0.14505653083324432, "learning_rate": 2.63829716751487e-05, "loss": 0.3918, "num_tokens": 3168572135.0, "step": 5003 }, { "epoch": 0.5916991841078396, "grad_norm": 0.11869428306818008, "learning_rate": 2.637294701777217e-05, "loss": 0.3177, "num_tokens": 3169206624.0, "step": 5004 }, { "epoch": 0.5918174293484687, "grad_norm": 0.11590024083852768, "learning_rate": 2.6362923332414622e-05, "loss": 0.34, "num_tokens": 3169843111.0, "step": 5005 }, { "epoch": 0.5919356745890978, "grad_norm": 0.1305178552865982, "learning_rate": 2.6352900620546315e-05, "loss": 0.3232, "num_tokens": 3170481941.0, "step": 5006 }, { "epoch": 0.5920539198297269, "grad_norm": 0.1430405229330063, "learning_rate": 2.6342878883637325e-05, "loss": 0.3556, "num_tokens": 3171118853.0, "step": 5007 }, { "epoch": 0.592172165070356, "grad_norm": 0.12259833514690399, "learning_rate": 2.6332858123157555e-05, "loss": 0.3324, "num_tokens": 3171755680.0, "step": 5008 }, { "epoch": 0.592290410310985, "grad_norm": 0.1334698498249054, "learning_rate": 2.6322838340576832e-05, "loss": 0.3489, "num_tokens": 3172369004.0, "step": 5009 }, { "epoch": 0.592408655551614, "grad_norm": 0.13413065671920776, "learning_rate": 2.631281953736477e-05, "loss": 0.3544, "num_tokens": 3173005485.0, "step": 5010 }, { "epoch": 0.5925269007922431, "grad_norm": 0.12142693996429443, "learning_rate": 2.6302801714990883e-05, "loss": 0.3041, "num_tokens": 3173639587.0, "step": 5011 }, { "epoch": 0.5926451460328722, "grad_norm": 0.1328512579202652, "learning_rate": 2.629278487492453e-05, "loss": 0.3592, "num_tokens": 3174272603.0, "step": 5012 }, { "epoch": 0.5927633912735012, "grad_norm": 0.12398266047239304, "learning_rate": 2.6282769018634922e-05, "loss": 0.3522, "num_tokens": 3174911647.0, "step": 5013 }, { "epoch": 0.5928816365141303, "grad_norm": 0.13155953586101532, "learning_rate": 2.6272754147591128e-05, "loss": 0.3118, "num_tokens": 3175544149.0, "step": 5014 }, { "epoch": 0.5929998817547594, "grad_norm": 0.13229453563690186, "learning_rate": 2.6262740263262076e-05, "loss": 0.3272, "num_tokens": 3176183466.0, "step": 5015 }, { "epoch": 0.5931181269953885, "grad_norm": 0.12863990664482117, "learning_rate": 2.6252727367116552e-05, "loss": 0.3034, "num_tokens": 3176817016.0, "step": 5016 }, { "epoch": 0.5932363722360176, "grad_norm": 0.13365541398525238, "learning_rate": 2.624271546062318e-05, "loss": 0.3544, "num_tokens": 3177448381.0, "step": 5017 }, { "epoch": 0.5933546174766465, "grad_norm": 0.15565578639507294, "learning_rate": 2.6232704545250465e-05, "loss": 0.3124, "num_tokens": 3178084171.0, "step": 5018 }, { "epoch": 0.5934728627172756, "grad_norm": 0.14917327463626862, "learning_rate": 2.622269462246673e-05, "loss": 0.346, "num_tokens": 3178721223.0, "step": 5019 }, { "epoch": 0.5935911079579047, "grad_norm": 0.13597726821899414, "learning_rate": 2.6212685693740194e-05, "loss": 0.3273, "num_tokens": 3179360429.0, "step": 5020 }, { "epoch": 0.5937093531985338, "grad_norm": 0.1398583948612213, "learning_rate": 2.62026777605389e-05, "loss": 0.356, "num_tokens": 3179992524.0, "step": 5021 }, { "epoch": 0.5938275984391628, "grad_norm": 0.12264390289783478, "learning_rate": 2.619267082433076e-05, "loss": 0.2759, "num_tokens": 3180617297.0, "step": 5022 }, { "epoch": 0.5939458436797919, "grad_norm": 0.13109777867794037, "learning_rate": 2.618266488658354e-05, "loss": 0.3392, "num_tokens": 3181255637.0, "step": 5023 }, { "epoch": 0.594064088920421, "grad_norm": 0.1526833325624466, "learning_rate": 2.617265994876484e-05, "loss": 0.3514, "num_tokens": 3181890713.0, "step": 5024 }, { "epoch": 0.59418233416105, "grad_norm": 0.13810202479362488, "learning_rate": 2.6162656012342147e-05, "loss": 0.3429, "num_tokens": 3182529429.0, "step": 5025 }, { "epoch": 0.5943005794016791, "grad_norm": 0.12475105375051498, "learning_rate": 2.6152653078782764e-05, "loss": 0.3405, "num_tokens": 3183162091.0, "step": 5026 }, { "epoch": 0.5944188246423081, "grad_norm": 0.1291893571615219, "learning_rate": 2.6142651149553888e-05, "loss": 0.3205, "num_tokens": 3183800650.0, "step": 5027 }, { "epoch": 0.5945370698829372, "grad_norm": 0.12858067452907562, "learning_rate": 2.6132650226122524e-05, "loss": 0.3172, "num_tokens": 3184431159.0, "step": 5028 }, { "epoch": 0.5946553151235663, "grad_norm": 0.14694294333457947, "learning_rate": 2.612265030995556e-05, "loss": 0.3681, "num_tokens": 3185068282.0, "step": 5029 }, { "epoch": 0.5947735603641954, "grad_norm": 0.13046540319919586, "learning_rate": 2.6112651402519727e-05, "loss": 0.3361, "num_tokens": 3185706120.0, "step": 5030 }, { "epoch": 0.5948918056048244, "grad_norm": 0.1415421962738037, "learning_rate": 2.6102653505281612e-05, "loss": 0.3175, "num_tokens": 3186321425.0, "step": 5031 }, { "epoch": 0.5950100508454534, "grad_norm": 0.1348298341035843, "learning_rate": 2.6092656619707652e-05, "loss": 0.3372, "num_tokens": 3186957213.0, "step": 5032 }, { "epoch": 0.5951282960860825, "grad_norm": 0.13365337252616882, "learning_rate": 2.6082660747264115e-05, "loss": 0.3395, "num_tokens": 3187595847.0, "step": 5033 }, { "epoch": 0.5952465413267116, "grad_norm": 0.1391095519065857, "learning_rate": 2.6072665889417174e-05, "loss": 0.348, "num_tokens": 3188229115.0, "step": 5034 }, { "epoch": 0.5953647865673407, "grad_norm": 0.12245971709489822, "learning_rate": 2.6062672047632786e-05, "loss": 0.3344, "num_tokens": 3188868604.0, "step": 5035 }, { "epoch": 0.5954830318079697, "grad_norm": 0.14646904170513153, "learning_rate": 2.6052679223376806e-05, "loss": 0.3497, "num_tokens": 3189504938.0, "step": 5036 }, { "epoch": 0.5956012770485988, "grad_norm": 0.117867112159729, "learning_rate": 2.6042687418114936e-05, "loss": 0.3211, "num_tokens": 3190142634.0, "step": 5037 }, { "epoch": 0.5957195222892279, "grad_norm": 0.12404090911149979, "learning_rate": 2.6032696633312692e-05, "loss": 0.3299, "num_tokens": 3190776591.0, "step": 5038 }, { "epoch": 0.595837767529857, "grad_norm": 0.1422877311706543, "learning_rate": 2.6022706870435492e-05, "loss": 0.3536, "num_tokens": 3191413601.0, "step": 5039 }, { "epoch": 0.595956012770486, "grad_norm": 0.13432134687900543, "learning_rate": 2.601271813094856e-05, "loss": 0.3478, "num_tokens": 3192052031.0, "step": 5040 }, { "epoch": 0.596074258011115, "grad_norm": 0.13519638776779175, "learning_rate": 2.6002730416317005e-05, "loss": 0.357, "num_tokens": 3192691046.0, "step": 5041 }, { "epoch": 0.5961925032517441, "grad_norm": 0.15101492404937744, "learning_rate": 2.599274372800575e-05, "loss": 0.3566, "num_tokens": 3193310256.0, "step": 5042 }, { "epoch": 0.5963107484923732, "grad_norm": 0.1180817186832428, "learning_rate": 2.5982758067479594e-05, "loss": 0.3143, "num_tokens": 3193945442.0, "step": 5043 }, { "epoch": 0.5964289937330023, "grad_norm": 0.12348415702581406, "learning_rate": 2.5972773436203187e-05, "loss": 0.2946, "num_tokens": 3194581789.0, "step": 5044 }, { "epoch": 0.5965472389736313, "grad_norm": 0.13846060633659363, "learning_rate": 2.5962789835641016e-05, "loss": 0.3478, "num_tokens": 3195214259.0, "step": 5045 }, { "epoch": 0.5966654842142604, "grad_norm": 0.13220107555389404, "learning_rate": 2.5952807267257413e-05, "loss": 0.3475, "num_tokens": 3195847103.0, "step": 5046 }, { "epoch": 0.5967837294548894, "grad_norm": 0.145167276263237, "learning_rate": 2.5942825732516563e-05, "loss": 0.3628, "num_tokens": 3196484739.0, "step": 5047 }, { "epoch": 0.5969019746955185, "grad_norm": 0.12188220769166946, "learning_rate": 2.5932845232882513e-05, "loss": 0.3246, "num_tokens": 3197119935.0, "step": 5048 }, { "epoch": 0.5970202199361476, "grad_norm": 0.13713207840919495, "learning_rate": 2.5922865769819134e-05, "loss": 0.3335, "num_tokens": 3197757529.0, "step": 5049 }, { "epoch": 0.5971384651767766, "grad_norm": 0.12698593735694885, "learning_rate": 2.5912887344790164e-05, "loss": 0.3298, "num_tokens": 3198392942.0, "step": 5050 }, { "epoch": 0.5972567104174057, "grad_norm": 0.14276793599128723, "learning_rate": 2.590290995925919e-05, "loss": 0.3473, "num_tokens": 3199031995.0, "step": 5051 }, { "epoch": 0.5973749556580348, "grad_norm": 0.13087496161460876, "learning_rate": 2.589293361468962e-05, "loss": 0.3301, "num_tokens": 3199666259.0, "step": 5052 }, { "epoch": 0.5974932008986639, "grad_norm": 0.12877961993217468, "learning_rate": 2.5882958312544754e-05, "loss": 0.3588, "num_tokens": 3200305853.0, "step": 5053 }, { "epoch": 0.5976114461392928, "grad_norm": 0.13817144930362701, "learning_rate": 2.5872984054287677e-05, "loss": 0.3191, "num_tokens": 3200944416.0, "step": 5054 }, { "epoch": 0.5977296913799219, "grad_norm": 0.14216451346874237, "learning_rate": 2.58630108413814e-05, "loss": 0.3457, "num_tokens": 3201571105.0, "step": 5055 }, { "epoch": 0.597847936620551, "grad_norm": 0.15668219327926636, "learning_rate": 2.5853038675288708e-05, "loss": 0.3674, "num_tokens": 3202208006.0, "step": 5056 }, { "epoch": 0.5979661818611801, "grad_norm": 0.12923480570316315, "learning_rate": 2.5843067557472274e-05, "loss": 0.3147, "num_tokens": 3202843334.0, "step": 5057 }, { "epoch": 0.5980844271018092, "grad_norm": 0.12925608456134796, "learning_rate": 2.583309748939461e-05, "loss": 0.317, "num_tokens": 3203474033.0, "step": 5058 }, { "epoch": 0.5982026723424382, "grad_norm": 0.14814504981040955, "learning_rate": 2.5823128472518065e-05, "loss": 0.3518, "num_tokens": 3204108443.0, "step": 5059 }, { "epoch": 0.5983209175830673, "grad_norm": 0.12932492792606354, "learning_rate": 2.581316050830483e-05, "loss": 0.3351, "num_tokens": 3204743633.0, "step": 5060 }, { "epoch": 0.5984391628236964, "grad_norm": 0.13720907270908356, "learning_rate": 2.5803193598216953e-05, "loss": 0.3531, "num_tokens": 3205381629.0, "step": 5061 }, { "epoch": 0.5985574080643254, "grad_norm": 0.12950506806373596, "learning_rate": 2.5793227743716337e-05, "loss": 0.3125, "num_tokens": 3206014283.0, "step": 5062 }, { "epoch": 0.5986756533049544, "grad_norm": 0.12923873960971832, "learning_rate": 2.57832629462647e-05, "loss": 0.3295, "num_tokens": 3206651796.0, "step": 5063 }, { "epoch": 0.5987938985455835, "grad_norm": 0.12632738053798676, "learning_rate": 2.5773299207323636e-05, "loss": 0.3459, "num_tokens": 3207290575.0, "step": 5064 }, { "epoch": 0.5989121437862126, "grad_norm": 0.13572031259536743, "learning_rate": 2.5763336528354578e-05, "loss": 0.3348, "num_tokens": 3207926714.0, "step": 5065 }, { "epoch": 0.5990303890268417, "grad_norm": 0.14190003275871277, "learning_rate": 2.5753374910818772e-05, "loss": 0.3333, "num_tokens": 3208562045.0, "step": 5066 }, { "epoch": 0.5991486342674708, "grad_norm": 0.12799905240535736, "learning_rate": 2.5743414356177347e-05, "loss": 0.3066, "num_tokens": 3209191509.0, "step": 5067 }, { "epoch": 0.5992668795080998, "grad_norm": 0.12612783908843994, "learning_rate": 2.5733454865891263e-05, "loss": 0.3563, "num_tokens": 3209829703.0, "step": 5068 }, { "epoch": 0.5993851247487288, "grad_norm": 0.14581556618213654, "learning_rate": 2.5723496441421318e-05, "loss": 0.3387, "num_tokens": 3210461477.0, "step": 5069 }, { "epoch": 0.5995033699893579, "grad_norm": 0.1313004046678543, "learning_rate": 2.5713539084228157e-05, "loss": 0.3347, "num_tokens": 3211097006.0, "step": 5070 }, { "epoch": 0.599621615229987, "grad_norm": 0.1415020227432251, "learning_rate": 2.5703582795772255e-05, "loss": 0.3393, "num_tokens": 3211733462.0, "step": 5071 }, { "epoch": 0.599739860470616, "grad_norm": 0.11660237610340118, "learning_rate": 2.569362757751399e-05, "loss": 0.3221, "num_tokens": 3212367807.0, "step": 5072 }, { "epoch": 0.5998581057112451, "grad_norm": 0.1410379558801651, "learning_rate": 2.5683673430913482e-05, "loss": 0.3358, "num_tokens": 3213000763.0, "step": 5073 }, { "epoch": 0.5999763509518742, "grad_norm": 0.13556289672851562, "learning_rate": 2.5673720357430796e-05, "loss": 0.3693, "num_tokens": 3213640138.0, "step": 5074 }, { "epoch": 0.6000945961925033, "grad_norm": 0.13811537623405457, "learning_rate": 2.5663768358525755e-05, "loss": 0.3232, "num_tokens": 3214274191.0, "step": 5075 }, { "epoch": 0.6002128414331324, "grad_norm": 0.1367601603269577, "learning_rate": 2.5653817435658096e-05, "loss": 0.3732, "num_tokens": 3214909600.0, "step": 5076 }, { "epoch": 0.6003310866737613, "grad_norm": 0.14274181425571442, "learning_rate": 2.5643867590287334e-05, "loss": 0.3699, "num_tokens": 3215548633.0, "step": 5077 }, { "epoch": 0.6004493319143904, "grad_norm": 0.15550951659679413, "learning_rate": 2.5633918823872882e-05, "loss": 0.3776, "num_tokens": 3216182604.0, "step": 5078 }, { "epoch": 0.6005675771550195, "grad_norm": 0.13933899998664856, "learning_rate": 2.5623971137873953e-05, "loss": 0.3133, "num_tokens": 3216819647.0, "step": 5079 }, { "epoch": 0.6006858223956486, "grad_norm": 0.13121749460697174, "learning_rate": 2.561402453374963e-05, "loss": 0.3586, "num_tokens": 3217457005.0, "step": 5080 }, { "epoch": 0.6008040676362777, "grad_norm": 0.14595471322536469, "learning_rate": 2.5604079012958815e-05, "loss": 0.3329, "num_tokens": 3218092782.0, "step": 5081 }, { "epoch": 0.6009223128769067, "grad_norm": 0.12408778816461563, "learning_rate": 2.5594134576960255e-05, "loss": 0.3022, "num_tokens": 3218730096.0, "step": 5082 }, { "epoch": 0.6010405581175358, "grad_norm": 0.12999886274337769, "learning_rate": 2.5584191227212573e-05, "loss": 0.3082, "num_tokens": 3219359034.0, "step": 5083 }, { "epoch": 0.6011588033581649, "grad_norm": 0.1289994865655899, "learning_rate": 2.5574248965174166e-05, "loss": 0.3505, "num_tokens": 3219985950.0, "step": 5084 }, { "epoch": 0.6012770485987939, "grad_norm": 0.1390872597694397, "learning_rate": 2.5564307792303327e-05, "loss": 0.3263, "num_tokens": 3220618951.0, "step": 5085 }, { "epoch": 0.6013952938394229, "grad_norm": 0.1447249948978424, "learning_rate": 2.5554367710058192e-05, "loss": 0.3205, "num_tokens": 3221251648.0, "step": 5086 }, { "epoch": 0.601513539080052, "grad_norm": 0.13694505393505096, "learning_rate": 2.5544428719896682e-05, "loss": 0.3247, "num_tokens": 3221884608.0, "step": 5087 }, { "epoch": 0.6016317843206811, "grad_norm": 0.12387730181217194, "learning_rate": 2.553449082327662e-05, "loss": 0.3269, "num_tokens": 3222520248.0, "step": 5088 }, { "epoch": 0.6017500295613102, "grad_norm": 0.13502119481563568, "learning_rate": 2.552455402165562e-05, "loss": 0.3177, "num_tokens": 3223147657.0, "step": 5089 }, { "epoch": 0.6018682748019393, "grad_norm": 0.13091272115707397, "learning_rate": 2.551461831649117e-05, "loss": 0.3154, "num_tokens": 3223783679.0, "step": 5090 }, { "epoch": 0.6019865200425683, "grad_norm": 0.12297334522008896, "learning_rate": 2.5504683709240572e-05, "loss": 0.2906, "num_tokens": 3224407972.0, "step": 5091 }, { "epoch": 0.6021047652831973, "grad_norm": 0.14336705207824707, "learning_rate": 2.5494750201360997e-05, "loss": 0.3337, "num_tokens": 3225046240.0, "step": 5092 }, { "epoch": 0.6022230105238264, "grad_norm": 0.14583246409893036, "learning_rate": 2.548481779430941e-05, "loss": 0.3579, "num_tokens": 3225680984.0, "step": 5093 }, { "epoch": 0.6023412557644555, "grad_norm": 0.14469586312770844, "learning_rate": 2.547488648954266e-05, "loss": 0.3317, "num_tokens": 3226317682.0, "step": 5094 }, { "epoch": 0.6024595010050845, "grad_norm": 0.13000698387622833, "learning_rate": 2.5464956288517413e-05, "loss": 0.3432, "num_tokens": 3226952408.0, "step": 5095 }, { "epoch": 0.6025777462457136, "grad_norm": 0.13129043579101562, "learning_rate": 2.5455027192690173e-05, "loss": 0.3431, "num_tokens": 3227589644.0, "step": 5096 }, { "epoch": 0.6026959914863427, "grad_norm": 0.1343054175376892, "learning_rate": 2.5445099203517282e-05, "loss": 0.3381, "num_tokens": 3228223144.0, "step": 5097 }, { "epoch": 0.6028142367269718, "grad_norm": 0.14183586835861206, "learning_rate": 2.543517232245493e-05, "loss": 0.3003, "num_tokens": 3228862387.0, "step": 5098 }, { "epoch": 0.6029324819676009, "grad_norm": 0.1341632902622223, "learning_rate": 2.5425246550959127e-05, "loss": 0.3317, "num_tokens": 3229495676.0, "step": 5099 }, { "epoch": 0.6030507272082298, "grad_norm": 0.13628843426704407, "learning_rate": 2.5415321890485725e-05, "loss": 0.3807, "num_tokens": 3230135266.0, "step": 5100 }, { "epoch": 0.6031689724488589, "grad_norm": 0.13701410591602325, "learning_rate": 2.5405398342490427e-05, "loss": 0.3473, "num_tokens": 3230767233.0, "step": 5101 }, { "epoch": 0.603287217689488, "grad_norm": 0.12303084135055542, "learning_rate": 2.539547590842878e-05, "loss": 0.3148, "num_tokens": 3231399740.0, "step": 5102 }, { "epoch": 0.6034054629301171, "grad_norm": 0.13256220519542694, "learning_rate": 2.538555458975611e-05, "loss": 0.3192, "num_tokens": 3232025444.0, "step": 5103 }, { "epoch": 0.6035237081707461, "grad_norm": 0.13812823593616486, "learning_rate": 2.537563438792766e-05, "loss": 0.3458, "num_tokens": 3232656317.0, "step": 5104 }, { "epoch": 0.6036419534113752, "grad_norm": 0.13567912578582764, "learning_rate": 2.5365715304398438e-05, "loss": 0.3651, "num_tokens": 3233291237.0, "step": 5105 }, { "epoch": 0.6037601986520043, "grad_norm": 0.12668322026729584, "learning_rate": 2.535579734062334e-05, "loss": 0.3173, "num_tokens": 3233928092.0, "step": 5106 }, { "epoch": 0.6038784438926333, "grad_norm": 0.12050856649875641, "learning_rate": 2.5345880498057073e-05, "loss": 0.3135, "num_tokens": 3234562895.0, "step": 5107 }, { "epoch": 0.6039966891332624, "grad_norm": 0.1252853125333786, "learning_rate": 2.533596477815418e-05, "loss": 0.3179, "num_tokens": 3235193833.0, "step": 5108 }, { "epoch": 0.6041149343738914, "grad_norm": 0.14707982540130615, "learning_rate": 2.532605018236906e-05, "loss": 0.3869, "num_tokens": 3235810817.0, "step": 5109 }, { "epoch": 0.6042331796145205, "grad_norm": 0.11910106986761093, "learning_rate": 2.5316136712155898e-05, "loss": 0.3127, "num_tokens": 3236447993.0, "step": 5110 }, { "epoch": 0.6043514248551496, "grad_norm": 0.14109908044338226, "learning_rate": 2.5306224368968776e-05, "loss": 0.3604, "num_tokens": 3237085950.0, "step": 5111 }, { "epoch": 0.6044696700957787, "grad_norm": 0.13098609447479248, "learning_rate": 2.529631315426156e-05, "loss": 0.3459, "num_tokens": 3237723422.0, "step": 5112 }, { "epoch": 0.6045879153364077, "grad_norm": 0.1327727884054184, "learning_rate": 2.5286403069488e-05, "loss": 0.3323, "num_tokens": 3238359829.0, "step": 5113 }, { "epoch": 0.6047061605770367, "grad_norm": 0.1275065541267395, "learning_rate": 2.5276494116101615e-05, "loss": 0.3265, "num_tokens": 3238987843.0, "step": 5114 }, { "epoch": 0.6048244058176658, "grad_norm": 0.13437727093696594, "learning_rate": 2.5266586295555814e-05, "loss": 0.3434, "num_tokens": 3239619795.0, "step": 5115 }, { "epoch": 0.6049426510582949, "grad_norm": 0.14004576206207275, "learning_rate": 2.525667960930383e-05, "loss": 0.362, "num_tokens": 3240224084.0, "step": 5116 }, { "epoch": 0.605060896298924, "grad_norm": 0.1499059498310089, "learning_rate": 2.5246774058798695e-05, "loss": 0.3846, "num_tokens": 3240856284.0, "step": 5117 }, { "epoch": 0.605179141539553, "grad_norm": 0.13300783932209015, "learning_rate": 2.523686964549333e-05, "loss": 0.3623, "num_tokens": 3241488999.0, "step": 5118 }, { "epoch": 0.6052973867801821, "grad_norm": 0.12946875393390656, "learning_rate": 2.5226966370840438e-05, "loss": 0.329, "num_tokens": 3242120537.0, "step": 5119 }, { "epoch": 0.6054156320208112, "grad_norm": 0.1326124370098114, "learning_rate": 2.521706423629258e-05, "loss": 0.3496, "num_tokens": 3242759217.0, "step": 5120 }, { "epoch": 0.6055338772614403, "grad_norm": 0.13469643890857697, "learning_rate": 2.5207163243302137e-05, "loss": 0.3246, "num_tokens": 3243393109.0, "step": 5121 }, { "epoch": 0.6056521225020693, "grad_norm": 0.15184877812862396, "learning_rate": 2.5197263393321344e-05, "loss": 0.3558, "num_tokens": 3244027817.0, "step": 5122 }, { "epoch": 0.6057703677426983, "grad_norm": 0.13290151953697205, "learning_rate": 2.518736468780226e-05, "loss": 0.3264, "num_tokens": 3244660688.0, "step": 5123 }, { "epoch": 0.6058886129833274, "grad_norm": 0.1412486433982849, "learning_rate": 2.5177467128196763e-05, "loss": 0.3618, "num_tokens": 3245298751.0, "step": 5124 }, { "epoch": 0.6060068582239565, "grad_norm": 0.14672155678272247, "learning_rate": 2.5167570715956575e-05, "loss": 0.3629, "num_tokens": 3245934481.0, "step": 5125 }, { "epoch": 0.6061251034645856, "grad_norm": 0.14305898547172546, "learning_rate": 2.5157675452533234e-05, "loss": 0.3232, "num_tokens": 3246566978.0, "step": 5126 }, { "epoch": 0.6062433487052146, "grad_norm": 0.13121609389781952, "learning_rate": 2.5147781339378148e-05, "loss": 0.3441, "num_tokens": 3247206256.0, "step": 5127 }, { "epoch": 0.6063615939458437, "grad_norm": 0.130837544798851, "learning_rate": 2.5137888377942492e-05, "loss": 0.3405, "num_tokens": 3247840224.0, "step": 5128 }, { "epoch": 0.6064798391864727, "grad_norm": 0.12484666705131531, "learning_rate": 2.5127996569677345e-05, "loss": 0.3397, "num_tokens": 3248476461.0, "step": 5129 }, { "epoch": 0.6065980844271018, "grad_norm": 0.1224384680390358, "learning_rate": 2.511810591603357e-05, "loss": 0.3003, "num_tokens": 3249111354.0, "step": 5130 }, { "epoch": 0.6067163296677309, "grad_norm": 0.14634080231189728, "learning_rate": 2.5108216418461857e-05, "loss": 0.3262, "num_tokens": 3249751039.0, "step": 5131 }, { "epoch": 0.6068345749083599, "grad_norm": 0.12907524406909943, "learning_rate": 2.5098328078412784e-05, "loss": 0.3291, "num_tokens": 3250381871.0, "step": 5132 }, { "epoch": 0.606952820148989, "grad_norm": 0.12572365999221802, "learning_rate": 2.508844089733667e-05, "loss": 0.314, "num_tokens": 3251019214.0, "step": 5133 }, { "epoch": 0.6070710653896181, "grad_norm": 0.15132801234722137, "learning_rate": 2.5078554876683744e-05, "loss": 0.3643, "num_tokens": 3251654911.0, "step": 5134 }, { "epoch": 0.6071893106302472, "grad_norm": 0.12888365983963013, "learning_rate": 2.5068670017904004e-05, "loss": 0.3052, "num_tokens": 3252281530.0, "step": 5135 }, { "epoch": 0.6073075558708761, "grad_norm": 0.1316634565591812, "learning_rate": 2.5058786322447326e-05, "loss": 0.3355, "num_tokens": 3252915864.0, "step": 5136 }, { "epoch": 0.6074258011115052, "grad_norm": 0.14191608130931854, "learning_rate": 2.5048903791763395e-05, "loss": 0.3528, "num_tokens": 3253555003.0, "step": 5137 }, { "epoch": 0.6075440463521343, "grad_norm": 0.12262517958879471, "learning_rate": 2.5039022427301717e-05, "loss": 0.3297, "num_tokens": 3254190422.0, "step": 5138 }, { "epoch": 0.6076622915927634, "grad_norm": 0.13619793951511383, "learning_rate": 2.502914223051165e-05, "loss": 0.3641, "num_tokens": 3254823917.0, "step": 5139 }, { "epoch": 0.6077805368333925, "grad_norm": 0.5961082577705383, "learning_rate": 2.5019263202842343e-05, "loss": 0.319, "num_tokens": 3255433503.0, "step": 5140 }, { "epoch": 0.6078987820740215, "grad_norm": 0.14761942625045776, "learning_rate": 2.5009385345742813e-05, "loss": 0.3388, "num_tokens": 3256067974.0, "step": 5141 }, { "epoch": 0.6080170273146506, "grad_norm": 0.14235791563987732, "learning_rate": 2.4999508660661876e-05, "loss": 0.3432, "num_tokens": 3256704598.0, "step": 5142 }, { "epoch": 0.6081352725552797, "grad_norm": 0.12544065713882446, "learning_rate": 2.4989633149048202e-05, "loss": 0.3361, "num_tokens": 3257340765.0, "step": 5143 }, { "epoch": 0.6082535177959087, "grad_norm": 0.1340220719575882, "learning_rate": 2.4979758812350275e-05, "loss": 0.3111, "num_tokens": 3257972749.0, "step": 5144 }, { "epoch": 0.6083717630365377, "grad_norm": 0.12618756294250488, "learning_rate": 2.49698856520164e-05, "loss": 0.3182, "num_tokens": 3258603870.0, "step": 5145 }, { "epoch": 0.6084900082771668, "grad_norm": 0.1414497047662735, "learning_rate": 2.496001366949472e-05, "loss": 0.3272, "num_tokens": 3259229257.0, "step": 5146 }, { "epoch": 0.6086082535177959, "grad_norm": 0.1342356950044632, "learning_rate": 2.49501428662332e-05, "loss": 0.3523, "num_tokens": 3259865431.0, "step": 5147 }, { "epoch": 0.608726498758425, "grad_norm": 0.12855695188045502, "learning_rate": 2.494027324367964e-05, "loss": 0.3423, "num_tokens": 3260495509.0, "step": 5148 }, { "epoch": 0.6088447439990541, "grad_norm": 0.14478057622909546, "learning_rate": 2.4930404803281654e-05, "loss": 0.3563, "num_tokens": 3261134256.0, "step": 5149 }, { "epoch": 0.6089629892396831, "grad_norm": 0.1359102725982666, "learning_rate": 2.4920537546486692e-05, "loss": 0.321, "num_tokens": 3261772690.0, "step": 5150 }, { "epoch": 0.6090812344803122, "grad_norm": 0.1275576502084732, "learning_rate": 2.491067147474203e-05, "loss": 0.3298, "num_tokens": 3262408174.0, "step": 5151 }, { "epoch": 0.6091994797209412, "grad_norm": 0.1204083114862442, "learning_rate": 2.4900806589494765e-05, "loss": 0.3362, "num_tokens": 3263042888.0, "step": 5152 }, { "epoch": 0.6093177249615703, "grad_norm": 0.1340213567018509, "learning_rate": 2.4890942892191835e-05, "loss": 0.3292, "num_tokens": 3263678215.0, "step": 5153 }, { "epoch": 0.6094359702021993, "grad_norm": 0.1404142528772354, "learning_rate": 2.4881080384279963e-05, "loss": 0.3541, "num_tokens": 3264305382.0, "step": 5154 }, { "epoch": 0.6095542154428284, "grad_norm": 0.13139916956424713, "learning_rate": 2.487121906720576e-05, "loss": 0.32, "num_tokens": 3264938408.0, "step": 5155 }, { "epoch": 0.6096724606834575, "grad_norm": 0.12975004315376282, "learning_rate": 2.4861358942415604e-05, "loss": 0.3391, "num_tokens": 3265576395.0, "step": 5156 }, { "epoch": 0.6097907059240866, "grad_norm": 0.13399994373321533, "learning_rate": 2.485150001135574e-05, "loss": 0.312, "num_tokens": 3266212614.0, "step": 5157 }, { "epoch": 0.6099089511647157, "grad_norm": 0.131593257188797, "learning_rate": 2.4841642275472215e-05, "loss": 0.3464, "num_tokens": 3266846940.0, "step": 5158 }, { "epoch": 0.6100271964053446, "grad_norm": 0.13931193947792053, "learning_rate": 2.4831785736210898e-05, "loss": 0.3639, "num_tokens": 3267480220.0, "step": 5159 }, { "epoch": 0.6101454416459737, "grad_norm": 0.12017636001110077, "learning_rate": 2.4821930395017507e-05, "loss": 0.317, "num_tokens": 3268117337.0, "step": 5160 }, { "epoch": 0.6102636868866028, "grad_norm": 0.12749464809894562, "learning_rate": 2.4812076253337543e-05, "loss": 0.3052, "num_tokens": 3268755632.0, "step": 5161 }, { "epoch": 0.6103819321272319, "grad_norm": 0.13347060978412628, "learning_rate": 2.48022233126164e-05, "loss": 0.3521, "num_tokens": 3269390978.0, "step": 5162 }, { "epoch": 0.610500177367861, "grad_norm": 0.13399963080883026, "learning_rate": 2.4792371574299198e-05, "loss": 0.3107, "num_tokens": 3270018235.0, "step": 5163 }, { "epoch": 0.61061842260849, "grad_norm": 0.12949804961681366, "learning_rate": 2.4782521039830965e-05, "loss": 0.3287, "num_tokens": 3270647221.0, "step": 5164 }, { "epoch": 0.6107366678491191, "grad_norm": 0.11592622101306915, "learning_rate": 2.4772671710656534e-05, "loss": 0.3014, "num_tokens": 3271280885.0, "step": 5165 }, { "epoch": 0.6108549130897482, "grad_norm": 0.12676465511322021, "learning_rate": 2.476282358822052e-05, "loss": 0.3302, "num_tokens": 3271919929.0, "step": 5166 }, { "epoch": 0.6109731583303772, "grad_norm": 0.1426728516817093, "learning_rate": 2.4752976673967412e-05, "loss": 0.3195, "num_tokens": 3272554874.0, "step": 5167 }, { "epoch": 0.6110914035710062, "grad_norm": 0.13912402093410492, "learning_rate": 2.4743130969341485e-05, "loss": 0.3443, "num_tokens": 3273191231.0, "step": 5168 }, { "epoch": 0.6112096488116353, "grad_norm": 0.13477365672588348, "learning_rate": 2.4733286475786874e-05, "loss": 0.3699, "num_tokens": 3273829929.0, "step": 5169 }, { "epoch": 0.6113278940522644, "grad_norm": 0.15268374979496002, "learning_rate": 2.472344319474749e-05, "loss": 0.3653, "num_tokens": 3274466279.0, "step": 5170 }, { "epoch": 0.6114461392928935, "grad_norm": 0.12330774962902069, "learning_rate": 2.4713601127667106e-05, "loss": 0.28, "num_tokens": 3275104071.0, "step": 5171 }, { "epoch": 0.6115643845335226, "grad_norm": 0.13140927255153656, "learning_rate": 2.4703760275989283e-05, "loss": 0.3223, "num_tokens": 3275743373.0, "step": 5172 }, { "epoch": 0.6116826297741516, "grad_norm": 0.1271623969078064, "learning_rate": 2.4693920641157434e-05, "loss": 0.3336, "num_tokens": 3276373078.0, "step": 5173 }, { "epoch": 0.6118008750147806, "grad_norm": 0.14208166301250458, "learning_rate": 2.4684082224614798e-05, "loss": 0.3676, "num_tokens": 3277007087.0, "step": 5174 }, { "epoch": 0.6119191202554097, "grad_norm": 0.14011356234550476, "learning_rate": 2.4674245027804387e-05, "loss": 0.3324, "num_tokens": 3277638607.0, "step": 5175 }, { "epoch": 0.6120373654960388, "grad_norm": 0.13721559941768646, "learning_rate": 2.46644090521691e-05, "loss": 0.356, "num_tokens": 3278273724.0, "step": 5176 }, { "epoch": 0.6121556107366678, "grad_norm": 0.13587194681167603, "learning_rate": 2.4654574299151584e-05, "loss": 0.3297, "num_tokens": 3278912493.0, "step": 5177 }, { "epoch": 0.6122738559772969, "grad_norm": 0.1359243541955948, "learning_rate": 2.464474077019438e-05, "loss": 0.3312, "num_tokens": 3279548751.0, "step": 5178 }, { "epoch": 0.612392101217926, "grad_norm": 0.12857699394226074, "learning_rate": 2.46349084667398e-05, "loss": 0.3715, "num_tokens": 3280184086.0, "step": 5179 }, { "epoch": 0.6125103464585551, "grad_norm": 0.13381968438625336, "learning_rate": 2.462507739022998e-05, "loss": 0.3394, "num_tokens": 3280822013.0, "step": 5180 }, { "epoch": 0.6126285916991842, "grad_norm": 0.12580780684947968, "learning_rate": 2.4615247542106913e-05, "loss": 0.3112, "num_tokens": 3281458909.0, "step": 5181 }, { "epoch": 0.6127468369398131, "grad_norm": 0.140984445810318, "learning_rate": 2.4605418923812363e-05, "loss": 0.361, "num_tokens": 3282091074.0, "step": 5182 }, { "epoch": 0.6128650821804422, "grad_norm": 0.1282375603914261, "learning_rate": 2.459559153678796e-05, "loss": 0.3196, "num_tokens": 3282729976.0, "step": 5183 }, { "epoch": 0.6129833274210713, "grad_norm": 0.12457488477230072, "learning_rate": 2.4585765382475097e-05, "loss": 0.3026, "num_tokens": 3283363232.0, "step": 5184 }, { "epoch": 0.6131015726617004, "grad_norm": 0.13191772997379303, "learning_rate": 2.4575940462315057e-05, "loss": 0.3222, "num_tokens": 3283989904.0, "step": 5185 }, { "epoch": 0.6132198179023294, "grad_norm": 0.15047669410705566, "learning_rate": 2.456611677774887e-05, "loss": 0.3616, "num_tokens": 3284617449.0, "step": 5186 }, { "epoch": 0.6133380631429585, "grad_norm": 0.14949476718902588, "learning_rate": 2.455629433021744e-05, "loss": 0.3271, "num_tokens": 3285253717.0, "step": 5187 }, { "epoch": 0.6134563083835876, "grad_norm": 0.1268942952156067, "learning_rate": 2.454647312116147e-05, "loss": 0.327, "num_tokens": 3285890624.0, "step": 5188 }, { "epoch": 0.6135745536242166, "grad_norm": 0.13196642696857452, "learning_rate": 2.4536653152021465e-05, "loss": 0.3356, "num_tokens": 3286522158.0, "step": 5189 }, { "epoch": 0.6136927988648457, "grad_norm": 0.13595178723335266, "learning_rate": 2.4526834424237776e-05, "loss": 0.3338, "num_tokens": 3287150240.0, "step": 5190 }, { "epoch": 0.6138110441054747, "grad_norm": 0.13317511975765228, "learning_rate": 2.4517016939250556e-05, "loss": 0.3518, "num_tokens": 3287783417.0, "step": 5191 }, { "epoch": 0.6139292893461038, "grad_norm": 0.1370093673467636, "learning_rate": 2.4507200698499772e-05, "loss": 0.3256, "num_tokens": 3288421362.0, "step": 5192 }, { "epoch": 0.6140475345867329, "grad_norm": 0.1695561558008194, "learning_rate": 2.449738570342522e-05, "loss": 0.352, "num_tokens": 3289023508.0, "step": 5193 }, { "epoch": 0.614165779827362, "grad_norm": 0.15656988322734833, "learning_rate": 2.448757195546651e-05, "loss": 0.3141, "num_tokens": 3289658976.0, "step": 5194 }, { "epoch": 0.6142840250679911, "grad_norm": 0.1424240916967392, "learning_rate": 2.4477759456063073e-05, "loss": 0.3413, "num_tokens": 3290293697.0, "step": 5195 }, { "epoch": 0.61440227030862, "grad_norm": 0.13750407099723816, "learning_rate": 2.4467948206654133e-05, "loss": 0.3398, "num_tokens": 3290932719.0, "step": 5196 }, { "epoch": 0.6145205155492491, "grad_norm": 0.13948053121566772, "learning_rate": 2.4458138208678773e-05, "loss": 0.3514, "num_tokens": 3291570460.0, "step": 5197 }, { "epoch": 0.6146387607898782, "grad_norm": 0.1414433866739273, "learning_rate": 2.4448329463575847e-05, "loss": 0.3273, "num_tokens": 3292203314.0, "step": 5198 }, { "epoch": 0.6147570060305073, "grad_norm": 0.14113140106201172, "learning_rate": 2.4438521972784062e-05, "loss": 0.3245, "num_tokens": 3292838484.0, "step": 5199 }, { "epoch": 0.6148752512711363, "grad_norm": 0.1370488852262497, "learning_rate": 2.4428715737741922e-05, "loss": 0.3138, "num_tokens": 3293474815.0, "step": 5200 }, { "epoch": 0.6149934965117654, "grad_norm": 0.12731055915355682, "learning_rate": 2.4418910759887736e-05, "loss": 0.3085, "num_tokens": 3294111304.0, "step": 5201 }, { "epoch": 0.6151117417523945, "grad_norm": 0.12654626369476318, "learning_rate": 2.4409107040659667e-05, "loss": 0.3315, "num_tokens": 3294746280.0, "step": 5202 }, { "epoch": 0.6152299869930236, "grad_norm": 0.12981125712394714, "learning_rate": 2.4399304581495656e-05, "loss": 0.3588, "num_tokens": 3295383844.0, "step": 5203 }, { "epoch": 0.6153482322336526, "grad_norm": 0.1508256494998932, "learning_rate": 2.4389503383833477e-05, "loss": 0.349, "num_tokens": 3296015238.0, "step": 5204 }, { "epoch": 0.6154664774742816, "grad_norm": 0.1364593505859375, "learning_rate": 2.4379703449110707e-05, "loss": 0.3489, "num_tokens": 3296654253.0, "step": 5205 }, { "epoch": 0.6155847227149107, "grad_norm": 0.12629801034927368, "learning_rate": 2.4369904778764764e-05, "loss": 0.3269, "num_tokens": 3297267566.0, "step": 5206 }, { "epoch": 0.6157029679555398, "grad_norm": 0.12861089408397675, "learning_rate": 2.436010737423284e-05, "loss": 0.3246, "num_tokens": 3297905522.0, "step": 5207 }, { "epoch": 0.6158212131961689, "grad_norm": 0.1372966468334198, "learning_rate": 2.4350311236951968e-05, "loss": 0.3447, "num_tokens": 3298538686.0, "step": 5208 }, { "epoch": 0.6159394584367979, "grad_norm": 0.13060255348682404, "learning_rate": 2.434051636835901e-05, "loss": 0.3364, "num_tokens": 3299176214.0, "step": 5209 }, { "epoch": 0.616057703677427, "grad_norm": 0.13676610589027405, "learning_rate": 2.4330722769890598e-05, "loss": 0.3337, "num_tokens": 3299814626.0, "step": 5210 }, { "epoch": 0.616175948918056, "grad_norm": 0.13656705617904663, "learning_rate": 2.432093044298322e-05, "loss": 0.3514, "num_tokens": 3300450059.0, "step": 5211 }, { "epoch": 0.6162941941586851, "grad_norm": 0.14119702577590942, "learning_rate": 2.4311139389073144e-05, "loss": 0.3361, "num_tokens": 3301083438.0, "step": 5212 }, { "epoch": 0.6164124393993142, "grad_norm": 0.12343753129243851, "learning_rate": 2.430134960959648e-05, "loss": 0.3123, "num_tokens": 3301722814.0, "step": 5213 }, { "epoch": 0.6165306846399432, "grad_norm": 0.14450901746749878, "learning_rate": 2.4291561105989127e-05, "loss": 0.3502, "num_tokens": 3302359875.0, "step": 5214 }, { "epoch": 0.6166489298805723, "grad_norm": 0.13393588364124298, "learning_rate": 2.4281773879686817e-05, "loss": 0.3215, "num_tokens": 3302998072.0, "step": 5215 }, { "epoch": 0.6167671751212014, "grad_norm": 0.13127733767032623, "learning_rate": 2.4271987932125086e-05, "loss": 0.3331, "num_tokens": 3303631814.0, "step": 5216 }, { "epoch": 0.6168854203618305, "grad_norm": 0.15220019221305847, "learning_rate": 2.4262203264739275e-05, "loss": 0.3265, "num_tokens": 3304266833.0, "step": 5217 }, { "epoch": 0.6170036656024595, "grad_norm": 0.14639513194561005, "learning_rate": 2.425241987896456e-05, "loss": 0.3444, "num_tokens": 3304899778.0, "step": 5218 }, { "epoch": 0.6171219108430885, "grad_norm": 0.1268526315689087, "learning_rate": 2.424263777623588e-05, "loss": 0.3046, "num_tokens": 3305535078.0, "step": 5219 }, { "epoch": 0.6172401560837176, "grad_norm": 0.1398886889219284, "learning_rate": 2.4232856957988056e-05, "loss": 0.3471, "num_tokens": 3306171185.0, "step": 5220 }, { "epoch": 0.6173584013243467, "grad_norm": 0.15526947379112244, "learning_rate": 2.4223077425655658e-05, "loss": 0.3754, "num_tokens": 3306806482.0, "step": 5221 }, { "epoch": 0.6174766465649758, "grad_norm": 0.1354888677597046, "learning_rate": 2.4213299180673095e-05, "loss": 0.3239, "num_tokens": 3307441024.0, "step": 5222 }, { "epoch": 0.6175948918056048, "grad_norm": 0.1366814374923706, "learning_rate": 2.420352222447461e-05, "loss": 0.3441, "num_tokens": 3308080111.0, "step": 5223 }, { "epoch": 0.6177131370462339, "grad_norm": 0.1258581578731537, "learning_rate": 2.4193746558494204e-05, "loss": 0.3298, "num_tokens": 3308713574.0, "step": 5224 }, { "epoch": 0.617831382286863, "grad_norm": 0.1302805095911026, "learning_rate": 2.418397218416574e-05, "loss": 0.3035, "num_tokens": 3309349595.0, "step": 5225 }, { "epoch": 0.617949627527492, "grad_norm": 0.13716426491737366, "learning_rate": 2.4174199102922845e-05, "loss": 0.3247, "num_tokens": 3309985335.0, "step": 5226 }, { "epoch": 0.618067872768121, "grad_norm": 0.140396848320961, "learning_rate": 2.4164427316198998e-05, "loss": 0.3358, "num_tokens": 3310621902.0, "step": 5227 }, { "epoch": 0.6181861180087501, "grad_norm": 0.14620888233184814, "learning_rate": 2.4154656825427458e-05, "loss": 0.3713, "num_tokens": 3311261288.0, "step": 5228 }, { "epoch": 0.6183043632493792, "grad_norm": 0.12948431074619293, "learning_rate": 2.4144887632041302e-05, "loss": 0.2805, "num_tokens": 3311894385.0, "step": 5229 }, { "epoch": 0.6184226084900083, "grad_norm": 0.1429416835308075, "learning_rate": 2.413511973747345e-05, "loss": 0.3473, "num_tokens": 3312526318.0, "step": 5230 }, { "epoch": 0.6185408537306374, "grad_norm": 0.149710014462471, "learning_rate": 2.4125353143156567e-05, "loss": 0.3674, "num_tokens": 3313161971.0, "step": 5231 }, { "epoch": 0.6186590989712664, "grad_norm": 0.13066859543323517, "learning_rate": 2.411558785052318e-05, "loss": 0.3029, "num_tokens": 3313797806.0, "step": 5232 }, { "epoch": 0.6187773442118955, "grad_norm": 0.1478959619998932, "learning_rate": 2.41058238610056e-05, "loss": 0.3519, "num_tokens": 3314434021.0, "step": 5233 }, { "epoch": 0.6188955894525245, "grad_norm": 0.13506822288036346, "learning_rate": 2.409606117603597e-05, "loss": 0.3804, "num_tokens": 3315069709.0, "step": 5234 }, { "epoch": 0.6190138346931536, "grad_norm": 0.13438940048217773, "learning_rate": 2.4086299797046207e-05, "loss": 0.3593, "num_tokens": 3315709276.0, "step": 5235 }, { "epoch": 0.6191320799337827, "grad_norm": 0.13669604063034058, "learning_rate": 2.4076539725468065e-05, "loss": 0.3392, "num_tokens": 3316338624.0, "step": 5236 }, { "epoch": 0.6192503251744117, "grad_norm": 0.12155944108963013, "learning_rate": 2.4066780962733098e-05, "loss": 0.3364, "num_tokens": 3316968673.0, "step": 5237 }, { "epoch": 0.6193685704150408, "grad_norm": 0.1275085061788559, "learning_rate": 2.4057023510272658e-05, "loss": 0.3436, "num_tokens": 3317605436.0, "step": 5238 }, { "epoch": 0.6194868156556699, "grad_norm": 0.1253606379032135, "learning_rate": 2.4047267369517922e-05, "loss": 0.3397, "num_tokens": 3318240047.0, "step": 5239 }, { "epoch": 0.619605060896299, "grad_norm": 0.11846736073493958, "learning_rate": 2.403751254189986e-05, "loss": 0.3247, "num_tokens": 3318876543.0, "step": 5240 }, { "epoch": 0.6197233061369279, "grad_norm": 0.13023492693901062, "learning_rate": 2.4027759028849266e-05, "loss": 0.3273, "num_tokens": 3319481430.0, "step": 5241 }, { "epoch": 0.619841551377557, "grad_norm": 0.1325293928384781, "learning_rate": 2.4018006831796707e-05, "loss": 0.3298, "num_tokens": 3320120861.0, "step": 5242 }, { "epoch": 0.6199597966181861, "grad_norm": 0.12690429389476776, "learning_rate": 2.4008255952172598e-05, "loss": 0.3196, "num_tokens": 3320753446.0, "step": 5243 }, { "epoch": 0.6200780418588152, "grad_norm": 0.12981106340885162, "learning_rate": 2.3998506391407163e-05, "loss": 0.3079, "num_tokens": 3321389270.0, "step": 5244 }, { "epoch": 0.6201962870994443, "grad_norm": 0.12419381737709045, "learning_rate": 2.3988758150930373e-05, "loss": 0.3177, "num_tokens": 3322022372.0, "step": 5245 }, { "epoch": 0.6203145323400733, "grad_norm": 0.13538970053195953, "learning_rate": 2.397901123217207e-05, "loss": 0.3542, "num_tokens": 3322654907.0, "step": 5246 }, { "epoch": 0.6204327775807024, "grad_norm": 0.12483152002096176, "learning_rate": 2.3969265636561874e-05, "loss": 0.306, "num_tokens": 3323290461.0, "step": 5247 }, { "epoch": 0.6205510228213315, "grad_norm": 0.1378663033246994, "learning_rate": 2.3959521365529215e-05, "loss": 0.3386, "num_tokens": 3323922988.0, "step": 5248 }, { "epoch": 0.6206692680619605, "grad_norm": 0.13100086152553558, "learning_rate": 2.3949778420503315e-05, "loss": 0.34, "num_tokens": 3324560216.0, "step": 5249 }, { "epoch": 0.6207875133025895, "grad_norm": 0.13394512236118317, "learning_rate": 2.3940036802913233e-05, "loss": 0.3395, "num_tokens": 3325194306.0, "step": 5250 }, { "epoch": 0.6209057585432186, "grad_norm": 0.1308339387178421, "learning_rate": 2.39302965141878e-05, "loss": 0.2876, "num_tokens": 3325824342.0, "step": 5251 }, { "epoch": 0.6210240037838477, "grad_norm": 0.1339906007051468, "learning_rate": 2.392055755575567e-05, "loss": 0.3151, "num_tokens": 3326453174.0, "step": 5252 }, { "epoch": 0.6211422490244768, "grad_norm": 0.13102863729000092, "learning_rate": 2.3910819929045317e-05, "loss": 0.331, "num_tokens": 3327089711.0, "step": 5253 }, { "epoch": 0.6212604942651059, "grad_norm": 0.11784249544143677, "learning_rate": 2.390108363548498e-05, "loss": 0.3143, "num_tokens": 3327717716.0, "step": 5254 }, { "epoch": 0.6213787395057349, "grad_norm": 0.14015263319015503, "learning_rate": 2.3891348676502732e-05, "loss": 0.3259, "num_tokens": 3328354391.0, "step": 5255 }, { "epoch": 0.621496984746364, "grad_norm": 0.13898451626300812, "learning_rate": 2.3881615053526437e-05, "loss": 0.3637, "num_tokens": 3328962156.0, "step": 5256 }, { "epoch": 0.621615229986993, "grad_norm": 0.12429610639810562, "learning_rate": 2.3871882767983782e-05, "loss": 0.3226, "num_tokens": 3329600092.0, "step": 5257 }, { "epoch": 0.6217334752276221, "grad_norm": 0.12686018645763397, "learning_rate": 2.3862151821302227e-05, "loss": 0.3115, "num_tokens": 3330232035.0, "step": 5258 }, { "epoch": 0.6218517204682511, "grad_norm": 0.12581875920295715, "learning_rate": 2.385242221490906e-05, "loss": 0.3531, "num_tokens": 3330871564.0, "step": 5259 }, { "epoch": 0.6219699657088802, "grad_norm": 0.12621711194515228, "learning_rate": 2.384269395023137e-05, "loss": 0.3236, "num_tokens": 3331506426.0, "step": 5260 }, { "epoch": 0.6220882109495093, "grad_norm": 0.13780003786087036, "learning_rate": 2.3832967028696033e-05, "loss": 0.3203, "num_tokens": 3332142483.0, "step": 5261 }, { "epoch": 0.6222064561901384, "grad_norm": 0.12952998280525208, "learning_rate": 2.3823241451729764e-05, "loss": 0.3114, "num_tokens": 3332772407.0, "step": 5262 }, { "epoch": 0.6223247014307675, "grad_norm": 0.14396962523460388, "learning_rate": 2.3813517220759017e-05, "loss": 0.3781, "num_tokens": 3333402215.0, "step": 5263 }, { "epoch": 0.6224429466713964, "grad_norm": 0.13947910070419312, "learning_rate": 2.3803794337210133e-05, "loss": 0.3542, "num_tokens": 3334034858.0, "step": 5264 }, { "epoch": 0.6225611919120255, "grad_norm": 0.1329195648431778, "learning_rate": 2.3794072802509158e-05, "loss": 0.3196, "num_tokens": 3334660812.0, "step": 5265 }, { "epoch": 0.6226794371526546, "grad_norm": 0.14390259981155396, "learning_rate": 2.3784352618082033e-05, "loss": 0.3549, "num_tokens": 3335294851.0, "step": 5266 }, { "epoch": 0.6227976823932837, "grad_norm": 0.12948542833328247, "learning_rate": 2.377463378535446e-05, "loss": 0.3577, "num_tokens": 3335927888.0, "step": 5267 }, { "epoch": 0.6229159276339127, "grad_norm": 0.12182746082544327, "learning_rate": 2.3764916305751915e-05, "loss": 0.3009, "num_tokens": 3336562116.0, "step": 5268 }, { "epoch": 0.6230341728745418, "grad_norm": 0.12996986508369446, "learning_rate": 2.3755200180699727e-05, "loss": 0.3505, "num_tokens": 3337201483.0, "step": 5269 }, { "epoch": 0.6231524181151709, "grad_norm": 0.12325923144817352, "learning_rate": 2.3745485411622993e-05, "loss": 0.309, "num_tokens": 3337838669.0, "step": 5270 }, { "epoch": 0.6232706633558, "grad_norm": 0.12466917932033539, "learning_rate": 2.3735771999946627e-05, "loss": 0.3279, "num_tokens": 3338476389.0, "step": 5271 }, { "epoch": 0.623388908596429, "grad_norm": 0.1292700469493866, "learning_rate": 2.3726059947095325e-05, "loss": 0.3314, "num_tokens": 3339113839.0, "step": 5272 }, { "epoch": 0.623507153837058, "grad_norm": 0.12961481511592865, "learning_rate": 2.3716349254493606e-05, "loss": 0.3278, "num_tokens": 3339745416.0, "step": 5273 }, { "epoch": 0.6236253990776871, "grad_norm": 0.13907501101493835, "learning_rate": 2.370663992356579e-05, "loss": 0.36, "num_tokens": 3340383007.0, "step": 5274 }, { "epoch": 0.6237436443183162, "grad_norm": 0.1202303096652031, "learning_rate": 2.3696931955735967e-05, "loss": 0.3007, "num_tokens": 3341016451.0, "step": 5275 }, { "epoch": 0.6238618895589453, "grad_norm": 0.12775550782680511, "learning_rate": 2.368722535242807e-05, "loss": 0.3291, "num_tokens": 3341648207.0, "step": 5276 }, { "epoch": 0.6239801347995744, "grad_norm": 0.13186615705490112, "learning_rate": 2.3677520115065787e-05, "loss": 0.3251, "num_tokens": 3342282119.0, "step": 5277 }, { "epoch": 0.6240983800402033, "grad_norm": 0.1257687658071518, "learning_rate": 2.3667816245072637e-05, "loss": 0.3346, "num_tokens": 3342917373.0, "step": 5278 }, { "epoch": 0.6242166252808324, "grad_norm": 0.12833859026432037, "learning_rate": 2.365811374387193e-05, "loss": 0.3171, "num_tokens": 3343551809.0, "step": 5279 }, { "epoch": 0.6243348705214615, "grad_norm": 0.1468733847141266, "learning_rate": 2.3648412612886764e-05, "loss": 0.3477, "num_tokens": 3344182449.0, "step": 5280 }, { "epoch": 0.6244531157620906, "grad_norm": 0.13075831532478333, "learning_rate": 2.3638712853540072e-05, "loss": 0.3223, "num_tokens": 3344813994.0, "step": 5281 }, { "epoch": 0.6245713610027196, "grad_norm": 0.15450094640254974, "learning_rate": 2.3629014467254536e-05, "loss": 0.4078, "num_tokens": 3345449207.0, "step": 5282 }, { "epoch": 0.6246896062433487, "grad_norm": 0.13192905485630035, "learning_rate": 2.3619317455452674e-05, "loss": 0.3466, "num_tokens": 3346086378.0, "step": 5283 }, { "epoch": 0.6248078514839778, "grad_norm": 0.1379285454750061, "learning_rate": 2.3609621819556783e-05, "loss": 0.3597, "num_tokens": 3346720674.0, "step": 5284 }, { "epoch": 0.6249260967246069, "grad_norm": 0.12853316962718964, "learning_rate": 2.3599927560988977e-05, "loss": 0.3391, "num_tokens": 3347358290.0, "step": 5285 }, { "epoch": 0.625044341965236, "grad_norm": 0.18422268331050873, "learning_rate": 2.359023468117113e-05, "loss": 0.3582, "num_tokens": 3347997974.0, "step": 5286 }, { "epoch": 0.6251625872058649, "grad_norm": 0.13707473874092102, "learning_rate": 2.358054318152496e-05, "loss": 0.3259, "num_tokens": 3348631352.0, "step": 5287 }, { "epoch": 0.625280832446494, "grad_norm": 0.13191957771778107, "learning_rate": 2.3570853063471956e-05, "loss": 0.3118, "num_tokens": 3349263470.0, "step": 5288 }, { "epoch": 0.6253990776871231, "grad_norm": 0.12812481820583344, "learning_rate": 2.3561164328433412e-05, "loss": 0.2787, "num_tokens": 3349890154.0, "step": 5289 }, { "epoch": 0.6255173229277522, "grad_norm": 0.15653300285339355, "learning_rate": 2.3551476977830417e-05, "loss": 0.3286, "num_tokens": 3350525331.0, "step": 5290 }, { "epoch": 0.6256355681683812, "grad_norm": 0.15626272559165955, "learning_rate": 2.3541791013083855e-05, "loss": 0.3392, "num_tokens": 3351162660.0, "step": 5291 }, { "epoch": 0.6257538134090103, "grad_norm": 0.1253092885017395, "learning_rate": 2.3532106435614423e-05, "loss": 0.2844, "num_tokens": 3351799876.0, "step": 5292 }, { "epoch": 0.6258720586496394, "grad_norm": 0.13834455609321594, "learning_rate": 2.352242324684256e-05, "loss": 0.3447, "num_tokens": 3352435130.0, "step": 5293 }, { "epoch": 0.6259903038902684, "grad_norm": 0.13640321791172028, "learning_rate": 2.3512741448188587e-05, "loss": 0.3093, "num_tokens": 3353074117.0, "step": 5294 }, { "epoch": 0.6261085491308975, "grad_norm": 0.14825667440891266, "learning_rate": 2.350306104107256e-05, "loss": 0.3318, "num_tokens": 3353709613.0, "step": 5295 }, { "epoch": 0.6262267943715265, "grad_norm": 0.1273735910654068, "learning_rate": 2.349338202691435e-05, "loss": 0.309, "num_tokens": 3354345175.0, "step": 5296 }, { "epoch": 0.6263450396121556, "grad_norm": 0.14025317132472992, "learning_rate": 2.3483704407133607e-05, "loss": 0.3576, "num_tokens": 3354984341.0, "step": 5297 }, { "epoch": 0.6264632848527847, "grad_norm": 0.14234203100204468, "learning_rate": 2.34740281831498e-05, "loss": 0.3109, "num_tokens": 3355619754.0, "step": 5298 }, { "epoch": 0.6265815300934138, "grad_norm": 0.13474233448505402, "learning_rate": 2.3464353356382187e-05, "loss": 0.3198, "num_tokens": 3356248910.0, "step": 5299 }, { "epoch": 0.6266997753340428, "grad_norm": 0.12708310782909393, "learning_rate": 2.3454679928249817e-05, "loss": 0.3304, "num_tokens": 3356882423.0, "step": 5300 }, { "epoch": 0.6268180205746718, "grad_norm": 0.1496780961751938, "learning_rate": 2.344500790017152e-05, "loss": 0.3417, "num_tokens": 3357514147.0, "step": 5301 }, { "epoch": 0.6269362658153009, "grad_norm": 0.13332732021808624, "learning_rate": 2.3435337273565953e-05, "loss": 0.3254, "num_tokens": 3358148745.0, "step": 5302 }, { "epoch": 0.62705451105593, "grad_norm": 0.13960085809230804, "learning_rate": 2.3425668049851535e-05, "loss": 0.3328, "num_tokens": 3358783050.0, "step": 5303 }, { "epoch": 0.6271727562965591, "grad_norm": 0.1461336314678192, "learning_rate": 2.3416000230446515e-05, "loss": 0.34, "num_tokens": 3359420347.0, "step": 5304 }, { "epoch": 0.6272910015371881, "grad_norm": 0.14516399800777435, "learning_rate": 2.3406333816768887e-05, "loss": 0.3512, "num_tokens": 3360054578.0, "step": 5305 }, { "epoch": 0.6274092467778172, "grad_norm": 0.12732654809951782, "learning_rate": 2.339666881023649e-05, "loss": 0.3132, "num_tokens": 3360679304.0, "step": 5306 }, { "epoch": 0.6275274920184463, "grad_norm": 0.13221144676208496, "learning_rate": 2.338700521226691e-05, "loss": 0.3556, "num_tokens": 3361309302.0, "step": 5307 }, { "epoch": 0.6276457372590754, "grad_norm": 0.11857721209526062, "learning_rate": 2.3377343024277565e-05, "loss": 0.2964, "num_tokens": 3361945448.0, "step": 5308 }, { "epoch": 0.6277639824997043, "grad_norm": 0.13173502683639526, "learning_rate": 2.336768224768565e-05, "loss": 0.3467, "num_tokens": 3362582315.0, "step": 5309 }, { "epoch": 0.6278822277403334, "grad_norm": 0.12509602308273315, "learning_rate": 2.3358022883908146e-05, "loss": 0.3097, "num_tokens": 3363219633.0, "step": 5310 }, { "epoch": 0.6280004729809625, "grad_norm": 0.13162162899971008, "learning_rate": 2.334836493436185e-05, "loss": 0.3177, "num_tokens": 3363857694.0, "step": 5311 }, { "epoch": 0.6281187182215916, "grad_norm": 0.13742738962173462, "learning_rate": 2.333870840046332e-05, "loss": 0.3549, "num_tokens": 3364493240.0, "step": 5312 }, { "epoch": 0.6282369634622207, "grad_norm": 0.1298433542251587, "learning_rate": 2.3329053283628933e-05, "loss": 0.3077, "num_tokens": 3365130445.0, "step": 5313 }, { "epoch": 0.6283552087028497, "grad_norm": 0.12382235378026962, "learning_rate": 2.3319399585274826e-05, "loss": 0.3241, "num_tokens": 3365742941.0, "step": 5314 }, { "epoch": 0.6284734539434788, "grad_norm": 0.13083097338676453, "learning_rate": 2.3309747306816974e-05, "loss": 0.3218, "num_tokens": 3366372954.0, "step": 5315 }, { "epoch": 0.6285916991841078, "grad_norm": 0.13849428296089172, "learning_rate": 2.3300096449671116e-05, "loss": 0.36, "num_tokens": 3367004890.0, "step": 5316 }, { "epoch": 0.6287099444247369, "grad_norm": 0.13833214342594147, "learning_rate": 2.3290447015252775e-05, "loss": 0.3215, "num_tokens": 3367641571.0, "step": 5317 }, { "epoch": 0.628828189665366, "grad_norm": 0.12520186603069305, "learning_rate": 2.3280799004977284e-05, "loss": 0.3288, "num_tokens": 3368277782.0, "step": 5318 }, { "epoch": 0.628946434905995, "grad_norm": 0.13023577630519867, "learning_rate": 2.3271152420259754e-05, "loss": 0.3072, "num_tokens": 3368909327.0, "step": 5319 }, { "epoch": 0.6290646801466241, "grad_norm": 0.13567592203617096, "learning_rate": 2.3261507262515104e-05, "loss": 0.3488, "num_tokens": 3369537601.0, "step": 5320 }, { "epoch": 0.6291829253872532, "grad_norm": 0.13472501933574677, "learning_rate": 2.3251863533158003e-05, "loss": 0.3088, "num_tokens": 3370175208.0, "step": 5321 }, { "epoch": 0.6293011706278823, "grad_norm": 0.12090735882520676, "learning_rate": 2.324222123360297e-05, "loss": 0.2617, "num_tokens": 3370808791.0, "step": 5322 }, { "epoch": 0.6294194158685112, "grad_norm": 0.1471964418888092, "learning_rate": 2.3232580365264273e-05, "loss": 0.307, "num_tokens": 3371440214.0, "step": 5323 }, { "epoch": 0.6295376611091403, "grad_norm": 0.1313406527042389, "learning_rate": 2.322294092955598e-05, "loss": 0.3226, "num_tokens": 3372070688.0, "step": 5324 }, { "epoch": 0.6296559063497694, "grad_norm": 0.12611471116542816, "learning_rate": 2.3213302927891945e-05, "loss": 0.2916, "num_tokens": 3372705601.0, "step": 5325 }, { "epoch": 0.6297741515903985, "grad_norm": 0.13384032249450684, "learning_rate": 2.320366636168582e-05, "loss": 0.3346, "num_tokens": 3373344210.0, "step": 5326 }, { "epoch": 0.6298923968310276, "grad_norm": 0.1267920583486557, "learning_rate": 2.319403123235105e-05, "loss": 0.2875, "num_tokens": 3373981947.0, "step": 5327 }, { "epoch": 0.6300106420716566, "grad_norm": 0.1360609382390976, "learning_rate": 2.3184397541300843e-05, "loss": 0.3243, "num_tokens": 3374618023.0, "step": 5328 }, { "epoch": 0.6301288873122857, "grad_norm": 0.14092853665351868, "learning_rate": 2.3174765289948226e-05, "loss": 0.3594, "num_tokens": 3375250046.0, "step": 5329 }, { "epoch": 0.6302471325529148, "grad_norm": 0.13449619710445404, "learning_rate": 2.316513447970602e-05, "loss": 0.3552, "num_tokens": 3375884934.0, "step": 5330 }, { "epoch": 0.6303653777935438, "grad_norm": 0.13008038699626923, "learning_rate": 2.3155505111986784e-05, "loss": 0.3067, "num_tokens": 3376516245.0, "step": 5331 }, { "epoch": 0.6304836230341728, "grad_norm": 0.13644056022167206, "learning_rate": 2.3145877188202937e-05, "loss": 0.3463, "num_tokens": 3377154353.0, "step": 5332 }, { "epoch": 0.6306018682748019, "grad_norm": 0.13304655253887177, "learning_rate": 2.3136250709766603e-05, "loss": 0.3422, "num_tokens": 3377790652.0, "step": 5333 }, { "epoch": 0.630720113515431, "grad_norm": 0.12712746858596802, "learning_rate": 2.312662567808979e-05, "loss": 0.315, "num_tokens": 3378426936.0, "step": 5334 }, { "epoch": 0.6308383587560601, "grad_norm": 0.13543708622455597, "learning_rate": 2.3117002094584213e-05, "loss": 0.3201, "num_tokens": 3379065837.0, "step": 5335 }, { "epoch": 0.6309566039966892, "grad_norm": 0.14401565492153168, "learning_rate": 2.3107379960661415e-05, "loss": 0.33, "num_tokens": 3379698858.0, "step": 5336 }, { "epoch": 0.6310748492373182, "grad_norm": 0.14143580198287964, "learning_rate": 2.3097759277732707e-05, "loss": 0.3224, "num_tokens": 3380336733.0, "step": 5337 }, { "epoch": 0.6311930944779472, "grad_norm": 0.13606590032577515, "learning_rate": 2.3088140047209205e-05, "loss": 0.3514, "num_tokens": 3380971182.0, "step": 5338 }, { "epoch": 0.6313113397185763, "grad_norm": 0.14229008555412292, "learning_rate": 2.3078522270501818e-05, "loss": 0.3792, "num_tokens": 3381609920.0, "step": 5339 }, { "epoch": 0.6314295849592054, "grad_norm": 0.12557902932167053, "learning_rate": 2.3068905949021185e-05, "loss": 0.3326, "num_tokens": 3382207524.0, "step": 5340 }, { "epoch": 0.6315478301998344, "grad_norm": 0.13120096921920776, "learning_rate": 2.3059291084177833e-05, "loss": 0.33, "num_tokens": 3382841902.0, "step": 5341 }, { "epoch": 0.6316660754404635, "grad_norm": 0.12748046219348907, "learning_rate": 2.3049677677381963e-05, "loss": 0.3408, "num_tokens": 3383481118.0, "step": 5342 }, { "epoch": 0.6317843206810926, "grad_norm": 0.12850528955459595, "learning_rate": 2.3040065730043654e-05, "loss": 0.3251, "num_tokens": 3384116644.0, "step": 5343 }, { "epoch": 0.6319025659217217, "grad_norm": 0.15428079664707184, "learning_rate": 2.303045524357271e-05, "loss": 0.3435, "num_tokens": 3384716092.0, "step": 5344 }, { "epoch": 0.6320208111623508, "grad_norm": 0.13495805859565735, "learning_rate": 2.3020846219378746e-05, "loss": 0.3282, "num_tokens": 3385351046.0, "step": 5345 }, { "epoch": 0.6321390564029797, "grad_norm": 0.12748855352401733, "learning_rate": 2.301123865887117e-05, "loss": 0.3281, "num_tokens": 3385989344.0, "step": 5346 }, { "epoch": 0.6322573016436088, "grad_norm": 0.13081611692905426, "learning_rate": 2.3001632563459152e-05, "loss": 0.3282, "num_tokens": 3386625700.0, "step": 5347 }, { "epoch": 0.6323755468842379, "grad_norm": 0.1259125918149948, "learning_rate": 2.2992027934551678e-05, "loss": 0.3018, "num_tokens": 3387255680.0, "step": 5348 }, { "epoch": 0.632493792124867, "grad_norm": 0.12850321829319, "learning_rate": 2.2982424773557482e-05, "loss": 0.299, "num_tokens": 3387884750.0, "step": 5349 }, { "epoch": 0.6326120373654961, "grad_norm": 0.12588472664356232, "learning_rate": 2.2972823081885115e-05, "loss": 0.287, "num_tokens": 3388519767.0, "step": 5350 }, { "epoch": 0.6327302826061251, "grad_norm": 0.13839486241340637, "learning_rate": 2.2963222860942896e-05, "loss": 0.3554, "num_tokens": 3389131956.0, "step": 5351 }, { "epoch": 0.6328485278467542, "grad_norm": 0.12286242842674255, "learning_rate": 2.295362411213892e-05, "loss": 0.332, "num_tokens": 3389769982.0, "step": 5352 }, { "epoch": 0.6329667730873832, "grad_norm": 0.12365791201591492, "learning_rate": 2.2944026836881102e-05, "loss": 0.3433, "num_tokens": 3390400309.0, "step": 5353 }, { "epoch": 0.6330850183280123, "grad_norm": 0.12575611472129822, "learning_rate": 2.29344310365771e-05, "loss": 0.3255, "num_tokens": 3391036531.0, "step": 5354 }, { "epoch": 0.6332032635686413, "grad_norm": 0.13314542174339294, "learning_rate": 2.292483671263437e-05, "loss": 0.352, "num_tokens": 3391674508.0, "step": 5355 }, { "epoch": 0.6333215088092704, "grad_norm": 0.1295817643404007, "learning_rate": 2.291524386646016e-05, "loss": 0.3078, "num_tokens": 3392285904.0, "step": 5356 }, { "epoch": 0.6334397540498995, "grad_norm": 0.13532960414886475, "learning_rate": 2.290565249946149e-05, "loss": 0.346, "num_tokens": 3392920203.0, "step": 5357 }, { "epoch": 0.6335579992905286, "grad_norm": 0.12982763350009918, "learning_rate": 2.289606261304518e-05, "loss": 0.3541, "num_tokens": 3393557899.0, "step": 5358 }, { "epoch": 0.6336762445311577, "grad_norm": 0.1215788796544075, "learning_rate": 2.28864742086178e-05, "loss": 0.331, "num_tokens": 3394192063.0, "step": 5359 }, { "epoch": 0.6337944897717867, "grad_norm": 0.13257426023483276, "learning_rate": 2.2876887287585757e-05, "loss": 0.3476, "num_tokens": 3394826850.0, "step": 5360 }, { "epoch": 0.6339127350124157, "grad_norm": 0.13627028465270996, "learning_rate": 2.2867301851355156e-05, "loss": 0.3427, "num_tokens": 3395464248.0, "step": 5361 }, { "epoch": 0.6340309802530448, "grad_norm": 0.11298656463623047, "learning_rate": 2.2857717901331997e-05, "loss": 0.2966, "num_tokens": 3396093297.0, "step": 5362 }, { "epoch": 0.6341492254936739, "grad_norm": 0.13628485798835754, "learning_rate": 2.2848135438921948e-05, "loss": 0.3514, "num_tokens": 3396724688.0, "step": 5363 }, { "epoch": 0.6342674707343029, "grad_norm": 0.1329411119222641, "learning_rate": 2.2838554465530534e-05, "loss": 0.3384, "num_tokens": 3397357855.0, "step": 5364 }, { "epoch": 0.634385715974932, "grad_norm": 0.13139039278030396, "learning_rate": 2.2828974982563037e-05, "loss": 0.3571, "num_tokens": 3397994177.0, "step": 5365 }, { "epoch": 0.6345039612155611, "grad_norm": 0.13811537623405457, "learning_rate": 2.281939699142452e-05, "loss": 0.3366, "num_tokens": 3398630026.0, "step": 5366 }, { "epoch": 0.6346222064561902, "grad_norm": 0.14048579335212708, "learning_rate": 2.2809820493519835e-05, "loss": 0.3187, "num_tokens": 3399237410.0, "step": 5367 }, { "epoch": 0.6347404516968193, "grad_norm": 0.1260146051645279, "learning_rate": 2.2800245490253592e-05, "loss": 0.2791, "num_tokens": 3399863497.0, "step": 5368 }, { "epoch": 0.6348586969374482, "grad_norm": 0.14695808291435242, "learning_rate": 2.2790671983030222e-05, "loss": 0.335, "num_tokens": 3400495731.0, "step": 5369 }, { "epoch": 0.6349769421780773, "grad_norm": 0.13401566445827484, "learning_rate": 2.2781099973253886e-05, "loss": 0.3555, "num_tokens": 3401134570.0, "step": 5370 }, { "epoch": 0.6350951874187064, "grad_norm": 0.1342829316854477, "learning_rate": 2.277152946232859e-05, "loss": 0.351, "num_tokens": 3401769654.0, "step": 5371 }, { "epoch": 0.6352134326593355, "grad_norm": 0.1298041194677353, "learning_rate": 2.2761960451658043e-05, "loss": 0.3274, "num_tokens": 3402406406.0, "step": 5372 }, { "epoch": 0.6353316778999645, "grad_norm": 0.13673308491706848, "learning_rate": 2.2752392942645793e-05, "loss": 0.322, "num_tokens": 3403044628.0, "step": 5373 }, { "epoch": 0.6354499231405936, "grad_norm": 0.12512601912021637, "learning_rate": 2.2742826936695165e-05, "loss": 0.3188, "num_tokens": 3403682392.0, "step": 5374 }, { "epoch": 0.6355681683812227, "grad_norm": 0.1172887533903122, "learning_rate": 2.2733262435209212e-05, "loss": 0.305, "num_tokens": 3404321775.0, "step": 5375 }, { "epoch": 0.6356864136218517, "grad_norm": 0.1289680451154709, "learning_rate": 2.2723699439590836e-05, "loss": 0.3649, "num_tokens": 3404961533.0, "step": 5376 }, { "epoch": 0.6358046588624808, "grad_norm": 0.13607698678970337, "learning_rate": 2.271413795124266e-05, "loss": 0.3709, "num_tokens": 3405596723.0, "step": 5377 }, { "epoch": 0.6359229041031098, "grad_norm": 0.13490577042102814, "learning_rate": 2.270457797156712e-05, "loss": 0.3372, "num_tokens": 3406235649.0, "step": 5378 }, { "epoch": 0.6360411493437389, "grad_norm": 0.13532541692256927, "learning_rate": 2.2695019501966406e-05, "loss": 0.3168, "num_tokens": 3406872089.0, "step": 5379 }, { "epoch": 0.636159394584368, "grad_norm": 0.13341546058654785, "learning_rate": 2.2685462543842516e-05, "loss": 0.3411, "num_tokens": 3407508760.0, "step": 5380 }, { "epoch": 0.6362776398249971, "grad_norm": 0.13713566958904266, "learning_rate": 2.2675907098597217e-05, "loss": 0.3213, "num_tokens": 3408147716.0, "step": 5381 }, { "epoch": 0.636395885065626, "grad_norm": 0.13877874612808228, "learning_rate": 2.266635316763202e-05, "loss": 0.3397, "num_tokens": 3408778871.0, "step": 5382 }, { "epoch": 0.6365141303062551, "grad_norm": 0.1285872757434845, "learning_rate": 2.2656800752348274e-05, "loss": 0.3094, "num_tokens": 3409410578.0, "step": 5383 }, { "epoch": 0.6366323755468842, "grad_norm": 0.1286468803882599, "learning_rate": 2.2647249854147055e-05, "loss": 0.3264, "num_tokens": 3410043936.0, "step": 5384 }, { "epoch": 0.6367506207875133, "grad_norm": 0.1247689425945282, "learning_rate": 2.2637700474429247e-05, "loss": 0.3252, "num_tokens": 3410679064.0, "step": 5385 }, { "epoch": 0.6368688660281424, "grad_norm": 0.12416106462478638, "learning_rate": 2.262815261459549e-05, "loss": 0.3193, "num_tokens": 3411310734.0, "step": 5386 }, { "epoch": 0.6369871112687714, "grad_norm": 0.13873574137687683, "learning_rate": 2.2618606276046208e-05, "loss": 0.3479, "num_tokens": 3411947092.0, "step": 5387 }, { "epoch": 0.6371053565094005, "grad_norm": 0.13913460075855255, "learning_rate": 2.260906146018162e-05, "loss": 0.3602, "num_tokens": 3412583836.0, "step": 5388 }, { "epoch": 0.6372236017500296, "grad_norm": 0.12325955182313919, "learning_rate": 2.2599518168401675e-05, "loss": 0.336, "num_tokens": 3413217681.0, "step": 5389 }, { "epoch": 0.6373418469906587, "grad_norm": 0.1299816370010376, "learning_rate": 2.2589976402106178e-05, "loss": 0.2807, "num_tokens": 3413839120.0, "step": 5390 }, { "epoch": 0.6374600922312877, "grad_norm": 0.129386767745018, "learning_rate": 2.2580436162694612e-05, "loss": 0.3056, "num_tokens": 3414478401.0, "step": 5391 }, { "epoch": 0.6375783374719167, "grad_norm": 0.12285500019788742, "learning_rate": 2.2570897451566328e-05, "loss": 0.3044, "num_tokens": 3415113430.0, "step": 5392 }, { "epoch": 0.6376965827125458, "grad_norm": 0.1385764181613922, "learning_rate": 2.256136027012037e-05, "loss": 0.3597, "num_tokens": 3415742505.0, "step": 5393 }, { "epoch": 0.6378148279531749, "grad_norm": 0.13005103170871735, "learning_rate": 2.2551824619755636e-05, "loss": 0.3427, "num_tokens": 3416379373.0, "step": 5394 }, { "epoch": 0.637933073193804, "grad_norm": 0.1396203190088272, "learning_rate": 2.254229050187074e-05, "loss": 0.3794, "num_tokens": 3417016296.0, "step": 5395 }, { "epoch": 0.638051318434433, "grad_norm": 0.1284061074256897, "learning_rate": 2.2532757917864093e-05, "loss": 0.3547, "num_tokens": 3417650054.0, "step": 5396 }, { "epoch": 0.6381695636750621, "grad_norm": 0.12097461521625519, "learning_rate": 2.2523226869133896e-05, "loss": 0.3199, "num_tokens": 3418288322.0, "step": 5397 }, { "epoch": 0.6382878089156911, "grad_norm": 0.1224663108587265, "learning_rate": 2.2513697357078095e-05, "loss": 0.3053, "num_tokens": 3418888032.0, "step": 5398 }, { "epoch": 0.6384060541563202, "grad_norm": 0.12895157933235168, "learning_rate": 2.250416938309443e-05, "loss": 0.3122, "num_tokens": 3419525745.0, "step": 5399 }, { "epoch": 0.6385242993969493, "grad_norm": 0.12950937449932098, "learning_rate": 2.249464294858041e-05, "loss": 0.3408, "num_tokens": 3420148012.0, "step": 5400 }, { "epoch": 0.6386425446375783, "grad_norm": 0.13379351794719696, "learning_rate": 2.2485118054933317e-05, "loss": 0.3641, "num_tokens": 3420783116.0, "step": 5401 }, { "epoch": 0.6387607898782074, "grad_norm": 0.128866046667099, "learning_rate": 2.247559470355022e-05, "loss": 0.3271, "num_tokens": 3421421510.0, "step": 5402 }, { "epoch": 0.6388790351188365, "grad_norm": 0.13917915523052216, "learning_rate": 2.246607289582794e-05, "loss": 0.3529, "num_tokens": 3422058986.0, "step": 5403 }, { "epoch": 0.6389972803594656, "grad_norm": 0.1272004246711731, "learning_rate": 2.245655263316309e-05, "loss": 0.3565, "num_tokens": 3422693773.0, "step": 5404 }, { "epoch": 0.6391155256000945, "grad_norm": 0.12785197794437408, "learning_rate": 2.244703391695204e-05, "loss": 0.3094, "num_tokens": 3423330844.0, "step": 5405 }, { "epoch": 0.6392337708407236, "grad_norm": 0.12395614385604858, "learning_rate": 2.243751674859095e-05, "loss": 0.3358, "num_tokens": 3423967869.0, "step": 5406 }, { "epoch": 0.6393520160813527, "grad_norm": 0.1327619105577469, "learning_rate": 2.242800112947574e-05, "loss": 0.3714, "num_tokens": 3424601316.0, "step": 5407 }, { "epoch": 0.6394702613219818, "grad_norm": 0.13251009583473206, "learning_rate": 2.24184870610021e-05, "loss": 0.3235, "num_tokens": 3425234073.0, "step": 5408 }, { "epoch": 0.6395885065626109, "grad_norm": 0.13117139041423798, "learning_rate": 2.2408974544565522e-05, "loss": 0.3204, "num_tokens": 3425860660.0, "step": 5409 }, { "epoch": 0.6397067518032399, "grad_norm": 0.13968917727470398, "learning_rate": 2.239946358156123e-05, "loss": 0.3285, "num_tokens": 3426479911.0, "step": 5410 }, { "epoch": 0.639824997043869, "grad_norm": 0.12764082849025726, "learning_rate": 2.2389954173384258e-05, "loss": 0.2946, "num_tokens": 3427110910.0, "step": 5411 }, { "epoch": 0.6399432422844981, "grad_norm": 0.1339077651500702, "learning_rate": 2.238044632142936e-05, "loss": 0.356, "num_tokens": 3427746998.0, "step": 5412 }, { "epoch": 0.6400614875251271, "grad_norm": 0.12758080661296844, "learning_rate": 2.2370940027091128e-05, "loss": 0.345, "num_tokens": 3428381267.0, "step": 5413 }, { "epoch": 0.6401797327657561, "grad_norm": 0.12687821686267853, "learning_rate": 2.236143529176387e-05, "loss": 0.3002, "num_tokens": 3429014087.0, "step": 5414 }, { "epoch": 0.6402979780063852, "grad_norm": 0.13295425474643707, "learning_rate": 2.2351932116841696e-05, "loss": 0.3588, "num_tokens": 3429651937.0, "step": 5415 }, { "epoch": 0.6404162232470143, "grad_norm": 0.13249534368515015, "learning_rate": 2.234243050371848e-05, "loss": 0.3229, "num_tokens": 3430286090.0, "step": 5416 }, { "epoch": 0.6405344684876434, "grad_norm": 0.13271737098693848, "learning_rate": 2.2332930453787862e-05, "loss": 0.3203, "num_tokens": 3430921289.0, "step": 5417 }, { "epoch": 0.6406527137282725, "grad_norm": 0.13227687776088715, "learning_rate": 2.232343196844326e-05, "loss": 0.3481, "num_tokens": 3431558177.0, "step": 5418 }, { "epoch": 0.6407709589689015, "grad_norm": 0.13221731781959534, "learning_rate": 2.2313935049077854e-05, "loss": 0.3475, "num_tokens": 3432192132.0, "step": 5419 }, { "epoch": 0.6408892042095305, "grad_norm": 0.12796233594417572, "learning_rate": 2.230443969708461e-05, "loss": 0.2988, "num_tokens": 3432827420.0, "step": 5420 }, { "epoch": 0.6410074494501596, "grad_norm": 0.12868885695934296, "learning_rate": 2.2294945913856226e-05, "loss": 0.3051, "num_tokens": 3433458284.0, "step": 5421 }, { "epoch": 0.6411256946907887, "grad_norm": 0.13334982097148895, "learning_rate": 2.228545370078523e-05, "loss": 0.3418, "num_tokens": 3434091883.0, "step": 5422 }, { "epoch": 0.6412439399314177, "grad_norm": 0.12433236092329025, "learning_rate": 2.2275963059263863e-05, "loss": 0.342, "num_tokens": 3434730408.0, "step": 5423 }, { "epoch": 0.6413621851720468, "grad_norm": 0.1271200180053711, "learning_rate": 2.226647399068417e-05, "loss": 0.3384, "num_tokens": 3435360585.0, "step": 5424 }, { "epoch": 0.6414804304126759, "grad_norm": 0.14178764820098877, "learning_rate": 2.2256986496437952e-05, "loss": 0.3363, "num_tokens": 3435993287.0, "step": 5425 }, { "epoch": 0.641598675653305, "grad_norm": 0.12296617031097412, "learning_rate": 2.2247500577916787e-05, "loss": 0.3011, "num_tokens": 3436630077.0, "step": 5426 }, { "epoch": 0.6417169208939341, "grad_norm": 0.15289810299873352, "learning_rate": 2.2238016236512006e-05, "loss": 0.3623, "num_tokens": 3437269096.0, "step": 5427 }, { "epoch": 0.641835166134563, "grad_norm": 0.1350374072790146, "learning_rate": 2.222853347361472e-05, "loss": 0.3101, "num_tokens": 3437872169.0, "step": 5428 }, { "epoch": 0.6419534113751921, "grad_norm": 0.1442604809999466, "learning_rate": 2.2219052290615823e-05, "loss": 0.3187, "num_tokens": 3438504590.0, "step": 5429 }, { "epoch": 0.6420716566158212, "grad_norm": 0.14575369656085968, "learning_rate": 2.2209572688905945e-05, "loss": 0.3347, "num_tokens": 3439141092.0, "step": 5430 }, { "epoch": 0.6421899018564503, "grad_norm": 0.13062600791454315, "learning_rate": 2.2200094669875493e-05, "loss": 0.3266, "num_tokens": 3439775141.0, "step": 5431 }, { "epoch": 0.6423081470970794, "grad_norm": 0.122803695499897, "learning_rate": 2.2190618234914684e-05, "loss": 0.3234, "num_tokens": 3440406802.0, "step": 5432 }, { "epoch": 0.6424263923377084, "grad_norm": 0.14033754169940948, "learning_rate": 2.2181143385413433e-05, "loss": 0.3186, "num_tokens": 3441041337.0, "step": 5433 }, { "epoch": 0.6425446375783375, "grad_norm": 0.14220628142356873, "learning_rate": 2.2171670122761486e-05, "loss": 0.3444, "num_tokens": 3441674791.0, "step": 5434 }, { "epoch": 0.6426628828189666, "grad_norm": 0.13696977496147156, "learning_rate": 2.2162198448348306e-05, "loss": 0.3111, "num_tokens": 3442305343.0, "step": 5435 }, { "epoch": 0.6427811280595956, "grad_norm": 0.12857097387313843, "learning_rate": 2.2152728363563155e-05, "loss": 0.3092, "num_tokens": 3442944116.0, "step": 5436 }, { "epoch": 0.6428993733002246, "grad_norm": 0.13059374690055847, "learning_rate": 2.214325986979505e-05, "loss": 0.3149, "num_tokens": 3443580977.0, "step": 5437 }, { "epoch": 0.6430176185408537, "grad_norm": 0.13515152037143707, "learning_rate": 2.213379296843277e-05, "loss": 0.3273, "num_tokens": 3444216397.0, "step": 5438 }, { "epoch": 0.6431358637814828, "grad_norm": 0.1287073940038681, "learning_rate": 2.212432766086489e-05, "loss": 0.3115, "num_tokens": 3444854105.0, "step": 5439 }, { "epoch": 0.6432541090221119, "grad_norm": 0.13758540153503418, "learning_rate": 2.2114863948479694e-05, "loss": 0.3481, "num_tokens": 3445484622.0, "step": 5440 }, { "epoch": 0.643372354262741, "grad_norm": 0.1306706666946411, "learning_rate": 2.2105401832665306e-05, "loss": 0.3152, "num_tokens": 3446069945.0, "step": 5441 }, { "epoch": 0.64349059950337, "grad_norm": 0.12666574120521545, "learning_rate": 2.2095941314809538e-05, "loss": 0.3352, "num_tokens": 3446706132.0, "step": 5442 }, { "epoch": 0.643608844743999, "grad_norm": 0.1301783323287964, "learning_rate": 2.208648239630003e-05, "loss": 0.3182, "num_tokens": 3447343208.0, "step": 5443 }, { "epoch": 0.6437270899846281, "grad_norm": 0.13590113818645477, "learning_rate": 2.207702507852416e-05, "loss": 0.3207, "num_tokens": 3447976007.0, "step": 5444 }, { "epoch": 0.6438453352252572, "grad_norm": 0.1447359323501587, "learning_rate": 2.206756936286906e-05, "loss": 0.3303, "num_tokens": 3448610455.0, "step": 5445 }, { "epoch": 0.6439635804658862, "grad_norm": 0.13043875992298126, "learning_rate": 2.2058115250721663e-05, "loss": 0.3416, "num_tokens": 3449248080.0, "step": 5446 }, { "epoch": 0.6440818257065153, "grad_norm": 0.13786481320858002, "learning_rate": 2.2048662743468628e-05, "loss": 0.3686, "num_tokens": 3449878414.0, "step": 5447 }, { "epoch": 0.6442000709471444, "grad_norm": 0.12482521682977676, "learning_rate": 2.2039211842496402e-05, "loss": 0.3204, "num_tokens": 3450515210.0, "step": 5448 }, { "epoch": 0.6443183161877735, "grad_norm": 0.132771834731102, "learning_rate": 2.202976254919119e-05, "loss": 0.3399, "num_tokens": 3451153142.0, "step": 5449 }, { "epoch": 0.6444365614284026, "grad_norm": 0.12618611752986908, "learning_rate": 2.2020314864938968e-05, "loss": 0.3062, "num_tokens": 3451790389.0, "step": 5450 }, { "epoch": 0.6445548066690315, "grad_norm": 0.14919303357601166, "learning_rate": 2.2010868791125448e-05, "loss": 0.3504, "num_tokens": 3452427053.0, "step": 5451 }, { "epoch": 0.6446730519096606, "grad_norm": 0.1292175054550171, "learning_rate": 2.200142432913615e-05, "loss": 0.35, "num_tokens": 3453060630.0, "step": 5452 }, { "epoch": 0.6447912971502897, "grad_norm": 0.12475311011075974, "learning_rate": 2.199198148035634e-05, "loss": 0.3253, "num_tokens": 3453696824.0, "step": 5453 }, { "epoch": 0.6449095423909188, "grad_norm": 0.1288503110408783, "learning_rate": 2.198254024617101e-05, "loss": 0.3281, "num_tokens": 3454331554.0, "step": 5454 }, { "epoch": 0.6450277876315478, "grad_norm": 0.13412141799926758, "learning_rate": 2.197310062796498e-05, "loss": 0.3252, "num_tokens": 3454966768.0, "step": 5455 }, { "epoch": 0.6451460328721769, "grad_norm": 0.1349751353263855, "learning_rate": 2.1963662627122778e-05, "loss": 0.3354, "num_tokens": 3455600209.0, "step": 5456 }, { "epoch": 0.645264278112806, "grad_norm": 0.13672010600566864, "learning_rate": 2.1954226245028734e-05, "loss": 0.3564, "num_tokens": 3456233878.0, "step": 5457 }, { "epoch": 0.645382523353435, "grad_norm": 0.12978118658065796, "learning_rate": 2.1944791483066907e-05, "loss": 0.3331, "num_tokens": 3456869479.0, "step": 5458 }, { "epoch": 0.6455007685940641, "grad_norm": 0.1272454410791397, "learning_rate": 2.193535834262114e-05, "loss": 0.3182, "num_tokens": 3457501917.0, "step": 5459 }, { "epoch": 0.6456190138346931, "grad_norm": 0.13822200894355774, "learning_rate": 2.1925926825075052e-05, "loss": 0.3145, "num_tokens": 3458132981.0, "step": 5460 }, { "epoch": 0.6457372590753222, "grad_norm": 0.1207205131649971, "learning_rate": 2.191649693181198e-05, "loss": 0.3176, "num_tokens": 3458760612.0, "step": 5461 }, { "epoch": 0.6458555043159513, "grad_norm": 0.1404033750295639, "learning_rate": 2.190706866421506e-05, "loss": 0.3852, "num_tokens": 3459397155.0, "step": 5462 }, { "epoch": 0.6459737495565804, "grad_norm": 0.13103969395160675, "learning_rate": 2.189764202366717e-05, "loss": 0.338, "num_tokens": 3460036701.0, "step": 5463 }, { "epoch": 0.6460919947972094, "grad_norm": 0.13459327816963196, "learning_rate": 2.1888217011550976e-05, "loss": 0.3637, "num_tokens": 3460670481.0, "step": 5464 }, { "epoch": 0.6462102400378384, "grad_norm": 0.14068764448165894, "learning_rate": 2.1878793629248863e-05, "loss": 0.3352, "num_tokens": 3461306809.0, "step": 5465 }, { "epoch": 0.6463284852784675, "grad_norm": 0.1232738122344017, "learning_rate": 2.1869371878143002e-05, "loss": 0.3272, "num_tokens": 3461941912.0, "step": 5466 }, { "epoch": 0.6464467305190966, "grad_norm": 0.13584744930267334, "learning_rate": 2.1859951759615346e-05, "loss": 0.3146, "num_tokens": 3462579022.0, "step": 5467 }, { "epoch": 0.6465649757597257, "grad_norm": 0.1360841691493988, "learning_rate": 2.1850533275047553e-05, "loss": 0.3295, "num_tokens": 3463213444.0, "step": 5468 }, { "epoch": 0.6466832210003547, "grad_norm": 0.14720581471920013, "learning_rate": 2.1841116425821105e-05, "loss": 0.343, "num_tokens": 3463849444.0, "step": 5469 }, { "epoch": 0.6468014662409838, "grad_norm": 0.13808144629001617, "learning_rate": 2.183170121331718e-05, "loss": 0.3201, "num_tokens": 3464486013.0, "step": 5470 }, { "epoch": 0.6469197114816129, "grad_norm": 0.1270453780889511, "learning_rate": 2.182228763891678e-05, "loss": 0.3324, "num_tokens": 3465120068.0, "step": 5471 }, { "epoch": 0.647037956722242, "grad_norm": 0.14067575335502625, "learning_rate": 2.1812875704000617e-05, "loss": 0.3232, "num_tokens": 3465755727.0, "step": 5472 }, { "epoch": 0.647156201962871, "grad_norm": 0.15171770751476288, "learning_rate": 2.1803465409949177e-05, "loss": 0.3494, "num_tokens": 3466388407.0, "step": 5473 }, { "epoch": 0.6472744472035, "grad_norm": 0.12555240094661713, "learning_rate": 2.179405675814273e-05, "loss": 0.2968, "num_tokens": 3467024528.0, "step": 5474 }, { "epoch": 0.6473926924441291, "grad_norm": 0.13519597053527832, "learning_rate": 2.1784649749961266e-05, "loss": 0.3127, "num_tokens": 3467655667.0, "step": 5475 }, { "epoch": 0.6475109376847582, "grad_norm": 0.1270657628774643, "learning_rate": 2.1775244386784563e-05, "loss": 0.3157, "num_tokens": 3468287981.0, "step": 5476 }, { "epoch": 0.6476291829253873, "grad_norm": 0.13910098373889923, "learning_rate": 2.1765840669992135e-05, "loss": 0.3151, "num_tokens": 3468922882.0, "step": 5477 }, { "epoch": 0.6477474281660163, "grad_norm": 0.1385640650987625, "learning_rate": 2.1756438600963284e-05, "loss": 0.3524, "num_tokens": 3469557453.0, "step": 5478 }, { "epoch": 0.6478656734066454, "grad_norm": 0.13423581421375275, "learning_rate": 2.1747038181077024e-05, "loss": 0.3356, "num_tokens": 3470193698.0, "step": 5479 }, { "epoch": 0.6479839186472744, "grad_norm": 0.13438771665096283, "learning_rate": 2.173763941171219e-05, "loss": 0.3629, "num_tokens": 3470800010.0, "step": 5480 }, { "epoch": 0.6481021638879035, "grad_norm": 0.12098889797925949, "learning_rate": 2.1728242294247324e-05, "loss": 0.2831, "num_tokens": 3471435549.0, "step": 5481 }, { "epoch": 0.6482204091285326, "grad_norm": 0.1284995675086975, "learning_rate": 2.1718846830060733e-05, "loss": 0.3247, "num_tokens": 3472071548.0, "step": 5482 }, { "epoch": 0.6483386543691616, "grad_norm": 0.1371462643146515, "learning_rate": 2.170945302053051e-05, "loss": 0.3253, "num_tokens": 3472701400.0, "step": 5483 }, { "epoch": 0.6484568996097907, "grad_norm": 0.13030193746089935, "learning_rate": 2.1700060867034468e-05, "loss": 0.329, "num_tokens": 3473336509.0, "step": 5484 }, { "epoch": 0.6485751448504198, "grad_norm": 0.12214719504117966, "learning_rate": 2.1690670370950232e-05, "loss": 0.3001, "num_tokens": 3473964771.0, "step": 5485 }, { "epoch": 0.6486933900910489, "grad_norm": 0.134231299161911, "learning_rate": 2.1681281533655094e-05, "loss": 0.3467, "num_tokens": 3474604317.0, "step": 5486 }, { "epoch": 0.6488116353316778, "grad_norm": 0.12819795310497284, "learning_rate": 2.167189435652618e-05, "loss": 0.3263, "num_tokens": 3475239632.0, "step": 5487 }, { "epoch": 0.6489298805723069, "grad_norm": 0.12956936657428741, "learning_rate": 2.1662508840940375e-05, "loss": 0.336, "num_tokens": 3475874869.0, "step": 5488 }, { "epoch": 0.649048125812936, "grad_norm": 3.595876455307007, "learning_rate": 2.1653124988274265e-05, "loss": 0.4146, "num_tokens": 3476475149.0, "step": 5489 }, { "epoch": 0.6491663710535651, "grad_norm": 0.1323203593492508, "learning_rate": 2.164374279990422e-05, "loss": 0.3148, "num_tokens": 3477114356.0, "step": 5490 }, { "epoch": 0.6492846162941942, "grad_norm": 0.12966802716255188, "learning_rate": 2.163436227720637e-05, "loss": 0.317, "num_tokens": 3477747144.0, "step": 5491 }, { "epoch": 0.6494028615348232, "grad_norm": 0.1247754767537117, "learning_rate": 2.1624983421556606e-05, "loss": 0.3193, "num_tokens": 3478383881.0, "step": 5492 }, { "epoch": 0.6495211067754523, "grad_norm": 0.1527627408504486, "learning_rate": 2.1615606234330567e-05, "loss": 0.3804, "num_tokens": 3479018108.0, "step": 5493 }, { "epoch": 0.6496393520160814, "grad_norm": 0.12834769487380981, "learning_rate": 2.1606230716903633e-05, "loss": 0.3225, "num_tokens": 3479657761.0, "step": 5494 }, { "epoch": 0.6497575972567105, "grad_norm": 0.1260865479707718, "learning_rate": 2.1596856870650953e-05, "loss": 0.3122, "num_tokens": 3480293778.0, "step": 5495 }, { "epoch": 0.6498758424973394, "grad_norm": 0.13541975617408752, "learning_rate": 2.1587484696947433e-05, "loss": 0.3508, "num_tokens": 3480933286.0, "step": 5496 }, { "epoch": 0.6499940877379685, "grad_norm": 0.13697515428066254, "learning_rate": 2.157811419716775e-05, "loss": 0.3558, "num_tokens": 3481572221.0, "step": 5497 }, { "epoch": 0.6501123329785976, "grad_norm": 0.12774153053760529, "learning_rate": 2.1568745372686303e-05, "loss": 0.2993, "num_tokens": 3482206729.0, "step": 5498 }, { "epoch": 0.6502305782192267, "grad_norm": 0.12636302411556244, "learning_rate": 2.1559378224877248e-05, "loss": 0.3357, "num_tokens": 3482842818.0, "step": 5499 }, { "epoch": 0.6503488234598558, "grad_norm": 0.12018145620822906, "learning_rate": 2.155001275511451e-05, "loss": 0.325, "num_tokens": 3483472415.0, "step": 5500 }, { "epoch": 0.6504670687004848, "grad_norm": 0.12794066965579987, "learning_rate": 2.154064896477178e-05, "loss": 0.3267, "num_tokens": 3484102729.0, "step": 5501 }, { "epoch": 0.6505853139411139, "grad_norm": 0.12295959144830704, "learning_rate": 2.153128685522247e-05, "loss": 0.3107, "num_tokens": 3484739285.0, "step": 5502 }, { "epoch": 0.6507035591817429, "grad_norm": 0.13873626291751862, "learning_rate": 2.1521926427839756e-05, "loss": 0.3713, "num_tokens": 3485372016.0, "step": 5503 }, { "epoch": 0.650821804422372, "grad_norm": 0.12437690049409866, "learning_rate": 2.15125676839966e-05, "loss": 0.3181, "num_tokens": 3486004121.0, "step": 5504 }, { "epoch": 0.6509400496630011, "grad_norm": 0.12655462324619293, "learning_rate": 2.150321062506565e-05, "loss": 0.3358, "num_tokens": 3486643200.0, "step": 5505 }, { "epoch": 0.6510582949036301, "grad_norm": 0.13139352202415466, "learning_rate": 2.1493855252419404e-05, "loss": 0.3251, "num_tokens": 3487280427.0, "step": 5506 }, { "epoch": 0.6511765401442592, "grad_norm": 0.12303803861141205, "learning_rate": 2.148450156743e-05, "loss": 0.3192, "num_tokens": 3487919233.0, "step": 5507 }, { "epoch": 0.6512947853848883, "grad_norm": 0.13335582613945007, "learning_rate": 2.147514957146941e-05, "loss": 0.3452, "num_tokens": 3488550011.0, "step": 5508 }, { "epoch": 0.6514130306255174, "grad_norm": 0.13508589565753937, "learning_rate": 2.1465799265909325e-05, "loss": 0.3465, "num_tokens": 3489171609.0, "step": 5509 }, { "epoch": 0.6515312758661463, "grad_norm": 0.1267634928226471, "learning_rate": 2.145645065212121e-05, "loss": 0.327, "num_tokens": 3489808377.0, "step": 5510 }, { "epoch": 0.6516495211067754, "grad_norm": 0.1308528482913971, "learning_rate": 2.1447103731476252e-05, "loss": 0.335, "num_tokens": 3490443841.0, "step": 5511 }, { "epoch": 0.6517677663474045, "grad_norm": 0.1345646232366562, "learning_rate": 2.1437758505345398e-05, "loss": 0.3364, "num_tokens": 3491080059.0, "step": 5512 }, { "epoch": 0.6518860115880336, "grad_norm": 0.122528076171875, "learning_rate": 2.1428414975099377e-05, "loss": 0.3142, "num_tokens": 3491719667.0, "step": 5513 }, { "epoch": 0.6520042568286627, "grad_norm": 0.1379760205745697, "learning_rate": 2.1419073142108638e-05, "loss": 0.3179, "num_tokens": 3492356566.0, "step": 5514 }, { "epoch": 0.6521225020692917, "grad_norm": 0.11928132176399231, "learning_rate": 2.1409733007743378e-05, "loss": 0.3027, "num_tokens": 3492987541.0, "step": 5515 }, { "epoch": 0.6522407473099208, "grad_norm": 0.1278296411037445, "learning_rate": 2.1400394573373556e-05, "loss": 0.3107, "num_tokens": 3493616979.0, "step": 5516 }, { "epoch": 0.6523589925505499, "grad_norm": 0.13506820797920227, "learning_rate": 2.1391057840368883e-05, "loss": 0.3411, "num_tokens": 3494253779.0, "step": 5517 }, { "epoch": 0.6524772377911789, "grad_norm": 0.1348469853401184, "learning_rate": 2.1381722810098837e-05, "loss": 0.3237, "num_tokens": 3494882756.0, "step": 5518 }, { "epoch": 0.6525954830318079, "grad_norm": 0.12717540562152863, "learning_rate": 2.1372389483932612e-05, "loss": 0.3296, "num_tokens": 3495520122.0, "step": 5519 }, { "epoch": 0.652713728272437, "grad_norm": 0.12728142738342285, "learning_rate": 2.1363057863239165e-05, "loss": 0.3448, "num_tokens": 3496154794.0, "step": 5520 }, { "epoch": 0.6528319735130661, "grad_norm": 0.14714448153972626, "learning_rate": 2.1353727949387208e-05, "loss": 0.36, "num_tokens": 3496765848.0, "step": 5521 }, { "epoch": 0.6529502187536952, "grad_norm": 0.13289868831634521, "learning_rate": 2.1344399743745213e-05, "loss": 0.3064, "num_tokens": 3497401641.0, "step": 5522 }, { "epoch": 0.6530684639943243, "grad_norm": 0.12825456261634827, "learning_rate": 2.133507324768137e-05, "loss": 0.3328, "num_tokens": 3498036571.0, "step": 5523 }, { "epoch": 0.6531867092349533, "grad_norm": 0.1331453174352646, "learning_rate": 2.1325748462563645e-05, "loss": 0.3192, "num_tokens": 3498671799.0, "step": 5524 }, { "epoch": 0.6533049544755823, "grad_norm": 0.13651984930038452, "learning_rate": 2.1316425389759747e-05, "loss": 0.3405, "num_tokens": 3499310004.0, "step": 5525 }, { "epoch": 0.6534231997162114, "grad_norm": 0.1388423889875412, "learning_rate": 2.130710403063713e-05, "loss": 0.337, "num_tokens": 3499943854.0, "step": 5526 }, { "epoch": 0.6535414449568405, "grad_norm": 0.14546062052249908, "learning_rate": 2.129778438656301e-05, "loss": 0.3547, "num_tokens": 3500578043.0, "step": 5527 }, { "epoch": 0.6536596901974695, "grad_norm": 0.12198447436094284, "learning_rate": 2.128846645890433e-05, "loss": 0.3209, "num_tokens": 3501212886.0, "step": 5528 }, { "epoch": 0.6537779354380986, "grad_norm": 0.12524248659610748, "learning_rate": 2.1279150249027796e-05, "loss": 0.3206, "num_tokens": 3501851197.0, "step": 5529 }, { "epoch": 0.6538961806787277, "grad_norm": 0.12637409567832947, "learning_rate": 2.1269835758299836e-05, "loss": 0.3274, "num_tokens": 3502484532.0, "step": 5530 }, { "epoch": 0.6540144259193568, "grad_norm": 0.1368137151002884, "learning_rate": 2.1260522988086675e-05, "loss": 0.3192, "num_tokens": 3503120627.0, "step": 5531 }, { "epoch": 0.6541326711599859, "grad_norm": 0.14032480120658875, "learning_rate": 2.125121193975425e-05, "loss": 0.3256, "num_tokens": 3503758957.0, "step": 5532 }, { "epoch": 0.6542509164006148, "grad_norm": 0.13026194274425507, "learning_rate": 2.124190261466824e-05, "loss": 0.362, "num_tokens": 3504390355.0, "step": 5533 }, { "epoch": 0.6543691616412439, "grad_norm": 0.13162586092948914, "learning_rate": 2.123259501419411e-05, "loss": 0.3224, "num_tokens": 3505026474.0, "step": 5534 }, { "epoch": 0.654487406881873, "grad_norm": 0.14132168889045715, "learning_rate": 2.122328913969702e-05, "loss": 0.3146, "num_tokens": 3505662677.0, "step": 5535 }, { "epoch": 0.6546056521225021, "grad_norm": 0.1304721236228943, "learning_rate": 2.121398499254194e-05, "loss": 0.3092, "num_tokens": 3506302237.0, "step": 5536 }, { "epoch": 0.6547238973631311, "grad_norm": 0.1413135826587677, "learning_rate": 2.1204682574093498e-05, "loss": 0.3407, "num_tokens": 3506939989.0, "step": 5537 }, { "epoch": 0.6548421426037602, "grad_norm": 0.1407465785741806, "learning_rate": 2.1195381885716154e-05, "loss": 0.3328, "num_tokens": 3507576893.0, "step": 5538 }, { "epoch": 0.6549603878443893, "grad_norm": 0.12954111397266388, "learning_rate": 2.1186082928774088e-05, "loss": 0.3356, "num_tokens": 3508198332.0, "step": 5539 }, { "epoch": 0.6550786330850183, "grad_norm": 0.1588725596666336, "learning_rate": 2.1176785704631204e-05, "loss": 0.3593, "num_tokens": 3508811151.0, "step": 5540 }, { "epoch": 0.6551968783256474, "grad_norm": 0.1335185468196869, "learning_rate": 2.1167490214651166e-05, "loss": 0.3304, "num_tokens": 3509447169.0, "step": 5541 }, { "epoch": 0.6553151235662764, "grad_norm": 0.13275019824504852, "learning_rate": 2.115819646019738e-05, "loss": 0.3204, "num_tokens": 3510082579.0, "step": 5542 }, { "epoch": 0.6554333688069055, "grad_norm": 0.13161025941371918, "learning_rate": 2.1148904442633016e-05, "loss": 0.3669, "num_tokens": 3510716781.0, "step": 5543 }, { "epoch": 0.6555516140475346, "grad_norm": 0.1284611076116562, "learning_rate": 2.1139614163320978e-05, "loss": 0.3183, "num_tokens": 3511351394.0, "step": 5544 }, { "epoch": 0.6556698592881637, "grad_norm": 0.14548717439174652, "learning_rate": 2.113032562362389e-05, "loss": 0.364, "num_tokens": 3511990278.0, "step": 5545 }, { "epoch": 0.6557881045287928, "grad_norm": 0.1384972482919693, "learning_rate": 2.1121038824904165e-05, "loss": 0.3307, "num_tokens": 3512623868.0, "step": 5546 }, { "epoch": 0.6559063497694217, "grad_norm": 0.13483934104442596, "learning_rate": 2.111175376852391e-05, "loss": 0.3199, "num_tokens": 3513254375.0, "step": 5547 }, { "epoch": 0.6560245950100508, "grad_norm": 0.13951319456100464, "learning_rate": 2.110247045584505e-05, "loss": 0.3709, "num_tokens": 3513891158.0, "step": 5548 }, { "epoch": 0.6561428402506799, "grad_norm": 0.1243831142783165, "learning_rate": 2.109318888822918e-05, "loss": 0.328, "num_tokens": 3514528096.0, "step": 5549 }, { "epoch": 0.656261085491309, "grad_norm": 0.12655004858970642, "learning_rate": 2.1083909067037665e-05, "loss": 0.2929, "num_tokens": 3515157138.0, "step": 5550 }, { "epoch": 0.656379330731938, "grad_norm": 0.1221890076994896, "learning_rate": 2.1074630993631615e-05, "loss": 0.2996, "num_tokens": 3515794217.0, "step": 5551 }, { "epoch": 0.6564975759725671, "grad_norm": 0.1272311508655548, "learning_rate": 2.1065354669371917e-05, "loss": 0.3094, "num_tokens": 3516427416.0, "step": 5552 }, { "epoch": 0.6566158212131962, "grad_norm": 0.13787509500980377, "learning_rate": 2.1056080095619136e-05, "loss": 0.3455, "num_tokens": 3517062641.0, "step": 5553 }, { "epoch": 0.6567340664538253, "grad_norm": 0.1350182443857193, "learning_rate": 2.1046807273733627e-05, "loss": 0.3099, "num_tokens": 3517699311.0, "step": 5554 }, { "epoch": 0.6568523116944543, "grad_norm": 0.12651850283145905, "learning_rate": 2.1037536205075484e-05, "loss": 0.3326, "num_tokens": 3518332304.0, "step": 5555 }, { "epoch": 0.6569705569350833, "grad_norm": 0.12643755972385406, "learning_rate": 2.1028266891004505e-05, "loss": 0.3522, "num_tokens": 3518970030.0, "step": 5556 }, { "epoch": 0.6570888021757124, "grad_norm": 0.14628633856773376, "learning_rate": 2.1018999332880303e-05, "loss": 0.3297, "num_tokens": 3519605935.0, "step": 5557 }, { "epoch": 0.6572070474163415, "grad_norm": 0.1422397643327713, "learning_rate": 2.1009733532062177e-05, "loss": 0.3692, "num_tokens": 3520239350.0, "step": 5558 }, { "epoch": 0.6573252926569706, "grad_norm": 0.1285260170698166, "learning_rate": 2.1000469489909154e-05, "loss": 0.3218, "num_tokens": 3520875465.0, "step": 5559 }, { "epoch": 0.6574435378975996, "grad_norm": 0.12030579894781113, "learning_rate": 2.0991207207780074e-05, "loss": 0.3174, "num_tokens": 3521511905.0, "step": 5560 }, { "epoch": 0.6575617831382287, "grad_norm": 0.11959812790155411, "learning_rate": 2.098194668703346e-05, "loss": 0.2877, "num_tokens": 3522146639.0, "step": 5561 }, { "epoch": 0.6576800283788578, "grad_norm": 0.1377599537372589, "learning_rate": 2.0972687929027584e-05, "loss": 0.311, "num_tokens": 3522783847.0, "step": 5562 }, { "epoch": 0.6577982736194868, "grad_norm": 0.1451016515493393, "learning_rate": 2.0963430935120473e-05, "loss": 0.3423, "num_tokens": 3523423368.0, "step": 5563 }, { "epoch": 0.6579165188601159, "grad_norm": 0.1276727318763733, "learning_rate": 2.09541757066699e-05, "loss": 0.3132, "num_tokens": 3524055858.0, "step": 5564 }, { "epoch": 0.6580347641007449, "grad_norm": 0.1275142878293991, "learning_rate": 2.0944922245033354e-05, "loss": 0.3358, "num_tokens": 3524688600.0, "step": 5565 }, { "epoch": 0.658153009341374, "grad_norm": 0.10948234051465988, "learning_rate": 2.0935670551568104e-05, "loss": 0.3239, "num_tokens": 3525323217.0, "step": 5566 }, { "epoch": 0.6582712545820031, "grad_norm": 0.15224969387054443, "learning_rate": 2.092642062763112e-05, "loss": 0.316, "num_tokens": 3525954600.0, "step": 5567 }, { "epoch": 0.6583894998226322, "grad_norm": 0.134739488363266, "learning_rate": 2.0917172474579123e-05, "loss": 0.3304, "num_tokens": 3526587709.0, "step": 5568 }, { "epoch": 0.6585077450632612, "grad_norm": 0.12974339723587036, "learning_rate": 2.0907926093768602e-05, "loss": 0.3363, "num_tokens": 3527195410.0, "step": 5569 }, { "epoch": 0.6586259903038902, "grad_norm": 0.14065738022327423, "learning_rate": 2.089868148655575e-05, "loss": 0.353, "num_tokens": 3527830877.0, "step": 5570 }, { "epoch": 0.6587442355445193, "grad_norm": 0.11513274163007736, "learning_rate": 2.0889438654296517e-05, "loss": 0.3035, "num_tokens": 3528465363.0, "step": 5571 }, { "epoch": 0.6588624807851484, "grad_norm": 0.1279260665178299, "learning_rate": 2.0880197598346583e-05, "loss": 0.3127, "num_tokens": 3529099325.0, "step": 5572 }, { "epoch": 0.6589807260257775, "grad_norm": 0.12500198185443878, "learning_rate": 2.0870958320061376e-05, "loss": 0.325, "num_tokens": 3529729535.0, "step": 5573 }, { "epoch": 0.6590989712664065, "grad_norm": 0.14736978709697723, "learning_rate": 2.0861720820796087e-05, "loss": 0.326, "num_tokens": 3530361486.0, "step": 5574 }, { "epoch": 0.6592172165070356, "grad_norm": 0.12428309768438339, "learning_rate": 2.0852485101905585e-05, "loss": 0.2858, "num_tokens": 3530998734.0, "step": 5575 }, { "epoch": 0.6593354617476647, "grad_norm": 0.13601961731910706, "learning_rate": 2.0843251164744534e-05, "loss": 0.3218, "num_tokens": 3531636396.0, "step": 5576 }, { "epoch": 0.6594537069882938, "grad_norm": 0.13515479862689972, "learning_rate": 2.0834019010667302e-05, "loss": 0.3633, "num_tokens": 3532270686.0, "step": 5577 }, { "epoch": 0.6595719522289227, "grad_norm": 0.1372845619916916, "learning_rate": 2.0824788641028027e-05, "loss": 0.3313, "num_tokens": 3532907473.0, "step": 5578 }, { "epoch": 0.6596901974695518, "grad_norm": 0.12599323689937592, "learning_rate": 2.0815560057180557e-05, "loss": 0.3265, "num_tokens": 3533536067.0, "step": 5579 }, { "epoch": 0.6598084427101809, "grad_norm": 0.12376197427511215, "learning_rate": 2.0806333260478492e-05, "loss": 0.3026, "num_tokens": 3534171486.0, "step": 5580 }, { "epoch": 0.65992668795081, "grad_norm": 0.12885521352291107, "learning_rate": 2.079710825227516e-05, "loss": 0.3162, "num_tokens": 3534800034.0, "step": 5581 }, { "epoch": 0.6600449331914391, "grad_norm": 0.12906767427921295, "learning_rate": 2.078788503392365e-05, "loss": 0.3214, "num_tokens": 3535432076.0, "step": 5582 }, { "epoch": 0.6601631784320681, "grad_norm": 0.1232844665646553, "learning_rate": 2.077866360677676e-05, "loss": 0.3218, "num_tokens": 3536059080.0, "step": 5583 }, { "epoch": 0.6602814236726972, "grad_norm": 0.13186609745025635, "learning_rate": 2.0769443972187024e-05, "loss": 0.3618, "num_tokens": 3536690755.0, "step": 5584 }, { "epoch": 0.6603996689133262, "grad_norm": 0.1308850646018982, "learning_rate": 2.0760226131506757e-05, "loss": 0.3373, "num_tokens": 3537325063.0, "step": 5585 }, { "epoch": 0.6605179141539553, "grad_norm": 0.1367843598127365, "learning_rate": 2.0751010086087952e-05, "loss": 0.3588, "num_tokens": 3537959574.0, "step": 5586 }, { "epoch": 0.6606361593945844, "grad_norm": 0.13776430487632751, "learning_rate": 2.07417958372824e-05, "loss": 0.358, "num_tokens": 3538592976.0, "step": 5587 }, { "epoch": 0.6607544046352134, "grad_norm": 0.13232217729091644, "learning_rate": 2.073258338644155e-05, "loss": 0.3387, "num_tokens": 3539225332.0, "step": 5588 }, { "epoch": 0.6608726498758425, "grad_norm": 0.11812736839056015, "learning_rate": 2.0723372734916663e-05, "loss": 0.32, "num_tokens": 3539862656.0, "step": 5589 }, { "epoch": 0.6609908951164716, "grad_norm": 0.13839353621006012, "learning_rate": 2.071416388405871e-05, "loss": 0.3019, "num_tokens": 3540483763.0, "step": 5590 }, { "epoch": 0.6611091403571007, "grad_norm": 0.13102738559246063, "learning_rate": 2.0704956835218385e-05, "loss": 0.3005, "num_tokens": 3541114539.0, "step": 5591 }, { "epoch": 0.6612273855977296, "grad_norm": 0.12914317846298218, "learning_rate": 2.0695751589746126e-05, "loss": 0.3213, "num_tokens": 3541750494.0, "step": 5592 }, { "epoch": 0.6613456308383587, "grad_norm": 0.12478779256343842, "learning_rate": 2.068654814899209e-05, "loss": 0.3103, "num_tokens": 3542357467.0, "step": 5593 }, { "epoch": 0.6614638760789878, "grad_norm": 0.14503563940525055, "learning_rate": 2.0677346514306217e-05, "loss": 0.3449, "num_tokens": 3542992207.0, "step": 5594 }, { "epoch": 0.6615821213196169, "grad_norm": 0.11880749464035034, "learning_rate": 2.0668146687038126e-05, "loss": 0.3115, "num_tokens": 3543624155.0, "step": 5595 }, { "epoch": 0.661700366560246, "grad_norm": 0.13263869285583496, "learning_rate": 2.0658948668537214e-05, "loss": 0.3294, "num_tokens": 3544258190.0, "step": 5596 }, { "epoch": 0.661818611800875, "grad_norm": 0.1292896419763565, "learning_rate": 2.0649752460152592e-05, "loss": 0.3053, "num_tokens": 3544889421.0, "step": 5597 }, { "epoch": 0.6619368570415041, "grad_norm": 0.1487559974193573, "learning_rate": 2.0640558063233092e-05, "loss": 0.3572, "num_tokens": 3545522420.0, "step": 5598 }, { "epoch": 0.6620551022821332, "grad_norm": 0.11853431165218353, "learning_rate": 2.0631365479127315e-05, "loss": 0.299, "num_tokens": 3546160230.0, "step": 5599 }, { "epoch": 0.6621733475227622, "grad_norm": 0.13187246024608612, "learning_rate": 2.0622174709183576e-05, "loss": 0.3289, "num_tokens": 3546793784.0, "step": 5600 }, { "epoch": 0.6622915927633912, "grad_norm": 0.1325283944606781, "learning_rate": 2.0612985754749917e-05, "loss": 0.3271, "num_tokens": 3547425243.0, "step": 5601 }, { "epoch": 0.6624098380040203, "grad_norm": 0.12779060006141663, "learning_rate": 2.0603798617174116e-05, "loss": 0.3195, "num_tokens": 3548064832.0, "step": 5602 }, { "epoch": 0.6625280832446494, "grad_norm": 0.13281095027923584, "learning_rate": 2.0594613297803706e-05, "loss": 0.343, "num_tokens": 3548703381.0, "step": 5603 }, { "epoch": 0.6626463284852785, "grad_norm": 0.13368625938892365, "learning_rate": 2.058542979798595e-05, "loss": 0.3585, "num_tokens": 3549332582.0, "step": 5604 }, { "epoch": 0.6627645737259076, "grad_norm": 0.13284972310066223, "learning_rate": 2.0576248119067797e-05, "loss": 0.358, "num_tokens": 3549970579.0, "step": 5605 }, { "epoch": 0.6628828189665366, "grad_norm": 0.12677001953125, "learning_rate": 2.0567068262395988e-05, "loss": 0.3275, "num_tokens": 3550603449.0, "step": 5606 }, { "epoch": 0.6630010642071656, "grad_norm": 0.13610674440860748, "learning_rate": 2.0557890229316963e-05, "loss": 0.3718, "num_tokens": 3551238904.0, "step": 5607 }, { "epoch": 0.6631193094477947, "grad_norm": 0.11876419186592102, "learning_rate": 2.0548714021176915e-05, "loss": 0.3273, "num_tokens": 3551877729.0, "step": 5608 }, { "epoch": 0.6632375546884238, "grad_norm": 0.13496428728103638, "learning_rate": 2.0539539639321758e-05, "loss": 0.3442, "num_tokens": 3552492976.0, "step": 5609 }, { "epoch": 0.6633557999290528, "grad_norm": 0.1277645230293274, "learning_rate": 2.0530367085097123e-05, "loss": 0.3366, "num_tokens": 3553122756.0, "step": 5610 }, { "epoch": 0.6634740451696819, "grad_norm": 0.1312544196844101, "learning_rate": 2.0521196359848407e-05, "loss": 0.3371, "num_tokens": 3553754103.0, "step": 5611 }, { "epoch": 0.663592290410311, "grad_norm": 0.1251879334449768, "learning_rate": 2.0512027464920718e-05, "loss": 0.3085, "num_tokens": 3554391914.0, "step": 5612 }, { "epoch": 0.6637105356509401, "grad_norm": 0.1370690017938614, "learning_rate": 2.0502860401658887e-05, "loss": 0.3294, "num_tokens": 3555026663.0, "step": 5613 }, { "epoch": 0.6638287808915692, "grad_norm": 0.12553325295448303, "learning_rate": 2.049369517140749e-05, "loss": 0.3224, "num_tokens": 3555661822.0, "step": 5614 }, { "epoch": 0.6639470261321981, "grad_norm": 0.12520168721675873, "learning_rate": 2.0484531775510842e-05, "loss": 0.319, "num_tokens": 3556295917.0, "step": 5615 }, { "epoch": 0.6640652713728272, "grad_norm": 0.1275336742401123, "learning_rate": 2.0475370215312968e-05, "loss": 0.312, "num_tokens": 3556934763.0, "step": 5616 }, { "epoch": 0.6641835166134563, "grad_norm": 0.1298343539237976, "learning_rate": 2.0466210492157643e-05, "loss": 0.3236, "num_tokens": 3557567385.0, "step": 5617 }, { "epoch": 0.6643017618540854, "grad_norm": 0.12200264632701874, "learning_rate": 2.0457052607388366e-05, "loss": 0.2864, "num_tokens": 3558201300.0, "step": 5618 }, { "epoch": 0.6644200070947145, "grad_norm": 0.12383028119802475, "learning_rate": 2.044789656234834e-05, "loss": 0.2899, "num_tokens": 3558836622.0, "step": 5619 }, { "epoch": 0.6645382523353435, "grad_norm": 0.12919668853282928, "learning_rate": 2.0438742358380554e-05, "loss": 0.302, "num_tokens": 3559471310.0, "step": 5620 }, { "epoch": 0.6646564975759726, "grad_norm": 0.13016080856323242, "learning_rate": 2.042958999682768e-05, "loss": 0.3329, "num_tokens": 3560110662.0, "step": 5621 }, { "epoch": 0.6647747428166016, "grad_norm": 0.13390590250492096, "learning_rate": 2.042043947903214e-05, "loss": 0.3591, "num_tokens": 3560745392.0, "step": 5622 }, { "epoch": 0.6648929880572307, "grad_norm": 0.13174307346343994, "learning_rate": 2.041129080633606e-05, "loss": 0.305, "num_tokens": 3561380502.0, "step": 5623 }, { "epoch": 0.6650112332978597, "grad_norm": 0.1257958859205246, "learning_rate": 2.0402143980081342e-05, "loss": 0.3315, "num_tokens": 3562014792.0, "step": 5624 }, { "epoch": 0.6651294785384888, "grad_norm": 0.12740851938724518, "learning_rate": 2.0392999001609596e-05, "loss": 0.3459, "num_tokens": 3562652093.0, "step": 5625 }, { "epoch": 0.6652477237791179, "grad_norm": 0.13950899243354797, "learning_rate": 2.0383855872262135e-05, "loss": 0.3457, "num_tokens": 3563287536.0, "step": 5626 }, { "epoch": 0.665365969019747, "grad_norm": 0.1327143758535385, "learning_rate": 2.0374714593380032e-05, "loss": 0.3306, "num_tokens": 3563925490.0, "step": 5627 }, { "epoch": 0.6654842142603761, "grad_norm": 0.1322358101606369, "learning_rate": 2.0365575166304062e-05, "loss": 0.3422, "num_tokens": 3564552390.0, "step": 5628 }, { "epoch": 0.665602459501005, "grad_norm": 0.1374218612909317, "learning_rate": 2.035643759237478e-05, "loss": 0.34, "num_tokens": 3565183332.0, "step": 5629 }, { "epoch": 0.6657207047416341, "grad_norm": 0.1362171471118927, "learning_rate": 2.0347301872932403e-05, "loss": 0.3325, "num_tokens": 3565820544.0, "step": 5630 }, { "epoch": 0.6658389499822632, "grad_norm": 0.13023962080478668, "learning_rate": 2.0338168009316912e-05, "loss": 0.3521, "num_tokens": 3566455912.0, "step": 5631 }, { "epoch": 0.6659571952228923, "grad_norm": 0.12380507588386536, "learning_rate": 2.0329036002868026e-05, "loss": 0.3201, "num_tokens": 3567094745.0, "step": 5632 }, { "epoch": 0.6660754404635213, "grad_norm": 0.13213573396205902, "learning_rate": 2.0319905854925162e-05, "loss": 0.3336, "num_tokens": 3567729643.0, "step": 5633 }, { "epoch": 0.6661936857041504, "grad_norm": 0.13204820454120636, "learning_rate": 2.0310777566827502e-05, "loss": 0.3565, "num_tokens": 3568365757.0, "step": 5634 }, { "epoch": 0.6663119309447795, "grad_norm": 0.1254914402961731, "learning_rate": 2.030165113991389e-05, "loss": 0.3186, "num_tokens": 3569004142.0, "step": 5635 }, { "epoch": 0.6664301761854086, "grad_norm": 0.12254994362592697, "learning_rate": 2.029252657552298e-05, "loss": 0.2712, "num_tokens": 3569636035.0, "step": 5636 }, { "epoch": 0.6665484214260377, "grad_norm": 0.12025221437215805, "learning_rate": 2.028340387499308e-05, "loss": 0.3452, "num_tokens": 3570272763.0, "step": 5637 }, { "epoch": 0.6666666666666666, "grad_norm": 0.1318490356206894, "learning_rate": 2.0274283039662287e-05, "loss": 0.3339, "num_tokens": 3570908618.0, "step": 5638 }, { "epoch": 0.6667849119072957, "grad_norm": 0.13466046750545502, "learning_rate": 2.026516407086838e-05, "loss": 0.3542, "num_tokens": 3571544228.0, "step": 5639 }, { "epoch": 0.6669031571479248, "grad_norm": 0.12355106323957443, "learning_rate": 2.0256046969948866e-05, "loss": 0.3024, "num_tokens": 3572176732.0, "step": 5640 }, { "epoch": 0.6670214023885539, "grad_norm": 0.12524010241031647, "learning_rate": 2.024693173824101e-05, "loss": 0.3144, "num_tokens": 3572810420.0, "step": 5641 }, { "epoch": 0.6671396476291829, "grad_norm": 0.1331588327884674, "learning_rate": 2.0237818377081776e-05, "loss": 0.2928, "num_tokens": 3573443879.0, "step": 5642 }, { "epoch": 0.667257892869812, "grad_norm": 0.12851698696613312, "learning_rate": 2.0228706887807858e-05, "loss": 0.3144, "num_tokens": 3574074364.0, "step": 5643 }, { "epoch": 0.667376138110441, "grad_norm": 0.12935872375965118, "learning_rate": 2.021959727175567e-05, "loss": 0.2993, "num_tokens": 3574706959.0, "step": 5644 }, { "epoch": 0.6674943833510701, "grad_norm": 0.12816590070724487, "learning_rate": 2.0210489530261374e-05, "loss": 0.3154, "num_tokens": 3575345190.0, "step": 5645 }, { "epoch": 0.6676126285916992, "grad_norm": 0.1376039832830429, "learning_rate": 2.0201383664660848e-05, "loss": 0.3109, "num_tokens": 3575983538.0, "step": 5646 }, { "epoch": 0.6677308738323282, "grad_norm": 0.1448696255683899, "learning_rate": 2.0192279676289674e-05, "loss": 0.3337, "num_tokens": 3576621467.0, "step": 5647 }, { "epoch": 0.6678491190729573, "grad_norm": 0.12528084218502045, "learning_rate": 2.018317756648318e-05, "loss": 0.314, "num_tokens": 3577257903.0, "step": 5648 }, { "epoch": 0.6679673643135864, "grad_norm": 0.12169931083917618, "learning_rate": 2.0174077336576406e-05, "loss": 0.3243, "num_tokens": 3577894590.0, "step": 5649 }, { "epoch": 0.6680856095542155, "grad_norm": 0.1396203339099884, "learning_rate": 2.0164978987904136e-05, "loss": 0.345, "num_tokens": 3578528451.0, "step": 5650 }, { "epoch": 0.6682038547948445, "grad_norm": 0.1294197291135788, "learning_rate": 2.0155882521800856e-05, "loss": 0.324, "num_tokens": 3579166095.0, "step": 5651 }, { "epoch": 0.6683221000354735, "grad_norm": 0.13628444075584412, "learning_rate": 2.014678793960078e-05, "loss": 0.3554, "num_tokens": 3579800189.0, "step": 5652 }, { "epoch": 0.6684403452761026, "grad_norm": 0.14487317204475403, "learning_rate": 2.0137695242637865e-05, "loss": 0.3642, "num_tokens": 3580439066.0, "step": 5653 }, { "epoch": 0.6685585905167317, "grad_norm": 0.12514755129814148, "learning_rate": 2.0128604432245758e-05, "loss": 0.3516, "num_tokens": 3581077272.0, "step": 5654 }, { "epoch": 0.6686768357573608, "grad_norm": 0.13309691846370697, "learning_rate": 2.011951550975788e-05, "loss": 0.3257, "num_tokens": 3581714522.0, "step": 5655 }, { "epoch": 0.6687950809979898, "grad_norm": 0.14527840912342072, "learning_rate": 2.01104284765073e-05, "loss": 0.3376, "num_tokens": 3582346000.0, "step": 5656 }, { "epoch": 0.6689133262386189, "grad_norm": 0.12818120419979095, "learning_rate": 2.0101343333826883e-05, "loss": 0.3126, "num_tokens": 3582982659.0, "step": 5657 }, { "epoch": 0.669031571479248, "grad_norm": 0.1349005103111267, "learning_rate": 2.009226008304917e-05, "loss": 0.347, "num_tokens": 3583618499.0, "step": 5658 }, { "epoch": 0.669149816719877, "grad_norm": 0.13478173315525055, "learning_rate": 2.008317872550646e-05, "loss": 0.3449, "num_tokens": 3584251877.0, "step": 5659 }, { "epoch": 0.6692680619605061, "grad_norm": 0.12832127511501312, "learning_rate": 2.007409926253075e-05, "loss": 0.3202, "num_tokens": 3584884466.0, "step": 5660 }, { "epoch": 0.6693863072011351, "grad_norm": 0.13011200726032257, "learning_rate": 2.0065021695453748e-05, "loss": 0.3535, "num_tokens": 3585521946.0, "step": 5661 }, { "epoch": 0.6695045524417642, "grad_norm": 0.1251049041748047, "learning_rate": 2.0055946025606922e-05, "loss": 0.3155, "num_tokens": 3586161324.0, "step": 5662 }, { "epoch": 0.6696227976823933, "grad_norm": 0.12424767017364502, "learning_rate": 2.0046872254321422e-05, "loss": 0.3295, "num_tokens": 3586800041.0, "step": 5663 }, { "epoch": 0.6697410429230224, "grad_norm": 0.12798379361629486, "learning_rate": 2.0037800382928166e-05, "loss": 0.3159, "num_tokens": 3587437669.0, "step": 5664 }, { "epoch": 0.6698592881636514, "grad_norm": 0.13504616916179657, "learning_rate": 2.0028730412757728e-05, "loss": 0.3595, "num_tokens": 3588068283.0, "step": 5665 }, { "epoch": 0.6699775334042805, "grad_norm": 0.11989165842533112, "learning_rate": 2.0019662345140468e-05, "loss": 0.3163, "num_tokens": 3588703211.0, "step": 5666 }, { "epoch": 0.6700957786449095, "grad_norm": 0.12114997953176498, "learning_rate": 2.0010596181406424e-05, "loss": 0.319, "num_tokens": 3589342580.0, "step": 5667 }, { "epoch": 0.6702140238855386, "grad_norm": 0.12650656700134277, "learning_rate": 2.0001531922885388e-05, "loss": 0.3198, "num_tokens": 3589976550.0, "step": 5668 }, { "epoch": 0.6703322691261677, "grad_norm": 0.13822555541992188, "learning_rate": 1.9992469570906834e-05, "loss": 0.3298, "num_tokens": 3590613199.0, "step": 5669 }, { "epoch": 0.6704505143667967, "grad_norm": 0.12249580025672913, "learning_rate": 1.9983409126799982e-05, "loss": 0.3295, "num_tokens": 3591244725.0, "step": 5670 }, { "epoch": 0.6705687596074258, "grad_norm": 0.13916794955730438, "learning_rate": 1.997435059189379e-05, "loss": 0.3551, "num_tokens": 3591884135.0, "step": 5671 }, { "epoch": 0.6706870048480549, "grad_norm": 0.12290985137224197, "learning_rate": 1.9965293967516887e-05, "loss": 0.3454, "num_tokens": 3592511615.0, "step": 5672 }, { "epoch": 0.670805250088684, "grad_norm": 0.13233032822608948, "learning_rate": 1.9956239254997653e-05, "loss": 0.3372, "num_tokens": 3593147453.0, "step": 5673 }, { "epoch": 0.670923495329313, "grad_norm": 0.12748852372169495, "learning_rate": 1.9947186455664175e-05, "loss": 0.3291, "num_tokens": 3593754822.0, "step": 5674 }, { "epoch": 0.671041740569942, "grad_norm": 0.12822382152080536, "learning_rate": 1.9938135570844286e-05, "loss": 0.3525, "num_tokens": 3594388732.0, "step": 5675 }, { "epoch": 0.6711599858105711, "grad_norm": 0.11573929339647293, "learning_rate": 1.9929086601865516e-05, "loss": 0.2892, "num_tokens": 3595027934.0, "step": 5676 }, { "epoch": 0.6712782310512002, "grad_norm": 0.12797491252422333, "learning_rate": 1.9920039550055108e-05, "loss": 0.3534, "num_tokens": 3595663128.0, "step": 5677 }, { "epoch": 0.6713964762918293, "grad_norm": 0.12622922658920288, "learning_rate": 1.9910994416740037e-05, "loss": 0.3386, "num_tokens": 3596300388.0, "step": 5678 }, { "epoch": 0.6715147215324583, "grad_norm": 0.12231127172708511, "learning_rate": 1.9901951203246985e-05, "loss": 0.3292, "num_tokens": 3596931694.0, "step": 5679 }, { "epoch": 0.6716329667730874, "grad_norm": 0.12787406146526337, "learning_rate": 1.9892909910902378e-05, "loss": 0.3459, "num_tokens": 3597560095.0, "step": 5680 }, { "epoch": 0.6717512120137165, "grad_norm": 0.12631197273731232, "learning_rate": 1.9883870541032323e-05, "loss": 0.3447, "num_tokens": 3598197596.0, "step": 5681 }, { "epoch": 0.6718694572543455, "grad_norm": 0.131700336933136, "learning_rate": 1.9874833094962665e-05, "loss": 0.3266, "num_tokens": 3598831634.0, "step": 5682 }, { "epoch": 0.6719877024949745, "grad_norm": 0.12206864356994629, "learning_rate": 1.9865797574018986e-05, "loss": 0.3332, "num_tokens": 3599466238.0, "step": 5683 }, { "epoch": 0.6721059477356036, "grad_norm": 0.1365828514099121, "learning_rate": 1.985676397952654e-05, "loss": 0.3812, "num_tokens": 3600103347.0, "step": 5684 }, { "epoch": 0.6722241929762327, "grad_norm": 0.13132484257221222, "learning_rate": 1.984773231281036e-05, "loss": 0.3269, "num_tokens": 3600731691.0, "step": 5685 }, { "epoch": 0.6723424382168618, "grad_norm": 0.13166776299476624, "learning_rate": 1.9838702575195113e-05, "loss": 0.3434, "num_tokens": 3601363006.0, "step": 5686 }, { "epoch": 0.6724606834574909, "grad_norm": 0.1270170509815216, "learning_rate": 1.9829674768005264e-05, "loss": 0.3287, "num_tokens": 3602000522.0, "step": 5687 }, { "epoch": 0.6725789286981199, "grad_norm": 0.13185624778270721, "learning_rate": 1.982064889256494e-05, "loss": 0.3392, "num_tokens": 3602634017.0, "step": 5688 }, { "epoch": 0.672697173938749, "grad_norm": 0.12181585282087326, "learning_rate": 1.9811624950198034e-05, "loss": 0.3096, "num_tokens": 3603267912.0, "step": 5689 }, { "epoch": 0.672815419179378, "grad_norm": 0.12455198913812637, "learning_rate": 1.98026029422281e-05, "loss": 0.3353, "num_tokens": 3603905827.0, "step": 5690 }, { "epoch": 0.6729336644200071, "grad_norm": 0.13113805651664734, "learning_rate": 1.9793582869978443e-05, "loss": 0.3419, "num_tokens": 3604544810.0, "step": 5691 }, { "epoch": 0.6730519096606361, "grad_norm": 0.12851063907146454, "learning_rate": 1.978456473477209e-05, "loss": 0.3253, "num_tokens": 3605179714.0, "step": 5692 }, { "epoch": 0.6731701549012652, "grad_norm": 0.13695712387561798, "learning_rate": 1.9775548537931765e-05, "loss": 0.3216, "num_tokens": 3605810578.0, "step": 5693 }, { "epoch": 0.6732884001418943, "grad_norm": 0.13072636723518372, "learning_rate": 1.97665342807799e-05, "loss": 0.3894, "num_tokens": 3606448802.0, "step": 5694 }, { "epoch": 0.6734066453825234, "grad_norm": 0.11420021951198578, "learning_rate": 1.9757521964638653e-05, "loss": 0.2893, "num_tokens": 3607084721.0, "step": 5695 }, { "epoch": 0.6735248906231525, "grad_norm": 0.15249435603618622, "learning_rate": 1.974851159082992e-05, "loss": 0.3496, "num_tokens": 3607718424.0, "step": 5696 }, { "epoch": 0.6736431358637814, "grad_norm": 0.1336076259613037, "learning_rate": 1.973950316067529e-05, "loss": 0.3431, "num_tokens": 3608353995.0, "step": 5697 }, { "epoch": 0.6737613811044105, "grad_norm": 0.1233532577753067, "learning_rate": 1.9730496675496056e-05, "loss": 0.3221, "num_tokens": 3608988829.0, "step": 5698 }, { "epoch": 0.6738796263450396, "grad_norm": 0.13503381609916687, "learning_rate": 1.9721492136613254e-05, "loss": 0.3527, "num_tokens": 3609626025.0, "step": 5699 }, { "epoch": 0.6739978715856687, "grad_norm": 0.12815816700458527, "learning_rate": 1.97124895453476e-05, "loss": 0.3332, "num_tokens": 3610262919.0, "step": 5700 }, { "epoch": 0.6741161168262978, "grad_norm": 0.12849435210227966, "learning_rate": 1.9703488903019558e-05, "loss": 0.3318, "num_tokens": 3610899205.0, "step": 5701 }, { "epoch": 0.6742343620669268, "grad_norm": 0.1313813477754593, "learning_rate": 1.9694490210949295e-05, "loss": 0.3521, "num_tokens": 3611531494.0, "step": 5702 }, { "epoch": 0.6743526073075559, "grad_norm": 0.1261393129825592, "learning_rate": 1.968549347045667e-05, "loss": 0.3147, "num_tokens": 3612167269.0, "step": 5703 }, { "epoch": 0.674470852548185, "grad_norm": 0.12816305458545685, "learning_rate": 1.9676498682861294e-05, "loss": 0.3472, "num_tokens": 3612801835.0, "step": 5704 }, { "epoch": 0.674589097788814, "grad_norm": 0.13346783816814423, "learning_rate": 1.9667505849482456e-05, "loss": 0.3547, "num_tokens": 3613437357.0, "step": 5705 }, { "epoch": 0.674707343029443, "grad_norm": 0.1336047202348709, "learning_rate": 1.9658514971639198e-05, "loss": 0.3249, "num_tokens": 3614072549.0, "step": 5706 }, { "epoch": 0.6748255882700721, "grad_norm": 0.13437899947166443, "learning_rate": 1.964952605065023e-05, "loss": 0.3248, "num_tokens": 3614709263.0, "step": 5707 }, { "epoch": 0.6749438335107012, "grad_norm": 0.13149748742580414, "learning_rate": 1.9640539087834006e-05, "loss": 0.3309, "num_tokens": 3615347994.0, "step": 5708 }, { "epoch": 0.6750620787513303, "grad_norm": 0.13318048417568207, "learning_rate": 1.9631554084508665e-05, "loss": 0.3669, "num_tokens": 3615982398.0, "step": 5709 }, { "epoch": 0.6751803239919594, "grad_norm": 0.1391725242137909, "learning_rate": 1.9622571041992107e-05, "loss": 0.326, "num_tokens": 3616620124.0, "step": 5710 }, { "epoch": 0.6752985692325884, "grad_norm": 0.12274601310491562, "learning_rate": 1.9613589961601903e-05, "loss": 0.3203, "num_tokens": 3617257893.0, "step": 5711 }, { "epoch": 0.6754168144732174, "grad_norm": 0.1220487505197525, "learning_rate": 1.9604610844655332e-05, "loss": 0.3136, "num_tokens": 3617872193.0, "step": 5712 }, { "epoch": 0.6755350597138465, "grad_norm": 0.12898799777030945, "learning_rate": 1.9595633692469424e-05, "loss": 0.3181, "num_tokens": 3618507822.0, "step": 5713 }, { "epoch": 0.6756533049544756, "grad_norm": 0.13841524720191956, "learning_rate": 1.958665850636087e-05, "loss": 0.3429, "num_tokens": 3619145383.0, "step": 5714 }, { "epoch": 0.6757715501951046, "grad_norm": 0.1375419646501541, "learning_rate": 1.957768528764614e-05, "loss": 0.3667, "num_tokens": 3619782277.0, "step": 5715 }, { "epoch": 0.6758897954357337, "grad_norm": 0.13990676403045654, "learning_rate": 1.9568714037641337e-05, "loss": 0.3169, "num_tokens": 3620420660.0, "step": 5716 }, { "epoch": 0.6760080406763628, "grad_norm": 0.12822432816028595, "learning_rate": 1.955974475766232e-05, "loss": 0.3148, "num_tokens": 3621052412.0, "step": 5717 }, { "epoch": 0.6761262859169919, "grad_norm": 0.1221461147069931, "learning_rate": 1.9550777449024677e-05, "loss": 0.3528, "num_tokens": 3621684202.0, "step": 5718 }, { "epoch": 0.676244531157621, "grad_norm": 0.13270297646522522, "learning_rate": 1.9541812113043668e-05, "loss": 0.3544, "num_tokens": 3622289455.0, "step": 5719 }, { "epoch": 0.6763627763982499, "grad_norm": 0.13523884117603302, "learning_rate": 1.9532848751034277e-05, "loss": 0.3109, "num_tokens": 3622923701.0, "step": 5720 }, { "epoch": 0.676481021638879, "grad_norm": 0.13840872049331665, "learning_rate": 1.9523887364311187e-05, "loss": 0.3614, "num_tokens": 3623555497.0, "step": 5721 }, { "epoch": 0.6765992668795081, "grad_norm": 0.13944745063781738, "learning_rate": 1.9514927954188824e-05, "loss": 0.3469, "num_tokens": 3624190469.0, "step": 5722 }, { "epoch": 0.6767175121201372, "grad_norm": 0.1297198385000229, "learning_rate": 1.95059705219813e-05, "loss": 0.3085, "num_tokens": 3624827494.0, "step": 5723 }, { "epoch": 0.6768357573607662, "grad_norm": 0.12857937812805176, "learning_rate": 1.9497015069002432e-05, "loss": 0.3389, "num_tokens": 3625465293.0, "step": 5724 }, { "epoch": 0.6769540026013953, "grad_norm": 0.12153079360723495, "learning_rate": 1.9488061596565766e-05, "loss": 0.3242, "num_tokens": 3626096205.0, "step": 5725 }, { "epoch": 0.6770722478420244, "grad_norm": 0.12570884823799133, "learning_rate": 1.9479110105984535e-05, "loss": 0.315, "num_tokens": 3626729044.0, "step": 5726 }, { "epoch": 0.6771904930826534, "grad_norm": 0.1346295326948166, "learning_rate": 1.9470160598571715e-05, "loss": 0.3199, "num_tokens": 3627367584.0, "step": 5727 }, { "epoch": 0.6773087383232825, "grad_norm": 0.13504832983016968, "learning_rate": 1.9461213075639952e-05, "loss": 0.3445, "num_tokens": 3628000664.0, "step": 5728 }, { "epoch": 0.6774269835639115, "grad_norm": 0.13605667650699615, "learning_rate": 1.9452267538501623e-05, "loss": 0.3508, "num_tokens": 3628635374.0, "step": 5729 }, { "epoch": 0.6775452288045406, "grad_norm": 0.1295412927865982, "learning_rate": 1.9443323988468798e-05, "loss": 0.3278, "num_tokens": 3629265917.0, "step": 5730 }, { "epoch": 0.6776634740451697, "grad_norm": 0.13222138583660126, "learning_rate": 1.943438242685328e-05, "loss": 0.3391, "num_tokens": 3629900795.0, "step": 5731 }, { "epoch": 0.6777817192857988, "grad_norm": 0.13267286121845245, "learning_rate": 1.9425442854966583e-05, "loss": 0.3306, "num_tokens": 3630533175.0, "step": 5732 }, { "epoch": 0.6778999645264278, "grad_norm": 0.12362632900476456, "learning_rate": 1.941650527411988e-05, "loss": 0.2874, "num_tokens": 3631165015.0, "step": 5733 }, { "epoch": 0.6780182097670568, "grad_norm": 0.13626307249069214, "learning_rate": 1.94075696856241e-05, "loss": 0.335, "num_tokens": 3631802685.0, "step": 5734 }, { "epoch": 0.6781364550076859, "grad_norm": 0.13283585011959076, "learning_rate": 1.9398636090789863e-05, "loss": 0.3149, "num_tokens": 3632437261.0, "step": 5735 }, { "epoch": 0.678254700248315, "grad_norm": 0.12867988646030426, "learning_rate": 1.9389704490927512e-05, "loss": 0.3553, "num_tokens": 3633075709.0, "step": 5736 }, { "epoch": 0.6783729454889441, "grad_norm": 0.12217909097671509, "learning_rate": 1.9380774887347065e-05, "loss": 0.287, "num_tokens": 3633707776.0, "step": 5737 }, { "epoch": 0.6784911907295731, "grad_norm": 0.14047159254550934, "learning_rate": 1.9371847281358282e-05, "loss": 0.34, "num_tokens": 3634344698.0, "step": 5738 }, { "epoch": 0.6786094359702022, "grad_norm": 0.13562101125717163, "learning_rate": 1.9362921674270598e-05, "loss": 0.3651, "num_tokens": 3634984377.0, "step": 5739 }, { "epoch": 0.6787276812108313, "grad_norm": 0.13381552696228027, "learning_rate": 1.9353998067393186e-05, "loss": 0.347, "num_tokens": 3635620339.0, "step": 5740 }, { "epoch": 0.6788459264514604, "grad_norm": 0.13231174647808075, "learning_rate": 1.9345076462034904e-05, "loss": 0.3082, "num_tokens": 3636254641.0, "step": 5741 }, { "epoch": 0.6789641716920894, "grad_norm": 0.14602969586849213, "learning_rate": 1.9336156859504314e-05, "loss": 0.389, "num_tokens": 3636886016.0, "step": 5742 }, { "epoch": 0.6790824169327184, "grad_norm": 0.13158610463142395, "learning_rate": 1.9327239261109714e-05, "loss": 0.3381, "num_tokens": 3637514078.0, "step": 5743 }, { "epoch": 0.6792006621733475, "grad_norm": 0.13185791671276093, "learning_rate": 1.9318323668159068e-05, "loss": 0.2917, "num_tokens": 3638148987.0, "step": 5744 }, { "epoch": 0.6793189074139766, "grad_norm": 0.13656750321388245, "learning_rate": 1.9309410081960096e-05, "loss": 0.3334, "num_tokens": 3638785614.0, "step": 5745 }, { "epoch": 0.6794371526546057, "grad_norm": 0.1252235770225525, "learning_rate": 1.9300498503820144e-05, "loss": 0.3489, "num_tokens": 3639415345.0, "step": 5746 }, { "epoch": 0.6795553978952347, "grad_norm": 0.12138763815164566, "learning_rate": 1.929158893504634e-05, "loss": 0.3329, "num_tokens": 3640051917.0, "step": 5747 }, { "epoch": 0.6796736431358638, "grad_norm": 0.1308400183916092, "learning_rate": 1.92826813769455e-05, "loss": 0.3179, "num_tokens": 3640681417.0, "step": 5748 }, { "epoch": 0.6797918883764928, "grad_norm": 0.13222740590572357, "learning_rate": 1.927377583082412e-05, "loss": 0.3367, "num_tokens": 3641313227.0, "step": 5749 }, { "epoch": 0.6799101336171219, "grad_norm": 0.13343703746795654, "learning_rate": 1.9264872297988426e-05, "loss": 0.3185, "num_tokens": 3641948745.0, "step": 5750 }, { "epoch": 0.680028378857751, "grad_norm": 0.12501174211502075, "learning_rate": 1.9255970779744312e-05, "loss": 0.2991, "num_tokens": 3642587266.0, "step": 5751 }, { "epoch": 0.68014662409838, "grad_norm": 0.13697752356529236, "learning_rate": 1.9247071277397437e-05, "loss": 0.316, "num_tokens": 3643221186.0, "step": 5752 }, { "epoch": 0.6802648693390091, "grad_norm": 0.1346348077058792, "learning_rate": 1.9238173792253115e-05, "loss": 0.3337, "num_tokens": 3643855497.0, "step": 5753 }, { "epoch": 0.6803831145796382, "grad_norm": 0.12009430676698685, "learning_rate": 1.9229278325616364e-05, "loss": 0.2983, "num_tokens": 3644494845.0, "step": 5754 }, { "epoch": 0.6805013598202673, "grad_norm": 0.1273733228445053, "learning_rate": 1.9220384878791952e-05, "loss": 0.3097, "num_tokens": 3645131277.0, "step": 5755 }, { "epoch": 0.6806196050608962, "grad_norm": 0.12941694259643555, "learning_rate": 1.9211493453084298e-05, "loss": 0.309, "num_tokens": 3645761243.0, "step": 5756 }, { "epoch": 0.6807378503015253, "grad_norm": 0.1269921213388443, "learning_rate": 1.9202604049797554e-05, "loss": 0.3525, "num_tokens": 3646383746.0, "step": 5757 }, { "epoch": 0.6808560955421544, "grad_norm": 0.13147908449172974, "learning_rate": 1.9193716670235575e-05, "loss": 0.3218, "num_tokens": 3647020848.0, "step": 5758 }, { "epoch": 0.6809743407827835, "grad_norm": 0.1246328353881836, "learning_rate": 1.91848313157019e-05, "loss": 0.3268, "num_tokens": 3647658888.0, "step": 5759 }, { "epoch": 0.6810925860234126, "grad_norm": 0.1285143941640854, "learning_rate": 1.9175947987499778e-05, "loss": 0.321, "num_tokens": 3648289603.0, "step": 5760 }, { "epoch": 0.6812108312640416, "grad_norm": 0.12624110281467438, "learning_rate": 1.9167066686932185e-05, "loss": 0.3403, "num_tokens": 3648920471.0, "step": 5761 }, { "epoch": 0.6813290765046707, "grad_norm": 0.13026796281337738, "learning_rate": 1.9158187415301765e-05, "loss": 0.3329, "num_tokens": 3649554612.0, "step": 5762 }, { "epoch": 0.6814473217452998, "grad_norm": 0.1253342181444168, "learning_rate": 1.9149310173910884e-05, "loss": 0.3161, "num_tokens": 3650187931.0, "step": 5763 }, { "epoch": 0.6815655669859289, "grad_norm": 0.13296747207641602, "learning_rate": 1.914043496406161e-05, "loss": 0.3615, "num_tokens": 3650822144.0, "step": 5764 }, { "epoch": 0.6816838122265578, "grad_norm": 0.1219678521156311, "learning_rate": 1.9131561787055705e-05, "loss": 0.3066, "num_tokens": 3651454782.0, "step": 5765 }, { "epoch": 0.6818020574671869, "grad_norm": 0.13411971926689148, "learning_rate": 1.912269064419464e-05, "loss": 0.3522, "num_tokens": 3652091141.0, "step": 5766 }, { "epoch": 0.681920302707816, "grad_norm": 0.11491860449314117, "learning_rate": 1.9113821536779585e-05, "loss": 0.2664, "num_tokens": 3652720102.0, "step": 5767 }, { "epoch": 0.6820385479484451, "grad_norm": 0.12562204897403717, "learning_rate": 1.9104954466111397e-05, "loss": 0.3433, "num_tokens": 3653359543.0, "step": 5768 }, { "epoch": 0.6821567931890742, "grad_norm": 0.1292973905801773, "learning_rate": 1.9096089433490674e-05, "loss": 0.3425, "num_tokens": 3653987648.0, "step": 5769 }, { "epoch": 0.6822750384297032, "grad_norm": 0.13409486413002014, "learning_rate": 1.9087226440217673e-05, "loss": 0.3304, "num_tokens": 3654626371.0, "step": 5770 }, { "epoch": 0.6823932836703323, "grad_norm": 0.13401633501052856, "learning_rate": 1.907836548759237e-05, "loss": 0.3629, "num_tokens": 3655261802.0, "step": 5771 }, { "epoch": 0.6825115289109613, "grad_norm": 0.12296122312545776, "learning_rate": 1.906950657691443e-05, "loss": 0.3234, "num_tokens": 3655896823.0, "step": 5772 }, { "epoch": 0.6826297741515904, "grad_norm": 0.1274804025888443, "learning_rate": 1.906064970948325e-05, "loss": 0.3273, "num_tokens": 3656534001.0, "step": 5773 }, { "epoch": 0.6827480193922195, "grad_norm": 0.13229185342788696, "learning_rate": 1.9051794886597883e-05, "loss": 0.3474, "num_tokens": 3657164737.0, "step": 5774 }, { "epoch": 0.6828662646328485, "grad_norm": 0.1368621289730072, "learning_rate": 1.904294210955713e-05, "loss": 0.3438, "num_tokens": 3657794947.0, "step": 5775 }, { "epoch": 0.6829845098734776, "grad_norm": 0.13201753795146942, "learning_rate": 1.9034091379659452e-05, "loss": 0.3356, "num_tokens": 3658425634.0, "step": 5776 }, { "epoch": 0.6831027551141067, "grad_norm": 0.12885883450508118, "learning_rate": 1.902524269820301e-05, "loss": 0.3357, "num_tokens": 3659063745.0, "step": 5777 }, { "epoch": 0.6832210003547358, "grad_norm": 0.12605787813663483, "learning_rate": 1.9016396066485712e-05, "loss": 0.3047, "num_tokens": 3659690835.0, "step": 5778 }, { "epoch": 0.6833392455953647, "grad_norm": 0.1319957971572876, "learning_rate": 1.900755148580511e-05, "loss": 0.3228, "num_tokens": 3660321713.0, "step": 5779 }, { "epoch": 0.6834574908359938, "grad_norm": 0.1336536854505539, "learning_rate": 1.8998708957458485e-05, "loss": 0.3339, "num_tokens": 3660960415.0, "step": 5780 }, { "epoch": 0.6835757360766229, "grad_norm": 0.1361416131258011, "learning_rate": 1.8989868482742794e-05, "loss": 0.3506, "num_tokens": 3661578355.0, "step": 5781 }, { "epoch": 0.683693981317252, "grad_norm": 0.12940379977226257, "learning_rate": 1.8981030062954726e-05, "loss": 0.2883, "num_tokens": 3662208259.0, "step": 5782 }, { "epoch": 0.6838122265578811, "grad_norm": 0.1337784230709076, "learning_rate": 1.8972193699390665e-05, "loss": 0.3246, "num_tokens": 3662843601.0, "step": 5783 }, { "epoch": 0.6839304717985101, "grad_norm": 0.1230391412973404, "learning_rate": 1.896335939334664e-05, "loss": 0.316, "num_tokens": 3663470688.0, "step": 5784 }, { "epoch": 0.6840487170391392, "grad_norm": 0.12934806942939758, "learning_rate": 1.895452714611845e-05, "loss": 0.3073, "num_tokens": 3664106808.0, "step": 5785 }, { "epoch": 0.6841669622797683, "grad_norm": 0.1323843002319336, "learning_rate": 1.894569695900153e-05, "loss": 0.3097, "num_tokens": 3664743688.0, "step": 5786 }, { "epoch": 0.6842852075203973, "grad_norm": 0.12519405782222748, "learning_rate": 1.893686883329108e-05, "loss": 0.2946, "num_tokens": 3665377391.0, "step": 5787 }, { "epoch": 0.6844034527610263, "grad_norm": 0.14248107373714447, "learning_rate": 1.8928042770281934e-05, "loss": 0.3459, "num_tokens": 3666015081.0, "step": 5788 }, { "epoch": 0.6845216980016554, "grad_norm": 0.13496893644332886, "learning_rate": 1.8919218771268645e-05, "loss": 0.3527, "num_tokens": 3666644501.0, "step": 5789 }, { "epoch": 0.6846399432422845, "grad_norm": 0.12713220715522766, "learning_rate": 1.8910396837545494e-05, "loss": 0.3091, "num_tokens": 3667277761.0, "step": 5790 }, { "epoch": 0.6847581884829136, "grad_norm": 0.13310560584068298, "learning_rate": 1.8901576970406413e-05, "loss": 0.3367, "num_tokens": 3667916515.0, "step": 5791 }, { "epoch": 0.6848764337235427, "grad_norm": 0.13430428504943848, "learning_rate": 1.8892759171145054e-05, "loss": 0.3286, "num_tokens": 3668553661.0, "step": 5792 }, { "epoch": 0.6849946789641717, "grad_norm": 0.1341521143913269, "learning_rate": 1.8883943441054753e-05, "loss": 0.3325, "num_tokens": 3669184318.0, "step": 5793 }, { "epoch": 0.6851129242048007, "grad_norm": 0.13181273639202118, "learning_rate": 1.8875129781428576e-05, "loss": 0.3588, "num_tokens": 3669823703.0, "step": 5794 }, { "epoch": 0.6852311694454298, "grad_norm": 0.12139171361923218, "learning_rate": 1.8866318193559234e-05, "loss": 0.3, "num_tokens": 3670462154.0, "step": 5795 }, { "epoch": 0.6853494146860589, "grad_norm": 0.13317234814167023, "learning_rate": 1.8857508678739185e-05, "loss": 0.3216, "num_tokens": 3671093973.0, "step": 5796 }, { "epoch": 0.6854676599266879, "grad_norm": 0.12991021573543549, "learning_rate": 1.8848701238260553e-05, "loss": 0.3225, "num_tokens": 3671731757.0, "step": 5797 }, { "epoch": 0.685585905167317, "grad_norm": 0.14479711651802063, "learning_rate": 1.8839895873415146e-05, "loss": 0.3442, "num_tokens": 3672364837.0, "step": 5798 }, { "epoch": 0.6857041504079461, "grad_norm": 0.13436703383922577, "learning_rate": 1.8831092585494508e-05, "loss": 0.367, "num_tokens": 3673000781.0, "step": 5799 }, { "epoch": 0.6858223956485752, "grad_norm": 0.11590973287820816, "learning_rate": 1.8822291375789856e-05, "loss": 0.2849, "num_tokens": 3673637257.0, "step": 5800 }, { "epoch": 0.6859406408892043, "grad_norm": 0.14299945533275604, "learning_rate": 1.8813492245592087e-05, "loss": 0.3342, "num_tokens": 3674274360.0, "step": 5801 }, { "epoch": 0.6860588861298332, "grad_norm": 0.11937914788722992, "learning_rate": 1.8804695196191803e-05, "loss": 0.2963, "num_tokens": 3674908911.0, "step": 5802 }, { "epoch": 0.6861771313704623, "grad_norm": 0.13128729164600372, "learning_rate": 1.8795900228879325e-05, "loss": 0.3449, "num_tokens": 3675545591.0, "step": 5803 }, { "epoch": 0.6862953766110914, "grad_norm": 0.1294240951538086, "learning_rate": 1.8787107344944645e-05, "loss": 0.3843, "num_tokens": 3676178062.0, "step": 5804 }, { "epoch": 0.6864136218517205, "grad_norm": 0.12306107580661774, "learning_rate": 1.877831654567746e-05, "loss": 0.298, "num_tokens": 3676814424.0, "step": 5805 }, { "epoch": 0.6865318670923495, "grad_norm": 0.13529321551322937, "learning_rate": 1.876952783236714e-05, "loss": 0.331, "num_tokens": 3677441143.0, "step": 5806 }, { "epoch": 0.6866501123329786, "grad_norm": 0.13990989327430725, "learning_rate": 1.8760741206302762e-05, "loss": 0.344, "num_tokens": 3678075577.0, "step": 5807 }, { "epoch": 0.6867683575736077, "grad_norm": 0.1441233903169632, "learning_rate": 1.875195666877312e-05, "loss": 0.3703, "num_tokens": 3678708756.0, "step": 5808 }, { "epoch": 0.6868866028142367, "grad_norm": 0.14841344952583313, "learning_rate": 1.8743174221066667e-05, "loss": 0.3625, "num_tokens": 3679346612.0, "step": 5809 }, { "epoch": 0.6870048480548658, "grad_norm": 0.13532261550426483, "learning_rate": 1.8734393864471552e-05, "loss": 0.3267, "num_tokens": 3679980429.0, "step": 5810 }, { "epoch": 0.6871230932954948, "grad_norm": 0.14109835028648376, "learning_rate": 1.8725615600275655e-05, "loss": 0.32, "num_tokens": 3680601370.0, "step": 5811 }, { "epoch": 0.6872413385361239, "grad_norm": 0.14897282421588898, "learning_rate": 1.8716839429766503e-05, "loss": 0.3341, "num_tokens": 3681237676.0, "step": 5812 }, { "epoch": 0.687359583776753, "grad_norm": 0.1292956918478012, "learning_rate": 1.8708065354231353e-05, "loss": 0.3204, "num_tokens": 3681871887.0, "step": 5813 }, { "epoch": 0.6874778290173821, "grad_norm": 0.11455988883972168, "learning_rate": 1.8699293374957108e-05, "loss": 0.2892, "num_tokens": 3682507493.0, "step": 5814 }, { "epoch": 0.6875960742580112, "grad_norm": 0.14378902316093445, "learning_rate": 1.8690523493230426e-05, "loss": 0.3607, "num_tokens": 3683139957.0, "step": 5815 }, { "epoch": 0.6877143194986401, "grad_norm": 0.13629965484142303, "learning_rate": 1.8681755710337593e-05, "loss": 0.3125, "num_tokens": 3683775772.0, "step": 5816 }, { "epoch": 0.6878325647392692, "grad_norm": 0.12297064810991287, "learning_rate": 1.8672990027564646e-05, "loss": 0.3264, "num_tokens": 3684411000.0, "step": 5817 }, { "epoch": 0.6879508099798983, "grad_norm": 0.13420647382736206, "learning_rate": 1.8664226446197276e-05, "loss": 0.3412, "num_tokens": 3685042765.0, "step": 5818 }, { "epoch": 0.6880690552205274, "grad_norm": 0.13430431485176086, "learning_rate": 1.865546496752086e-05, "loss": 0.3618, "num_tokens": 3685681556.0, "step": 5819 }, { "epoch": 0.6881873004611564, "grad_norm": 0.13524684309959412, "learning_rate": 1.864670559282051e-05, "loss": 0.3394, "num_tokens": 3686309890.0, "step": 5820 }, { "epoch": 0.6883055457017855, "grad_norm": 0.12971335649490356, "learning_rate": 1.863794832338099e-05, "loss": 0.3304, "num_tokens": 3686949397.0, "step": 5821 }, { "epoch": 0.6884237909424146, "grad_norm": 0.1285020411014557, "learning_rate": 1.8629193160486765e-05, "loss": 0.3239, "num_tokens": 3687567509.0, "step": 5822 }, { "epoch": 0.6885420361830437, "grad_norm": 0.14252929389476776, "learning_rate": 1.8620440105421983e-05, "loss": 0.3903, "num_tokens": 3688198883.0, "step": 5823 }, { "epoch": 0.6886602814236727, "grad_norm": 0.14251406490802765, "learning_rate": 1.861168915947052e-05, "loss": 0.3515, "num_tokens": 3688830943.0, "step": 5824 }, { "epoch": 0.6887785266643017, "grad_norm": 0.12622565031051636, "learning_rate": 1.8602940323915882e-05, "loss": 0.3243, "num_tokens": 3689464533.0, "step": 5825 }, { "epoch": 0.6888967719049308, "grad_norm": 0.127378910779953, "learning_rate": 1.8594193600041328e-05, "loss": 0.3115, "num_tokens": 3690102461.0, "step": 5826 }, { "epoch": 0.6890150171455599, "grad_norm": 0.13644152879714966, "learning_rate": 1.8585448989129776e-05, "loss": 0.3177, "num_tokens": 3690731582.0, "step": 5827 }, { "epoch": 0.689133262386189, "grad_norm": 0.12838402390480042, "learning_rate": 1.8576706492463818e-05, "loss": 0.3353, "num_tokens": 3691364036.0, "step": 5828 }, { "epoch": 0.689251507626818, "grad_norm": 0.13879919052124023, "learning_rate": 1.8567966111325767e-05, "loss": 0.3385, "num_tokens": 3691964823.0, "step": 5829 }, { "epoch": 0.6893697528674471, "grad_norm": 0.15734964609146118, "learning_rate": 1.8559227846997616e-05, "loss": 0.362, "num_tokens": 3692599731.0, "step": 5830 }, { "epoch": 0.6894879981080761, "grad_norm": 0.12720921635627747, "learning_rate": 1.8550491700761037e-05, "loss": 0.3405, "num_tokens": 3693227291.0, "step": 5831 }, { "epoch": 0.6896062433487052, "grad_norm": 0.12157858163118362, "learning_rate": 1.8541757673897392e-05, "loss": 0.3078, "num_tokens": 3693860216.0, "step": 5832 }, { "epoch": 0.6897244885893343, "grad_norm": 0.13956192135810852, "learning_rate": 1.8533025767687755e-05, "loss": 0.367, "num_tokens": 3694498912.0, "step": 5833 }, { "epoch": 0.6898427338299633, "grad_norm": 0.12501904368400574, "learning_rate": 1.852429598341288e-05, "loss": 0.3079, "num_tokens": 3695129842.0, "step": 5834 }, { "epoch": 0.6899609790705924, "grad_norm": 0.12412240356206894, "learning_rate": 1.8515568322353172e-05, "loss": 0.3107, "num_tokens": 3695760216.0, "step": 5835 }, { "epoch": 0.6900792243112215, "grad_norm": 0.1253201961517334, "learning_rate": 1.8506842785788784e-05, "loss": 0.307, "num_tokens": 3696396860.0, "step": 5836 }, { "epoch": 0.6901974695518506, "grad_norm": 0.13254958391189575, "learning_rate": 1.849811937499951e-05, "loss": 0.3608, "num_tokens": 3697032087.0, "step": 5837 }, { "epoch": 0.6903157147924796, "grad_norm": 0.12601864337921143, "learning_rate": 1.8489398091264867e-05, "loss": 0.3188, "num_tokens": 3697667689.0, "step": 5838 }, { "epoch": 0.6904339600331086, "grad_norm": 0.13481323421001434, "learning_rate": 1.8480678935864037e-05, "loss": 0.3366, "num_tokens": 3698301353.0, "step": 5839 }, { "epoch": 0.6905522052737377, "grad_norm": 0.13996563851833344, "learning_rate": 1.8471961910075884e-05, "loss": 0.3396, "num_tokens": 3698937753.0, "step": 5840 }, { "epoch": 0.6906704505143668, "grad_norm": 0.13765645027160645, "learning_rate": 1.8463247015178995e-05, "loss": 0.3525, "num_tokens": 3699561264.0, "step": 5841 }, { "epoch": 0.6907886957549959, "grad_norm": 0.12768568098545074, "learning_rate": 1.8454534252451598e-05, "loss": 0.3422, "num_tokens": 3700162875.0, "step": 5842 }, { "epoch": 0.6909069409956249, "grad_norm": 0.12752121686935425, "learning_rate": 1.8445823623171675e-05, "loss": 0.319, "num_tokens": 3700793418.0, "step": 5843 }, { "epoch": 0.691025186236254, "grad_norm": 0.12035366892814636, "learning_rate": 1.8437115128616794e-05, "loss": 0.3251, "num_tokens": 3701424074.0, "step": 5844 }, { "epoch": 0.6911434314768831, "grad_norm": 0.12277904152870178, "learning_rate": 1.8428408770064313e-05, "loss": 0.333, "num_tokens": 3702061398.0, "step": 5845 }, { "epoch": 0.6912616767175122, "grad_norm": 0.13956542313098907, "learning_rate": 1.8419704548791207e-05, "loss": 0.3495, "num_tokens": 3702694538.0, "step": 5846 }, { "epoch": 0.6913799219581411, "grad_norm": 0.1440926343202591, "learning_rate": 1.8411002466074176e-05, "loss": 0.384, "num_tokens": 3703331572.0, "step": 5847 }, { "epoch": 0.6914981671987702, "grad_norm": 0.12320835143327713, "learning_rate": 1.8402302523189596e-05, "loss": 0.3523, "num_tokens": 3703969200.0, "step": 5848 }, { "epoch": 0.6916164124393993, "grad_norm": 0.12968814373016357, "learning_rate": 1.8393604721413503e-05, "loss": 0.3529, "num_tokens": 3704604595.0, "step": 5849 }, { "epoch": 0.6917346576800284, "grad_norm": 0.12651167809963226, "learning_rate": 1.8384909062021665e-05, "loss": 0.3253, "num_tokens": 3705238759.0, "step": 5850 }, { "epoch": 0.6918529029206575, "grad_norm": 0.1557781845331192, "learning_rate": 1.837621554628951e-05, "loss": 0.3304, "num_tokens": 3705872573.0, "step": 5851 }, { "epoch": 0.6919711481612865, "grad_norm": 0.13032633066177368, "learning_rate": 1.8367524175492147e-05, "loss": 0.3055, "num_tokens": 3706509299.0, "step": 5852 }, { "epoch": 0.6920893934019156, "grad_norm": 0.1324736475944519, "learning_rate": 1.835883495090437e-05, "loss": 0.3456, "num_tokens": 3707142267.0, "step": 5853 }, { "epoch": 0.6922076386425446, "grad_norm": 0.13086870312690735, "learning_rate": 1.8350147873800677e-05, "loss": 0.33, "num_tokens": 3707778525.0, "step": 5854 }, { "epoch": 0.6923258838831737, "grad_norm": 0.1258751004934311, "learning_rate": 1.834146294545524e-05, "loss": 0.3049, "num_tokens": 3708416113.0, "step": 5855 }, { "epoch": 0.6924441291238028, "grad_norm": 0.1445545107126236, "learning_rate": 1.8332780167141922e-05, "loss": 0.3256, "num_tokens": 3709052784.0, "step": 5856 }, { "epoch": 0.6925623743644318, "grad_norm": 0.13186568021774292, "learning_rate": 1.8324099540134252e-05, "loss": 0.3295, "num_tokens": 3709688552.0, "step": 5857 }, { "epoch": 0.6926806196050609, "grad_norm": 0.12505275011062622, "learning_rate": 1.8315421065705452e-05, "loss": 0.3193, "num_tokens": 3710324473.0, "step": 5858 }, { "epoch": 0.69279886484569, "grad_norm": 0.13638946413993835, "learning_rate": 1.830674474512845e-05, "loss": 0.335, "num_tokens": 3710960838.0, "step": 5859 }, { "epoch": 0.6929171100863191, "grad_norm": 0.13417398929595947, "learning_rate": 1.829807057967583e-05, "loss": 0.3604, "num_tokens": 3711596350.0, "step": 5860 }, { "epoch": 0.693035355326948, "grad_norm": 0.12994061410427094, "learning_rate": 1.8289398570619858e-05, "loss": 0.3248, "num_tokens": 3712232302.0, "step": 5861 }, { "epoch": 0.6931536005675771, "grad_norm": 0.13772404193878174, "learning_rate": 1.828072871923252e-05, "loss": 0.3536, "num_tokens": 3712864388.0, "step": 5862 }, { "epoch": 0.6932718458082062, "grad_norm": 0.13559986650943756, "learning_rate": 1.827206102678544e-05, "loss": 0.3645, "num_tokens": 3713501823.0, "step": 5863 }, { "epoch": 0.6933900910488353, "grad_norm": 0.12994882464408875, "learning_rate": 1.826339549454997e-05, "loss": 0.319, "num_tokens": 3714137941.0, "step": 5864 }, { "epoch": 0.6935083362894644, "grad_norm": 0.1348172128200531, "learning_rate": 1.8254732123797088e-05, "loss": 0.3096, "num_tokens": 3714772937.0, "step": 5865 }, { "epoch": 0.6936265815300934, "grad_norm": 0.13262900710105896, "learning_rate": 1.8246070915797524e-05, "loss": 0.3223, "num_tokens": 3715404799.0, "step": 5866 }, { "epoch": 0.6937448267707225, "grad_norm": 0.1294858604669571, "learning_rate": 1.8237411871821627e-05, "loss": 0.3325, "num_tokens": 3716033831.0, "step": 5867 }, { "epoch": 0.6938630720113516, "grad_norm": 0.12642933428287506, "learning_rate": 1.8228754993139475e-05, "loss": 0.3444, "num_tokens": 3716663303.0, "step": 5868 }, { "epoch": 0.6939813172519806, "grad_norm": 0.13638174533843994, "learning_rate": 1.822010028102081e-05, "loss": 0.3735, "num_tokens": 3717296989.0, "step": 5869 }, { "epoch": 0.6940995624926096, "grad_norm": 0.12498006224632263, "learning_rate": 1.821144773673504e-05, "loss": 0.3237, "num_tokens": 3717935228.0, "step": 5870 }, { "epoch": 0.6942178077332387, "grad_norm": 0.1240546777844429, "learning_rate": 1.8202797361551294e-05, "loss": 0.3403, "num_tokens": 3718568154.0, "step": 5871 }, { "epoch": 0.6943360529738678, "grad_norm": 0.1277850866317749, "learning_rate": 1.8194149156738334e-05, "loss": 0.3167, "num_tokens": 3719166742.0, "step": 5872 }, { "epoch": 0.6944542982144969, "grad_norm": 0.12373281270265579, "learning_rate": 1.8185503123564677e-05, "loss": 0.3403, "num_tokens": 3719806266.0, "step": 5873 }, { "epoch": 0.694572543455126, "grad_norm": 0.14084646105766296, "learning_rate": 1.8176859263298416e-05, "loss": 0.3419, "num_tokens": 3720438000.0, "step": 5874 }, { "epoch": 0.694690788695755, "grad_norm": 0.12635065615177155, "learning_rate": 1.8168217577207413e-05, "loss": 0.3171, "num_tokens": 3721072113.0, "step": 5875 }, { "epoch": 0.694809033936384, "grad_norm": 0.12144511193037033, "learning_rate": 1.8159578066559193e-05, "loss": 0.301, "num_tokens": 3721669813.0, "step": 5876 }, { "epoch": 0.6949272791770131, "grad_norm": 0.11597161740064621, "learning_rate": 1.815094073262094e-05, "loss": 0.2839, "num_tokens": 3722301682.0, "step": 5877 }, { "epoch": 0.6950455244176422, "grad_norm": 0.11677677184343338, "learning_rate": 1.8142305576659523e-05, "loss": 0.3159, "num_tokens": 3722941144.0, "step": 5878 }, { "epoch": 0.6951637696582712, "grad_norm": 0.138946533203125, "learning_rate": 1.81336725999415e-05, "loss": 0.3364, "num_tokens": 3723565854.0, "step": 5879 }, { "epoch": 0.6952820148989003, "grad_norm": 0.1254957616329193, "learning_rate": 1.8125041803733117e-05, "loss": 0.308, "num_tokens": 3724201759.0, "step": 5880 }, { "epoch": 0.6954002601395294, "grad_norm": 0.12992896139621735, "learning_rate": 1.811641318930029e-05, "loss": 0.3517, "num_tokens": 3724841381.0, "step": 5881 }, { "epoch": 0.6955185053801585, "grad_norm": 0.13455608487129211, "learning_rate": 1.81077867579086e-05, "loss": 0.3785, "num_tokens": 3725478716.0, "step": 5882 }, { "epoch": 0.6956367506207876, "grad_norm": 0.12912684679031372, "learning_rate": 1.8099162510823345e-05, "loss": 0.3531, "num_tokens": 3726118385.0, "step": 5883 }, { "epoch": 0.6957549958614165, "grad_norm": 0.13361915946006775, "learning_rate": 1.8090540449309462e-05, "loss": 0.348, "num_tokens": 3726757062.0, "step": 5884 }, { "epoch": 0.6958732411020456, "grad_norm": 0.12414980679750443, "learning_rate": 1.8081920574631605e-05, "loss": 0.3165, "num_tokens": 3727394687.0, "step": 5885 }, { "epoch": 0.6959914863426747, "grad_norm": 0.12516480684280396, "learning_rate": 1.807330288805408e-05, "loss": 0.3049, "num_tokens": 3728030034.0, "step": 5886 }, { "epoch": 0.6961097315833038, "grad_norm": 0.1287129819393158, "learning_rate": 1.8064687390840885e-05, "loss": 0.3331, "num_tokens": 3728663540.0, "step": 5887 }, { "epoch": 0.6962279768239328, "grad_norm": 0.12457302212715149, "learning_rate": 1.8056074084255675e-05, "loss": 0.3058, "num_tokens": 3729296436.0, "step": 5888 }, { "epoch": 0.6963462220645619, "grad_norm": 0.1275339275598526, "learning_rate": 1.8047462969561825e-05, "loss": 0.3124, "num_tokens": 3729935899.0, "step": 5889 }, { "epoch": 0.696464467305191, "grad_norm": 0.1217694878578186, "learning_rate": 1.8038854048022355e-05, "loss": 0.3256, "num_tokens": 3730568085.0, "step": 5890 }, { "epoch": 0.69658271254582, "grad_norm": 0.13456638157367706, "learning_rate": 1.8030247320899968e-05, "loss": 0.3431, "num_tokens": 3731198446.0, "step": 5891 }, { "epoch": 0.6967009577864491, "grad_norm": 0.13324302434921265, "learning_rate": 1.8021642789457064e-05, "loss": 0.3226, "num_tokens": 3731835236.0, "step": 5892 }, { "epoch": 0.6968192030270781, "grad_norm": 0.13632163405418396, "learning_rate": 1.801304045495569e-05, "loss": 0.3215, "num_tokens": 3732468927.0, "step": 5893 }, { "epoch": 0.6969374482677072, "grad_norm": 0.12338250130414963, "learning_rate": 1.8004440318657613e-05, "loss": 0.2885, "num_tokens": 3733106897.0, "step": 5894 }, { "epoch": 0.6970556935083363, "grad_norm": 0.13845548033714294, "learning_rate": 1.799584238182422e-05, "loss": 0.3412, "num_tokens": 3733740560.0, "step": 5895 }, { "epoch": 0.6971739387489654, "grad_norm": 0.12935960292816162, "learning_rate": 1.7987246645716636e-05, "loss": 0.3216, "num_tokens": 3734334682.0, "step": 5896 }, { "epoch": 0.6972921839895945, "grad_norm": 0.12830761075019836, "learning_rate": 1.7978653111595624e-05, "loss": 0.3355, "num_tokens": 3734970731.0, "step": 5897 }, { "epoch": 0.6974104292302234, "grad_norm": 0.13360562920570374, "learning_rate": 1.797006178072164e-05, "loss": 0.3283, "num_tokens": 3735602143.0, "step": 5898 }, { "epoch": 0.6975286744708525, "grad_norm": 0.14855296909809113, "learning_rate": 1.7961472654354816e-05, "loss": 0.3562, "num_tokens": 3736241071.0, "step": 5899 }, { "epoch": 0.6976469197114816, "grad_norm": 0.13049012422561646, "learning_rate": 1.7952885733754933e-05, "loss": 0.3215, "num_tokens": 3736872867.0, "step": 5900 }, { "epoch": 0.6977651649521107, "grad_norm": 0.1217912882566452, "learning_rate": 1.79443010201815e-05, "loss": 0.3094, "num_tokens": 3737493742.0, "step": 5901 }, { "epoch": 0.6978834101927397, "grad_norm": 0.13400143384933472, "learning_rate": 1.7935718514893667e-05, "loss": 0.3339, "num_tokens": 3738123798.0, "step": 5902 }, { "epoch": 0.6980016554333688, "grad_norm": 0.14052388072013855, "learning_rate": 1.7927138219150267e-05, "loss": 0.3832, "num_tokens": 3738751599.0, "step": 5903 }, { "epoch": 0.6981199006739979, "grad_norm": 0.13133621215820312, "learning_rate": 1.7918560134209794e-05, "loss": 0.3427, "num_tokens": 3739390411.0, "step": 5904 }, { "epoch": 0.698238145914627, "grad_norm": 0.12761370837688446, "learning_rate": 1.7909984261330453e-05, "loss": 0.329, "num_tokens": 3740023173.0, "step": 5905 }, { "epoch": 0.698356391155256, "grad_norm": 0.1106487363576889, "learning_rate": 1.7901410601770097e-05, "loss": 0.2964, "num_tokens": 3740655693.0, "step": 5906 }, { "epoch": 0.698474636395885, "grad_norm": 0.16570104658603668, "learning_rate": 1.7892839156786272e-05, "loss": 0.3268, "num_tokens": 3741231195.0, "step": 5907 }, { "epoch": 0.6985928816365141, "grad_norm": 0.13732890784740448, "learning_rate": 1.7884269927636185e-05, "loss": 0.3276, "num_tokens": 3741852626.0, "step": 5908 }, { "epoch": 0.6987111268771432, "grad_norm": 0.1352613866329193, "learning_rate": 1.787570291557671e-05, "loss": 0.347, "num_tokens": 3742488743.0, "step": 5909 }, { "epoch": 0.6988293721177723, "grad_norm": 0.13195863366127014, "learning_rate": 1.7867138121864427e-05, "loss": 0.3293, "num_tokens": 3743118485.0, "step": 5910 }, { "epoch": 0.6989476173584013, "grad_norm": 0.13302285969257355, "learning_rate": 1.7858575547755563e-05, "loss": 0.3402, "num_tokens": 3743751512.0, "step": 5911 }, { "epoch": 0.6990658625990304, "grad_norm": 0.13765741884708405, "learning_rate": 1.785001519450602e-05, "loss": 0.3221, "num_tokens": 3744382004.0, "step": 5912 }, { "epoch": 0.6991841078396595, "grad_norm": 0.1335068792104721, "learning_rate": 1.7841457063371398e-05, "loss": 0.3032, "num_tokens": 3745012636.0, "step": 5913 }, { "epoch": 0.6993023530802885, "grad_norm": 0.1334938257932663, "learning_rate": 1.7832901155606938e-05, "loss": 0.3322, "num_tokens": 3745648567.0, "step": 5914 }, { "epoch": 0.6994205983209176, "grad_norm": 0.15474091470241547, "learning_rate": 1.78243474724676e-05, "loss": 0.3636, "num_tokens": 3746275333.0, "step": 5915 }, { "epoch": 0.6995388435615466, "grad_norm": 0.14363034069538116, "learning_rate": 1.7815796015207967e-05, "loss": 0.331, "num_tokens": 3746908633.0, "step": 5916 }, { "epoch": 0.6996570888021757, "grad_norm": 0.13288013637065887, "learning_rate": 1.7807246785082327e-05, "loss": 0.3591, "num_tokens": 3747548103.0, "step": 5917 }, { "epoch": 0.6997753340428048, "grad_norm": 0.13713829219341278, "learning_rate": 1.7798699783344625e-05, "loss": 0.3356, "num_tokens": 3748180942.0, "step": 5918 }, { "epoch": 0.6998935792834339, "grad_norm": 0.15143044292926788, "learning_rate": 1.7790155011248504e-05, "loss": 0.3323, "num_tokens": 3748820278.0, "step": 5919 }, { "epoch": 0.7000118245240629, "grad_norm": 0.1269940733909607, "learning_rate": 1.778161247004725e-05, "loss": 0.3491, "num_tokens": 3749457787.0, "step": 5920 }, { "epoch": 0.7001300697646919, "grad_norm": 0.13239197432994843, "learning_rate": 1.777307216099383e-05, "loss": 0.3533, "num_tokens": 3750093749.0, "step": 5921 }, { "epoch": 0.700248315005321, "grad_norm": 0.13065047562122345, "learning_rate": 1.776453408534091e-05, "loss": 0.3059, "num_tokens": 3750732722.0, "step": 5922 }, { "epoch": 0.7003665602459501, "grad_norm": 0.12075690925121307, "learning_rate": 1.7755998244340784e-05, "loss": 0.2874, "num_tokens": 3751369271.0, "step": 5923 }, { "epoch": 0.7004848054865792, "grad_norm": 0.15137693285942078, "learning_rate": 1.774746463924547e-05, "loss": 0.3179, "num_tokens": 3752007879.0, "step": 5924 }, { "epoch": 0.7006030507272082, "grad_norm": 0.1457999348640442, "learning_rate": 1.7738933271306592e-05, "loss": 0.3282, "num_tokens": 3752642055.0, "step": 5925 }, { "epoch": 0.7007212959678373, "grad_norm": 0.12405742704868317, "learning_rate": 1.7730404141775504e-05, "loss": 0.3223, "num_tokens": 3753276595.0, "step": 5926 }, { "epoch": 0.7008395412084664, "grad_norm": 0.1320282369852066, "learning_rate": 1.772187725190322e-05, "loss": 0.3563, "num_tokens": 3753911850.0, "step": 5927 }, { "epoch": 0.7009577864490955, "grad_norm": 0.1359589695930481, "learning_rate": 1.77133526029404e-05, "loss": 0.3523, "num_tokens": 3754549639.0, "step": 5928 }, { "epoch": 0.7010760316897245, "grad_norm": 0.13008180260658264, "learning_rate": 1.7704830196137403e-05, "loss": 0.3231, "num_tokens": 3755157196.0, "step": 5929 }, { "epoch": 0.7011942769303535, "grad_norm": 0.1260058432817459, "learning_rate": 1.769631003274423e-05, "loss": 0.3504, "num_tokens": 3755794739.0, "step": 5930 }, { "epoch": 0.7013125221709826, "grad_norm": 0.15542006492614746, "learning_rate": 1.7687792114010596e-05, "loss": 0.3209, "num_tokens": 3756428557.0, "step": 5931 }, { "epoch": 0.7014307674116117, "grad_norm": 0.12867040932178497, "learning_rate": 1.7679276441185848e-05, "loss": 0.3143, "num_tokens": 3757063138.0, "step": 5932 }, { "epoch": 0.7015490126522408, "grad_norm": 0.13452968001365662, "learning_rate": 1.7670763015519008e-05, "loss": 0.348, "num_tokens": 3757697105.0, "step": 5933 }, { "epoch": 0.7016672578928698, "grad_norm": 0.1208498403429985, "learning_rate": 1.76622518382588e-05, "loss": 0.3172, "num_tokens": 3758328787.0, "step": 5934 }, { "epoch": 0.7017855031334989, "grad_norm": 0.1266033798456192, "learning_rate": 1.765374291065357e-05, "loss": 0.3229, "num_tokens": 3758961930.0, "step": 5935 }, { "epoch": 0.701903748374128, "grad_norm": 0.1323128342628479, "learning_rate": 1.7645236233951385e-05, "loss": 0.315, "num_tokens": 3759593371.0, "step": 5936 }, { "epoch": 0.702021993614757, "grad_norm": 0.1348852515220642, "learning_rate": 1.7636731809399942e-05, "loss": 0.3268, "num_tokens": 3760230826.0, "step": 5937 }, { "epoch": 0.7021402388553861, "grad_norm": 0.12249930202960968, "learning_rate": 1.7628229638246628e-05, "loss": 0.3334, "num_tokens": 3760865267.0, "step": 5938 }, { "epoch": 0.7022584840960151, "grad_norm": 0.13622398674488068, "learning_rate": 1.7619729721738488e-05, "loss": 0.3426, "num_tokens": 3761494899.0, "step": 5939 }, { "epoch": 0.7023767293366442, "grad_norm": 0.12729552388191223, "learning_rate": 1.761123206112224e-05, "loss": 0.3402, "num_tokens": 3762128399.0, "step": 5940 }, { "epoch": 0.7024949745772733, "grad_norm": 0.1256004422903061, "learning_rate": 1.7602736657644293e-05, "loss": 0.3159, "num_tokens": 3762766951.0, "step": 5941 }, { "epoch": 0.7026132198179024, "grad_norm": 0.1314542144536972, "learning_rate": 1.7594243512550678e-05, "loss": 0.3525, "num_tokens": 3763398115.0, "step": 5942 }, { "epoch": 0.7027314650585313, "grad_norm": 0.12950840592384338, "learning_rate": 1.7585752627087147e-05, "loss": 0.3149, "num_tokens": 3764035725.0, "step": 5943 }, { "epoch": 0.7028497102991604, "grad_norm": 0.12112457305192947, "learning_rate": 1.7577264002499066e-05, "loss": 0.2977, "num_tokens": 3764667048.0, "step": 5944 }, { "epoch": 0.7029679555397895, "grad_norm": 0.12373477220535278, "learning_rate": 1.7568777640031533e-05, "loss": 0.3364, "num_tokens": 3765305075.0, "step": 5945 }, { "epoch": 0.7030862007804186, "grad_norm": 0.12770794332027435, "learning_rate": 1.7560293540929256e-05, "loss": 0.365, "num_tokens": 3765941596.0, "step": 5946 }, { "epoch": 0.7032044460210477, "grad_norm": 0.12122304737567902, "learning_rate": 1.755181170643664e-05, "loss": 0.3029, "num_tokens": 3766572533.0, "step": 5947 }, { "epoch": 0.7033226912616767, "grad_norm": 0.13427914679050446, "learning_rate": 1.7543332137797762e-05, "loss": 0.3589, "num_tokens": 3767208403.0, "step": 5948 }, { "epoch": 0.7034409365023058, "grad_norm": 0.136050745844841, "learning_rate": 1.753485483625635e-05, "loss": 0.3324, "num_tokens": 3767843106.0, "step": 5949 }, { "epoch": 0.7035591817429349, "grad_norm": 0.11816053837537766, "learning_rate": 1.752637980305581e-05, "loss": 0.2991, "num_tokens": 3768480847.0, "step": 5950 }, { "epoch": 0.703677426983564, "grad_norm": 0.13640017807483673, "learning_rate": 1.7517907039439202e-05, "loss": 0.3316, "num_tokens": 3769114573.0, "step": 5951 }, { "epoch": 0.7037956722241929, "grad_norm": 0.12880422174930573, "learning_rate": 1.750943654664928e-05, "loss": 0.3162, "num_tokens": 3769746313.0, "step": 5952 }, { "epoch": 0.703913917464822, "grad_norm": 0.12790872156620026, "learning_rate": 1.7500968325928432e-05, "loss": 0.3325, "num_tokens": 3770382666.0, "step": 5953 }, { "epoch": 0.7040321627054511, "grad_norm": 0.11633957922458649, "learning_rate": 1.749250237851875e-05, "loss": 0.281, "num_tokens": 3771013101.0, "step": 5954 }, { "epoch": 0.7041504079460802, "grad_norm": 0.13028587400913239, "learning_rate": 1.7484038705661957e-05, "loss": 0.3326, "num_tokens": 3771648902.0, "step": 5955 }, { "epoch": 0.7042686531867093, "grad_norm": 0.12643644213676453, "learning_rate": 1.7475577308599446e-05, "loss": 0.3051, "num_tokens": 3772285028.0, "step": 5956 }, { "epoch": 0.7043868984273383, "grad_norm": 0.13222385942935944, "learning_rate": 1.7467118188572316e-05, "loss": 0.3477, "num_tokens": 3772921626.0, "step": 5957 }, { "epoch": 0.7045051436679673, "grad_norm": 0.130381777882576, "learning_rate": 1.745866134682129e-05, "loss": 0.3244, "num_tokens": 3773549346.0, "step": 5958 }, { "epoch": 0.7046233889085964, "grad_norm": 0.12099155783653259, "learning_rate": 1.7450206784586765e-05, "loss": 0.3085, "num_tokens": 3774184784.0, "step": 5959 }, { "epoch": 0.7047416341492255, "grad_norm": 0.12213980406522751, "learning_rate": 1.7441754503108802e-05, "loss": 0.2883, "num_tokens": 3774818601.0, "step": 5960 }, { "epoch": 0.7048598793898545, "grad_norm": 0.13477879762649536, "learning_rate": 1.7433304503627144e-05, "loss": 0.3139, "num_tokens": 3775457338.0, "step": 5961 }, { "epoch": 0.7049781246304836, "grad_norm": 0.12299639731645584, "learning_rate": 1.7424856787381217e-05, "loss": 0.3171, "num_tokens": 3776081158.0, "step": 5962 }, { "epoch": 0.7050963698711127, "grad_norm": 0.12891604006290436, "learning_rate": 1.7416411355610037e-05, "loss": 0.3491, "num_tokens": 3776716189.0, "step": 5963 }, { "epoch": 0.7052146151117418, "grad_norm": 0.12535972893238068, "learning_rate": 1.7407968209552355e-05, "loss": 0.3237, "num_tokens": 3777354449.0, "step": 5964 }, { "epoch": 0.7053328603523709, "grad_norm": 0.1294996440410614, "learning_rate": 1.739952735044656e-05, "loss": 0.3825, "num_tokens": 3777990017.0, "step": 5965 }, { "epoch": 0.7054511055929998, "grad_norm": 0.13155752420425415, "learning_rate": 1.7391088779530717e-05, "loss": 0.3362, "num_tokens": 3778625872.0, "step": 5966 }, { "epoch": 0.7055693508336289, "grad_norm": 0.1283806413412094, "learning_rate": 1.738265249804255e-05, "loss": 0.3308, "num_tokens": 3779264221.0, "step": 5967 }, { "epoch": 0.705687596074258, "grad_norm": 0.1282111555337906, "learning_rate": 1.737421850721943e-05, "loss": 0.3474, "num_tokens": 3779896776.0, "step": 5968 }, { "epoch": 0.7058058413148871, "grad_norm": 0.13246379792690277, "learning_rate": 1.7365786808298423e-05, "loss": 0.3003, "num_tokens": 3780534683.0, "step": 5969 }, { "epoch": 0.7059240865555162, "grad_norm": 0.13067099452018738, "learning_rate": 1.7357357402516243e-05, "loss": 0.3049, "num_tokens": 3781171298.0, "step": 5970 }, { "epoch": 0.7060423317961452, "grad_norm": 0.13618256151676178, "learning_rate": 1.7348930291109254e-05, "loss": 0.3441, "num_tokens": 3781801653.0, "step": 5971 }, { "epoch": 0.7061605770367743, "grad_norm": 0.13174451887607574, "learning_rate": 1.7340505475313502e-05, "loss": 0.3669, "num_tokens": 3782434709.0, "step": 5972 }, { "epoch": 0.7062788222774034, "grad_norm": 0.13330353796482086, "learning_rate": 1.7332082956364708e-05, "loss": 0.3467, "num_tokens": 3783070999.0, "step": 5973 }, { "epoch": 0.7063970675180324, "grad_norm": 0.13794635236263275, "learning_rate": 1.7323662735498218e-05, "loss": 0.3453, "num_tokens": 3783705559.0, "step": 5974 }, { "epoch": 0.7065153127586614, "grad_norm": 0.12331514805555344, "learning_rate": 1.731524481394908e-05, "loss": 0.3275, "num_tokens": 3784339502.0, "step": 5975 }, { "epoch": 0.7066335579992905, "grad_norm": 0.1341167539358139, "learning_rate": 1.7306829192951982e-05, "loss": 0.3632, "num_tokens": 3784978842.0, "step": 5976 }, { "epoch": 0.7067518032399196, "grad_norm": 0.11850544810295105, "learning_rate": 1.7298415873741266e-05, "loss": 0.2866, "num_tokens": 3785617791.0, "step": 5977 }, { "epoch": 0.7068700484805487, "grad_norm": 0.11415688693523407, "learning_rate": 1.7290004857550975e-05, "loss": 0.2971, "num_tokens": 3786254170.0, "step": 5978 }, { "epoch": 0.7069882937211778, "grad_norm": 0.1370972841978073, "learning_rate": 1.7281596145614775e-05, "loss": 0.326, "num_tokens": 3786892778.0, "step": 5979 }, { "epoch": 0.7071065389618068, "grad_norm": 0.13354960083961487, "learning_rate": 1.7273189739166014e-05, "loss": 0.3347, "num_tokens": 3787529712.0, "step": 5980 }, { "epoch": 0.7072247842024358, "grad_norm": 0.13305804133415222, "learning_rate": 1.7264785639437684e-05, "loss": 0.3461, "num_tokens": 3788165888.0, "step": 5981 }, { "epoch": 0.7073430294430649, "grad_norm": 0.13702531158924103, "learning_rate": 1.7256383847662472e-05, "loss": 0.3337, "num_tokens": 3788804753.0, "step": 5982 }, { "epoch": 0.707461274683694, "grad_norm": 0.1377696692943573, "learning_rate": 1.7247984365072685e-05, "loss": 0.3607, "num_tokens": 3789442988.0, "step": 5983 }, { "epoch": 0.707579519924323, "grad_norm": 0.14974941313266754, "learning_rate": 1.723958719290033e-05, "loss": 0.3744, "num_tokens": 3790075724.0, "step": 5984 }, { "epoch": 0.7076977651649521, "grad_norm": 0.12751975655555725, "learning_rate": 1.7231192332377053e-05, "loss": 0.3385, "num_tokens": 3790706662.0, "step": 5985 }, { "epoch": 0.7078160104055812, "grad_norm": 0.11750368773937225, "learning_rate": 1.722279978473415e-05, "loss": 0.3345, "num_tokens": 3791344612.0, "step": 5986 }, { "epoch": 0.7079342556462103, "grad_norm": 0.13390833139419556, "learning_rate": 1.7214409551202615e-05, "loss": 0.3265, "num_tokens": 3791978909.0, "step": 5987 }, { "epoch": 0.7080525008868394, "grad_norm": 0.13542680442333221, "learning_rate": 1.7206021633013066e-05, "loss": 0.345, "num_tokens": 3792615782.0, "step": 5988 }, { "epoch": 0.7081707461274683, "grad_norm": 0.13035817444324493, "learning_rate": 1.7197636031395803e-05, "loss": 0.3558, "num_tokens": 3793248151.0, "step": 5989 }, { "epoch": 0.7082889913680974, "grad_norm": 0.13301821053028107, "learning_rate": 1.7189252747580763e-05, "loss": 0.3343, "num_tokens": 3793886041.0, "step": 5990 }, { "epoch": 0.7084072366087265, "grad_norm": 0.12952649593353271, "learning_rate": 1.7180871782797578e-05, "loss": 0.3529, "num_tokens": 3794518756.0, "step": 5991 }, { "epoch": 0.7085254818493556, "grad_norm": 0.1305188685655594, "learning_rate": 1.717249313827553e-05, "loss": 0.3363, "num_tokens": 3795154054.0, "step": 5992 }, { "epoch": 0.7086437270899846, "grad_norm": 0.12814801931381226, "learning_rate": 1.7164116815243517e-05, "loss": 0.3211, "num_tokens": 3795783539.0, "step": 5993 }, { "epoch": 0.7087619723306137, "grad_norm": 0.12616711854934692, "learning_rate": 1.7155742814930162e-05, "loss": 0.3298, "num_tokens": 3796421156.0, "step": 5994 }, { "epoch": 0.7088802175712428, "grad_norm": 0.1270228773355484, "learning_rate": 1.71473711385637e-05, "loss": 0.2962, "num_tokens": 3797057780.0, "step": 5995 }, { "epoch": 0.7089984628118718, "grad_norm": 0.12999464571475983, "learning_rate": 1.713900178737205e-05, "loss": 0.304, "num_tokens": 3797692417.0, "step": 5996 }, { "epoch": 0.7091167080525009, "grad_norm": 0.13378968834877014, "learning_rate": 1.7130634762582774e-05, "loss": 0.342, "num_tokens": 3798330739.0, "step": 5997 }, { "epoch": 0.7092349532931299, "grad_norm": 0.136545330286026, "learning_rate": 1.7122270065423102e-05, "loss": 0.3563, "num_tokens": 3798965079.0, "step": 5998 }, { "epoch": 0.709353198533759, "grad_norm": 0.13541528582572937, "learning_rate": 1.711390769711993e-05, "loss": 0.3334, "num_tokens": 3799597928.0, "step": 5999 }, { "epoch": 0.7094714437743881, "grad_norm": 0.1404547095298767, "learning_rate": 1.71055476588998e-05, "loss": 0.3563, "num_tokens": 3800206921.0, "step": 6000 }, { "epoch": 0.7095896890150172, "grad_norm": 0.12705805897712708, "learning_rate": 1.709718995198891e-05, "loss": 0.333, "num_tokens": 3800804021.0, "step": 6001 }, { "epoch": 0.7097079342556462, "grad_norm": 0.12843723595142365, "learning_rate": 1.708883457761312e-05, "loss": 0.3673, "num_tokens": 3801439688.0, "step": 6002 }, { "epoch": 0.7098261794962752, "grad_norm": 0.15162979066371918, "learning_rate": 1.7080481536997955e-05, "loss": 0.3256, "num_tokens": 3802071499.0, "step": 6003 }, { "epoch": 0.7099444247369043, "grad_norm": 0.11769632995128632, "learning_rate": 1.7072130831368585e-05, "loss": 0.273, "num_tokens": 3802711256.0, "step": 6004 }, { "epoch": 0.7100626699775334, "grad_norm": 0.13175168633460999, "learning_rate": 1.7063782461949863e-05, "loss": 0.3611, "num_tokens": 3803347451.0, "step": 6005 }, { "epoch": 0.7101809152181625, "grad_norm": 0.12978686392307281, "learning_rate": 1.7055436429966268e-05, "loss": 0.3143, "num_tokens": 3803983366.0, "step": 6006 }, { "epoch": 0.7102991604587915, "grad_norm": 0.12876322865486145, "learning_rate": 1.7047092736641935e-05, "loss": 0.3045, "num_tokens": 3804612871.0, "step": 6007 }, { "epoch": 0.7104174056994206, "grad_norm": 0.12701863050460815, "learning_rate": 1.7038751383200693e-05, "loss": 0.3037, "num_tokens": 3805247136.0, "step": 6008 }, { "epoch": 0.7105356509400497, "grad_norm": 0.113589346408844, "learning_rate": 1.7030412370866005e-05, "loss": 0.3069, "num_tokens": 3805880119.0, "step": 6009 }, { "epoch": 0.7106538961806788, "grad_norm": 0.138833686709404, "learning_rate": 1.7022075700860982e-05, "loss": 0.3276, "num_tokens": 3806516329.0, "step": 6010 }, { "epoch": 0.7107721414213078, "grad_norm": 0.13270586729049683, "learning_rate": 1.7013741374408388e-05, "loss": 0.3115, "num_tokens": 3807155361.0, "step": 6011 }, { "epoch": 0.7108903866619368, "grad_norm": 0.1339854747056961, "learning_rate": 1.7005409392730668e-05, "loss": 0.3777, "num_tokens": 3807793382.0, "step": 6012 }, { "epoch": 0.7110086319025659, "grad_norm": 0.1354489028453827, "learning_rate": 1.6997079757049923e-05, "loss": 0.3634, "num_tokens": 3808429517.0, "step": 6013 }, { "epoch": 0.711126877143195, "grad_norm": 0.12649747729301453, "learning_rate": 1.698875246858788e-05, "loss": 0.3313, "num_tokens": 3809038122.0, "step": 6014 }, { "epoch": 0.7112451223838241, "grad_norm": 0.11848926544189453, "learning_rate": 1.6980427528565952e-05, "loss": 0.311, "num_tokens": 3809674347.0, "step": 6015 }, { "epoch": 0.7113633676244531, "grad_norm": 0.12262213230133057, "learning_rate": 1.697210493820517e-05, "loss": 0.3384, "num_tokens": 3810308340.0, "step": 6016 }, { "epoch": 0.7114816128650822, "grad_norm": 0.13652683794498444, "learning_rate": 1.696378469872627e-05, "loss": 0.328, "num_tokens": 3810941040.0, "step": 6017 }, { "epoch": 0.7115998581057112, "grad_norm": 0.15048272907733917, "learning_rate": 1.6955466811349615e-05, "loss": 0.3173, "num_tokens": 3811578279.0, "step": 6018 }, { "epoch": 0.7117181033463403, "grad_norm": 0.12327592819929123, "learning_rate": 1.6947151277295204e-05, "loss": 0.3224, "num_tokens": 3812208907.0, "step": 6019 }, { "epoch": 0.7118363485869694, "grad_norm": 0.13084293901920319, "learning_rate": 1.6938838097782746e-05, "loss": 0.3389, "num_tokens": 3812848145.0, "step": 6020 }, { "epoch": 0.7119545938275984, "grad_norm": 0.13727179169654846, "learning_rate": 1.6930527274031537e-05, "loss": 0.3593, "num_tokens": 3813480523.0, "step": 6021 }, { "epoch": 0.7120728390682275, "grad_norm": 0.13184747099876404, "learning_rate": 1.6922218807260605e-05, "loss": 0.3415, "num_tokens": 3814116555.0, "step": 6022 }, { "epoch": 0.7121910843088566, "grad_norm": 0.12516222894191742, "learning_rate": 1.6913912698688547e-05, "loss": 0.2964, "num_tokens": 3814753948.0, "step": 6023 }, { "epoch": 0.7123093295494857, "grad_norm": 0.1447058618068695, "learning_rate": 1.6905608949533683e-05, "loss": 0.3752, "num_tokens": 3815391416.0, "step": 6024 }, { "epoch": 0.7124275747901146, "grad_norm": 0.13148553669452667, "learning_rate": 1.6897307561013937e-05, "loss": 0.3504, "num_tokens": 3816023721.0, "step": 6025 }, { "epoch": 0.7125458200307437, "grad_norm": 0.1292978823184967, "learning_rate": 1.6889008534346942e-05, "loss": 0.3611, "num_tokens": 3816651591.0, "step": 6026 }, { "epoch": 0.7126640652713728, "grad_norm": 0.13025899231433868, "learning_rate": 1.6880711870749927e-05, "loss": 0.3281, "num_tokens": 3817285896.0, "step": 6027 }, { "epoch": 0.7127823105120019, "grad_norm": 0.12341047078371048, "learning_rate": 1.6872417571439808e-05, "loss": 0.3123, "num_tokens": 3817925576.0, "step": 6028 }, { "epoch": 0.712900555752631, "grad_norm": 0.1301572620868683, "learning_rate": 1.6864125637633153e-05, "loss": 0.3404, "num_tokens": 3818556127.0, "step": 6029 }, { "epoch": 0.71301880099326, "grad_norm": 0.12500491738319397, "learning_rate": 1.6855836070546163e-05, "loss": 0.3278, "num_tokens": 3819194603.0, "step": 6030 }, { "epoch": 0.7131370462338891, "grad_norm": 0.12935614585876465, "learning_rate": 1.6847548871394725e-05, "loss": 0.3115, "num_tokens": 3819820405.0, "step": 6031 }, { "epoch": 0.7132552914745182, "grad_norm": 0.12786328792572021, "learning_rate": 1.6839264041394325e-05, "loss": 0.311, "num_tokens": 3820454450.0, "step": 6032 }, { "epoch": 0.7133735367151472, "grad_norm": 0.11960144340991974, "learning_rate": 1.6830981581760165e-05, "loss": 0.3176, "num_tokens": 3821093231.0, "step": 6033 }, { "epoch": 0.7134917819557762, "grad_norm": 0.1327783167362213, "learning_rate": 1.682270149370707e-05, "loss": 0.3534, "num_tokens": 3821724242.0, "step": 6034 }, { "epoch": 0.7136100271964053, "grad_norm": 0.12353923916816711, "learning_rate": 1.681442377844951e-05, "loss": 0.3444, "num_tokens": 3822359698.0, "step": 6035 }, { "epoch": 0.7137282724370344, "grad_norm": 0.11986614763736725, "learning_rate": 1.6806148437201612e-05, "loss": 0.3272, "num_tokens": 3822995801.0, "step": 6036 }, { "epoch": 0.7138465176776635, "grad_norm": 0.13518020510673523, "learning_rate": 1.6797875471177143e-05, "loss": 0.3323, "num_tokens": 3823625205.0, "step": 6037 }, { "epoch": 0.7139647629182926, "grad_norm": 0.12930041551589966, "learning_rate": 1.678960488158956e-05, "loss": 0.3317, "num_tokens": 3824256248.0, "step": 6038 }, { "epoch": 0.7140830081589216, "grad_norm": 0.11744073778390884, "learning_rate": 1.6781336669651942e-05, "loss": 0.2869, "num_tokens": 3824892992.0, "step": 6039 }, { "epoch": 0.7142012533995507, "grad_norm": 0.12493520230054855, "learning_rate": 1.6773070836577e-05, "loss": 0.326, "num_tokens": 3825530430.0, "step": 6040 }, { "epoch": 0.7143194986401797, "grad_norm": 0.1294877976179123, "learning_rate": 1.676480738357716e-05, "loss": 0.3381, "num_tokens": 3826167633.0, "step": 6041 }, { "epoch": 0.7144377438808088, "grad_norm": 0.13321548700332642, "learning_rate": 1.675654631186442e-05, "loss": 0.3231, "num_tokens": 3826802441.0, "step": 6042 }, { "epoch": 0.7145559891214378, "grad_norm": 0.1270340532064438, "learning_rate": 1.6748287622650506e-05, "loss": 0.3461, "num_tokens": 3827432488.0, "step": 6043 }, { "epoch": 0.7146742343620669, "grad_norm": 0.1370619237422943, "learning_rate": 1.6740031317146717e-05, "loss": 0.334, "num_tokens": 3828068572.0, "step": 6044 }, { "epoch": 0.714792479602696, "grad_norm": 0.1405697911977768, "learning_rate": 1.6731777396564073e-05, "loss": 0.3601, "num_tokens": 3828705449.0, "step": 6045 }, { "epoch": 0.7149107248433251, "grad_norm": 0.13506433367729187, "learning_rate": 1.6723525862113185e-05, "loss": 0.3394, "num_tokens": 3829343316.0, "step": 6046 }, { "epoch": 0.7150289700839542, "grad_norm": 0.13099293410778046, "learning_rate": 1.671527671500437e-05, "loss": 0.3375, "num_tokens": 3829978164.0, "step": 6047 }, { "epoch": 0.7151472153245831, "grad_norm": 0.1234850138425827, "learning_rate": 1.670702995644755e-05, "loss": 0.306, "num_tokens": 3830607202.0, "step": 6048 }, { "epoch": 0.7152654605652122, "grad_norm": 0.1304810494184494, "learning_rate": 1.669878558765231e-05, "loss": 0.3343, "num_tokens": 3831240913.0, "step": 6049 }, { "epoch": 0.7153837058058413, "grad_norm": 0.13107362389564514, "learning_rate": 1.6690543609827906e-05, "loss": 0.3583, "num_tokens": 3831876968.0, "step": 6050 }, { "epoch": 0.7155019510464704, "grad_norm": 0.11993814259767532, "learning_rate": 1.66823040241832e-05, "loss": 0.317, "num_tokens": 3832511179.0, "step": 6051 }, { "epoch": 0.7156201962870995, "grad_norm": 0.12485232204198837, "learning_rate": 1.6674066831926762e-05, "loss": 0.3115, "num_tokens": 3833143289.0, "step": 6052 }, { "epoch": 0.7157384415277285, "grad_norm": 0.12179189175367355, "learning_rate": 1.666583203426674e-05, "loss": 0.3066, "num_tokens": 3833776002.0, "step": 6053 }, { "epoch": 0.7158566867683576, "grad_norm": 0.12375129759311676, "learning_rate": 1.6657599632410982e-05, "loss": 0.3121, "num_tokens": 3834401255.0, "step": 6054 }, { "epoch": 0.7159749320089867, "grad_norm": 0.127711683511734, "learning_rate": 1.6649369627566994e-05, "loss": 0.3354, "num_tokens": 3835037873.0, "step": 6055 }, { "epoch": 0.7160931772496157, "grad_norm": 0.12034992128610611, "learning_rate": 1.6641142020941874e-05, "loss": 0.3163, "num_tokens": 3835671501.0, "step": 6056 }, { "epoch": 0.7162114224902447, "grad_norm": 0.13101376593112946, "learning_rate": 1.663291681374242e-05, "loss": 0.3511, "num_tokens": 3836309245.0, "step": 6057 }, { "epoch": 0.7163296677308738, "grad_norm": 0.1385481059551239, "learning_rate": 1.6624694007175044e-05, "loss": 0.3368, "num_tokens": 3836947116.0, "step": 6058 }, { "epoch": 0.7164479129715029, "grad_norm": 0.1270703822374344, "learning_rate": 1.6616473602445843e-05, "loss": 0.2975, "num_tokens": 3837581929.0, "step": 6059 }, { "epoch": 0.716566158212132, "grad_norm": 0.1373627781867981, "learning_rate": 1.6608255600760522e-05, "loss": 0.3166, "num_tokens": 3838219899.0, "step": 6060 }, { "epoch": 0.7166844034527611, "grad_norm": 0.13698303699493408, "learning_rate": 1.6600040003324455e-05, "loss": 0.3011, "num_tokens": 3838852961.0, "step": 6061 }, { "epoch": 0.71680264869339, "grad_norm": 0.12557640671730042, "learning_rate": 1.6591826811342662e-05, "loss": 0.3369, "num_tokens": 3839481418.0, "step": 6062 }, { "epoch": 0.7169208939340191, "grad_norm": 0.13589897751808167, "learning_rate": 1.6583616026019804e-05, "loss": 0.3294, "num_tokens": 3840108767.0, "step": 6063 }, { "epoch": 0.7170391391746482, "grad_norm": 0.1353003978729248, "learning_rate": 1.657540764856021e-05, "loss": 0.3287, "num_tokens": 3840743104.0, "step": 6064 }, { "epoch": 0.7171573844152773, "grad_norm": 0.13242493569850922, "learning_rate": 1.656720168016782e-05, "loss": 0.3531, "num_tokens": 3841377682.0, "step": 6065 }, { "epoch": 0.7172756296559063, "grad_norm": 0.13715659081935883, "learning_rate": 1.655899812204625e-05, "loss": 0.3524, "num_tokens": 3842015758.0, "step": 6066 }, { "epoch": 0.7173938748965354, "grad_norm": 0.13119187951087952, "learning_rate": 1.6550796975398734e-05, "loss": 0.331, "num_tokens": 3842652820.0, "step": 6067 }, { "epoch": 0.7175121201371645, "grad_norm": 0.13550859689712524, "learning_rate": 1.6542598241428193e-05, "loss": 0.3373, "num_tokens": 3843283182.0, "step": 6068 }, { "epoch": 0.7176303653777936, "grad_norm": 0.13566794991493225, "learning_rate": 1.6534401921337162e-05, "loss": 0.3503, "num_tokens": 3843920822.0, "step": 6069 }, { "epoch": 0.7177486106184227, "grad_norm": 0.12714403867721558, "learning_rate": 1.652620801632782e-05, "loss": 0.3381, "num_tokens": 3844557143.0, "step": 6070 }, { "epoch": 0.7178668558590516, "grad_norm": 0.13178937137126923, "learning_rate": 1.6518016527602027e-05, "loss": 0.3308, "num_tokens": 3845189799.0, "step": 6071 }, { "epoch": 0.7179851010996807, "grad_norm": 0.1254439800977707, "learning_rate": 1.6509827456361243e-05, "loss": 0.3315, "num_tokens": 3845802905.0, "step": 6072 }, { "epoch": 0.7181033463403098, "grad_norm": 0.12423785775899887, "learning_rate": 1.650164080380662e-05, "loss": 0.3183, "num_tokens": 3846438310.0, "step": 6073 }, { "epoch": 0.7182215915809389, "grad_norm": 0.1260284036397934, "learning_rate": 1.6493456571138897e-05, "loss": 0.3118, "num_tokens": 3847075383.0, "step": 6074 }, { "epoch": 0.7183398368215679, "grad_norm": 0.13681896030902863, "learning_rate": 1.648527475955852e-05, "loss": 0.3307, "num_tokens": 3847706484.0, "step": 6075 }, { "epoch": 0.718458082062197, "grad_norm": 0.13870859146118164, "learning_rate": 1.647709537026553e-05, "loss": 0.3259, "num_tokens": 3848337746.0, "step": 6076 }, { "epoch": 0.7185763273028261, "grad_norm": 0.13207878172397614, "learning_rate": 1.6468918404459655e-05, "loss": 0.2965, "num_tokens": 3848968704.0, "step": 6077 }, { "epoch": 0.7186945725434551, "grad_norm": 0.12258992344141006, "learning_rate": 1.646074386334023e-05, "loss": 0.2907, "num_tokens": 3849597183.0, "step": 6078 }, { "epoch": 0.7188128177840842, "grad_norm": 0.13320890069007874, "learning_rate": 1.6452571748106255e-05, "loss": 0.3533, "num_tokens": 3850234509.0, "step": 6079 }, { "epoch": 0.7189310630247132, "grad_norm": 0.12006750702857971, "learning_rate": 1.6444402059956377e-05, "loss": 0.2957, "num_tokens": 3850872891.0, "step": 6080 }, { "epoch": 0.7190493082653423, "grad_norm": 0.14596199989318848, "learning_rate": 1.6436234800088874e-05, "loss": 0.3364, "num_tokens": 3851505906.0, "step": 6081 }, { "epoch": 0.7191675535059714, "grad_norm": 0.13312076032161713, "learning_rate": 1.6428069969701673e-05, "loss": 0.3439, "num_tokens": 3852144699.0, "step": 6082 }, { "epoch": 0.7192857987466005, "grad_norm": 0.12495353817939758, "learning_rate": 1.6419907569992344e-05, "loss": 0.3192, "num_tokens": 3852782643.0, "step": 6083 }, { "epoch": 0.7194040439872296, "grad_norm": 0.1316165179014206, "learning_rate": 1.6411747602158097e-05, "loss": 0.326, "num_tokens": 3853396284.0, "step": 6084 }, { "epoch": 0.7195222892278585, "grad_norm": 0.12691926956176758, "learning_rate": 1.6403590067395816e-05, "loss": 0.3314, "num_tokens": 3854033249.0, "step": 6085 }, { "epoch": 0.7196405344684876, "grad_norm": 0.13803555071353912, "learning_rate": 1.639543496690198e-05, "loss": 0.3412, "num_tokens": 3854661798.0, "step": 6086 }, { "epoch": 0.7197587797091167, "grad_norm": 0.1362668126821518, "learning_rate": 1.638728230187274e-05, "loss": 0.2966, "num_tokens": 3855295269.0, "step": 6087 }, { "epoch": 0.7198770249497458, "grad_norm": 0.13456842303276062, "learning_rate": 1.6379132073503874e-05, "loss": 0.3383, "num_tokens": 3855930332.0, "step": 6088 }, { "epoch": 0.7199952701903748, "grad_norm": 0.1361512392759323, "learning_rate": 1.6370984282990827e-05, "loss": 0.3337, "num_tokens": 3856566702.0, "step": 6089 }, { "epoch": 0.7201135154310039, "grad_norm": 0.13726729154586792, "learning_rate": 1.636283893152867e-05, "loss": 0.3629, "num_tokens": 3857197492.0, "step": 6090 }, { "epoch": 0.720231760671633, "grad_norm": 0.13791437447071075, "learning_rate": 1.6354696020312094e-05, "loss": 0.3264, "num_tokens": 3857836959.0, "step": 6091 }, { "epoch": 0.7203500059122621, "grad_norm": 0.1272287368774414, "learning_rate": 1.634655555053549e-05, "loss": 0.3471, "num_tokens": 3858472794.0, "step": 6092 }, { "epoch": 0.7204682511528911, "grad_norm": 0.13668733835220337, "learning_rate": 1.6338417523392825e-05, "loss": 0.3472, "num_tokens": 3859110919.0, "step": 6093 }, { "epoch": 0.7205864963935201, "grad_norm": 0.12841157615184784, "learning_rate": 1.6330281940077765e-05, "loss": 0.3056, "num_tokens": 3859742095.0, "step": 6094 }, { "epoch": 0.7207047416341492, "grad_norm": 0.12074586749076843, "learning_rate": 1.632214880178358e-05, "loss": 0.3245, "num_tokens": 3860378910.0, "step": 6095 }, { "epoch": 0.7208229868747783, "grad_norm": 0.14361248910427094, "learning_rate": 1.6314018109703194e-05, "loss": 0.3685, "num_tokens": 3861010799.0, "step": 6096 }, { "epoch": 0.7209412321154074, "grad_norm": 0.12311781942844391, "learning_rate": 1.6305889865029164e-05, "loss": 0.2903, "num_tokens": 3861643178.0, "step": 6097 }, { "epoch": 0.7210594773560364, "grad_norm": 0.13464349508285522, "learning_rate": 1.629776406895371e-05, "loss": 0.3357, "num_tokens": 3862275945.0, "step": 6098 }, { "epoch": 0.7211777225966655, "grad_norm": 0.1363033950328827, "learning_rate": 1.6289640722668673e-05, "loss": 0.3022, "num_tokens": 3862908906.0, "step": 6099 }, { "epoch": 0.7212959678372945, "grad_norm": 0.13351857662200928, "learning_rate": 1.6281519827365525e-05, "loss": 0.3178, "num_tokens": 3863541866.0, "step": 6100 }, { "epoch": 0.7214142130779236, "grad_norm": 0.12255244702100754, "learning_rate": 1.6273401384235417e-05, "loss": 0.3292, "num_tokens": 3864173909.0, "step": 6101 }, { "epoch": 0.7215324583185527, "grad_norm": 0.13806664943695068, "learning_rate": 1.62652853944691e-05, "loss": 0.3486, "num_tokens": 3864805578.0, "step": 6102 }, { "epoch": 0.7216507035591817, "grad_norm": 0.12245506793260574, "learning_rate": 1.6257171859257003e-05, "loss": 0.2742, "num_tokens": 3865438768.0, "step": 6103 }, { "epoch": 0.7217689487998108, "grad_norm": 0.13733242452144623, "learning_rate": 1.6249060779789143e-05, "loss": 0.3757, "num_tokens": 3866074778.0, "step": 6104 }, { "epoch": 0.7218871940404399, "grad_norm": 0.12061509490013123, "learning_rate": 1.6240952157255225e-05, "loss": 0.3035, "num_tokens": 3866699776.0, "step": 6105 }, { "epoch": 0.722005439281069, "grad_norm": 0.12005076557397842, "learning_rate": 1.6232845992844578e-05, "loss": 0.2869, "num_tokens": 3867335923.0, "step": 6106 }, { "epoch": 0.722123684521698, "grad_norm": 0.12384641170501709, "learning_rate": 1.6224742287746168e-05, "loss": 0.3135, "num_tokens": 3867964851.0, "step": 6107 }, { "epoch": 0.722241929762327, "grad_norm": 0.146280899643898, "learning_rate": 1.6216641043148602e-05, "loss": 0.3613, "num_tokens": 3868600754.0, "step": 6108 }, { "epoch": 0.7223601750029561, "grad_norm": 0.13016770780086517, "learning_rate": 1.620854226024011e-05, "loss": 0.355, "num_tokens": 3869236408.0, "step": 6109 }, { "epoch": 0.7224784202435852, "grad_norm": 0.13559098541736603, "learning_rate": 1.62004459402086e-05, "loss": 0.3569, "num_tokens": 3869871110.0, "step": 6110 }, { "epoch": 0.7225966654842143, "grad_norm": 0.1311751902103424, "learning_rate": 1.619235208424158e-05, "loss": 0.3364, "num_tokens": 3870502249.0, "step": 6111 }, { "epoch": 0.7227149107248433, "grad_norm": 0.13205233216285706, "learning_rate": 1.6184260693526204e-05, "loss": 0.3205, "num_tokens": 3871134569.0, "step": 6112 }, { "epoch": 0.7228331559654724, "grad_norm": 0.1315503865480423, "learning_rate": 1.6176171769249295e-05, "loss": 0.336, "num_tokens": 3871767770.0, "step": 6113 }, { "epoch": 0.7229514012061015, "grad_norm": 0.13976052403450012, "learning_rate": 1.6168085312597273e-05, "loss": 0.356, "num_tokens": 3872404097.0, "step": 6114 }, { "epoch": 0.7230696464467306, "grad_norm": 0.12567362189292908, "learning_rate": 1.6160001324756217e-05, "loss": 0.318, "num_tokens": 3873037573.0, "step": 6115 }, { "epoch": 0.7231878916873595, "grad_norm": 0.13047868013381958, "learning_rate": 1.615191980691185e-05, "loss": 0.2896, "num_tokens": 3873669180.0, "step": 6116 }, { "epoch": 0.7233061369279886, "grad_norm": 0.12425858527421951, "learning_rate": 1.614384076024952e-05, "loss": 0.3281, "num_tokens": 3874308722.0, "step": 6117 }, { "epoch": 0.7234243821686177, "grad_norm": 0.1273704618215561, "learning_rate": 1.6135764185954205e-05, "loss": 0.311, "num_tokens": 3874940245.0, "step": 6118 }, { "epoch": 0.7235426274092468, "grad_norm": 0.13584624230861664, "learning_rate": 1.6127690085210536e-05, "loss": 0.3346, "num_tokens": 3875573164.0, "step": 6119 }, { "epoch": 0.7236608726498759, "grad_norm": 0.1298605054616928, "learning_rate": 1.61196184592028e-05, "loss": 0.3192, "num_tokens": 3876207638.0, "step": 6120 }, { "epoch": 0.7237791178905049, "grad_norm": 0.13019943237304688, "learning_rate": 1.6111549309114866e-05, "loss": 0.3454, "num_tokens": 3876841973.0, "step": 6121 }, { "epoch": 0.723897363131134, "grad_norm": 0.1151416078209877, "learning_rate": 1.6103482636130288e-05, "loss": 0.2948, "num_tokens": 3877480984.0, "step": 6122 }, { "epoch": 0.724015608371763, "grad_norm": 0.131477490067482, "learning_rate": 1.609541844143223e-05, "loss": 0.3259, "num_tokens": 3878112518.0, "step": 6123 }, { "epoch": 0.7241338536123921, "grad_norm": 0.13814376294612885, "learning_rate": 1.6087356726203525e-05, "loss": 0.3392, "num_tokens": 3878745065.0, "step": 6124 }, { "epoch": 0.7242520988530212, "grad_norm": 0.12700913846492767, "learning_rate": 1.60792974916266e-05, "loss": 0.3521, "num_tokens": 3879382050.0, "step": 6125 }, { "epoch": 0.7243703440936502, "grad_norm": 0.13182440400123596, "learning_rate": 1.607124073888353e-05, "loss": 0.3188, "num_tokens": 3880013816.0, "step": 6126 }, { "epoch": 0.7244885893342793, "grad_norm": 0.11933727562427521, "learning_rate": 1.6063186469156066e-05, "loss": 0.3253, "num_tokens": 3880644118.0, "step": 6127 }, { "epoch": 0.7246068345749084, "grad_norm": 0.12544769048690796, "learning_rate": 1.6055134683625544e-05, "loss": 0.3014, "num_tokens": 3881280459.0, "step": 6128 }, { "epoch": 0.7247250798155375, "grad_norm": 0.1281972974538803, "learning_rate": 1.604708538347295e-05, "loss": 0.3306, "num_tokens": 3881911919.0, "step": 6129 }, { "epoch": 0.7248433250561664, "grad_norm": 0.14001010358333588, "learning_rate": 1.603903856987891e-05, "loss": 0.3519, "num_tokens": 3882543160.0, "step": 6130 }, { "epoch": 0.7249615702967955, "grad_norm": 0.1271282136440277, "learning_rate": 1.6030994244023704e-05, "loss": 0.3041, "num_tokens": 3883172915.0, "step": 6131 }, { "epoch": 0.7250798155374246, "grad_norm": 0.14336012303829193, "learning_rate": 1.602295240708721e-05, "loss": 0.3634, "num_tokens": 3883775934.0, "step": 6132 }, { "epoch": 0.7251980607780537, "grad_norm": 0.1305043250322342, "learning_rate": 1.601491306024897e-05, "loss": 0.3409, "num_tokens": 3884410069.0, "step": 6133 }, { "epoch": 0.7253163060186828, "grad_norm": 0.1264205276966095, "learning_rate": 1.6006876204688146e-05, "loss": 0.3263, "num_tokens": 3885042603.0, "step": 6134 }, { "epoch": 0.7254345512593118, "grad_norm": 0.1255791038274765, "learning_rate": 1.5998841841583533e-05, "loss": 0.2981, "num_tokens": 3885673232.0, "step": 6135 }, { "epoch": 0.7255527964999409, "grad_norm": 0.14094240963459015, "learning_rate": 1.5990809972113585e-05, "loss": 0.3796, "num_tokens": 3886304918.0, "step": 6136 }, { "epoch": 0.72567104174057, "grad_norm": 0.13971509039402008, "learning_rate": 1.5982780597456357e-05, "loss": 0.3557, "num_tokens": 3886938302.0, "step": 6137 }, { "epoch": 0.725789286981199, "grad_norm": 0.14893625676631927, "learning_rate": 1.5974753718789553e-05, "loss": 0.3788, "num_tokens": 3887574200.0, "step": 6138 }, { "epoch": 0.725907532221828, "grad_norm": 0.12753871083259583, "learning_rate": 1.5966729337290502e-05, "loss": 0.3388, "num_tokens": 3888209260.0, "step": 6139 }, { "epoch": 0.7260257774624571, "grad_norm": 0.13912983238697052, "learning_rate": 1.5958707454136196e-05, "loss": 0.3563, "num_tokens": 3888841438.0, "step": 6140 }, { "epoch": 0.7261440227030862, "grad_norm": 0.12111813575029373, "learning_rate": 1.5950688070503238e-05, "loss": 0.3042, "num_tokens": 3889473494.0, "step": 6141 }, { "epoch": 0.7262622679437153, "grad_norm": 0.13433906435966492, "learning_rate": 1.5942671187567843e-05, "loss": 0.3163, "num_tokens": 3890109667.0, "step": 6142 }, { "epoch": 0.7263805131843444, "grad_norm": 0.13713018596172333, "learning_rate": 1.593465680650591e-05, "loss": 0.3041, "num_tokens": 3890738647.0, "step": 6143 }, { "epoch": 0.7264987584249734, "grad_norm": 0.12481521815061569, "learning_rate": 1.592664492849292e-05, "loss": 0.3349, "num_tokens": 3891373024.0, "step": 6144 }, { "epoch": 0.7266170036656024, "grad_norm": 0.13748455047607422, "learning_rate": 1.591863555470403e-05, "loss": 0.3719, "num_tokens": 3892007005.0, "step": 6145 }, { "epoch": 0.7267352489062315, "grad_norm": 0.1268884688615799, "learning_rate": 1.5910628686314002e-05, "loss": 0.3135, "num_tokens": 3892646520.0, "step": 6146 }, { "epoch": 0.7268534941468606, "grad_norm": 0.13290156424045563, "learning_rate": 1.5902624324497243e-05, "loss": 0.3129, "num_tokens": 3893281050.0, "step": 6147 }, { "epoch": 0.7269717393874896, "grad_norm": 0.1231759637594223, "learning_rate": 1.5894622470427773e-05, "loss": 0.3338, "num_tokens": 3893919112.0, "step": 6148 }, { "epoch": 0.7270899846281187, "grad_norm": 0.12487541884183884, "learning_rate": 1.588662312527927e-05, "loss": 0.3324, "num_tokens": 3894558381.0, "step": 6149 }, { "epoch": 0.7272082298687478, "grad_norm": 0.14467141032218933, "learning_rate": 1.5878626290225043e-05, "loss": 0.3313, "num_tokens": 3895188741.0, "step": 6150 }, { "epoch": 0.7273264751093769, "grad_norm": 0.13332237303256989, "learning_rate": 1.5870631966437998e-05, "loss": 0.3471, "num_tokens": 3895824459.0, "step": 6151 }, { "epoch": 0.727444720350006, "grad_norm": 0.12890537083148956, "learning_rate": 1.586264015509073e-05, "loss": 0.3115, "num_tokens": 3896457399.0, "step": 6152 }, { "epoch": 0.7275629655906349, "grad_norm": 0.1350763738155365, "learning_rate": 1.5854650857355398e-05, "loss": 0.3392, "num_tokens": 3897089705.0, "step": 6153 }, { "epoch": 0.727681210831264, "grad_norm": 0.13021527230739594, "learning_rate": 1.5846664074403855e-05, "loss": 0.3464, "num_tokens": 3897729282.0, "step": 6154 }, { "epoch": 0.7277994560718931, "grad_norm": 0.1422225683927536, "learning_rate": 1.5838679807407547e-05, "loss": 0.3513, "num_tokens": 3898363818.0, "step": 6155 }, { "epoch": 0.7279177013125222, "grad_norm": 0.12866514921188354, "learning_rate": 1.5830698057537554e-05, "loss": 0.32, "num_tokens": 3898994519.0, "step": 6156 }, { "epoch": 0.7280359465531512, "grad_norm": 0.1337915062904358, "learning_rate": 1.582271882596461e-05, "loss": 0.3453, "num_tokens": 3899630560.0, "step": 6157 }, { "epoch": 0.7281541917937803, "grad_norm": 0.12983328104019165, "learning_rate": 1.581474211385906e-05, "loss": 0.2954, "num_tokens": 3900267713.0, "step": 6158 }, { "epoch": 0.7282724370344094, "grad_norm": 0.13861139118671417, "learning_rate": 1.5806767922390876e-05, "loss": 0.3311, "num_tokens": 3900901945.0, "step": 6159 }, { "epoch": 0.7283906822750384, "grad_norm": 0.13541215658187866, "learning_rate": 1.5798796252729665e-05, "loss": 0.3592, "num_tokens": 3901534990.0, "step": 6160 }, { "epoch": 0.7285089275156675, "grad_norm": 0.13034571707248688, "learning_rate": 1.579082710604468e-05, "loss": 0.3161, "num_tokens": 3902170628.0, "step": 6161 }, { "epoch": 0.7286271727562965, "grad_norm": 0.13059161603450775, "learning_rate": 1.578286048350478e-05, "loss": 0.293, "num_tokens": 3902806955.0, "step": 6162 }, { "epoch": 0.7287454179969256, "grad_norm": 0.1309465616941452, "learning_rate": 1.5774896386278475e-05, "loss": 0.3385, "num_tokens": 3903444327.0, "step": 6163 }, { "epoch": 0.7288636632375547, "grad_norm": 0.1258264034986496, "learning_rate": 1.5766934815533892e-05, "loss": 0.3346, "num_tokens": 3904061301.0, "step": 6164 }, { "epoch": 0.7289819084781838, "grad_norm": 0.12857311964035034, "learning_rate": 1.5758975772438768e-05, "loss": 0.3397, "num_tokens": 3904691037.0, "step": 6165 }, { "epoch": 0.7291001537188129, "grad_norm": 0.12759630382061005, "learning_rate": 1.5751019258160525e-05, "loss": 0.3126, "num_tokens": 3905309714.0, "step": 6166 }, { "epoch": 0.7292183989594418, "grad_norm": 0.1303693652153015, "learning_rate": 1.574306527386616e-05, "loss": 0.3347, "num_tokens": 3905947199.0, "step": 6167 }, { "epoch": 0.7293366442000709, "grad_norm": 0.13050566613674164, "learning_rate": 1.573511382072232e-05, "loss": 0.3359, "num_tokens": 3906585532.0, "step": 6168 }, { "epoch": 0.7294548894407, "grad_norm": 0.1291007101535797, "learning_rate": 1.572716489989528e-05, "loss": 0.3239, "num_tokens": 3907222228.0, "step": 6169 }, { "epoch": 0.7295731346813291, "grad_norm": 0.12608124315738678, "learning_rate": 1.5719218512550942e-05, "loss": 0.3001, "num_tokens": 3907860007.0, "step": 6170 }, { "epoch": 0.7296913799219581, "grad_norm": 0.11933233588933945, "learning_rate": 1.5711274659854856e-05, "loss": 0.3077, "num_tokens": 3908484854.0, "step": 6171 }, { "epoch": 0.7298096251625872, "grad_norm": 0.1255209892988205, "learning_rate": 1.5703333342972147e-05, "loss": 0.3305, "num_tokens": 3909119776.0, "step": 6172 }, { "epoch": 0.7299278704032163, "grad_norm": 0.1359107792377472, "learning_rate": 1.5695394563067634e-05, "loss": 0.3452, "num_tokens": 3909750248.0, "step": 6173 }, { "epoch": 0.7300461156438454, "grad_norm": 0.13161510229110718, "learning_rate": 1.5687458321305713e-05, "loss": 0.3257, "num_tokens": 3910386692.0, "step": 6174 }, { "epoch": 0.7301643608844745, "grad_norm": 0.130403533577919, "learning_rate": 1.5679524618850444e-05, "loss": 0.3033, "num_tokens": 3911020007.0, "step": 6175 }, { "epoch": 0.7302826061251034, "grad_norm": 0.13608427345752716, "learning_rate": 1.567159345686549e-05, "loss": 0.3274, "num_tokens": 3911655752.0, "step": 6176 }, { "epoch": 0.7304008513657325, "grad_norm": 0.12190147489309311, "learning_rate": 1.5663664836514142e-05, "loss": 0.2967, "num_tokens": 3912294802.0, "step": 6177 }, { "epoch": 0.7305190966063616, "grad_norm": 0.12499073147773743, "learning_rate": 1.5655738758959343e-05, "loss": 0.3196, "num_tokens": 3912924530.0, "step": 6178 }, { "epoch": 0.7306373418469907, "grad_norm": 0.12456873804330826, "learning_rate": 1.5647815225363623e-05, "loss": 0.3322, "num_tokens": 3913558025.0, "step": 6179 }, { "epoch": 0.7307555870876197, "grad_norm": 0.13154719769954681, "learning_rate": 1.563989423688919e-05, "loss": 0.3427, "num_tokens": 3914193616.0, "step": 6180 }, { "epoch": 0.7308738323282488, "grad_norm": 0.14462383091449738, "learning_rate": 1.5631975794697814e-05, "loss": 0.3439, "num_tokens": 3914826750.0, "step": 6181 }, { "epoch": 0.7309920775688779, "grad_norm": 0.1283525824546814, "learning_rate": 1.562405989995096e-05, "loss": 0.3207, "num_tokens": 3915463810.0, "step": 6182 }, { "epoch": 0.7311103228095069, "grad_norm": 0.11666427552700043, "learning_rate": 1.5616146553809667e-05, "loss": 0.3146, "num_tokens": 3916096362.0, "step": 6183 }, { "epoch": 0.731228568050136, "grad_norm": 0.13701771199703217, "learning_rate": 1.5608235757434635e-05, "loss": 0.346, "num_tokens": 3916729552.0, "step": 6184 }, { "epoch": 0.731346813290765, "grad_norm": 0.13610383868217468, "learning_rate": 1.5600327511986176e-05, "loss": 0.3104, "num_tokens": 3917357083.0, "step": 6185 }, { "epoch": 0.7314650585313941, "grad_norm": 0.14188465476036072, "learning_rate": 1.5592421818624205e-05, "loss": 0.3312, "num_tokens": 3917992745.0, "step": 6186 }, { "epoch": 0.7315833037720232, "grad_norm": 0.11991411447525024, "learning_rate": 1.5584518678508308e-05, "loss": 0.3088, "num_tokens": 3918626406.0, "step": 6187 }, { "epoch": 0.7317015490126523, "grad_norm": 0.13681086897850037, "learning_rate": 1.557661809279767e-05, "loss": 0.3309, "num_tokens": 3919258320.0, "step": 6188 }, { "epoch": 0.7318197942532813, "grad_norm": 0.13197654485702515, "learning_rate": 1.556872006265109e-05, "loss": 0.3309, "num_tokens": 3919892210.0, "step": 6189 }, { "epoch": 0.7319380394939103, "grad_norm": 0.13383439183235168, "learning_rate": 1.5560824589227012e-05, "loss": 0.3111, "num_tokens": 3920530860.0, "step": 6190 }, { "epoch": 0.7320562847345394, "grad_norm": 0.12728272378444672, "learning_rate": 1.5552931673683504e-05, "loss": 0.2861, "num_tokens": 3921162878.0, "step": 6191 }, { "epoch": 0.7321745299751685, "grad_norm": 0.12665778398513794, "learning_rate": 1.5545041317178267e-05, "loss": 0.2895, "num_tokens": 3921797699.0, "step": 6192 }, { "epoch": 0.7322927752157976, "grad_norm": 0.14602062106132507, "learning_rate": 1.55371535208686e-05, "loss": 0.2971, "num_tokens": 3922424635.0, "step": 6193 }, { "epoch": 0.7324110204564266, "grad_norm": 0.14087772369384766, "learning_rate": 1.5529268285911438e-05, "loss": 0.33, "num_tokens": 3923062407.0, "step": 6194 }, { "epoch": 0.7325292656970557, "grad_norm": 0.14423184096813202, "learning_rate": 1.5521385613463344e-05, "loss": 0.3492, "num_tokens": 3923665004.0, "step": 6195 }, { "epoch": 0.7326475109376848, "grad_norm": 0.12282237410545349, "learning_rate": 1.551350550468052e-05, "loss": 0.3098, "num_tokens": 3924303752.0, "step": 6196 }, { "epoch": 0.7327657561783139, "grad_norm": 0.1353834867477417, "learning_rate": 1.5505627960718755e-05, "loss": 0.3269, "num_tokens": 3924935450.0, "step": 6197 }, { "epoch": 0.7328840014189429, "grad_norm": 0.13085924088954926, "learning_rate": 1.549775298273349e-05, "loss": 0.2901, "num_tokens": 3925572313.0, "step": 6198 }, { "epoch": 0.7330022466595719, "grad_norm": 0.1435791254043579, "learning_rate": 1.5489880571879798e-05, "loss": 0.3182, "num_tokens": 3926206132.0, "step": 6199 }, { "epoch": 0.733120491900201, "grad_norm": 0.13186600804328918, "learning_rate": 1.548201072931233e-05, "loss": 0.3799, "num_tokens": 3926837321.0, "step": 6200 }, { "epoch": 0.7332387371408301, "grad_norm": 0.14109022915363312, "learning_rate": 1.5474143456185432e-05, "loss": 0.3279, "num_tokens": 3927472059.0, "step": 6201 }, { "epoch": 0.7333569823814592, "grad_norm": 0.1412018984556198, "learning_rate": 1.5466278753652987e-05, "loss": 0.3318, "num_tokens": 3928104781.0, "step": 6202 }, { "epoch": 0.7334752276220882, "grad_norm": 0.13326172530651093, "learning_rate": 1.5458416622868573e-05, "loss": 0.326, "num_tokens": 3928729945.0, "step": 6203 }, { "epoch": 0.7335934728627173, "grad_norm": 0.13381119072437286, "learning_rate": 1.545055706498535e-05, "loss": 0.3646, "num_tokens": 3929368804.0, "step": 6204 }, { "epoch": 0.7337117181033463, "grad_norm": 0.1273198276758194, "learning_rate": 1.5442700081156124e-05, "loss": 0.3378, "num_tokens": 3930007701.0, "step": 6205 }, { "epoch": 0.7338299633439754, "grad_norm": 0.12683889269828796, "learning_rate": 1.5434845672533314e-05, "loss": 0.3298, "num_tokens": 3930639355.0, "step": 6206 }, { "epoch": 0.7339482085846045, "grad_norm": 0.13914772868156433, "learning_rate": 1.542699384026895e-05, "loss": 0.3112, "num_tokens": 3931273383.0, "step": 6207 }, { "epoch": 0.7340664538252335, "grad_norm": 0.12066660821437836, "learning_rate": 1.5419144585514703e-05, "loss": 0.3413, "num_tokens": 3931905274.0, "step": 6208 }, { "epoch": 0.7341846990658626, "grad_norm": 0.13515877723693848, "learning_rate": 1.541129790942186e-05, "loss": 0.3267, "num_tokens": 3932537025.0, "step": 6209 }, { "epoch": 0.7343029443064917, "grad_norm": 0.13162940740585327, "learning_rate": 1.540345381314132e-05, "loss": 0.3442, "num_tokens": 3933176499.0, "step": 6210 }, { "epoch": 0.7344211895471208, "grad_norm": 0.13487784564495087, "learning_rate": 1.5395612297823606e-05, "loss": 0.3682, "num_tokens": 3933815124.0, "step": 6211 }, { "epoch": 0.7345394347877497, "grad_norm": 0.13747508823871613, "learning_rate": 1.5387773364618876e-05, "loss": 0.3369, "num_tokens": 3934452610.0, "step": 6212 }, { "epoch": 0.7346576800283788, "grad_norm": 0.12950122356414795, "learning_rate": 1.537993701467691e-05, "loss": 0.3217, "num_tokens": 3935089077.0, "step": 6213 }, { "epoch": 0.7347759252690079, "grad_norm": 0.13174621760845184, "learning_rate": 1.5372103249147087e-05, "loss": 0.3094, "num_tokens": 3935724900.0, "step": 6214 }, { "epoch": 0.734894170509637, "grad_norm": 0.12965109944343567, "learning_rate": 1.5364272069178426e-05, "loss": 0.3112, "num_tokens": 3936355805.0, "step": 6215 }, { "epoch": 0.7350124157502661, "grad_norm": 0.12451507896184921, "learning_rate": 1.5356443475919548e-05, "loss": 0.3138, "num_tokens": 3936984257.0, "step": 6216 }, { "epoch": 0.7351306609908951, "grad_norm": 0.13147974014282227, "learning_rate": 1.534861747051873e-05, "loss": 0.3204, "num_tokens": 3937619128.0, "step": 6217 }, { "epoch": 0.7352489062315242, "grad_norm": 0.12056206911802292, "learning_rate": 1.534079405412383e-05, "loss": 0.2883, "num_tokens": 3938249113.0, "step": 6218 }, { "epoch": 0.7353671514721533, "grad_norm": 0.15774895250797272, "learning_rate": 1.5332973227882342e-05, "loss": 0.3917, "num_tokens": 3938887425.0, "step": 6219 }, { "epoch": 0.7354853967127823, "grad_norm": 0.12618249654769897, "learning_rate": 1.5325154992941387e-05, "loss": 0.3262, "num_tokens": 3939520747.0, "step": 6220 }, { "epoch": 0.7356036419534113, "grad_norm": 0.13265381753444672, "learning_rate": 1.5317339350447695e-05, "loss": 0.3143, "num_tokens": 3940154562.0, "step": 6221 }, { "epoch": 0.7357218871940404, "grad_norm": 0.13884097337722778, "learning_rate": 1.5309526301547643e-05, "loss": 0.3192, "num_tokens": 3940786619.0, "step": 6222 }, { "epoch": 0.7358401324346695, "grad_norm": 0.1287054866552353, "learning_rate": 1.530171584738716e-05, "loss": 0.3011, "num_tokens": 3941420347.0, "step": 6223 }, { "epoch": 0.7359583776752986, "grad_norm": 0.1296381801366806, "learning_rate": 1.5293907989111887e-05, "loss": 0.3189, "num_tokens": 3942051799.0, "step": 6224 }, { "epoch": 0.7360766229159277, "grad_norm": 0.13189168274402618, "learning_rate": 1.5286102727867e-05, "loss": 0.3337, "num_tokens": 3942683650.0, "step": 6225 }, { "epoch": 0.7361948681565567, "grad_norm": 0.135064497590065, "learning_rate": 1.5278300064797355e-05, "loss": 0.3308, "num_tokens": 3943321277.0, "step": 6226 }, { "epoch": 0.7363131133971857, "grad_norm": 0.13632231950759888, "learning_rate": 1.5270500001047396e-05, "loss": 0.3036, "num_tokens": 3943958981.0, "step": 6227 }, { "epoch": 0.7364313586378148, "grad_norm": 0.11653760820627213, "learning_rate": 1.526270253776119e-05, "loss": 0.2999, "num_tokens": 3944591798.0, "step": 6228 }, { "epoch": 0.7365496038784439, "grad_norm": 0.12295464426279068, "learning_rate": 1.5254907676082428e-05, "loss": 0.3437, "num_tokens": 3945222921.0, "step": 6229 }, { "epoch": 0.7366678491190729, "grad_norm": 0.12821029126644135, "learning_rate": 1.5247115417154407e-05, "loss": 0.3134, "num_tokens": 3945856718.0, "step": 6230 }, { "epoch": 0.736786094359702, "grad_norm": 0.12589962780475616, "learning_rate": 1.523932576212008e-05, "loss": 0.3183, "num_tokens": 3946492314.0, "step": 6231 }, { "epoch": 0.7369043396003311, "grad_norm": 0.12805122137069702, "learning_rate": 1.5231538712121953e-05, "loss": 0.2667, "num_tokens": 3947121972.0, "step": 6232 }, { "epoch": 0.7370225848409602, "grad_norm": 0.13580147922039032, "learning_rate": 1.5223754268302212e-05, "loss": 0.3557, "num_tokens": 3947756273.0, "step": 6233 }, { "epoch": 0.7371408300815893, "grad_norm": 0.14945286512374878, "learning_rate": 1.5215972431802619e-05, "loss": 0.3545, "num_tokens": 3948387269.0, "step": 6234 }, { "epoch": 0.7372590753222182, "grad_norm": 0.11914674192667007, "learning_rate": 1.5208193203764584e-05, "loss": 0.3312, "num_tokens": 3949021220.0, "step": 6235 }, { "epoch": 0.7373773205628473, "grad_norm": 0.13755962252616882, "learning_rate": 1.5200416585329118e-05, "loss": 0.3115, "num_tokens": 3949659968.0, "step": 6236 }, { "epoch": 0.7374955658034764, "grad_norm": 0.1345282346010208, "learning_rate": 1.5192642577636847e-05, "loss": 0.3184, "num_tokens": 3950297561.0, "step": 6237 }, { "epoch": 0.7376138110441055, "grad_norm": 0.13085973262786865, "learning_rate": 1.5184871181828024e-05, "loss": 0.3315, "num_tokens": 3950930505.0, "step": 6238 }, { "epoch": 0.7377320562847346, "grad_norm": 0.12818896770477295, "learning_rate": 1.5177102399042513e-05, "loss": 0.3126, "num_tokens": 3951564438.0, "step": 6239 }, { "epoch": 0.7378503015253636, "grad_norm": 0.12317423522472382, "learning_rate": 1.5169336230419795e-05, "loss": 0.2817, "num_tokens": 3952198467.0, "step": 6240 }, { "epoch": 0.7379685467659927, "grad_norm": 0.12942329049110413, "learning_rate": 1.5161572677098952e-05, "loss": 0.3298, "num_tokens": 3952834295.0, "step": 6241 }, { "epoch": 0.7380867920066218, "grad_norm": 0.12274757027626038, "learning_rate": 1.5153811740218719e-05, "loss": 0.2983, "num_tokens": 3953462384.0, "step": 6242 }, { "epoch": 0.7382050372472508, "grad_norm": 0.1431964486837387, "learning_rate": 1.5146053420917428e-05, "loss": 0.3548, "num_tokens": 3954099306.0, "step": 6243 }, { "epoch": 0.7383232824878798, "grad_norm": 0.11678394675254822, "learning_rate": 1.5138297720333023e-05, "loss": 0.2928, "num_tokens": 3954738248.0, "step": 6244 }, { "epoch": 0.7384415277285089, "grad_norm": 0.12951810657978058, "learning_rate": 1.5130544639603056e-05, "loss": 0.2994, "num_tokens": 3955375531.0, "step": 6245 }, { "epoch": 0.738559772969138, "grad_norm": 0.12030979245901108, "learning_rate": 1.5122794179864706e-05, "loss": 0.3156, "num_tokens": 3956014579.0, "step": 6246 }, { "epoch": 0.7386780182097671, "grad_norm": 0.14473631978034973, "learning_rate": 1.5115046342254785e-05, "loss": 0.3569, "num_tokens": 3956647075.0, "step": 6247 }, { "epoch": 0.7387962634503962, "grad_norm": 0.12666530907154083, "learning_rate": 1.5107301127909686e-05, "loss": 0.3208, "num_tokens": 3957282160.0, "step": 6248 }, { "epoch": 0.7389145086910252, "grad_norm": 0.12993764877319336, "learning_rate": 1.5099558537965431e-05, "loss": 0.3384, "num_tokens": 3957914297.0, "step": 6249 }, { "epoch": 0.7390327539316542, "grad_norm": 0.12225433439016342, "learning_rate": 1.5091818573557677e-05, "loss": 0.3294, "num_tokens": 3958552830.0, "step": 6250 }, { "epoch": 0.7391509991722833, "grad_norm": 0.1233525201678276, "learning_rate": 1.508408123582166e-05, "loss": 0.3389, "num_tokens": 3959187383.0, "step": 6251 }, { "epoch": 0.7392692444129124, "grad_norm": 0.12150568515062332, "learning_rate": 1.5076346525892273e-05, "loss": 0.3032, "num_tokens": 3959818570.0, "step": 6252 }, { "epoch": 0.7393874896535414, "grad_norm": 0.12467543035745621, "learning_rate": 1.5068614444903969e-05, "loss": 0.308, "num_tokens": 3960455425.0, "step": 6253 }, { "epoch": 0.7395057348941705, "grad_norm": 0.13604149222373962, "learning_rate": 1.5060884993990871e-05, "loss": 0.3026, "num_tokens": 3961089771.0, "step": 6254 }, { "epoch": 0.7396239801347996, "grad_norm": 0.14430995285511017, "learning_rate": 1.5053158174286677e-05, "loss": 0.3989, "num_tokens": 3961725909.0, "step": 6255 }, { "epoch": 0.7397422253754287, "grad_norm": 0.13741986453533173, "learning_rate": 1.504543398692472e-05, "loss": 0.33, "num_tokens": 3962339034.0, "step": 6256 }, { "epoch": 0.7398604706160578, "grad_norm": 0.13323070108890533, "learning_rate": 1.5037712433037947e-05, "loss": 0.333, "num_tokens": 3962977359.0, "step": 6257 }, { "epoch": 0.7399787158566867, "grad_norm": 0.1385607272386551, "learning_rate": 1.5029993513758892e-05, "loss": 0.3415, "num_tokens": 3963612088.0, "step": 6258 }, { "epoch": 0.7400969610973158, "grad_norm": 0.12201206386089325, "learning_rate": 1.5022277230219749e-05, "loss": 0.3107, "num_tokens": 3964238937.0, "step": 6259 }, { "epoch": 0.7402152063379449, "grad_norm": 0.12822695076465607, "learning_rate": 1.5014563583552279e-05, "loss": 0.3307, "num_tokens": 3964873509.0, "step": 6260 }, { "epoch": 0.740333451578574, "grad_norm": 0.14252492785453796, "learning_rate": 1.5006852574887894e-05, "loss": 0.3638, "num_tokens": 3965506457.0, "step": 6261 }, { "epoch": 0.740451696819203, "grad_norm": 0.12283393740653992, "learning_rate": 1.4999144205357577e-05, "loss": 0.2883, "num_tokens": 3966144482.0, "step": 6262 }, { "epoch": 0.7405699420598321, "grad_norm": 0.1314791589975357, "learning_rate": 1.4991438476091964e-05, "loss": 0.3241, "num_tokens": 3966778551.0, "step": 6263 }, { "epoch": 0.7406881873004612, "grad_norm": 0.1286136955022812, "learning_rate": 1.4983735388221294e-05, "loss": 0.3127, "num_tokens": 3967410361.0, "step": 6264 }, { "epoch": 0.7408064325410902, "grad_norm": 0.12521271407604218, "learning_rate": 1.4976034942875406e-05, "loss": 0.2903, "num_tokens": 3968044410.0, "step": 6265 }, { "epoch": 0.7409246777817193, "grad_norm": 0.1309756487607956, "learning_rate": 1.4968337141183762e-05, "loss": 0.2751, "num_tokens": 3968680661.0, "step": 6266 }, { "epoch": 0.7410429230223483, "grad_norm": 0.1260569542646408, "learning_rate": 1.4960641984275415e-05, "loss": 0.3152, "num_tokens": 3969319800.0, "step": 6267 }, { "epoch": 0.7411611682629774, "grad_norm": 0.12850767374038696, "learning_rate": 1.4952949473279071e-05, "loss": 0.2859, "num_tokens": 3969952614.0, "step": 6268 }, { "epoch": 0.7412794135036065, "grad_norm": 0.12208041548728943, "learning_rate": 1.4945259609323017e-05, "loss": 0.3229, "num_tokens": 3970580054.0, "step": 6269 }, { "epoch": 0.7413976587442356, "grad_norm": 0.12738503515720367, "learning_rate": 1.4937572393535145e-05, "loss": 0.3108, "num_tokens": 3971206587.0, "step": 6270 }, { "epoch": 0.7415159039848646, "grad_norm": 0.13081125915050507, "learning_rate": 1.4929887827042994e-05, "loss": 0.329, "num_tokens": 3971842253.0, "step": 6271 }, { "epoch": 0.7416341492254936, "grad_norm": 0.1250012218952179, "learning_rate": 1.4922205910973673e-05, "loss": 0.3548, "num_tokens": 3972474879.0, "step": 6272 }, { "epoch": 0.7417523944661227, "grad_norm": 0.1316950023174286, "learning_rate": 1.4914526646453943e-05, "loss": 0.289, "num_tokens": 3973109225.0, "step": 6273 }, { "epoch": 0.7418706397067518, "grad_norm": 0.13429328799247742, "learning_rate": 1.4906850034610145e-05, "loss": 0.2934, "num_tokens": 3973746099.0, "step": 6274 }, { "epoch": 0.7419888849473809, "grad_norm": 0.12727922201156616, "learning_rate": 1.4899176076568244e-05, "loss": 0.3372, "num_tokens": 3974381101.0, "step": 6275 }, { "epoch": 0.7421071301880099, "grad_norm": 0.13950154185295105, "learning_rate": 1.48915047734538e-05, "loss": 0.3572, "num_tokens": 3975018900.0, "step": 6276 }, { "epoch": 0.742225375428639, "grad_norm": 0.13971056044101715, "learning_rate": 1.4883836126392017e-05, "loss": 0.3477, "num_tokens": 3975657139.0, "step": 6277 }, { "epoch": 0.7423436206692681, "grad_norm": 0.12806862592697144, "learning_rate": 1.487617013650768e-05, "loss": 0.3056, "num_tokens": 3976293877.0, "step": 6278 }, { "epoch": 0.7424618659098972, "grad_norm": 0.12458652257919312, "learning_rate": 1.4868506804925182e-05, "loss": 0.283, "num_tokens": 3976925857.0, "step": 6279 }, { "epoch": 0.7425801111505262, "grad_norm": 0.13677279651165009, "learning_rate": 1.4860846132768561e-05, "loss": 0.3354, "num_tokens": 3977559288.0, "step": 6280 }, { "epoch": 0.7426983563911552, "grad_norm": 0.12468330562114716, "learning_rate": 1.485318812116142e-05, "loss": 0.3177, "num_tokens": 3978195902.0, "step": 6281 }, { "epoch": 0.7428166016317843, "grad_norm": 0.1417744904756546, "learning_rate": 1.4845532771227023e-05, "loss": 0.3683, "num_tokens": 3978834868.0, "step": 6282 }, { "epoch": 0.7429348468724134, "grad_norm": 0.12670855224132538, "learning_rate": 1.4837880084088172e-05, "loss": 0.3217, "num_tokens": 3979471022.0, "step": 6283 }, { "epoch": 0.7430530921130425, "grad_norm": 0.13195553421974182, "learning_rate": 1.4830230060867343e-05, "loss": 0.3146, "num_tokens": 3980098049.0, "step": 6284 }, { "epoch": 0.7431713373536715, "grad_norm": 0.13289175927639008, "learning_rate": 1.4822582702686605e-05, "loss": 0.3499, "num_tokens": 3980731844.0, "step": 6285 }, { "epoch": 0.7432895825943006, "grad_norm": 0.12356553226709366, "learning_rate": 1.4814938010667623e-05, "loss": 0.318, "num_tokens": 3981369283.0, "step": 6286 }, { "epoch": 0.7434078278349296, "grad_norm": 0.1188426986336708, "learning_rate": 1.4807295985931678e-05, "loss": 0.3177, "num_tokens": 3982002439.0, "step": 6287 }, { "epoch": 0.7435260730755587, "grad_norm": 0.12725141644477844, "learning_rate": 1.4799656629599649e-05, "loss": 0.308, "num_tokens": 3982638029.0, "step": 6288 }, { "epoch": 0.7436443183161878, "grad_norm": 0.12166880816221237, "learning_rate": 1.4792019942792053e-05, "loss": 0.2854, "num_tokens": 3983266705.0, "step": 6289 }, { "epoch": 0.7437625635568168, "grad_norm": 0.1421353965997696, "learning_rate": 1.4784385926628983e-05, "loss": 0.3584, "num_tokens": 3983902324.0, "step": 6290 }, { "epoch": 0.7438808087974459, "grad_norm": 0.13187626004219055, "learning_rate": 1.4776754582230151e-05, "loss": 0.3415, "num_tokens": 3984538288.0, "step": 6291 }, { "epoch": 0.743999054038075, "grad_norm": 0.12808012962341309, "learning_rate": 1.4769125910714894e-05, "loss": 0.3373, "num_tokens": 3985175214.0, "step": 6292 }, { "epoch": 0.7441172992787041, "grad_norm": 0.1361730396747589, "learning_rate": 1.4761499913202126e-05, "loss": 0.3774, "num_tokens": 3985804927.0, "step": 6293 }, { "epoch": 0.744235544519333, "grad_norm": 0.13194234669208527, "learning_rate": 1.4753876590810404e-05, "loss": 0.3428, "num_tokens": 3986442558.0, "step": 6294 }, { "epoch": 0.7443537897599621, "grad_norm": 0.1427699476480484, "learning_rate": 1.4746255944657863e-05, "loss": 0.3327, "num_tokens": 3987080155.0, "step": 6295 }, { "epoch": 0.7444720350005912, "grad_norm": 0.13032644987106323, "learning_rate": 1.473863797586226e-05, "loss": 0.3505, "num_tokens": 3987719393.0, "step": 6296 }, { "epoch": 0.7445902802412203, "grad_norm": 0.12654797732830048, "learning_rate": 1.4731022685540944e-05, "loss": 0.3397, "num_tokens": 3988356779.0, "step": 6297 }, { "epoch": 0.7447085254818494, "grad_norm": 0.13076362013816833, "learning_rate": 1.4723410074810895e-05, "loss": 0.3585, "num_tokens": 3988990536.0, "step": 6298 }, { "epoch": 0.7448267707224784, "grad_norm": 0.1324608325958252, "learning_rate": 1.4715800144788702e-05, "loss": 0.3045, "num_tokens": 3989629453.0, "step": 6299 }, { "epoch": 0.7449450159631075, "grad_norm": 0.1215270385146141, "learning_rate": 1.4708192896590518e-05, "loss": 0.3317, "num_tokens": 3990265446.0, "step": 6300 }, { "epoch": 0.7450632612037366, "grad_norm": 0.130240797996521, "learning_rate": 1.4700588331332153e-05, "loss": 0.2766, "num_tokens": 3990890233.0, "step": 6301 }, { "epoch": 0.7451815064443656, "grad_norm": 0.14984826743602753, "learning_rate": 1.4692986450128983e-05, "loss": 0.3323, "num_tokens": 3991518645.0, "step": 6302 }, { "epoch": 0.7452997516849946, "grad_norm": 0.13923399150371552, "learning_rate": 1.4685387254096031e-05, "loss": 0.3554, "num_tokens": 3992157354.0, "step": 6303 }, { "epoch": 0.7454179969256237, "grad_norm": 0.1187940314412117, "learning_rate": 1.4677790744347897e-05, "loss": 0.2962, "num_tokens": 3992790889.0, "step": 6304 }, { "epoch": 0.7455362421662528, "grad_norm": 0.12926797568798065, "learning_rate": 1.4670196921998778e-05, "loss": 0.3206, "num_tokens": 3993419764.0, "step": 6305 }, { "epoch": 0.7456544874068819, "grad_norm": 0.13464480638504028, "learning_rate": 1.4662605788162515e-05, "loss": 0.2989, "num_tokens": 3994055036.0, "step": 6306 }, { "epoch": 0.745772732647511, "grad_norm": 0.13763250410556793, "learning_rate": 1.4655017343952527e-05, "loss": 0.3345, "num_tokens": 3994680964.0, "step": 6307 }, { "epoch": 0.74589097788814, "grad_norm": 0.13265223801136017, "learning_rate": 1.4647431590481844e-05, "loss": 0.3296, "num_tokens": 3995314321.0, "step": 6308 }, { "epoch": 0.746009223128769, "grad_norm": 0.12473204731941223, "learning_rate": 1.4639848528863087e-05, "loss": 0.3174, "num_tokens": 3995952672.0, "step": 6309 }, { "epoch": 0.7461274683693981, "grad_norm": 0.13630880415439606, "learning_rate": 1.4632268160208523e-05, "loss": 0.3464, "num_tokens": 3996582783.0, "step": 6310 }, { "epoch": 0.7462457136100272, "grad_norm": 0.14538772404193878, "learning_rate": 1.4624690485629976e-05, "loss": 0.3479, "num_tokens": 3997219906.0, "step": 6311 }, { "epoch": 0.7463639588506562, "grad_norm": 0.14005494117736816, "learning_rate": 1.4617115506238921e-05, "loss": 0.3178, "num_tokens": 3997852528.0, "step": 6312 }, { "epoch": 0.7464822040912853, "grad_norm": 0.13737818598747253, "learning_rate": 1.4609543223146382e-05, "loss": 0.3406, "num_tokens": 3998487056.0, "step": 6313 }, { "epoch": 0.7466004493319144, "grad_norm": 0.12919875979423523, "learning_rate": 1.4601973637463038e-05, "loss": 0.3432, "num_tokens": 3999123304.0, "step": 6314 }, { "epoch": 0.7467186945725435, "grad_norm": 0.1412971019744873, "learning_rate": 1.4594406750299159e-05, "loss": 0.3488, "num_tokens": 3999755181.0, "step": 6315 }, { "epoch": 0.7468369398131726, "grad_norm": 0.13803736865520477, "learning_rate": 1.4586842562764603e-05, "loss": 0.3516, "num_tokens": 4000390780.0, "step": 6316 }, { "epoch": 0.7469551850538015, "grad_norm": 0.1353667825460434, "learning_rate": 1.4579281075968848e-05, "loss": 0.3615, "num_tokens": 4001027165.0, "step": 6317 }, { "epoch": 0.7470734302944306, "grad_norm": 0.12890100479125977, "learning_rate": 1.457172229102096e-05, "loss": 0.3295, "num_tokens": 4001665768.0, "step": 6318 }, { "epoch": 0.7471916755350597, "grad_norm": 0.13418051600456238, "learning_rate": 1.4564166209029633e-05, "loss": 0.3262, "num_tokens": 4002303955.0, "step": 6319 }, { "epoch": 0.7473099207756888, "grad_norm": 0.12977677583694458, "learning_rate": 1.4556612831103146e-05, "loss": 0.3336, "num_tokens": 4002942725.0, "step": 6320 }, { "epoch": 0.7474281660163179, "grad_norm": 0.12483970820903778, "learning_rate": 1.454906215834938e-05, "loss": 0.3147, "num_tokens": 4003578850.0, "step": 6321 }, { "epoch": 0.7475464112569469, "grad_norm": 0.1339177042245865, "learning_rate": 1.4541514191875834e-05, "loss": 0.3539, "num_tokens": 4004214544.0, "step": 6322 }, { "epoch": 0.747664656497576, "grad_norm": 0.13127344846725464, "learning_rate": 1.4533968932789591e-05, "loss": 0.3049, "num_tokens": 4004843829.0, "step": 6323 }, { "epoch": 0.747782901738205, "grad_norm": 0.13374678790569305, "learning_rate": 1.4526426382197363e-05, "loss": 0.3442, "num_tokens": 4005483071.0, "step": 6324 }, { "epoch": 0.7479011469788341, "grad_norm": 0.13894295692443848, "learning_rate": 1.4518886541205441e-05, "loss": 0.3231, "num_tokens": 4006119761.0, "step": 6325 }, { "epoch": 0.7480193922194631, "grad_norm": 0.12840433418750763, "learning_rate": 1.4511349410919725e-05, "loss": 0.3372, "num_tokens": 4006753264.0, "step": 6326 }, { "epoch": 0.7481376374600922, "grad_norm": 0.1261133849620819, "learning_rate": 1.450381499244571e-05, "loss": 0.3428, "num_tokens": 4007389007.0, "step": 6327 }, { "epoch": 0.7482558827007213, "grad_norm": 0.13152338564395905, "learning_rate": 1.4496283286888515e-05, "loss": 0.2676, "num_tokens": 4008022876.0, "step": 6328 }, { "epoch": 0.7483741279413504, "grad_norm": 0.12405335158109665, "learning_rate": 1.4488754295352862e-05, "loss": 0.3224, "num_tokens": 4008655483.0, "step": 6329 }, { "epoch": 0.7484923731819795, "grad_norm": 0.12768690288066864, "learning_rate": 1.4481228018943027e-05, "loss": 0.3096, "num_tokens": 4009289710.0, "step": 6330 }, { "epoch": 0.7486106184226085, "grad_norm": 0.1258234828710556, "learning_rate": 1.4473704458762952e-05, "loss": 0.3305, "num_tokens": 4009924147.0, "step": 6331 }, { "epoch": 0.7487288636632375, "grad_norm": 0.1341477334499359, "learning_rate": 1.4466183615916125e-05, "loss": 0.3528, "num_tokens": 4010554613.0, "step": 6332 }, { "epoch": 0.7488471089038666, "grad_norm": 0.12396214157342911, "learning_rate": 1.445866549150569e-05, "loss": 0.3074, "num_tokens": 4011189715.0, "step": 6333 }, { "epoch": 0.7489653541444957, "grad_norm": 0.1343856304883957, "learning_rate": 1.4451150086634345e-05, "loss": 0.3447, "num_tokens": 4011824944.0, "step": 6334 }, { "epoch": 0.7490835993851247, "grad_norm": 0.11184556782245636, "learning_rate": 1.44436374024044e-05, "loss": 0.3043, "num_tokens": 4012462908.0, "step": 6335 }, { "epoch": 0.7492018446257538, "grad_norm": 0.1331510990858078, "learning_rate": 1.4436127439917796e-05, "loss": 0.3384, "num_tokens": 4013102123.0, "step": 6336 }, { "epoch": 0.7493200898663829, "grad_norm": 0.12113068997859955, "learning_rate": 1.4428620200276039e-05, "loss": 0.3264, "num_tokens": 4013740264.0, "step": 6337 }, { "epoch": 0.749438335107012, "grad_norm": 0.13321010768413544, "learning_rate": 1.442111568458025e-05, "loss": 0.3492, "num_tokens": 4014376708.0, "step": 6338 }, { "epoch": 0.749556580347641, "grad_norm": 0.12650921940803528, "learning_rate": 1.4413613893931141e-05, "loss": 0.2988, "num_tokens": 4015016038.0, "step": 6339 }, { "epoch": 0.74967482558827, "grad_norm": 0.1296134740114212, "learning_rate": 1.4406114829429049e-05, "loss": 0.3357, "num_tokens": 4015655315.0, "step": 6340 }, { "epoch": 0.7497930708288991, "grad_norm": 0.12937311828136444, "learning_rate": 1.4398618492173882e-05, "loss": 0.3258, "num_tokens": 4016282816.0, "step": 6341 }, { "epoch": 0.7499113160695282, "grad_norm": 0.13592471182346344, "learning_rate": 1.4391124883265174e-05, "loss": 0.328, "num_tokens": 4016908865.0, "step": 6342 }, { "epoch": 0.7500295613101573, "grad_norm": 0.12953752279281616, "learning_rate": 1.4383634003802033e-05, "loss": 0.3223, "num_tokens": 4017545438.0, "step": 6343 }, { "epoch": 0.7501478065507863, "grad_norm": 0.13860835134983063, "learning_rate": 1.4376145854883177e-05, "loss": 0.3664, "num_tokens": 4018178360.0, "step": 6344 }, { "epoch": 0.7502660517914154, "grad_norm": 0.13040651381015778, "learning_rate": 1.4368660437606944e-05, "loss": 0.3234, "num_tokens": 4018812028.0, "step": 6345 }, { "epoch": 0.7503842970320445, "grad_norm": 0.12715373933315277, "learning_rate": 1.4361177753071236e-05, "loss": 0.3463, "num_tokens": 4019450593.0, "step": 6346 }, { "epoch": 0.7505025422726735, "grad_norm": 0.12946541607379913, "learning_rate": 1.435369780237358e-05, "loss": 0.3074, "num_tokens": 4020081571.0, "step": 6347 }, { "epoch": 0.7506207875133026, "grad_norm": 0.13193261623382568, "learning_rate": 1.4346220586611083e-05, "loss": 0.338, "num_tokens": 4020712657.0, "step": 6348 }, { "epoch": 0.7507390327539316, "grad_norm": 0.13188154995441437, "learning_rate": 1.4338746106880465e-05, "loss": 0.2969, "num_tokens": 4021347678.0, "step": 6349 }, { "epoch": 0.7508572779945607, "grad_norm": 0.12901121377944946, "learning_rate": 1.4331274364278062e-05, "loss": 0.3165, "num_tokens": 4021978894.0, "step": 6350 }, { "epoch": 0.7509755232351898, "grad_norm": 0.119459368288517, "learning_rate": 1.4323805359899754e-05, "loss": 0.3145, "num_tokens": 4022611192.0, "step": 6351 }, { "epoch": 0.7510937684758189, "grad_norm": 0.139527827501297, "learning_rate": 1.4316339094841075e-05, "loss": 0.3525, "num_tokens": 4023246052.0, "step": 6352 }, { "epoch": 0.751212013716448, "grad_norm": 0.1243899017572403, "learning_rate": 1.4308875570197118e-05, "loss": 0.3269, "num_tokens": 4023882629.0, "step": 6353 }, { "epoch": 0.751330258957077, "grad_norm": 0.12390729039907455, "learning_rate": 1.4301414787062611e-05, "loss": 0.2937, "num_tokens": 4024514698.0, "step": 6354 }, { "epoch": 0.751448504197706, "grad_norm": 0.11872659623622894, "learning_rate": 1.429395674653185e-05, "loss": 0.3052, "num_tokens": 4025149395.0, "step": 6355 }, { "epoch": 0.7515667494383351, "grad_norm": 0.13059251010417938, "learning_rate": 1.428650144969873e-05, "loss": 0.3391, "num_tokens": 4025788015.0, "step": 6356 }, { "epoch": 0.7516849946789642, "grad_norm": 0.14135579764842987, "learning_rate": 1.4279048897656767e-05, "loss": 0.3278, "num_tokens": 4026425811.0, "step": 6357 }, { "epoch": 0.7518032399195932, "grad_norm": 0.9473778009414673, "learning_rate": 1.4271599091499055e-05, "loss": 0.3615, "num_tokens": 4027028086.0, "step": 6358 }, { "epoch": 0.7519214851602223, "grad_norm": 0.13267070055007935, "learning_rate": 1.4264152032318288e-05, "loss": 0.3325, "num_tokens": 4027660025.0, "step": 6359 }, { "epoch": 0.7520397304008514, "grad_norm": 0.13721176981925964, "learning_rate": 1.425670772120675e-05, "loss": 0.3187, "num_tokens": 4028296736.0, "step": 6360 }, { "epoch": 0.7521579756414805, "grad_norm": 0.14144429564476013, "learning_rate": 1.424926615925635e-05, "loss": 0.3396, "num_tokens": 4028936051.0, "step": 6361 }, { "epoch": 0.7522762208821095, "grad_norm": 0.13820460438728333, "learning_rate": 1.4241827347558554e-05, "loss": 0.3375, "num_tokens": 4029571938.0, "step": 6362 }, { "epoch": 0.7523944661227385, "grad_norm": 0.12828329205513, "learning_rate": 1.4234391287204463e-05, "loss": 0.3036, "num_tokens": 4030210930.0, "step": 6363 }, { "epoch": 0.7525127113633676, "grad_norm": 0.14028991758823395, "learning_rate": 1.4226957979284753e-05, "loss": 0.3574, "num_tokens": 4030846482.0, "step": 6364 }, { "epoch": 0.7526309566039967, "grad_norm": 0.13354851305484772, "learning_rate": 1.4219527424889686e-05, "loss": 0.3105, "num_tokens": 4031482767.0, "step": 6365 }, { "epoch": 0.7527492018446258, "grad_norm": 0.12479114532470703, "learning_rate": 1.4212099625109153e-05, "loss": 0.3096, "num_tokens": 4032107560.0, "step": 6366 }, { "epoch": 0.7528674470852548, "grad_norm": 0.13806438446044922, "learning_rate": 1.420467458103261e-05, "loss": 0.3243, "num_tokens": 4032742815.0, "step": 6367 }, { "epoch": 0.7529856923258839, "grad_norm": 0.13623502850532532, "learning_rate": 1.419725229374913e-05, "loss": 0.3358, "num_tokens": 4033377013.0, "step": 6368 }, { "epoch": 0.753103937566513, "grad_norm": 0.15420502424240112, "learning_rate": 1.4189832764347353e-05, "loss": 0.357, "num_tokens": 4034012782.0, "step": 6369 }, { "epoch": 0.753222182807142, "grad_norm": 0.13054127991199493, "learning_rate": 1.4182415993915551e-05, "loss": 0.3177, "num_tokens": 4034648264.0, "step": 6370 }, { "epoch": 0.7533404280477711, "grad_norm": 0.13466407358646393, "learning_rate": 1.4175001983541583e-05, "loss": 0.3511, "num_tokens": 4035285526.0, "step": 6371 }, { "epoch": 0.7534586732884001, "grad_norm": 0.12561380863189697, "learning_rate": 1.416759073431288e-05, "loss": 0.2941, "num_tokens": 4035919834.0, "step": 6372 }, { "epoch": 0.7535769185290292, "grad_norm": 0.1307155042886734, "learning_rate": 1.4160182247316485e-05, "loss": 0.2984, "num_tokens": 4036553096.0, "step": 6373 }, { "epoch": 0.7536951637696583, "grad_norm": 0.14000757038593292, "learning_rate": 1.4152776523639024e-05, "loss": 0.3133, "num_tokens": 4037189828.0, "step": 6374 }, { "epoch": 0.7538134090102874, "grad_norm": 0.13248521089553833, "learning_rate": 1.4145373564366744e-05, "loss": 0.342, "num_tokens": 4037829472.0, "step": 6375 }, { "epoch": 0.7539316542509163, "grad_norm": 0.12606137990951538, "learning_rate": 1.4137973370585466e-05, "loss": 0.3237, "num_tokens": 4038460775.0, "step": 6376 }, { "epoch": 0.7540498994915454, "grad_norm": 0.14519831538200378, "learning_rate": 1.4130575943380595e-05, "loss": 0.3331, "num_tokens": 4039095468.0, "step": 6377 }, { "epoch": 0.7541681447321745, "grad_norm": 0.14479799568653107, "learning_rate": 1.4123181283837163e-05, "loss": 0.317, "num_tokens": 4039734527.0, "step": 6378 }, { "epoch": 0.7542863899728036, "grad_norm": 0.1462600976228714, "learning_rate": 1.4115789393039762e-05, "loss": 0.3524, "num_tokens": 4040367356.0, "step": 6379 }, { "epoch": 0.7544046352134327, "grad_norm": 0.13336458802223206, "learning_rate": 1.4108400272072616e-05, "loss": 0.3517, "num_tokens": 4041001791.0, "step": 6380 }, { "epoch": 0.7545228804540617, "grad_norm": 0.13587038218975067, "learning_rate": 1.4101013922019488e-05, "loss": 0.2877, "num_tokens": 4041628700.0, "step": 6381 }, { "epoch": 0.7546411256946908, "grad_norm": 0.16557584702968597, "learning_rate": 1.4093630343963787e-05, "loss": 0.3715, "num_tokens": 4042257507.0, "step": 6382 }, { "epoch": 0.7547593709353199, "grad_norm": 0.14298979938030243, "learning_rate": 1.4086249538988484e-05, "loss": 0.3279, "num_tokens": 4042888675.0, "step": 6383 }, { "epoch": 0.754877616175949, "grad_norm": 0.1334739625453949, "learning_rate": 1.4078871508176169e-05, "loss": 0.3591, "num_tokens": 4043501460.0, "step": 6384 }, { "epoch": 0.7549958614165779, "grad_norm": 0.1261744201183319, "learning_rate": 1.4071496252609003e-05, "loss": 0.3031, "num_tokens": 4044134420.0, "step": 6385 }, { "epoch": 0.755114106657207, "grad_norm": 0.12436582148075104, "learning_rate": 1.4064123773368732e-05, "loss": 0.3065, "num_tokens": 4044770097.0, "step": 6386 }, { "epoch": 0.7552323518978361, "grad_norm": 0.15137693285942078, "learning_rate": 1.4056754071536737e-05, "loss": 0.3615, "num_tokens": 4045401885.0, "step": 6387 }, { "epoch": 0.7553505971384652, "grad_norm": 0.13955606520175934, "learning_rate": 1.4049387148193948e-05, "loss": 0.3091, "num_tokens": 4046041606.0, "step": 6388 }, { "epoch": 0.7554688423790943, "grad_norm": 0.12993229925632477, "learning_rate": 1.4042023004420907e-05, "loss": 0.3425, "num_tokens": 4046674293.0, "step": 6389 }, { "epoch": 0.7555870876197233, "grad_norm": 0.1239859014749527, "learning_rate": 1.403466164129774e-05, "loss": 0.3406, "num_tokens": 4047309143.0, "step": 6390 }, { "epoch": 0.7557053328603524, "grad_norm": 0.15420521795749664, "learning_rate": 1.4027303059904183e-05, "loss": 0.3853, "num_tokens": 4047944757.0, "step": 6391 }, { "epoch": 0.7558235781009814, "grad_norm": 0.14931181073188782, "learning_rate": 1.4019947261319535e-05, "loss": 0.3636, "num_tokens": 4048582512.0, "step": 6392 }, { "epoch": 0.7559418233416105, "grad_norm": 0.1406264454126358, "learning_rate": 1.4012594246622725e-05, "loss": 0.3617, "num_tokens": 4049222154.0, "step": 6393 }, { "epoch": 0.7560600685822396, "grad_norm": 0.1218009665608406, "learning_rate": 1.4005244016892237e-05, "loss": 0.3323, "num_tokens": 4049860650.0, "step": 6394 }, { "epoch": 0.7561783138228686, "grad_norm": 0.13991829752922058, "learning_rate": 1.3997896573206156e-05, "loss": 0.3797, "num_tokens": 4050496257.0, "step": 6395 }, { "epoch": 0.7562965590634977, "grad_norm": 0.13074196875095367, "learning_rate": 1.3990551916642183e-05, "loss": 0.3348, "num_tokens": 4051127346.0, "step": 6396 }, { "epoch": 0.7564148043041268, "grad_norm": 0.13540786504745483, "learning_rate": 1.398321004827758e-05, "loss": 0.325, "num_tokens": 4051763659.0, "step": 6397 }, { "epoch": 0.7565330495447559, "grad_norm": 0.12810228765010834, "learning_rate": 1.3975870969189209e-05, "loss": 0.3297, "num_tokens": 4052402864.0, "step": 6398 }, { "epoch": 0.7566512947853848, "grad_norm": 0.13195978105068207, "learning_rate": 1.3968534680453522e-05, "loss": 0.3255, "num_tokens": 4053036417.0, "step": 6399 }, { "epoch": 0.7567695400260139, "grad_norm": 0.13362343609333038, "learning_rate": 1.3961201183146569e-05, "loss": 0.3279, "num_tokens": 4053668454.0, "step": 6400 }, { "epoch": 0.756887785266643, "grad_norm": 0.13807988166809082, "learning_rate": 1.3953870478343996e-05, "loss": 0.3351, "num_tokens": 4054305283.0, "step": 6401 }, { "epoch": 0.7570060305072721, "grad_norm": 0.1456075757741928, "learning_rate": 1.3946542567121023e-05, "loss": 0.3771, "num_tokens": 4054938222.0, "step": 6402 }, { "epoch": 0.7571242757479012, "grad_norm": 0.1321442872285843, "learning_rate": 1.3939217450552463e-05, "loss": 0.3107, "num_tokens": 4055570811.0, "step": 6403 }, { "epoch": 0.7572425209885302, "grad_norm": 0.1252087503671646, "learning_rate": 1.3931895129712719e-05, "loss": 0.3179, "num_tokens": 4056209090.0, "step": 6404 }, { "epoch": 0.7573607662291593, "grad_norm": 0.13521023094654083, "learning_rate": 1.39245756056758e-05, "loss": 0.2979, "num_tokens": 4056845867.0, "step": 6405 }, { "epoch": 0.7574790114697884, "grad_norm": 0.12604564428329468, "learning_rate": 1.391725887951529e-05, "loss": 0.3257, "num_tokens": 4057482308.0, "step": 6406 }, { "epoch": 0.7575972567104174, "grad_norm": 0.13495126366615295, "learning_rate": 1.3909944952304353e-05, "loss": 0.3144, "num_tokens": 4058117862.0, "step": 6407 }, { "epoch": 0.7577155019510464, "grad_norm": 0.13791398704051971, "learning_rate": 1.3902633825115773e-05, "loss": 0.325, "num_tokens": 4058747888.0, "step": 6408 }, { "epoch": 0.7578337471916755, "grad_norm": 0.13360974192619324, "learning_rate": 1.3895325499021889e-05, "loss": 0.3258, "num_tokens": 4059385527.0, "step": 6409 }, { "epoch": 0.7579519924323046, "grad_norm": 0.12751787900924683, "learning_rate": 1.388801997509467e-05, "loss": 0.2825, "num_tokens": 4060024254.0, "step": 6410 }, { "epoch": 0.7580702376729337, "grad_norm": 0.13527333736419678, "learning_rate": 1.3880717254405614e-05, "loss": 0.3351, "num_tokens": 4060655571.0, "step": 6411 }, { "epoch": 0.7581884829135628, "grad_norm": 0.1366226077079773, "learning_rate": 1.3873417338025872e-05, "loss": 0.348, "num_tokens": 4061291023.0, "step": 6412 }, { "epoch": 0.7583067281541918, "grad_norm": 0.13378849625587463, "learning_rate": 1.3866120227026133e-05, "loss": 0.3165, "num_tokens": 4061922899.0, "step": 6413 }, { "epoch": 0.7584249733948208, "grad_norm": 0.13467103242874146, "learning_rate": 1.3858825922476719e-05, "loss": 0.3469, "num_tokens": 4062560847.0, "step": 6414 }, { "epoch": 0.7585432186354499, "grad_norm": 0.1344882845878601, "learning_rate": 1.3851534425447506e-05, "loss": 0.3541, "num_tokens": 4063195668.0, "step": 6415 }, { "epoch": 0.758661463876079, "grad_norm": 0.1270541101694107, "learning_rate": 1.3844245737007958e-05, "loss": 0.3209, "num_tokens": 4063832654.0, "step": 6416 }, { "epoch": 0.758779709116708, "grad_norm": 0.1255628913640976, "learning_rate": 1.3836959858227165e-05, "loss": 0.3094, "num_tokens": 4064469152.0, "step": 6417 }, { "epoch": 0.7588979543573371, "grad_norm": 0.12652434408664703, "learning_rate": 1.3829676790173764e-05, "loss": 0.3047, "num_tokens": 4065102169.0, "step": 6418 }, { "epoch": 0.7590161995979662, "grad_norm": 0.13536076247692108, "learning_rate": 1.3822396533915996e-05, "loss": 0.3259, "num_tokens": 4065741079.0, "step": 6419 }, { "epoch": 0.7591344448385953, "grad_norm": 0.13197050988674164, "learning_rate": 1.3815119090521679e-05, "loss": 0.3733, "num_tokens": 4066373459.0, "step": 6420 }, { "epoch": 0.7592526900792244, "grad_norm": 0.13276909291744232, "learning_rate": 1.3807844461058235e-05, "loss": 0.3243, "num_tokens": 4067008039.0, "step": 6421 }, { "epoch": 0.7593709353198533, "grad_norm": 0.12172029912471771, "learning_rate": 1.3800572646592678e-05, "loss": 0.3222, "num_tokens": 4067645324.0, "step": 6422 }, { "epoch": 0.7594891805604824, "grad_norm": 0.1513986736536026, "learning_rate": 1.3793303648191589e-05, "loss": 0.3729, "num_tokens": 4068276908.0, "step": 6423 }, { "epoch": 0.7596074258011115, "grad_norm": 0.12842394411563873, "learning_rate": 1.3786037466921138e-05, "loss": 0.3154, "num_tokens": 4068913698.0, "step": 6424 }, { "epoch": 0.7597256710417406, "grad_norm": 0.14461806416511536, "learning_rate": 1.3778774103847083e-05, "loss": 0.3152, "num_tokens": 4069551138.0, "step": 6425 }, { "epoch": 0.7598439162823696, "grad_norm": 0.13353879749774933, "learning_rate": 1.3771513560034794e-05, "loss": 0.3368, "num_tokens": 4070189879.0, "step": 6426 }, { "epoch": 0.7599621615229987, "grad_norm": 0.13277386128902435, "learning_rate": 1.376425583654919e-05, "loss": 0.3182, "num_tokens": 4070824838.0, "step": 6427 }, { "epoch": 0.7600804067636278, "grad_norm": 0.1313123255968094, "learning_rate": 1.3757000934454793e-05, "loss": 0.358, "num_tokens": 4071457241.0, "step": 6428 }, { "epoch": 0.7601986520042568, "grad_norm": 0.13354401290416718, "learning_rate": 1.3749748854815724e-05, "loss": 0.3279, "num_tokens": 4072090426.0, "step": 6429 }, { "epoch": 0.7603168972448859, "grad_norm": 0.1188952848315239, "learning_rate": 1.374249959869566e-05, "loss": 0.3076, "num_tokens": 4072726398.0, "step": 6430 }, { "epoch": 0.7604351424855149, "grad_norm": 0.13922974467277527, "learning_rate": 1.373525316715791e-05, "loss": 0.2928, "num_tokens": 4073365872.0, "step": 6431 }, { "epoch": 0.760553387726144, "grad_norm": 0.14626488089561462, "learning_rate": 1.3728009561265304e-05, "loss": 0.3498, "num_tokens": 4074005104.0, "step": 6432 }, { "epoch": 0.7606716329667731, "grad_norm": 0.12952566146850586, "learning_rate": 1.372076878208032e-05, "loss": 0.3412, "num_tokens": 4074643822.0, "step": 6433 }, { "epoch": 0.7607898782074022, "grad_norm": 0.12660372257232666, "learning_rate": 1.3713530830664973e-05, "loss": 0.2904, "num_tokens": 4075278024.0, "step": 6434 }, { "epoch": 0.7609081234480313, "grad_norm": 0.14246653020381927, "learning_rate": 1.3706295708080908e-05, "loss": 0.3059, "num_tokens": 4075908565.0, "step": 6435 }, { "epoch": 0.7610263686886602, "grad_norm": 0.1313161998987198, "learning_rate": 1.3699063415389326e-05, "loss": 0.328, "num_tokens": 4076503608.0, "step": 6436 }, { "epoch": 0.7611446139292893, "grad_norm": 0.12725292146205902, "learning_rate": 1.3691833953651e-05, "loss": 0.343, "num_tokens": 4077139070.0, "step": 6437 }, { "epoch": 0.7612628591699184, "grad_norm": 0.12543800473213196, "learning_rate": 1.3684607323926335e-05, "loss": 0.3237, "num_tokens": 4077776412.0, "step": 6438 }, { "epoch": 0.7613811044105475, "grad_norm": 0.1353425234556198, "learning_rate": 1.3677383527275268e-05, "loss": 0.3373, "num_tokens": 4078411020.0, "step": 6439 }, { "epoch": 0.7614993496511765, "grad_norm": 0.14656561613082886, "learning_rate": 1.3670162564757377e-05, "loss": 0.3431, "num_tokens": 4079047490.0, "step": 6440 }, { "epoch": 0.7616175948918056, "grad_norm": 0.12219518423080444, "learning_rate": 1.3662944437431752e-05, "loss": 0.3139, "num_tokens": 4079678060.0, "step": 6441 }, { "epoch": 0.7617358401324347, "grad_norm": 0.12115050852298737, "learning_rate": 1.3655729146357132e-05, "loss": 0.3268, "num_tokens": 4080315350.0, "step": 6442 }, { "epoch": 0.7618540853730638, "grad_norm": 0.13752250373363495, "learning_rate": 1.3648516692591816e-05, "loss": 0.351, "num_tokens": 4080947479.0, "step": 6443 }, { "epoch": 0.7619723306136928, "grad_norm": 0.13786648213863373, "learning_rate": 1.3641307077193684e-05, "loss": 0.3378, "num_tokens": 4081582089.0, "step": 6444 }, { "epoch": 0.7620905758543218, "grad_norm": 0.1298142522573471, "learning_rate": 1.3634100301220194e-05, "loss": 0.3145, "num_tokens": 4082221450.0, "step": 6445 }, { "epoch": 0.7622088210949509, "grad_norm": 0.1347825527191162, "learning_rate": 1.3626896365728397e-05, "loss": 0.3325, "num_tokens": 4082850211.0, "step": 6446 }, { "epoch": 0.76232706633558, "grad_norm": 0.13452841341495514, "learning_rate": 1.3619695271774938e-05, "loss": 0.3383, "num_tokens": 4083479851.0, "step": 6447 }, { "epoch": 0.7624453115762091, "grad_norm": 0.13618136942386627, "learning_rate": 1.3612497020416026e-05, "loss": 0.316, "num_tokens": 4084118672.0, "step": 6448 }, { "epoch": 0.7625635568168381, "grad_norm": 0.13410931825637817, "learning_rate": 1.3605301612707446e-05, "loss": 0.3306, "num_tokens": 4084751445.0, "step": 6449 }, { "epoch": 0.7626818020574672, "grad_norm": 0.1349155753850937, "learning_rate": 1.3598109049704606e-05, "loss": 0.3185, "num_tokens": 4085381935.0, "step": 6450 }, { "epoch": 0.7628000472980963, "grad_norm": 0.13292716443538666, "learning_rate": 1.3590919332462447e-05, "loss": 0.3392, "num_tokens": 4086015248.0, "step": 6451 }, { "epoch": 0.7629182925387253, "grad_norm": 0.1195073276758194, "learning_rate": 1.3583732462035537e-05, "loss": 0.3013, "num_tokens": 4086650425.0, "step": 6452 }, { "epoch": 0.7630365377793544, "grad_norm": 0.1277841478586197, "learning_rate": 1.3576548439477996e-05, "loss": 0.3018, "num_tokens": 4087287140.0, "step": 6453 }, { "epoch": 0.7631547830199834, "grad_norm": 0.12954823672771454, "learning_rate": 1.3569367265843536e-05, "loss": 0.3362, "num_tokens": 4087921704.0, "step": 6454 }, { "epoch": 0.7632730282606125, "grad_norm": 0.14170196652412415, "learning_rate": 1.3562188942185448e-05, "loss": 0.353, "num_tokens": 4088555107.0, "step": 6455 }, { "epoch": 0.7633912735012416, "grad_norm": 0.13179020583629608, "learning_rate": 1.3555013469556614e-05, "loss": 0.3309, "num_tokens": 4089185426.0, "step": 6456 }, { "epoch": 0.7635095187418707, "grad_norm": 0.1239769384264946, "learning_rate": 1.3547840849009494e-05, "loss": 0.3265, "num_tokens": 4089823367.0, "step": 6457 }, { "epoch": 0.7636277639824997, "grad_norm": 0.13024766743183136, "learning_rate": 1.3540671081596117e-05, "loss": 0.3352, "num_tokens": 4090457003.0, "step": 6458 }, { "epoch": 0.7637460092231287, "grad_norm": 0.12898169457912445, "learning_rate": 1.3533504168368116e-05, "loss": 0.3225, "num_tokens": 4091087251.0, "step": 6459 }, { "epoch": 0.7638642544637578, "grad_norm": 0.12652570009231567, "learning_rate": 1.3526340110376684e-05, "loss": 0.3055, "num_tokens": 4091722086.0, "step": 6460 }, { "epoch": 0.7639824997043869, "grad_norm": 0.13400301337242126, "learning_rate": 1.3519178908672627e-05, "loss": 0.3278, "num_tokens": 4092360787.0, "step": 6461 }, { "epoch": 0.764100744945016, "grad_norm": 0.12218889594078064, "learning_rate": 1.3512020564306275e-05, "loss": 0.3119, "num_tokens": 4092999710.0, "step": 6462 }, { "epoch": 0.764218990185645, "grad_norm": 0.13713176548480988, "learning_rate": 1.350486507832759e-05, "loss": 0.3376, "num_tokens": 4093637654.0, "step": 6463 }, { "epoch": 0.7643372354262741, "grad_norm": 0.1368236392736435, "learning_rate": 1.3497712451786113e-05, "loss": 0.3457, "num_tokens": 4094277270.0, "step": 6464 }, { "epoch": 0.7644554806669032, "grad_norm": 0.13153311610221863, "learning_rate": 1.3490562685730935e-05, "loss": 0.3279, "num_tokens": 4094916324.0, "step": 6465 }, { "epoch": 0.7645737259075323, "grad_norm": 0.12887689471244812, "learning_rate": 1.3483415781210748e-05, "loss": 0.3292, "num_tokens": 4095554779.0, "step": 6466 }, { "epoch": 0.7646919711481612, "grad_norm": 0.14095818996429443, "learning_rate": 1.3476271739273815e-05, "loss": 0.3638, "num_tokens": 4096186636.0, "step": 6467 }, { "epoch": 0.7648102163887903, "grad_norm": 0.11750029027462006, "learning_rate": 1.3469130560967993e-05, "loss": 0.3016, "num_tokens": 4096819923.0, "step": 6468 }, { "epoch": 0.7649284616294194, "grad_norm": 0.12166915833950043, "learning_rate": 1.3461992247340696e-05, "loss": 0.3331, "num_tokens": 4097456694.0, "step": 6469 }, { "epoch": 0.7650467068700485, "grad_norm": 0.13126549124717712, "learning_rate": 1.3454856799438962e-05, "loss": 0.3115, "num_tokens": 4098093549.0, "step": 6470 }, { "epoch": 0.7651649521106776, "grad_norm": 0.13642987608909607, "learning_rate": 1.3447724218309338e-05, "loss": 0.3339, "num_tokens": 4098731765.0, "step": 6471 }, { "epoch": 0.7652831973513066, "grad_norm": 0.13020724058151245, "learning_rate": 1.3440594504998009e-05, "loss": 0.3323, "num_tokens": 4099364707.0, "step": 6472 }, { "epoch": 0.7654014425919357, "grad_norm": 0.12966817617416382, "learning_rate": 1.3433467660550735e-05, "loss": 0.3383, "num_tokens": 4100002435.0, "step": 6473 }, { "epoch": 0.7655196878325647, "grad_norm": 0.12649047374725342, "learning_rate": 1.3426343686012826e-05, "loss": 0.2939, "num_tokens": 4100635211.0, "step": 6474 }, { "epoch": 0.7656379330731938, "grad_norm": 0.12525750696659088, "learning_rate": 1.3419222582429196e-05, "loss": 0.3085, "num_tokens": 4101272758.0, "step": 6475 }, { "epoch": 0.7657561783138229, "grad_norm": 0.14212287962436676, "learning_rate": 1.341210435084431e-05, "loss": 0.3732, "num_tokens": 4101903988.0, "step": 6476 }, { "epoch": 0.7658744235544519, "grad_norm": 0.1322428286075592, "learning_rate": 1.3404988992302256e-05, "loss": 0.303, "num_tokens": 4102539583.0, "step": 6477 }, { "epoch": 0.765992668795081, "grad_norm": 0.13280171155929565, "learning_rate": 1.339787650784666e-05, "loss": 0.3319, "num_tokens": 4103175345.0, "step": 6478 }, { "epoch": 0.7661109140357101, "grad_norm": 0.1495521366596222, "learning_rate": 1.3390766898520735e-05, "loss": 0.3696, "num_tokens": 4103812460.0, "step": 6479 }, { "epoch": 0.7662291592763392, "grad_norm": 0.13558423519134521, "learning_rate": 1.3383660165367297e-05, "loss": 0.3265, "num_tokens": 4104444995.0, "step": 6480 }, { "epoch": 0.7663474045169681, "grad_norm": 0.1287766546010971, "learning_rate": 1.3376556309428704e-05, "loss": 0.3013, "num_tokens": 4105081298.0, "step": 6481 }, { "epoch": 0.7664656497575972, "grad_norm": 0.1373738944530487, "learning_rate": 1.3369455331746925e-05, "loss": 0.3488, "num_tokens": 4105716486.0, "step": 6482 }, { "epoch": 0.7665838949982263, "grad_norm": 0.13724499940872192, "learning_rate": 1.3362357233363488e-05, "loss": 0.3073, "num_tokens": 4106342707.0, "step": 6483 }, { "epoch": 0.7667021402388554, "grad_norm": 0.1436561495065689, "learning_rate": 1.3355262015319492e-05, "loss": 0.357, "num_tokens": 4106979382.0, "step": 6484 }, { "epoch": 0.7668203854794845, "grad_norm": 0.12658269703388214, "learning_rate": 1.3348169678655629e-05, "loss": 0.3242, "num_tokens": 4107614840.0, "step": 6485 }, { "epoch": 0.7669386307201135, "grad_norm": 0.13492389023303986, "learning_rate": 1.3341080224412169e-05, "loss": 0.3129, "num_tokens": 4108253270.0, "step": 6486 }, { "epoch": 0.7670568759607426, "grad_norm": 0.1417853832244873, "learning_rate": 1.3333993653628954e-05, "loss": 0.3677, "num_tokens": 4108887957.0, "step": 6487 }, { "epoch": 0.7671751212013717, "grad_norm": 0.11957691609859467, "learning_rate": 1.3326909967345386e-05, "loss": 0.2923, "num_tokens": 4109523418.0, "step": 6488 }, { "epoch": 0.7672933664420007, "grad_norm": 0.13651590049266815, "learning_rate": 1.3319829166600486e-05, "loss": 0.3287, "num_tokens": 4110162235.0, "step": 6489 }, { "epoch": 0.7674116116826297, "grad_norm": 0.13523732125759125, "learning_rate": 1.33127512524328e-05, "loss": 0.3442, "num_tokens": 4110794725.0, "step": 6490 }, { "epoch": 0.7675298569232588, "grad_norm": 0.1339389979839325, "learning_rate": 1.3305676225880505e-05, "loss": 0.3229, "num_tokens": 4111426903.0, "step": 6491 }, { "epoch": 0.7676481021638879, "grad_norm": 0.12181495130062103, "learning_rate": 1.3298604087981295e-05, "loss": 0.3149, "num_tokens": 4112063575.0, "step": 6492 }, { "epoch": 0.767766347404517, "grad_norm": 0.12000451982021332, "learning_rate": 1.3291534839772489e-05, "loss": 0.2885, "num_tokens": 4112702637.0, "step": 6493 }, { "epoch": 0.7678845926451461, "grad_norm": 0.1375793218612671, "learning_rate": 1.328446848229097e-05, "loss": 0.3424, "num_tokens": 4113339172.0, "step": 6494 }, { "epoch": 0.7680028378857751, "grad_norm": 0.12924163043498993, "learning_rate": 1.3277405016573185e-05, "loss": 0.3149, "num_tokens": 4113969347.0, "step": 6495 }, { "epoch": 0.7681210831264041, "grad_norm": 0.1480138897895813, "learning_rate": 1.3270344443655166e-05, "loss": 0.3438, "num_tokens": 4114602248.0, "step": 6496 }, { "epoch": 0.7682393283670332, "grad_norm": 0.12992379069328308, "learning_rate": 1.3263286764572503e-05, "loss": 0.2876, "num_tokens": 4115238707.0, "step": 6497 }, { "epoch": 0.7683575736076623, "grad_norm": 0.14970946311950684, "learning_rate": 1.32562319803604e-05, "loss": 0.3917, "num_tokens": 4115869418.0, "step": 6498 }, { "epoch": 0.7684758188482913, "grad_norm": 0.1371813416481018, "learning_rate": 1.3249180092053603e-05, "loss": 0.3439, "num_tokens": 4116501606.0, "step": 6499 }, { "epoch": 0.7685940640889204, "grad_norm": 0.1402600258588791, "learning_rate": 1.3242131100686435e-05, "loss": 0.3624, "num_tokens": 4117141180.0, "step": 6500 }, { "epoch": 0.7687123093295495, "grad_norm": 0.11886383593082428, "learning_rate": 1.3235085007292817e-05, "loss": 0.3066, "num_tokens": 4117772639.0, "step": 6501 }, { "epoch": 0.7688305545701786, "grad_norm": 0.13559088110923767, "learning_rate": 1.3228041812906217e-05, "loss": 0.3112, "num_tokens": 4118408775.0, "step": 6502 }, { "epoch": 0.7689487998108077, "grad_norm": 0.1388697624206543, "learning_rate": 1.3221001518559705e-05, "loss": 0.3776, "num_tokens": 4119039774.0, "step": 6503 }, { "epoch": 0.7690670450514366, "grad_norm": 0.1492912322282791, "learning_rate": 1.3213964125285908e-05, "loss": 0.3702, "num_tokens": 4119671059.0, "step": 6504 }, { "epoch": 0.7691852902920657, "grad_norm": 0.14128649234771729, "learning_rate": 1.3206929634117028e-05, "loss": 0.3734, "num_tokens": 4120306638.0, "step": 6505 }, { "epoch": 0.7693035355326948, "grad_norm": 0.1293901652097702, "learning_rate": 1.3199898046084837e-05, "loss": 0.3516, "num_tokens": 4120935518.0, "step": 6506 }, { "epoch": 0.7694217807733239, "grad_norm": 0.13435833156108856, "learning_rate": 1.31928693622207e-05, "loss": 0.3353, "num_tokens": 4121562911.0, "step": 6507 }, { "epoch": 0.769540026013953, "grad_norm": 0.14658649265766144, "learning_rate": 1.3185843583555557e-05, "loss": 0.3136, "num_tokens": 4122197444.0, "step": 6508 }, { "epoch": 0.769658271254582, "grad_norm": 0.1245918720960617, "learning_rate": 1.317882071111988e-05, "loss": 0.3303, "num_tokens": 4122831786.0, "step": 6509 }, { "epoch": 0.7697765164952111, "grad_norm": 0.13819052278995514, "learning_rate": 1.3171800745943768e-05, "loss": 0.3311, "num_tokens": 4123465044.0, "step": 6510 }, { "epoch": 0.7698947617358401, "grad_norm": 0.13310480117797852, "learning_rate": 1.3164783689056856e-05, "loss": 0.3041, "num_tokens": 4124103735.0, "step": 6511 }, { "epoch": 0.7700130069764692, "grad_norm": 0.1290964037179947, "learning_rate": 1.3157769541488378e-05, "loss": 0.3144, "num_tokens": 4124740204.0, "step": 6512 }, { "epoch": 0.7701312522170982, "grad_norm": 0.13239192962646484, "learning_rate": 1.3150758304267123e-05, "loss": 0.3298, "num_tokens": 4125375240.0, "step": 6513 }, { "epoch": 0.7702494974577273, "grad_norm": 0.13205744326114655, "learning_rate": 1.3143749978421457e-05, "loss": 0.3259, "num_tokens": 4126005988.0, "step": 6514 }, { "epoch": 0.7703677426983564, "grad_norm": 0.14972741901874542, "learning_rate": 1.3136744564979334e-05, "loss": 0.3195, "num_tokens": 4126635008.0, "step": 6515 }, { "epoch": 0.7704859879389855, "grad_norm": 0.13701266050338745, "learning_rate": 1.3129742064968263e-05, "loss": 0.3376, "num_tokens": 4127272526.0, "step": 6516 }, { "epoch": 0.7706042331796146, "grad_norm": 0.14855630695819855, "learning_rate": 1.3122742479415328e-05, "loss": 0.3522, "num_tokens": 4127909209.0, "step": 6517 }, { "epoch": 0.7707224784202436, "grad_norm": 0.13413570821285248, "learning_rate": 1.3115745809347181e-05, "loss": 0.313, "num_tokens": 4128544561.0, "step": 6518 }, { "epoch": 0.7708407236608726, "grad_norm": 0.13415676355361938, "learning_rate": 1.3108752055790078e-05, "loss": 0.3279, "num_tokens": 4129169927.0, "step": 6519 }, { "epoch": 0.7709589689015017, "grad_norm": 0.14937016367912292, "learning_rate": 1.3101761219769798e-05, "loss": 0.4069, "num_tokens": 4129806969.0, "step": 6520 }, { "epoch": 0.7710772141421308, "grad_norm": 0.14602042734622955, "learning_rate": 1.3094773302311739e-05, "loss": 0.3171, "num_tokens": 4130440809.0, "step": 6521 }, { "epoch": 0.7711954593827598, "grad_norm": 0.15677742660045624, "learning_rate": 1.3087788304440839e-05, "loss": 0.376, "num_tokens": 4131075682.0, "step": 6522 }, { "epoch": 0.7713137046233889, "grad_norm": 0.13661722838878632, "learning_rate": 1.3080806227181614e-05, "loss": 0.3369, "num_tokens": 4131710514.0, "step": 6523 }, { "epoch": 0.771431949864018, "grad_norm": 0.1363690197467804, "learning_rate": 1.3073827071558167e-05, "loss": 0.349, "num_tokens": 4132342682.0, "step": 6524 }, { "epoch": 0.7715501951046471, "grad_norm": 0.12217701226472855, "learning_rate": 1.3066850838594156e-05, "loss": 0.2928, "num_tokens": 4132971210.0, "step": 6525 }, { "epoch": 0.7716684403452762, "grad_norm": 0.13643693923950195, "learning_rate": 1.3059877529312815e-05, "loss": 0.3314, "num_tokens": 4133606529.0, "step": 6526 }, { "epoch": 0.7717866855859051, "grad_norm": 0.1349450945854187, "learning_rate": 1.3052907144736947e-05, "loss": 0.3518, "num_tokens": 4134242555.0, "step": 6527 }, { "epoch": 0.7719049308265342, "grad_norm": 0.13883955776691437, "learning_rate": 1.304593968588893e-05, "loss": 0.3233, "num_tokens": 4134875521.0, "step": 6528 }, { "epoch": 0.7720231760671633, "grad_norm": 0.13877476751804352, "learning_rate": 1.303897515379073e-05, "loss": 0.36, "num_tokens": 4135510567.0, "step": 6529 }, { "epoch": 0.7721414213077924, "grad_norm": 0.1310817450284958, "learning_rate": 1.3032013549463834e-05, "loss": 0.3332, "num_tokens": 4136144577.0, "step": 6530 }, { "epoch": 0.7722596665484214, "grad_norm": 0.1335369199514389, "learning_rate": 1.3025054873929358e-05, "loss": 0.3142, "num_tokens": 4136743373.0, "step": 6531 }, { "epoch": 0.7723779117890505, "grad_norm": 0.13638007640838623, "learning_rate": 1.301809912820794e-05, "loss": 0.3239, "num_tokens": 4137378337.0, "step": 6532 }, { "epoch": 0.7724961570296796, "grad_norm": 0.12828032672405243, "learning_rate": 1.3011146313319829e-05, "loss": 0.327, "num_tokens": 4138014299.0, "step": 6533 }, { "epoch": 0.7726144022703086, "grad_norm": 0.12944236397743225, "learning_rate": 1.3004196430284818e-05, "loss": 0.3013, "num_tokens": 4138644042.0, "step": 6534 }, { "epoch": 0.7727326475109377, "grad_norm": 0.12254159897565842, "learning_rate": 1.2997249480122266e-05, "loss": 0.3074, "num_tokens": 4139283125.0, "step": 6535 }, { "epoch": 0.7728508927515667, "grad_norm": 0.14838692545890808, "learning_rate": 1.2990305463851134e-05, "loss": 0.3589, "num_tokens": 4139909731.0, "step": 6536 }, { "epoch": 0.7729691379921958, "grad_norm": 0.13178987801074982, "learning_rate": 1.2983364382489918e-05, "loss": 0.3124, "num_tokens": 4140539855.0, "step": 6537 }, { "epoch": 0.7730873832328249, "grad_norm": 0.12813882529735565, "learning_rate": 1.2976426237056693e-05, "loss": 0.3063, "num_tokens": 4141173874.0, "step": 6538 }, { "epoch": 0.773205628473454, "grad_norm": 0.134090855717659, "learning_rate": 1.296949102856911e-05, "loss": 0.3157, "num_tokens": 4141807418.0, "step": 6539 }, { "epoch": 0.773323873714083, "grad_norm": 0.12326796352863312, "learning_rate": 1.2962558758044395e-05, "loss": 0.2963, "num_tokens": 4142443811.0, "step": 6540 }, { "epoch": 0.773442118954712, "grad_norm": 0.13039334118366241, "learning_rate": 1.2955629426499325e-05, "loss": 0.3244, "num_tokens": 4143075229.0, "step": 6541 }, { "epoch": 0.7735603641953411, "grad_norm": 0.13126014173030853, "learning_rate": 1.2948703034950265e-05, "loss": 0.3303, "num_tokens": 4143707699.0, "step": 6542 }, { "epoch": 0.7736786094359702, "grad_norm": 0.14167049527168274, "learning_rate": 1.2941779584413134e-05, "loss": 0.366, "num_tokens": 4144339514.0, "step": 6543 }, { "epoch": 0.7737968546765993, "grad_norm": 0.13731129467487335, "learning_rate": 1.2934859075903415e-05, "loss": 0.3409, "num_tokens": 4144976033.0, "step": 6544 }, { "epoch": 0.7739150999172283, "grad_norm": 0.1393587440252304, "learning_rate": 1.2927941510436194e-05, "loss": 0.361, "num_tokens": 4145613471.0, "step": 6545 }, { "epoch": 0.7740333451578574, "grad_norm": 0.1342426836490631, "learning_rate": 1.292102688902608e-05, "loss": 0.3375, "num_tokens": 4146246274.0, "step": 6546 }, { "epoch": 0.7741515903984865, "grad_norm": 0.13014402985572815, "learning_rate": 1.2914115212687278e-05, "loss": 0.3368, "num_tokens": 4146875458.0, "step": 6547 }, { "epoch": 0.7742698356391156, "grad_norm": 0.1428433358669281, "learning_rate": 1.2907206482433543e-05, "loss": 0.3585, "num_tokens": 4147513285.0, "step": 6548 }, { "epoch": 0.7743880808797446, "grad_norm": 0.13273289799690247, "learning_rate": 1.2900300699278222e-05, "loss": 0.3535, "num_tokens": 4148146724.0, "step": 6549 }, { "epoch": 0.7745063261203736, "grad_norm": 0.14193230867385864, "learning_rate": 1.289339786423422e-05, "loss": 0.3658, "num_tokens": 4148783495.0, "step": 6550 }, { "epoch": 0.7746245713610027, "grad_norm": 0.12787675857543945, "learning_rate": 1.2886497978313999e-05, "loss": 0.3398, "num_tokens": 4149416783.0, "step": 6551 }, { "epoch": 0.7747428166016318, "grad_norm": 0.12942427396774292, "learning_rate": 1.2879601042529598e-05, "loss": 0.3429, "num_tokens": 4150052684.0, "step": 6552 }, { "epoch": 0.7748610618422609, "grad_norm": 0.13521602749824524, "learning_rate": 1.287270705789261e-05, "loss": 0.3409, "num_tokens": 4150688832.0, "step": 6553 }, { "epoch": 0.7749793070828899, "grad_norm": 0.11793643236160278, "learning_rate": 1.2865816025414223e-05, "loss": 0.2936, "num_tokens": 4151323159.0, "step": 6554 }, { "epoch": 0.775097552323519, "grad_norm": 0.13392284512519836, "learning_rate": 1.2858927946105165e-05, "loss": 0.2924, "num_tokens": 4151951624.0, "step": 6555 }, { "epoch": 0.775215797564148, "grad_norm": 0.12879620492458344, "learning_rate": 1.2852042820975745e-05, "loss": 0.3055, "num_tokens": 4152582652.0, "step": 6556 }, { "epoch": 0.7753340428047771, "grad_norm": 0.12826897203922272, "learning_rate": 1.284516065103583e-05, "loss": 0.3057, "num_tokens": 4153216083.0, "step": 6557 }, { "epoch": 0.7754522880454062, "grad_norm": 0.1376877725124359, "learning_rate": 1.2838281437294852e-05, "loss": 0.3032, "num_tokens": 4153849204.0, "step": 6558 }, { "epoch": 0.7755705332860352, "grad_norm": 0.12771490216255188, "learning_rate": 1.2831405180761848e-05, "loss": 0.3221, "num_tokens": 4154486331.0, "step": 6559 }, { "epoch": 0.7756887785266643, "grad_norm": 0.12381000816822052, "learning_rate": 1.2824531882445344e-05, "loss": 0.3207, "num_tokens": 4155119415.0, "step": 6560 }, { "epoch": 0.7758070237672934, "grad_norm": 0.13436369597911835, "learning_rate": 1.2817661543353507e-05, "loss": 0.3272, "num_tokens": 4155730084.0, "step": 6561 }, { "epoch": 0.7759252690079225, "grad_norm": 0.13786770403385162, "learning_rate": 1.2810794164494022e-05, "loss": 0.3253, "num_tokens": 4156365401.0, "step": 6562 }, { "epoch": 0.7760435142485514, "grad_norm": 0.1350763589143753, "learning_rate": 1.2803929746874178e-05, "loss": 0.3293, "num_tokens": 4156993327.0, "step": 6563 }, { "epoch": 0.7761617594891805, "grad_norm": 0.1325332075357437, "learning_rate": 1.2797068291500797e-05, "loss": 0.3147, "num_tokens": 4157632341.0, "step": 6564 }, { "epoch": 0.7762800047298096, "grad_norm": 0.1236170157790184, "learning_rate": 1.2790209799380269e-05, "loss": 0.3055, "num_tokens": 4158268689.0, "step": 6565 }, { "epoch": 0.7763982499704387, "grad_norm": 0.12852966785430908, "learning_rate": 1.2783354271518579e-05, "loss": 0.2952, "num_tokens": 4158905745.0, "step": 6566 }, { "epoch": 0.7765164952110678, "grad_norm": 0.134352445602417, "learning_rate": 1.277650170892125e-05, "loss": 0.3348, "num_tokens": 4159543045.0, "step": 6567 }, { "epoch": 0.7766347404516968, "grad_norm": 0.12936130166053772, "learning_rate": 1.2769652112593374e-05, "loss": 0.3123, "num_tokens": 4160173592.0, "step": 6568 }, { "epoch": 0.7767529856923259, "grad_norm": 0.1302395462989807, "learning_rate": 1.2762805483539606e-05, "loss": 0.3281, "num_tokens": 4160807881.0, "step": 6569 }, { "epoch": 0.776871230932955, "grad_norm": 0.1378278285264969, "learning_rate": 1.2755961822764189e-05, "loss": 0.3391, "num_tokens": 4161443968.0, "step": 6570 }, { "epoch": 0.776989476173584, "grad_norm": 0.12971808016300201, "learning_rate": 1.2749121131270891e-05, "loss": 0.2856, "num_tokens": 4162080991.0, "step": 6571 }, { "epoch": 0.777107721414213, "grad_norm": 0.1357884556055069, "learning_rate": 1.2742283410063087e-05, "loss": 0.3196, "num_tokens": 4162716694.0, "step": 6572 }, { "epoch": 0.7772259666548421, "grad_norm": 0.1429767906665802, "learning_rate": 1.2735448660143686e-05, "loss": 0.3311, "num_tokens": 4163353210.0, "step": 6573 }, { "epoch": 0.7773442118954712, "grad_norm": 0.12729798257350922, "learning_rate": 1.2728616882515163e-05, "loss": 0.3004, "num_tokens": 4163986382.0, "step": 6574 }, { "epoch": 0.7774624571361003, "grad_norm": 0.13208231329917908, "learning_rate": 1.2721788078179582e-05, "loss": 0.3238, "num_tokens": 4164620310.0, "step": 6575 }, { "epoch": 0.7775807023767294, "grad_norm": 0.12411850690841675, "learning_rate": 1.2714962248138541e-05, "loss": 0.2794, "num_tokens": 4165225969.0, "step": 6576 }, { "epoch": 0.7776989476173584, "grad_norm": 0.12088257074356079, "learning_rate": 1.2708139393393222e-05, "loss": 0.3262, "num_tokens": 4165858468.0, "step": 6577 }, { "epoch": 0.7778171928579874, "grad_norm": 0.12698878347873688, "learning_rate": 1.2701319514944351e-05, "loss": 0.3011, "num_tokens": 4166492153.0, "step": 6578 }, { "epoch": 0.7779354380986165, "grad_norm": 0.13170303404331207, "learning_rate": 1.2694502613792238e-05, "loss": 0.3582, "num_tokens": 4167129070.0, "step": 6579 }, { "epoch": 0.7780536833392456, "grad_norm": 0.13701705634593964, "learning_rate": 1.2687688690936757e-05, "loss": 0.3266, "num_tokens": 4167762823.0, "step": 6580 }, { "epoch": 0.7781719285798746, "grad_norm": 0.13320361077785492, "learning_rate": 1.2680877747377323e-05, "loss": 0.3296, "num_tokens": 4168393382.0, "step": 6581 }, { "epoch": 0.7782901738205037, "grad_norm": 0.13682259619235992, "learning_rate": 1.2674069784112932e-05, "loss": 0.3075, "num_tokens": 4169025698.0, "step": 6582 }, { "epoch": 0.7784084190611328, "grad_norm": 0.1349308341741562, "learning_rate": 1.266726480214213e-05, "loss": 0.364, "num_tokens": 4169655310.0, "step": 6583 }, { "epoch": 0.7785266643017619, "grad_norm": 0.13383999466896057, "learning_rate": 1.266046280246305e-05, "loss": 0.3162, "num_tokens": 4170287535.0, "step": 6584 }, { "epoch": 0.778644909542391, "grad_norm": 0.13930389285087585, "learning_rate": 1.2653663786073361e-05, "loss": 0.3947, "num_tokens": 4170924863.0, "step": 6585 }, { "epoch": 0.7787631547830199, "grad_norm": 0.13718664646148682, "learning_rate": 1.2646867753970295e-05, "loss": 0.3361, "num_tokens": 4171560959.0, "step": 6586 }, { "epoch": 0.778881400023649, "grad_norm": 0.12550300359725952, "learning_rate": 1.2640074707150675e-05, "loss": 0.31, "num_tokens": 4172192982.0, "step": 6587 }, { "epoch": 0.7789996452642781, "grad_norm": 0.12574522197246552, "learning_rate": 1.2633284646610853e-05, "loss": 0.3408, "num_tokens": 4172806927.0, "step": 6588 }, { "epoch": 0.7791178905049072, "grad_norm": 0.1262744814157486, "learning_rate": 1.2626497573346778e-05, "loss": 0.3145, "num_tokens": 4173440528.0, "step": 6589 }, { "epoch": 0.7792361357455363, "grad_norm": 0.13033445179462433, "learning_rate": 1.2619713488353904e-05, "loss": 0.3481, "num_tokens": 4174074007.0, "step": 6590 }, { "epoch": 0.7793543809861653, "grad_norm": 0.13193373382091522, "learning_rate": 1.2612932392627317e-05, "loss": 0.3343, "num_tokens": 4174713741.0, "step": 6591 }, { "epoch": 0.7794726262267944, "grad_norm": 0.13726097345352173, "learning_rate": 1.2606154287161605e-05, "loss": 0.3312, "num_tokens": 4175342630.0, "step": 6592 }, { "epoch": 0.7795908714674235, "grad_norm": 0.12534412741661072, "learning_rate": 1.2599379172950959e-05, "loss": 0.2891, "num_tokens": 4175974484.0, "step": 6593 }, { "epoch": 0.7797091167080525, "grad_norm": 0.1277114301919937, "learning_rate": 1.2592607050989113e-05, "loss": 0.3506, "num_tokens": 4176609032.0, "step": 6594 }, { "epoch": 0.7798273619486815, "grad_norm": 0.13964439928531647, "learning_rate": 1.2585837922269353e-05, "loss": 0.3422, "num_tokens": 4177248388.0, "step": 6595 }, { "epoch": 0.7799456071893106, "grad_norm": 0.12254644930362701, "learning_rate": 1.2579071787784548e-05, "loss": 0.3111, "num_tokens": 4177879555.0, "step": 6596 }, { "epoch": 0.7800638524299397, "grad_norm": 0.13458816707134247, "learning_rate": 1.2572308648527114e-05, "loss": 0.3204, "num_tokens": 4178514133.0, "step": 6597 }, { "epoch": 0.7801820976705688, "grad_norm": 0.12728287279605865, "learning_rate": 1.256554850548903e-05, "loss": 0.3157, "num_tokens": 4179148572.0, "step": 6598 }, { "epoch": 0.7803003429111979, "grad_norm": 0.12975001335144043, "learning_rate": 1.2558791359661832e-05, "loss": 0.3331, "num_tokens": 4179782937.0, "step": 6599 }, { "epoch": 0.7804185881518269, "grad_norm": 0.13789628446102142, "learning_rate": 1.2552037212036618e-05, "loss": 0.3719, "num_tokens": 4180419620.0, "step": 6600 }, { "epoch": 0.7805368333924559, "grad_norm": 0.12502458691596985, "learning_rate": 1.2545286063604061e-05, "loss": 0.3193, "num_tokens": 4181056326.0, "step": 6601 }, { "epoch": 0.780655078633085, "grad_norm": 0.1280224770307541, "learning_rate": 1.2538537915354377e-05, "loss": 0.3245, "num_tokens": 4181691989.0, "step": 6602 }, { "epoch": 0.7807733238737141, "grad_norm": 0.12822864949703217, "learning_rate": 1.2531792768277347e-05, "loss": 0.3127, "num_tokens": 4182322198.0, "step": 6603 }, { "epoch": 0.7808915691143431, "grad_norm": 0.13187287747859955, "learning_rate": 1.2525050623362294e-05, "loss": 0.3446, "num_tokens": 4182958041.0, "step": 6604 }, { "epoch": 0.7810098143549722, "grad_norm": 0.13443109393119812, "learning_rate": 1.2518311481598148e-05, "loss": 0.3807, "num_tokens": 4183592709.0, "step": 6605 }, { "epoch": 0.7811280595956013, "grad_norm": 0.11832693964242935, "learning_rate": 1.2511575343973348e-05, "loss": 0.2722, "num_tokens": 4184230491.0, "step": 6606 }, { "epoch": 0.7812463048362304, "grad_norm": 0.13518041372299194, "learning_rate": 1.2504842211475911e-05, "loss": 0.3726, "num_tokens": 4184867347.0, "step": 6607 }, { "epoch": 0.7813645500768595, "grad_norm": 0.13651204109191895, "learning_rate": 1.2498112085093432e-05, "loss": 0.3472, "num_tokens": 4185505848.0, "step": 6608 }, { "epoch": 0.7814827953174884, "grad_norm": 0.13595373928546906, "learning_rate": 1.2491384965813031e-05, "loss": 0.3295, "num_tokens": 4186137944.0, "step": 6609 }, { "epoch": 0.7816010405581175, "grad_norm": 0.12474144995212555, "learning_rate": 1.2484660854621424e-05, "loss": 0.3278, "num_tokens": 4186767214.0, "step": 6610 }, { "epoch": 0.7817192857987466, "grad_norm": 0.1370088756084442, "learning_rate": 1.247793975250484e-05, "loss": 0.3253, "num_tokens": 4187402957.0, "step": 6611 }, { "epoch": 0.7818375310393757, "grad_norm": 0.1365833729505539, "learning_rate": 1.2471221660449115e-05, "loss": 0.3512, "num_tokens": 4188039006.0, "step": 6612 }, { "epoch": 0.7819557762800047, "grad_norm": 0.12678934633731842, "learning_rate": 1.2464506579439603e-05, "loss": 0.3237, "num_tokens": 4188674073.0, "step": 6613 }, { "epoch": 0.7820740215206338, "grad_norm": 0.13371823728084564, "learning_rate": 1.2457794510461256e-05, "loss": 0.3428, "num_tokens": 4189310836.0, "step": 6614 }, { "epoch": 0.7821922667612629, "grad_norm": 0.13772156834602356, "learning_rate": 1.245108545449855e-05, "loss": 0.3359, "num_tokens": 4189950386.0, "step": 6615 }, { "epoch": 0.782310512001892, "grad_norm": 0.15061122179031372, "learning_rate": 1.2444379412535518e-05, "loss": 0.4275, "num_tokens": 4190556573.0, "step": 6616 }, { "epoch": 0.782428757242521, "grad_norm": 0.12954388558864594, "learning_rate": 1.2437676385555791e-05, "loss": 0.3453, "num_tokens": 4191192119.0, "step": 6617 }, { "epoch": 0.78254700248315, "grad_norm": 0.11874725669622421, "learning_rate": 1.2430976374542514e-05, "loss": 0.3239, "num_tokens": 4191829716.0, "step": 6618 }, { "epoch": 0.7826652477237791, "grad_norm": 0.1369546353816986, "learning_rate": 1.2424279380478424e-05, "loss": 0.3312, "num_tokens": 4192468884.0, "step": 6619 }, { "epoch": 0.7827834929644082, "grad_norm": 0.13208581507205963, "learning_rate": 1.2417585404345774e-05, "loss": 0.3388, "num_tokens": 4193103930.0, "step": 6620 }, { "epoch": 0.7829017382050373, "grad_norm": 0.13341876864433289, "learning_rate": 1.2410894447126411e-05, "loss": 0.3322, "num_tokens": 4193730657.0, "step": 6621 }, { "epoch": 0.7830199834456663, "grad_norm": 0.137414813041687, "learning_rate": 1.2404206509801735e-05, "loss": 0.3397, "num_tokens": 4194365850.0, "step": 6622 }, { "epoch": 0.7831382286862953, "grad_norm": 0.13275554776191711, "learning_rate": 1.2397521593352692e-05, "loss": 0.363, "num_tokens": 4194997859.0, "step": 6623 }, { "epoch": 0.7832564739269244, "grad_norm": 0.14042964577674866, "learning_rate": 1.2390839698759784e-05, "loss": 0.3253, "num_tokens": 4195628612.0, "step": 6624 }, { "epoch": 0.7833747191675535, "grad_norm": 0.12221132218837738, "learning_rate": 1.2384160827003066e-05, "loss": 0.2797, "num_tokens": 4196254817.0, "step": 6625 }, { "epoch": 0.7834929644081826, "grad_norm": 0.1324293613433838, "learning_rate": 1.2377484979062173e-05, "loss": 0.3075, "num_tokens": 4196884626.0, "step": 6626 }, { "epoch": 0.7836112096488116, "grad_norm": 0.14059056341648102, "learning_rate": 1.2370812155916273e-05, "loss": 0.3593, "num_tokens": 4197520993.0, "step": 6627 }, { "epoch": 0.7837294548894407, "grad_norm": 0.1357766091823578, "learning_rate": 1.2364142358544092e-05, "loss": 0.3041, "num_tokens": 4198150336.0, "step": 6628 }, { "epoch": 0.7838477001300698, "grad_norm": 0.12363699078559875, "learning_rate": 1.235747558792393e-05, "loss": 0.3129, "num_tokens": 4198784665.0, "step": 6629 }, { "epoch": 0.7839659453706989, "grad_norm": 0.13997557759284973, "learning_rate": 1.2350811845033626e-05, "loss": 0.3126, "num_tokens": 4199394114.0, "step": 6630 }, { "epoch": 0.784084190611328, "grad_norm": 0.1346777379512787, "learning_rate": 1.2344151130850587e-05, "loss": 0.3357, "num_tokens": 4200033599.0, "step": 6631 }, { "epoch": 0.7842024358519569, "grad_norm": 0.12473587691783905, "learning_rate": 1.2337493446351763e-05, "loss": 0.3333, "num_tokens": 4200666151.0, "step": 6632 }, { "epoch": 0.784320681092586, "grad_norm": 0.12280646711587906, "learning_rate": 1.2330838792513668e-05, "loss": 0.3237, "num_tokens": 4201302622.0, "step": 6633 }, { "epoch": 0.7844389263332151, "grad_norm": 0.1328110694885254, "learning_rate": 1.2324187170312358e-05, "loss": 0.3114, "num_tokens": 4201937592.0, "step": 6634 }, { "epoch": 0.7845571715738442, "grad_norm": 0.14145715534687042, "learning_rate": 1.2317538580723477e-05, "loss": 0.3429, "num_tokens": 4202568582.0, "step": 6635 }, { "epoch": 0.7846754168144732, "grad_norm": 0.12837430834770203, "learning_rate": 1.231089302472219e-05, "loss": 0.3161, "num_tokens": 4203200487.0, "step": 6636 }, { "epoch": 0.7847936620551023, "grad_norm": 0.125211700797081, "learning_rate": 1.2304250503283225e-05, "loss": 0.3187, "num_tokens": 4203831728.0, "step": 6637 }, { "epoch": 0.7849119072957313, "grad_norm": 0.13689924776554108, "learning_rate": 1.2297611017380883e-05, "loss": 0.3126, "num_tokens": 4204467688.0, "step": 6638 }, { "epoch": 0.7850301525363604, "grad_norm": 0.14216765761375427, "learning_rate": 1.2290974567988993e-05, "loss": 0.3634, "num_tokens": 4205092425.0, "step": 6639 }, { "epoch": 0.7851483977769895, "grad_norm": 0.1324181705713272, "learning_rate": 1.2284341156080975e-05, "loss": 0.3341, "num_tokens": 4205729817.0, "step": 6640 }, { "epoch": 0.7852666430176185, "grad_norm": 0.13135167956352234, "learning_rate": 1.2277710782629751e-05, "loss": 0.3597, "num_tokens": 4206360496.0, "step": 6641 }, { "epoch": 0.7853848882582476, "grad_norm": 0.13654138147830963, "learning_rate": 1.2271083448607848e-05, "loss": 0.3566, "num_tokens": 4206997953.0, "step": 6642 }, { "epoch": 0.7855031334988767, "grad_norm": 0.12891986966133118, "learning_rate": 1.2264459154987309e-05, "loss": 0.3076, "num_tokens": 4207634315.0, "step": 6643 }, { "epoch": 0.7856213787395058, "grad_norm": 0.12963853776454926, "learning_rate": 1.2257837902739771e-05, "loss": 0.323, "num_tokens": 4208269454.0, "step": 6644 }, { "epoch": 0.7857396239801347, "grad_norm": 0.12281721085309982, "learning_rate": 1.2251219692836387e-05, "loss": 0.3443, "num_tokens": 4208906534.0, "step": 6645 }, { "epoch": 0.7858578692207638, "grad_norm": 0.12218178063631058, "learning_rate": 1.2244604526247874e-05, "loss": 0.3217, "num_tokens": 4209545361.0, "step": 6646 }, { "epoch": 0.7859761144613929, "grad_norm": 0.12039627134799957, "learning_rate": 1.2237992403944526e-05, "loss": 0.2949, "num_tokens": 4210184605.0, "step": 6647 }, { "epoch": 0.786094359702022, "grad_norm": 0.14068880677223206, "learning_rate": 1.223138332689615e-05, "loss": 0.3459, "num_tokens": 4210819108.0, "step": 6648 }, { "epoch": 0.7862126049426511, "grad_norm": 0.1349094957113266, "learning_rate": 1.2224777296072158e-05, "loss": 0.3506, "num_tokens": 4211453992.0, "step": 6649 }, { "epoch": 0.7863308501832801, "grad_norm": 0.12969303131103516, "learning_rate": 1.2218174312441452e-05, "loss": 0.2808, "num_tokens": 4212082172.0, "step": 6650 }, { "epoch": 0.7864490954239092, "grad_norm": 0.13625085353851318, "learning_rate": 1.2211574376972532e-05, "loss": 0.364, "num_tokens": 4212717907.0, "step": 6651 }, { "epoch": 0.7865673406645383, "grad_norm": 0.1349954754114151, "learning_rate": 1.2204977490633456e-05, "loss": 0.3024, "num_tokens": 4213355661.0, "step": 6652 }, { "epoch": 0.7866855859051674, "grad_norm": 0.1442887783050537, "learning_rate": 1.2198383654391804e-05, "loss": 0.336, "num_tokens": 4213983432.0, "step": 6653 }, { "epoch": 0.7868038311457963, "grad_norm": 0.1336517035961151, "learning_rate": 1.2191792869214726e-05, "loss": 0.3421, "num_tokens": 4214614981.0, "step": 6654 }, { "epoch": 0.7869220763864254, "grad_norm": 0.13299278914928436, "learning_rate": 1.2185205136068913e-05, "loss": 0.3279, "num_tokens": 4215251657.0, "step": 6655 }, { "epoch": 0.7870403216270545, "grad_norm": 0.12926867604255676, "learning_rate": 1.2178620455920629e-05, "loss": 0.3216, "num_tokens": 4215885564.0, "step": 6656 }, { "epoch": 0.7871585668676836, "grad_norm": 0.13137231767177582, "learning_rate": 1.2172038829735674e-05, "loss": 0.3275, "num_tokens": 4216522242.0, "step": 6657 }, { "epoch": 0.7872768121083127, "grad_norm": 0.1489289253950119, "learning_rate": 1.2165460258479398e-05, "loss": 0.3622, "num_tokens": 4217149332.0, "step": 6658 }, { "epoch": 0.7873950573489417, "grad_norm": 0.12757886946201324, "learning_rate": 1.2158884743116722e-05, "loss": 0.3395, "num_tokens": 4217782023.0, "step": 6659 }, { "epoch": 0.7875133025895708, "grad_norm": 0.12925079464912415, "learning_rate": 1.2152312284612091e-05, "loss": 0.3566, "num_tokens": 4218420167.0, "step": 6660 }, { "epoch": 0.7876315478301998, "grad_norm": 0.1311197280883789, "learning_rate": 1.2145742883929527e-05, "loss": 0.3392, "num_tokens": 4219049520.0, "step": 6661 }, { "epoch": 0.7877497930708289, "grad_norm": 0.13208074867725372, "learning_rate": 1.2139176542032592e-05, "loss": 0.3408, "num_tokens": 4219685724.0, "step": 6662 }, { "epoch": 0.787868038311458, "grad_norm": 0.12992621958255768, "learning_rate": 1.2132613259884398e-05, "loss": 0.3471, "num_tokens": 4220325492.0, "step": 6663 }, { "epoch": 0.787986283552087, "grad_norm": 0.12366601824760437, "learning_rate": 1.21260530384476e-05, "loss": 0.3063, "num_tokens": 4220956160.0, "step": 6664 }, { "epoch": 0.7881045287927161, "grad_norm": 0.12272733449935913, "learning_rate": 1.2119495878684435e-05, "loss": 0.2902, "num_tokens": 4221595482.0, "step": 6665 }, { "epoch": 0.7882227740333452, "grad_norm": 0.12762926518917084, "learning_rate": 1.2112941781556663e-05, "loss": 0.3309, "num_tokens": 4222230229.0, "step": 6666 }, { "epoch": 0.7883410192739743, "grad_norm": 0.12239900976419449, "learning_rate": 1.2106390748025586e-05, "loss": 0.2983, "num_tokens": 4222862635.0, "step": 6667 }, { "epoch": 0.7884592645146032, "grad_norm": 0.12676723301410675, "learning_rate": 1.2099842779052096e-05, "loss": 0.3041, "num_tokens": 4223494400.0, "step": 6668 }, { "epoch": 0.7885775097552323, "grad_norm": 0.12831851840019226, "learning_rate": 1.2093297875596592e-05, "loss": 0.2996, "num_tokens": 4224127836.0, "step": 6669 }, { "epoch": 0.7886957549958614, "grad_norm": 0.14281047880649567, "learning_rate": 1.2086756038619075e-05, "loss": 0.3174, "num_tokens": 4224762872.0, "step": 6670 }, { "epoch": 0.7888140002364905, "grad_norm": 0.12541504204273224, "learning_rate": 1.2080217269079026e-05, "loss": 0.3046, "num_tokens": 4225395949.0, "step": 6671 }, { "epoch": 0.7889322454771196, "grad_norm": 0.13400118052959442, "learning_rate": 1.2073681567935536e-05, "loss": 0.3066, "num_tokens": 4226031441.0, "step": 6672 }, { "epoch": 0.7890504907177486, "grad_norm": 0.1298825442790985, "learning_rate": 1.2067148936147225e-05, "loss": 0.3179, "num_tokens": 4226665682.0, "step": 6673 }, { "epoch": 0.7891687359583777, "grad_norm": 0.12998205423355103, "learning_rate": 1.2060619374672264e-05, "loss": 0.2838, "num_tokens": 4227296693.0, "step": 6674 }, { "epoch": 0.7892869811990068, "grad_norm": 0.12419440597295761, "learning_rate": 1.2054092884468368e-05, "loss": 0.3163, "num_tokens": 4227935774.0, "step": 6675 }, { "epoch": 0.7894052264396358, "grad_norm": 0.125247985124588, "learning_rate": 1.2047569466492801e-05, "loss": 0.3213, "num_tokens": 4228572418.0, "step": 6676 }, { "epoch": 0.7895234716802648, "grad_norm": 0.12730289995670319, "learning_rate": 1.2041049121702395e-05, "loss": 0.3009, "num_tokens": 4229199807.0, "step": 6677 }, { "epoch": 0.7896417169208939, "grad_norm": 0.13793793320655823, "learning_rate": 1.2034531851053509e-05, "loss": 0.303, "num_tokens": 4229833698.0, "step": 6678 }, { "epoch": 0.789759962161523, "grad_norm": 0.12345176935195923, "learning_rate": 1.2028017655502051e-05, "loss": 0.2845, "num_tokens": 4230462871.0, "step": 6679 }, { "epoch": 0.7898782074021521, "grad_norm": 0.11980018764734268, "learning_rate": 1.202150653600351e-05, "loss": 0.2783, "num_tokens": 4231071014.0, "step": 6680 }, { "epoch": 0.7899964526427812, "grad_norm": 0.12897971272468567, "learning_rate": 1.2014998493512873e-05, "loss": 0.3454, "num_tokens": 4231705385.0, "step": 6681 }, { "epoch": 0.7901146978834102, "grad_norm": 0.138078972697258, "learning_rate": 1.2008493528984726e-05, "loss": 0.3181, "num_tokens": 4232344674.0, "step": 6682 }, { "epoch": 0.7902329431240392, "grad_norm": 0.13283474743366241, "learning_rate": 1.2001991643373177e-05, "loss": 0.3251, "num_tokens": 4232954600.0, "step": 6683 }, { "epoch": 0.7903511883646683, "grad_norm": 0.13478408753871918, "learning_rate": 1.1995492837631876e-05, "loss": 0.3176, "num_tokens": 4233586342.0, "step": 6684 }, { "epoch": 0.7904694336052974, "grad_norm": 0.1265101134777069, "learning_rate": 1.198899711271403e-05, "loss": 0.304, "num_tokens": 4234219743.0, "step": 6685 }, { "epoch": 0.7905876788459264, "grad_norm": 0.11715853959321976, "learning_rate": 1.1982504469572405e-05, "loss": 0.2928, "num_tokens": 4234849904.0, "step": 6686 }, { "epoch": 0.7907059240865555, "grad_norm": 0.13857711851596832, "learning_rate": 1.1976014909159314e-05, "loss": 0.3274, "num_tokens": 4235455899.0, "step": 6687 }, { "epoch": 0.7908241693271846, "grad_norm": 0.14449654519557953, "learning_rate": 1.1969528432426587e-05, "loss": 0.329, "num_tokens": 4236089798.0, "step": 6688 }, { "epoch": 0.7909424145678137, "grad_norm": 0.13133209943771362, "learning_rate": 1.1963045040325638e-05, "loss": 0.3593, "num_tokens": 4236725420.0, "step": 6689 }, { "epoch": 0.7910606598084428, "grad_norm": 0.128089040517807, "learning_rate": 1.1956564733807407e-05, "loss": 0.3177, "num_tokens": 4237358719.0, "step": 6690 }, { "epoch": 0.7911789050490717, "grad_norm": 0.11863856017589569, "learning_rate": 1.1950087513822399e-05, "loss": 0.314, "num_tokens": 4237994763.0, "step": 6691 }, { "epoch": 0.7912971502897008, "grad_norm": 0.12749670445919037, "learning_rate": 1.1943613381320654e-05, "loss": 0.3288, "num_tokens": 4238629527.0, "step": 6692 }, { "epoch": 0.7914153955303299, "grad_norm": 0.1438407152891159, "learning_rate": 1.1937142337251751e-05, "loss": 0.3793, "num_tokens": 4239262799.0, "step": 6693 }, { "epoch": 0.791533640770959, "grad_norm": 0.1268959939479828, "learning_rate": 1.1930674382564841e-05, "loss": 0.3251, "num_tokens": 4239895663.0, "step": 6694 }, { "epoch": 0.791651886011588, "grad_norm": 0.13384437561035156, "learning_rate": 1.1924209518208599e-05, "loss": 0.3354, "num_tokens": 4240530404.0, "step": 6695 }, { "epoch": 0.7917701312522171, "grad_norm": 0.1265680491924286, "learning_rate": 1.191774774513126e-05, "loss": 0.3503, "num_tokens": 4241169996.0, "step": 6696 }, { "epoch": 0.7918883764928462, "grad_norm": 0.14053857326507568, "learning_rate": 1.1911289064280586e-05, "loss": 0.3368, "num_tokens": 4241797886.0, "step": 6697 }, { "epoch": 0.7920066217334752, "grad_norm": 0.1445091813802719, "learning_rate": 1.1904833476603922e-05, "loss": 0.353, "num_tokens": 4242435515.0, "step": 6698 }, { "epoch": 0.7921248669741043, "grad_norm": 0.13693879544734955, "learning_rate": 1.1898380983048115e-05, "loss": 0.3393, "num_tokens": 4243067439.0, "step": 6699 }, { "epoch": 0.7922431122147333, "grad_norm": 0.12296474725008011, "learning_rate": 1.1891931584559604e-05, "loss": 0.3008, "num_tokens": 4243702931.0, "step": 6700 }, { "epoch": 0.7923613574553624, "grad_norm": 0.12820537388324738, "learning_rate": 1.1885485282084337e-05, "loss": 0.2976, "num_tokens": 4244328704.0, "step": 6701 }, { "epoch": 0.7924796026959915, "grad_norm": 0.1410311460494995, "learning_rate": 1.1879042076567817e-05, "loss": 0.3553, "num_tokens": 4244967861.0, "step": 6702 }, { "epoch": 0.7925978479366206, "grad_norm": 0.12284179031848907, "learning_rate": 1.1872601968955112e-05, "loss": 0.2936, "num_tokens": 4245601697.0, "step": 6703 }, { "epoch": 0.7927160931772497, "grad_norm": 0.13498979806900024, "learning_rate": 1.1866164960190813e-05, "loss": 0.345, "num_tokens": 4246231177.0, "step": 6704 }, { "epoch": 0.7928343384178786, "grad_norm": 0.13380733132362366, "learning_rate": 1.1859731051219063e-05, "loss": 0.3198, "num_tokens": 4246868305.0, "step": 6705 }, { "epoch": 0.7929525836585077, "grad_norm": 0.12958002090454102, "learning_rate": 1.1853300242983546e-05, "loss": 0.3027, "num_tokens": 4247506215.0, "step": 6706 }, { "epoch": 0.7930708288991368, "grad_norm": 0.14349056780338287, "learning_rate": 1.1846872536427504e-05, "loss": 0.3477, "num_tokens": 4248140439.0, "step": 6707 }, { "epoch": 0.7931890741397659, "grad_norm": 0.13324420154094696, "learning_rate": 1.1840447932493731e-05, "loss": 0.3262, "num_tokens": 4248778659.0, "step": 6708 }, { "epoch": 0.7933073193803949, "grad_norm": 0.12882651388645172, "learning_rate": 1.1834026432124526e-05, "loss": 0.2905, "num_tokens": 4249412944.0, "step": 6709 }, { "epoch": 0.793425564621024, "grad_norm": 0.14018158614635468, "learning_rate": 1.1827608036261776e-05, "loss": 0.3367, "num_tokens": 4250049635.0, "step": 6710 }, { "epoch": 0.7935438098616531, "grad_norm": 0.15278246998786926, "learning_rate": 1.1821192745846884e-05, "loss": 0.3575, "num_tokens": 4250680592.0, "step": 6711 }, { "epoch": 0.7936620551022822, "grad_norm": 0.13466882705688477, "learning_rate": 1.1814780561820817e-05, "loss": 0.3357, "num_tokens": 4251317643.0, "step": 6712 }, { "epoch": 0.7937803003429112, "grad_norm": 0.11932344734668732, "learning_rate": 1.1808371485124081e-05, "loss": 0.2996, "num_tokens": 4251940575.0, "step": 6713 }, { "epoch": 0.7938985455835402, "grad_norm": 0.11888457089662552, "learning_rate": 1.1801965516696717e-05, "loss": 0.3111, "num_tokens": 4252580227.0, "step": 6714 }, { "epoch": 0.7940167908241693, "grad_norm": 0.12513116002082825, "learning_rate": 1.1795562657478312e-05, "loss": 0.3418, "num_tokens": 4253216129.0, "step": 6715 }, { "epoch": 0.7941350360647984, "grad_norm": 0.13809475302696228, "learning_rate": 1.1789162908408004e-05, "loss": 0.3517, "num_tokens": 4253848061.0, "step": 6716 }, { "epoch": 0.7942532813054275, "grad_norm": 0.13425400853157043, "learning_rate": 1.1782766270424495e-05, "loss": 0.2883, "num_tokens": 4254475796.0, "step": 6717 }, { "epoch": 0.7943715265460565, "grad_norm": 0.12420428544282913, "learning_rate": 1.1776372744465971e-05, "loss": 0.2993, "num_tokens": 4255109299.0, "step": 6718 }, { "epoch": 0.7944897717866856, "grad_norm": 0.1295495331287384, "learning_rate": 1.1769982331470226e-05, "loss": 0.3144, "num_tokens": 4255737642.0, "step": 6719 }, { "epoch": 0.7946080170273147, "grad_norm": 0.14244148135185242, "learning_rate": 1.1763595032374551e-05, "loss": 0.3394, "num_tokens": 4256366464.0, "step": 6720 }, { "epoch": 0.7947262622679437, "grad_norm": 0.12949158251285553, "learning_rate": 1.1757210848115818e-05, "loss": 0.3244, "num_tokens": 4256993981.0, "step": 6721 }, { "epoch": 0.7948445075085728, "grad_norm": 0.14394862949848175, "learning_rate": 1.1750829779630416e-05, "loss": 0.3388, "num_tokens": 4257631116.0, "step": 6722 }, { "epoch": 0.7949627527492018, "grad_norm": 0.1196734756231308, "learning_rate": 1.1744451827854271e-05, "loss": 0.3005, "num_tokens": 4258262016.0, "step": 6723 }, { "epoch": 0.7950809979898309, "grad_norm": 0.13311626017093658, "learning_rate": 1.1738076993722887e-05, "loss": 0.338, "num_tokens": 4258895681.0, "step": 6724 }, { "epoch": 0.79519924323046, "grad_norm": 0.11979721486568451, "learning_rate": 1.1731705278171275e-05, "loss": 0.2859, "num_tokens": 4259532168.0, "step": 6725 }, { "epoch": 0.7953174884710891, "grad_norm": 0.13220149278640747, "learning_rate": 1.172533668213401e-05, "loss": 0.3239, "num_tokens": 4260168784.0, "step": 6726 }, { "epoch": 0.795435733711718, "grad_norm": 0.13634072244167328, "learning_rate": 1.171897120654519e-05, "loss": 0.3251, "num_tokens": 4260806288.0, "step": 6727 }, { "epoch": 0.7955539789523471, "grad_norm": 0.11889393627643585, "learning_rate": 1.1712608852338483e-05, "loss": 0.3272, "num_tokens": 4261441684.0, "step": 6728 }, { "epoch": 0.7956722241929762, "grad_norm": 0.12919805943965912, "learning_rate": 1.1706249620447067e-05, "loss": 0.3145, "num_tokens": 4262077091.0, "step": 6729 }, { "epoch": 0.7957904694336053, "grad_norm": 0.1344483345746994, "learning_rate": 1.1699893511803697e-05, "loss": 0.3394, "num_tokens": 4262710514.0, "step": 6730 }, { "epoch": 0.7959087146742344, "grad_norm": 0.13782218098640442, "learning_rate": 1.1693540527340641e-05, "loss": 0.3129, "num_tokens": 4263348211.0, "step": 6731 }, { "epoch": 0.7960269599148634, "grad_norm": 0.13229118287563324, "learning_rate": 1.1687190667989716e-05, "loss": 0.3308, "num_tokens": 4263987753.0, "step": 6732 }, { "epoch": 0.7961452051554925, "grad_norm": 0.13276167213916779, "learning_rate": 1.168084393468229e-05, "loss": 0.3088, "num_tokens": 4264618372.0, "step": 6733 }, { "epoch": 0.7962634503961216, "grad_norm": 0.13791856169700623, "learning_rate": 1.1674500328349267e-05, "loss": 0.3532, "num_tokens": 4265255543.0, "step": 6734 }, { "epoch": 0.7963816956367507, "grad_norm": 0.1375613808631897, "learning_rate": 1.1668159849921091e-05, "loss": 0.3262, "num_tokens": 4265885542.0, "step": 6735 }, { "epoch": 0.7964999408773796, "grad_norm": 0.1188935711979866, "learning_rate": 1.1661822500327735e-05, "loss": 0.3442, "num_tokens": 4266521822.0, "step": 6736 }, { "epoch": 0.7966181861180087, "grad_norm": 0.12968024611473083, "learning_rate": 1.1655488280498736e-05, "loss": 0.3431, "num_tokens": 4267158737.0, "step": 6737 }, { "epoch": 0.7967364313586378, "grad_norm": 0.13209010660648346, "learning_rate": 1.1649157191363179e-05, "loss": 0.3255, "num_tokens": 4267791774.0, "step": 6738 }, { "epoch": 0.7968546765992669, "grad_norm": 0.1251404583454132, "learning_rate": 1.1642829233849643e-05, "loss": 0.2665, "num_tokens": 4268427757.0, "step": 6739 }, { "epoch": 0.796972921839896, "grad_norm": 0.13582515716552734, "learning_rate": 1.1636504408886297e-05, "loss": 0.3186, "num_tokens": 4269052883.0, "step": 6740 }, { "epoch": 0.797091167080525, "grad_norm": 0.12943227589130402, "learning_rate": 1.1630182717400818e-05, "loss": 0.3193, "num_tokens": 4269690920.0, "step": 6741 }, { "epoch": 0.797209412321154, "grad_norm": 0.14120297133922577, "learning_rate": 1.1623864160320449e-05, "loss": 0.3861, "num_tokens": 4270330404.0, "step": 6742 }, { "epoch": 0.7973276575617831, "grad_norm": 0.12900996208190918, "learning_rate": 1.1617548738571955e-05, "loss": 0.3406, "num_tokens": 4270963180.0, "step": 6743 }, { "epoch": 0.7974459028024122, "grad_norm": 0.1217140182852745, "learning_rate": 1.1611236453081639e-05, "loss": 0.3283, "num_tokens": 4271595487.0, "step": 6744 }, { "epoch": 0.7975641480430413, "grad_norm": 0.12386002391576767, "learning_rate": 1.1604927304775365e-05, "loss": 0.3077, "num_tokens": 4272234471.0, "step": 6745 }, { "epoch": 0.7976823932836703, "grad_norm": 0.14184410870075226, "learning_rate": 1.1598621294578518e-05, "loss": 0.3491, "num_tokens": 4272873961.0, "step": 6746 }, { "epoch": 0.7978006385242994, "grad_norm": 0.1319027692079544, "learning_rate": 1.1592318423416027e-05, "loss": 0.353, "num_tokens": 4273506677.0, "step": 6747 }, { "epoch": 0.7979188837649285, "grad_norm": 0.12823180854320526, "learning_rate": 1.1586018692212358e-05, "loss": 0.3286, "num_tokens": 4274133427.0, "step": 6748 }, { "epoch": 0.7980371290055576, "grad_norm": 0.12162072211503983, "learning_rate": 1.1579722101891528e-05, "loss": 0.2834, "num_tokens": 4274765399.0, "step": 6749 }, { "epoch": 0.7981553742461865, "grad_norm": 0.13067074120044708, "learning_rate": 1.1573428653377077e-05, "loss": 0.337, "num_tokens": 4275399492.0, "step": 6750 }, { "epoch": 0.7982736194868156, "grad_norm": 0.13258789479732513, "learning_rate": 1.1567138347592107e-05, "loss": 0.3436, "num_tokens": 4276035133.0, "step": 6751 }, { "epoch": 0.7983918647274447, "grad_norm": 0.12288425862789154, "learning_rate": 1.1560851185459233e-05, "loss": 0.2811, "num_tokens": 4276662066.0, "step": 6752 }, { "epoch": 0.7985101099680738, "grad_norm": 0.12311696261167526, "learning_rate": 1.1554567167900615e-05, "loss": 0.2961, "num_tokens": 4277298663.0, "step": 6753 }, { "epoch": 0.7986283552087029, "grad_norm": 0.1340690702199936, "learning_rate": 1.1548286295837977e-05, "loss": 0.3503, "num_tokens": 4277933025.0, "step": 6754 }, { "epoch": 0.7987466004493319, "grad_norm": 0.13709501922130585, "learning_rate": 1.154200857019255e-05, "loss": 0.3378, "num_tokens": 4278560065.0, "step": 6755 }, { "epoch": 0.798864845689961, "grad_norm": 0.11939229816198349, "learning_rate": 1.1535733991885116e-05, "loss": 0.2891, "num_tokens": 4279163975.0, "step": 6756 }, { "epoch": 0.7989830909305901, "grad_norm": 0.12087669968605042, "learning_rate": 1.1529462561835991e-05, "loss": 0.3139, "num_tokens": 4279794066.0, "step": 6757 }, { "epoch": 0.7991013361712191, "grad_norm": 0.12726303935050964, "learning_rate": 1.1523194280965035e-05, "loss": 0.3092, "num_tokens": 4280426160.0, "step": 6758 }, { "epoch": 0.7992195814118481, "grad_norm": 0.12691663205623627, "learning_rate": 1.1516929150191658e-05, "loss": 0.3241, "num_tokens": 4281062441.0, "step": 6759 }, { "epoch": 0.7993378266524772, "grad_norm": 0.1300065815448761, "learning_rate": 1.1510667170434784e-05, "loss": 0.3107, "num_tokens": 4281701336.0, "step": 6760 }, { "epoch": 0.7994560718931063, "grad_norm": 0.12202689796686172, "learning_rate": 1.1504408342612882e-05, "loss": 0.3017, "num_tokens": 4282337842.0, "step": 6761 }, { "epoch": 0.7995743171337354, "grad_norm": 0.12771499156951904, "learning_rate": 1.1498152667643959e-05, "loss": 0.3, "num_tokens": 4282970685.0, "step": 6762 }, { "epoch": 0.7996925623743645, "grad_norm": 0.12563097476959229, "learning_rate": 1.1491900146445575e-05, "loss": 0.2991, "num_tokens": 4283610033.0, "step": 6763 }, { "epoch": 0.7998108076149935, "grad_norm": 0.12822340428829193, "learning_rate": 1.1485650779934808e-05, "loss": 0.3397, "num_tokens": 4284245399.0, "step": 6764 }, { "epoch": 0.7999290528556225, "grad_norm": 0.1307930052280426, "learning_rate": 1.1479404569028272e-05, "loss": 0.3314, "num_tokens": 4284884531.0, "step": 6765 }, { "epoch": 0.8000472980962516, "grad_norm": 0.13377854228019714, "learning_rate": 1.1473161514642144e-05, "loss": 0.3229, "num_tokens": 4285521355.0, "step": 6766 }, { "epoch": 0.8001655433368807, "grad_norm": 0.12605473399162292, "learning_rate": 1.1466921617692098e-05, "loss": 0.3287, "num_tokens": 4286154837.0, "step": 6767 }, { "epoch": 0.8002837885775097, "grad_norm": 0.1334364414215088, "learning_rate": 1.1460684879093397e-05, "loss": 0.3256, "num_tokens": 4286781925.0, "step": 6768 }, { "epoch": 0.8004020338181388, "grad_norm": 0.13179868459701538, "learning_rate": 1.145445129976078e-05, "loss": 0.337, "num_tokens": 4287414544.0, "step": 6769 }, { "epoch": 0.8005202790587679, "grad_norm": 0.12082202732563019, "learning_rate": 1.1448220880608572e-05, "loss": 0.3346, "num_tokens": 4288047747.0, "step": 6770 }, { "epoch": 0.800638524299397, "grad_norm": 0.13065968453884125, "learning_rate": 1.1441993622550604e-05, "loss": 0.2966, "num_tokens": 4288679987.0, "step": 6771 }, { "epoch": 0.8007567695400261, "grad_norm": 0.13685201108455658, "learning_rate": 1.1435769526500269e-05, "loss": 0.3124, "num_tokens": 4289316203.0, "step": 6772 }, { "epoch": 0.800875014780655, "grad_norm": 0.13627590239048004, "learning_rate": 1.1429548593370479e-05, "loss": 0.3427, "num_tokens": 4289945821.0, "step": 6773 }, { "epoch": 0.8009932600212841, "grad_norm": 0.12003438919782639, "learning_rate": 1.1423330824073674e-05, "loss": 0.3145, "num_tokens": 4290583989.0, "step": 6774 }, { "epoch": 0.8011115052619132, "grad_norm": 0.13457068800926208, "learning_rate": 1.1417116219521854e-05, "loss": 0.3367, "num_tokens": 4291222193.0, "step": 6775 }, { "epoch": 0.8012297505025423, "grad_norm": 0.16540616750717163, "learning_rate": 1.1410904780626544e-05, "loss": 0.3404, "num_tokens": 4291861614.0, "step": 6776 }, { "epoch": 0.8013479957431713, "grad_norm": 0.13471604883670807, "learning_rate": 1.1404696508298792e-05, "loss": 0.3397, "num_tokens": 4292495759.0, "step": 6777 }, { "epoch": 0.8014662409838004, "grad_norm": 0.1430043876171112, "learning_rate": 1.1398491403449192e-05, "loss": 0.3524, "num_tokens": 4293135120.0, "step": 6778 }, { "epoch": 0.8015844862244295, "grad_norm": 0.12264858931303024, "learning_rate": 1.139228946698788e-05, "loss": 0.2886, "num_tokens": 4293746128.0, "step": 6779 }, { "epoch": 0.8017027314650585, "grad_norm": 0.13857302069664001, "learning_rate": 1.1386090699824534e-05, "loss": 0.3348, "num_tokens": 4294384384.0, "step": 6780 }, { "epoch": 0.8018209767056876, "grad_norm": 0.5322644710540771, "learning_rate": 1.1379895102868335e-05, "loss": 0.3869, "num_tokens": 4294988338.0, "step": 6781 }, { "epoch": 0.8019392219463166, "grad_norm": 0.1570366472005844, "learning_rate": 1.1373702677028027e-05, "loss": 0.3512, "num_tokens": 4295621840.0, "step": 6782 }, { "epoch": 0.8020574671869457, "grad_norm": 0.14444446563720703, "learning_rate": 1.1367513423211874e-05, "loss": 0.3239, "num_tokens": 4296255870.0, "step": 6783 }, { "epoch": 0.8021757124275748, "grad_norm": 0.15278539061546326, "learning_rate": 1.136132734232769e-05, "loss": 0.3306, "num_tokens": 4296891996.0, "step": 6784 }, { "epoch": 0.8022939576682039, "grad_norm": 0.16689155995845795, "learning_rate": 1.135514443528281e-05, "loss": 0.3868, "num_tokens": 4297522577.0, "step": 6785 }, { "epoch": 0.802412202908833, "grad_norm": 0.13586340844631195, "learning_rate": 1.1348964702984104e-05, "loss": 0.3063, "num_tokens": 4298158973.0, "step": 6786 }, { "epoch": 0.802530448149462, "grad_norm": 0.14038440585136414, "learning_rate": 1.134278814633799e-05, "loss": 0.326, "num_tokens": 4298789261.0, "step": 6787 }, { "epoch": 0.802648693390091, "grad_norm": 0.12696698307991028, "learning_rate": 1.1336614766250398e-05, "loss": 0.3194, "num_tokens": 4299426113.0, "step": 6788 }, { "epoch": 0.8027669386307201, "grad_norm": 0.15314222872257233, "learning_rate": 1.1330444563626822e-05, "loss": 0.3567, "num_tokens": 4300059747.0, "step": 6789 }, { "epoch": 0.8028851838713492, "grad_norm": 0.13312454521656036, "learning_rate": 1.1324277539372264e-05, "loss": 0.3469, "num_tokens": 4300688230.0, "step": 6790 }, { "epoch": 0.8030034291119782, "grad_norm": 0.1352563053369522, "learning_rate": 1.1318113694391267e-05, "loss": 0.3292, "num_tokens": 4301325624.0, "step": 6791 }, { "epoch": 0.8031216743526073, "grad_norm": 0.1312970519065857, "learning_rate": 1.1311953029587902e-05, "loss": 0.3096, "num_tokens": 4301961460.0, "step": 6792 }, { "epoch": 0.8032399195932364, "grad_norm": 0.15305566787719727, "learning_rate": 1.1305795545865798e-05, "loss": 0.3255, "num_tokens": 4302600039.0, "step": 6793 }, { "epoch": 0.8033581648338655, "grad_norm": 0.13634087145328522, "learning_rate": 1.129964124412809e-05, "loss": 0.3092, "num_tokens": 4303232683.0, "step": 6794 }, { "epoch": 0.8034764100744946, "grad_norm": 0.12676748633384705, "learning_rate": 1.129349012527745e-05, "loss": 0.3118, "num_tokens": 4303866737.0, "step": 6795 }, { "epoch": 0.8035946553151235, "grad_norm": 0.13482049107551575, "learning_rate": 1.1287342190216106e-05, "loss": 0.3327, "num_tokens": 4304505592.0, "step": 6796 }, { "epoch": 0.8037129005557526, "grad_norm": 0.13460466265678406, "learning_rate": 1.1281197439845786e-05, "loss": 0.363, "num_tokens": 4305143425.0, "step": 6797 }, { "epoch": 0.8038311457963817, "grad_norm": 0.13617762923240662, "learning_rate": 1.127505587506779e-05, "loss": 0.3227, "num_tokens": 4305780205.0, "step": 6798 }, { "epoch": 0.8039493910370108, "grad_norm": 0.14505332708358765, "learning_rate": 1.1268917496782904e-05, "loss": 0.3597, "num_tokens": 4306413952.0, "step": 6799 }, { "epoch": 0.8040676362776398, "grad_norm": 0.13393472135066986, "learning_rate": 1.1262782305891482e-05, "loss": 0.3145, "num_tokens": 4307050531.0, "step": 6800 }, { "epoch": 0.8041858815182689, "grad_norm": 0.1354353278875351, "learning_rate": 1.1256650303293394e-05, "loss": 0.3727, "num_tokens": 4307684737.0, "step": 6801 }, { "epoch": 0.804304126758898, "grad_norm": 0.13390904664993286, "learning_rate": 1.1250521489888059e-05, "loss": 0.2989, "num_tokens": 4308318527.0, "step": 6802 }, { "epoch": 0.804422371999527, "grad_norm": 0.1283448189496994, "learning_rate": 1.1244395866574409e-05, "loss": 0.3509, "num_tokens": 4308957589.0, "step": 6803 }, { "epoch": 0.8045406172401561, "grad_norm": 0.13245630264282227, "learning_rate": 1.123827343425091e-05, "loss": 0.3041, "num_tokens": 4309596932.0, "step": 6804 }, { "epoch": 0.8046588624807851, "grad_norm": 0.13183650374412537, "learning_rate": 1.1232154193815582e-05, "loss": 0.356, "num_tokens": 4310232273.0, "step": 6805 }, { "epoch": 0.8047771077214142, "grad_norm": 0.12155691534280777, "learning_rate": 1.1226038146165953e-05, "loss": 0.3167, "num_tokens": 4310871668.0, "step": 6806 }, { "epoch": 0.8048953529620433, "grad_norm": 0.12470170110464096, "learning_rate": 1.1219925292199088e-05, "loss": 0.3236, "num_tokens": 4311501617.0, "step": 6807 }, { "epoch": 0.8050135982026724, "grad_norm": 0.12215010821819305, "learning_rate": 1.1213815632811581e-05, "loss": 0.3008, "num_tokens": 4312135045.0, "step": 6808 }, { "epoch": 0.8051318434433014, "grad_norm": 0.13781476020812988, "learning_rate": 1.120770916889957e-05, "loss": 0.3248, "num_tokens": 4312738452.0, "step": 6809 }, { "epoch": 0.8052500886839304, "grad_norm": 0.13332971930503845, "learning_rate": 1.1201605901358725e-05, "loss": 0.3204, "num_tokens": 4313375340.0, "step": 6810 }, { "epoch": 0.8053683339245595, "grad_norm": 0.12441720813512802, "learning_rate": 1.1195505831084228e-05, "loss": 0.3444, "num_tokens": 4314012975.0, "step": 6811 }, { "epoch": 0.8054865791651886, "grad_norm": 0.13373014330863953, "learning_rate": 1.1189408958970808e-05, "loss": 0.3278, "num_tokens": 4314652456.0, "step": 6812 }, { "epoch": 0.8056048244058177, "grad_norm": 0.11907578259706497, "learning_rate": 1.1183315285912713e-05, "loss": 0.2826, "num_tokens": 4315288371.0, "step": 6813 }, { "epoch": 0.8057230696464467, "grad_norm": 0.12480556219816208, "learning_rate": 1.1177224812803739e-05, "loss": 0.3119, "num_tokens": 4315927699.0, "step": 6814 }, { "epoch": 0.8058413148870758, "grad_norm": 0.13963443040847778, "learning_rate": 1.1171137540537197e-05, "loss": 0.3287, "num_tokens": 4316560418.0, "step": 6815 }, { "epoch": 0.8059595601277049, "grad_norm": 0.13323993980884552, "learning_rate": 1.1165053470005927e-05, "loss": 0.338, "num_tokens": 4317194973.0, "step": 6816 }, { "epoch": 0.806077805368334, "grad_norm": 0.1312451809644699, "learning_rate": 1.115897260210232e-05, "loss": 0.2947, "num_tokens": 4317834337.0, "step": 6817 }, { "epoch": 0.806196050608963, "grad_norm": 0.12980644404888153, "learning_rate": 1.115289493771827e-05, "loss": 0.2932, "num_tokens": 4318443414.0, "step": 6818 }, { "epoch": 0.806314295849592, "grad_norm": 0.125754252076149, "learning_rate": 1.1146820477745237e-05, "loss": 0.3256, "num_tokens": 4319080363.0, "step": 6819 }, { "epoch": 0.8064325410902211, "grad_norm": 0.14098496735095978, "learning_rate": 1.114074922307416e-05, "loss": 0.3409, "num_tokens": 4319714004.0, "step": 6820 }, { "epoch": 0.8065507863308502, "grad_norm": 0.12824328243732452, "learning_rate": 1.1134681174595556e-05, "loss": 0.3316, "num_tokens": 4320351875.0, "step": 6821 }, { "epoch": 0.8066690315714793, "grad_norm": 0.12480524927377701, "learning_rate": 1.112861633319944e-05, "loss": 0.3057, "num_tokens": 4320966097.0, "step": 6822 }, { "epoch": 0.8067872768121083, "grad_norm": 0.12011347711086273, "learning_rate": 1.1122554699775383e-05, "loss": 0.3393, "num_tokens": 4321602609.0, "step": 6823 }, { "epoch": 0.8069055220527374, "grad_norm": 0.1204599067568779, "learning_rate": 1.1116496275212467e-05, "loss": 0.287, "num_tokens": 4322226994.0, "step": 6824 }, { "epoch": 0.8070237672933664, "grad_norm": 0.12607181072235107, "learning_rate": 1.1110441060399295e-05, "loss": 0.3201, "num_tokens": 4322858458.0, "step": 6825 }, { "epoch": 0.8071420125339955, "grad_norm": 0.11706461012363434, "learning_rate": 1.1104389056224031e-05, "loss": 0.2948, "num_tokens": 4323497362.0, "step": 6826 }, { "epoch": 0.8072602577746246, "grad_norm": 0.13219009339809418, "learning_rate": 1.109834026357433e-05, "loss": 0.3374, "num_tokens": 4324133237.0, "step": 6827 }, { "epoch": 0.8073785030152536, "grad_norm": 0.1382984220981598, "learning_rate": 1.109229468333742e-05, "loss": 0.3292, "num_tokens": 4324770723.0, "step": 6828 }, { "epoch": 0.8074967482558827, "grad_norm": 0.12480422854423523, "learning_rate": 1.1086252316400006e-05, "loss": 0.3154, "num_tokens": 4325404948.0, "step": 6829 }, { "epoch": 0.8076149934965118, "grad_norm": 0.12917253375053406, "learning_rate": 1.1080213163648361e-05, "loss": 0.3117, "num_tokens": 4326040896.0, "step": 6830 }, { "epoch": 0.8077332387371409, "grad_norm": 0.12349945306777954, "learning_rate": 1.1074177225968277e-05, "loss": 0.2967, "num_tokens": 4326670778.0, "step": 6831 }, { "epoch": 0.8078514839777698, "grad_norm": 0.1269787698984146, "learning_rate": 1.1068144504245072e-05, "loss": 0.3215, "num_tokens": 4327310226.0, "step": 6832 }, { "epoch": 0.8079697292183989, "grad_norm": 0.1271209716796875, "learning_rate": 1.1062114999363587e-05, "loss": 0.3312, "num_tokens": 4327948760.0, "step": 6833 }, { "epoch": 0.808087974459028, "grad_norm": 0.13086101412773132, "learning_rate": 1.1056088712208188e-05, "loss": 0.3185, "num_tokens": 4328584922.0, "step": 6834 }, { "epoch": 0.8082062196996571, "grad_norm": 0.13608062267303467, "learning_rate": 1.1050065643662795e-05, "loss": 0.3275, "num_tokens": 4329221753.0, "step": 6835 }, { "epoch": 0.8083244649402862, "grad_norm": 0.13829180598258972, "learning_rate": 1.1044045794610826e-05, "loss": 0.3079, "num_tokens": 4329859919.0, "step": 6836 }, { "epoch": 0.8084427101809152, "grad_norm": 0.12535732984542847, "learning_rate": 1.1038029165935238e-05, "loss": 0.3281, "num_tokens": 4330496431.0, "step": 6837 }, { "epoch": 0.8085609554215443, "grad_norm": 0.129340261220932, "learning_rate": 1.1032015758518524e-05, "loss": 0.2928, "num_tokens": 4331133708.0, "step": 6838 }, { "epoch": 0.8086792006621734, "grad_norm": 0.13255037367343903, "learning_rate": 1.1026005573242686e-05, "loss": 0.3648, "num_tokens": 4331765402.0, "step": 6839 }, { "epoch": 0.8087974459028024, "grad_norm": 0.1298629343509674, "learning_rate": 1.101999861098928e-05, "loss": 0.3267, "num_tokens": 4332378101.0, "step": 6840 }, { "epoch": 0.8089156911434314, "grad_norm": 0.13153283298015594, "learning_rate": 1.1013994872639367e-05, "loss": 0.3196, "num_tokens": 4333014320.0, "step": 6841 }, { "epoch": 0.8090339363840605, "grad_norm": 0.13549868762493134, "learning_rate": 1.1007994359073535e-05, "loss": 0.3558, "num_tokens": 4333642114.0, "step": 6842 }, { "epoch": 0.8091521816246896, "grad_norm": 0.13596881926059723, "learning_rate": 1.1001997071171907e-05, "loss": 0.3327, "num_tokens": 4334248417.0, "step": 6843 }, { "epoch": 0.8092704268653187, "grad_norm": 0.1366199553012848, "learning_rate": 1.099600300981414e-05, "loss": 0.3563, "num_tokens": 4334881071.0, "step": 6844 }, { "epoch": 0.8093886721059478, "grad_norm": 0.1383516937494278, "learning_rate": 1.0990012175879406e-05, "loss": 0.3369, "num_tokens": 4335516662.0, "step": 6845 }, { "epoch": 0.8095069173465768, "grad_norm": 0.148209348320961, "learning_rate": 1.0984024570246398e-05, "loss": 0.3264, "num_tokens": 4336155487.0, "step": 6846 }, { "epoch": 0.8096251625872058, "grad_norm": 0.13350160419940948, "learning_rate": 1.0978040193793354e-05, "loss": 0.3498, "num_tokens": 4336790613.0, "step": 6847 }, { "epoch": 0.8097434078278349, "grad_norm": 0.15024593472480774, "learning_rate": 1.0972059047398025e-05, "loss": 0.3392, "num_tokens": 4337426918.0, "step": 6848 }, { "epoch": 0.809861653068464, "grad_norm": 0.12504619359970093, "learning_rate": 1.0966081131937707e-05, "loss": 0.34, "num_tokens": 4338065055.0, "step": 6849 }, { "epoch": 0.809979898309093, "grad_norm": 0.13454103469848633, "learning_rate": 1.0960106448289177e-05, "loss": 0.3217, "num_tokens": 4338700212.0, "step": 6850 }, { "epoch": 0.8100981435497221, "grad_norm": 0.13337554037570953, "learning_rate": 1.0954134997328787e-05, "loss": 0.3224, "num_tokens": 4339337159.0, "step": 6851 }, { "epoch": 0.8102163887903512, "grad_norm": 0.14232316613197327, "learning_rate": 1.09481667799324e-05, "loss": 0.3347, "num_tokens": 4339964152.0, "step": 6852 }, { "epoch": 0.8103346340309803, "grad_norm": 0.149149090051651, "learning_rate": 1.0942201796975392e-05, "loss": 0.3394, "num_tokens": 4340598059.0, "step": 6853 }, { "epoch": 0.8104528792716094, "grad_norm": 0.14447303116321564, "learning_rate": 1.093624004933268e-05, "loss": 0.3082, "num_tokens": 4341232830.0, "step": 6854 }, { "epoch": 0.8105711245122383, "grad_norm": 0.14366254210472107, "learning_rate": 1.0930281537878685e-05, "loss": 0.3622, "num_tokens": 4341865062.0, "step": 6855 }, { "epoch": 0.8106893697528674, "grad_norm": 0.12971043586730957, "learning_rate": 1.092432626348739e-05, "loss": 0.2866, "num_tokens": 4342497535.0, "step": 6856 }, { "epoch": 0.8108076149934965, "grad_norm": 0.11838717013597488, "learning_rate": 1.091837422703226e-05, "loss": 0.2702, "num_tokens": 4343132334.0, "step": 6857 }, { "epoch": 0.8109258602341256, "grad_norm": 0.13142304122447968, "learning_rate": 1.0912425429386323e-05, "loss": 0.3149, "num_tokens": 4343768575.0, "step": 6858 }, { "epoch": 0.8110441054747547, "grad_norm": 0.12500078976154327, "learning_rate": 1.090647987142211e-05, "loss": 0.2901, "num_tokens": 4344396955.0, "step": 6859 }, { "epoch": 0.8111623507153837, "grad_norm": 0.1716040074825287, "learning_rate": 1.0900537554011672e-05, "loss": 0.345, "num_tokens": 4345035239.0, "step": 6860 }, { "epoch": 0.8112805959560128, "grad_norm": 0.14008885622024536, "learning_rate": 1.0894598478026613e-05, "loss": 0.3553, "num_tokens": 4345669475.0, "step": 6861 }, { "epoch": 0.8113988411966419, "grad_norm": 0.11963013559579849, "learning_rate": 1.0888662644338031e-05, "loss": 0.3074, "num_tokens": 4346305025.0, "step": 6862 }, { "epoch": 0.8115170864372709, "grad_norm": 0.1295543611049652, "learning_rate": 1.0882730053816567e-05, "loss": 0.335, "num_tokens": 4346927142.0, "step": 6863 }, { "epoch": 0.8116353316778999, "grad_norm": 0.13707797229290009, "learning_rate": 1.0876800707332368e-05, "loss": 0.2997, "num_tokens": 4347562454.0, "step": 6864 }, { "epoch": 0.811753576918529, "grad_norm": 0.14086633920669556, "learning_rate": 1.0870874605755128e-05, "loss": 0.3506, "num_tokens": 4348196585.0, "step": 6865 }, { "epoch": 0.8118718221591581, "grad_norm": 0.13375811278820038, "learning_rate": 1.0864951749954064e-05, "loss": 0.3152, "num_tokens": 4348828139.0, "step": 6866 }, { "epoch": 0.8119900673997872, "grad_norm": 0.14243444800376892, "learning_rate": 1.0859032140797887e-05, "loss": 0.3094, "num_tokens": 4349460883.0, "step": 6867 }, { "epoch": 0.8121083126404163, "grad_norm": 0.13729111850261688, "learning_rate": 1.0853115779154863e-05, "loss": 0.337, "num_tokens": 4350096885.0, "step": 6868 }, { "epoch": 0.8122265578810453, "grad_norm": 0.13893480598926544, "learning_rate": 1.0847202665892763e-05, "loss": 0.3474, "num_tokens": 4350732796.0, "step": 6869 }, { "epoch": 0.8123448031216743, "grad_norm": 0.13317711651325226, "learning_rate": 1.0841292801878904e-05, "loss": 0.3095, "num_tokens": 4351362514.0, "step": 6870 }, { "epoch": 0.8124630483623034, "grad_norm": 0.13587036728858948, "learning_rate": 1.08353861879801e-05, "loss": 0.3391, "num_tokens": 4351996237.0, "step": 6871 }, { "epoch": 0.8125812936029325, "grad_norm": 0.1372128427028656, "learning_rate": 1.0829482825062697e-05, "loss": 0.3277, "num_tokens": 4352633227.0, "step": 6872 }, { "epoch": 0.8126995388435615, "grad_norm": 0.1305849254131317, "learning_rate": 1.0823582713992584e-05, "loss": 0.3121, "num_tokens": 4353270707.0, "step": 6873 }, { "epoch": 0.8128177840841906, "grad_norm": 0.12497754395008087, "learning_rate": 1.081768585563514e-05, "loss": 0.3234, "num_tokens": 4353897472.0, "step": 6874 }, { "epoch": 0.8129360293248197, "grad_norm": 0.1376192718744278, "learning_rate": 1.081179225085529e-05, "loss": 0.3394, "num_tokens": 4354504074.0, "step": 6875 }, { "epoch": 0.8130542745654488, "grad_norm": 0.12711191177368164, "learning_rate": 1.080590190051747e-05, "loss": 0.3382, "num_tokens": 4355140019.0, "step": 6876 }, { "epoch": 0.8131725198060779, "grad_norm": 0.13178183138370514, "learning_rate": 1.0800014805485654e-05, "loss": 0.3248, "num_tokens": 4355779066.0, "step": 6877 }, { "epoch": 0.8132907650467068, "grad_norm": 0.13197104632854462, "learning_rate": 1.079413096662331e-05, "loss": 0.3315, "num_tokens": 4356412197.0, "step": 6878 }, { "epoch": 0.8134090102873359, "grad_norm": 0.12861086428165436, "learning_rate": 1.0788250384793476e-05, "loss": 0.2952, "num_tokens": 4357048566.0, "step": 6879 }, { "epoch": 0.813527255527965, "grad_norm": 0.13625408709049225, "learning_rate": 1.078237306085865e-05, "loss": 0.3421, "num_tokens": 4357687228.0, "step": 6880 }, { "epoch": 0.8136455007685941, "grad_norm": 0.13308465480804443, "learning_rate": 1.0776498995680898e-05, "loss": 0.3217, "num_tokens": 4358312904.0, "step": 6881 }, { "epoch": 0.8137637460092231, "grad_norm": 0.11896278709173203, "learning_rate": 1.0770628190121803e-05, "loss": 0.3255, "num_tokens": 4358946508.0, "step": 6882 }, { "epoch": 0.8138819912498522, "grad_norm": 0.1287333071231842, "learning_rate": 1.0764760645042457e-05, "loss": 0.3068, "num_tokens": 4359584742.0, "step": 6883 }, { "epoch": 0.8140002364904813, "grad_norm": 0.13051587343215942, "learning_rate": 1.0758896361303476e-05, "loss": 0.3153, "num_tokens": 4360217477.0, "step": 6884 }, { "epoch": 0.8141184817311103, "grad_norm": 0.1487097442150116, "learning_rate": 1.0753035339764996e-05, "loss": 0.3539, "num_tokens": 4360851638.0, "step": 6885 }, { "epoch": 0.8142367269717394, "grad_norm": 0.13634216785430908, "learning_rate": 1.0747177581286689e-05, "loss": 0.3246, "num_tokens": 4361485035.0, "step": 6886 }, { "epoch": 0.8143549722123684, "grad_norm": 0.1374128758907318, "learning_rate": 1.0741323086727735e-05, "loss": 0.3311, "num_tokens": 4362120881.0, "step": 6887 }, { "epoch": 0.8144732174529975, "grad_norm": 0.12480004876852036, "learning_rate": 1.073547185694683e-05, "loss": 0.2851, "num_tokens": 4362758832.0, "step": 6888 }, { "epoch": 0.8145914626936266, "grad_norm": 0.14233165979385376, "learning_rate": 1.0729623892802211e-05, "loss": 0.3365, "num_tokens": 4363395007.0, "step": 6889 }, { "epoch": 0.8147097079342557, "grad_norm": 0.12106978893280029, "learning_rate": 1.0723779195151619e-05, "loss": 0.3094, "num_tokens": 4364033947.0, "step": 6890 }, { "epoch": 0.8148279531748847, "grad_norm": 0.13249273598194122, "learning_rate": 1.0717937764852324e-05, "loss": 0.3384, "num_tokens": 4364673197.0, "step": 6891 }, { "epoch": 0.8149461984155137, "grad_norm": 0.13309593498706818, "learning_rate": 1.0712099602761113e-05, "loss": 0.3142, "num_tokens": 4365306277.0, "step": 6892 }, { "epoch": 0.8150644436561428, "grad_norm": 0.12649299204349518, "learning_rate": 1.0706264709734298e-05, "loss": 0.3041, "num_tokens": 4365936085.0, "step": 6893 }, { "epoch": 0.8151826888967719, "grad_norm": 0.1330874264240265, "learning_rate": 1.0700433086627695e-05, "loss": 0.3211, "num_tokens": 4366573483.0, "step": 6894 }, { "epoch": 0.815300934137401, "grad_norm": 0.12766484916210175, "learning_rate": 1.0694604734296667e-05, "loss": 0.2958, "num_tokens": 4367196524.0, "step": 6895 }, { "epoch": 0.81541917937803, "grad_norm": 0.14484108984470367, "learning_rate": 1.0688779653596093e-05, "loss": 0.3769, "num_tokens": 4367830862.0, "step": 6896 }, { "epoch": 0.8155374246186591, "grad_norm": 0.1286381632089615, "learning_rate": 1.068295784538034e-05, "loss": 0.3516, "num_tokens": 4368462412.0, "step": 6897 }, { "epoch": 0.8156556698592882, "grad_norm": 0.13974528014659882, "learning_rate": 1.0677139310503334e-05, "loss": 0.3098, "num_tokens": 4369099529.0, "step": 6898 }, { "epoch": 0.8157739150999173, "grad_norm": 0.12610778212547302, "learning_rate": 1.0671324049818497e-05, "loss": 0.2984, "num_tokens": 4369730950.0, "step": 6899 }, { "epoch": 0.8158921603405463, "grad_norm": 0.13385972380638123, "learning_rate": 1.0665512064178784e-05, "loss": 0.3266, "num_tokens": 4370367413.0, "step": 6900 }, { "epoch": 0.8160104055811753, "grad_norm": 0.12983761727809906, "learning_rate": 1.0659703354436665e-05, "loss": 0.3593, "num_tokens": 4371001616.0, "step": 6901 }, { "epoch": 0.8161286508218044, "grad_norm": 0.13404297828674316, "learning_rate": 1.065389792144412e-05, "loss": 0.3024, "num_tokens": 4371635457.0, "step": 6902 }, { "epoch": 0.8162468960624335, "grad_norm": 0.12769095599651337, "learning_rate": 1.0648095766052676e-05, "loss": 0.3387, "num_tokens": 4372270817.0, "step": 6903 }, { "epoch": 0.8163651413030626, "grad_norm": 0.12777194380760193, "learning_rate": 1.0642296889113345e-05, "loss": 0.3153, "num_tokens": 4372904256.0, "step": 6904 }, { "epoch": 0.8164833865436916, "grad_norm": 0.13482600450515747, "learning_rate": 1.0636501291476678e-05, "loss": 0.3063, "num_tokens": 4373537629.0, "step": 6905 }, { "epoch": 0.8166016317843207, "grad_norm": 0.12904362380504608, "learning_rate": 1.0630708973992735e-05, "loss": 0.3202, "num_tokens": 4374169947.0, "step": 6906 }, { "epoch": 0.8167198770249497, "grad_norm": 0.10246802121400833, "learning_rate": 1.0624919937511114e-05, "loss": 0.2306, "num_tokens": 4374807128.0, "step": 6907 }, { "epoch": 0.8168381222655788, "grad_norm": 0.131879523396492, "learning_rate": 1.0619134182880901e-05, "loss": 0.3484, "num_tokens": 4375445842.0, "step": 6908 }, { "epoch": 0.8169563675062079, "grad_norm": 0.13426567614078522, "learning_rate": 1.0613351710950738e-05, "loss": 0.3118, "num_tokens": 4376081954.0, "step": 6909 }, { "epoch": 0.8170746127468369, "grad_norm": 0.12608924508094788, "learning_rate": 1.0607572522568752e-05, "loss": 0.2874, "num_tokens": 4376714280.0, "step": 6910 }, { "epoch": 0.817192857987466, "grad_norm": 0.12884655594825745, "learning_rate": 1.06017966185826e-05, "loss": 0.3311, "num_tokens": 4377352215.0, "step": 6911 }, { "epoch": 0.8173111032280951, "grad_norm": 0.1256176382303238, "learning_rate": 1.0596023999839473e-05, "loss": 0.2993, "num_tokens": 4377987004.0, "step": 6912 }, { "epoch": 0.8174293484687242, "grad_norm": 0.1319318413734436, "learning_rate": 1.0590254667186055e-05, "loss": 0.3198, "num_tokens": 4378620080.0, "step": 6913 }, { "epoch": 0.8175475937093531, "grad_norm": 0.12693314254283905, "learning_rate": 1.0584488621468567e-05, "loss": 0.3129, "num_tokens": 4379253370.0, "step": 6914 }, { "epoch": 0.8176658389499822, "grad_norm": 0.1308133602142334, "learning_rate": 1.0578725863532725e-05, "loss": 0.3255, "num_tokens": 4379885347.0, "step": 6915 }, { "epoch": 0.8177840841906113, "grad_norm": 0.12960676848888397, "learning_rate": 1.0572966394223791e-05, "loss": 0.3228, "num_tokens": 4380520238.0, "step": 6916 }, { "epoch": 0.8179023294312404, "grad_norm": 0.1407497376203537, "learning_rate": 1.056721021438654e-05, "loss": 0.3323, "num_tokens": 4381154009.0, "step": 6917 }, { "epoch": 0.8180205746718695, "grad_norm": 0.12255080044269562, "learning_rate": 1.0561457324865236e-05, "loss": 0.3186, "num_tokens": 4381786237.0, "step": 6918 }, { "epoch": 0.8181388199124985, "grad_norm": 0.12103301286697388, "learning_rate": 1.0555707726503695e-05, "loss": 0.2921, "num_tokens": 4382424592.0, "step": 6919 }, { "epoch": 0.8182570651531276, "grad_norm": 0.12041692435741425, "learning_rate": 1.0549961420145227e-05, "loss": 0.3096, "num_tokens": 4383060569.0, "step": 6920 }, { "epoch": 0.8183753103937567, "grad_norm": 0.1365186870098114, "learning_rate": 1.0544218406632675e-05, "loss": 0.3486, "num_tokens": 4383699974.0, "step": 6921 }, { "epoch": 0.8184935556343857, "grad_norm": 0.13032984733581543, "learning_rate": 1.0538478686808394e-05, "loss": 0.3268, "num_tokens": 4384337661.0, "step": 6922 }, { "epoch": 0.8186118008750147, "grad_norm": 0.12145069986581802, "learning_rate": 1.053274226151424e-05, "loss": 0.2997, "num_tokens": 4384971731.0, "step": 6923 }, { "epoch": 0.8187300461156438, "grad_norm": 0.12032490223646164, "learning_rate": 1.0527009131591616e-05, "loss": 0.3187, "num_tokens": 4385606859.0, "step": 6924 }, { "epoch": 0.8188482913562729, "grad_norm": 0.14125335216522217, "learning_rate": 1.0521279297881418e-05, "loss": 0.3495, "num_tokens": 4386235852.0, "step": 6925 }, { "epoch": 0.818966536596902, "grad_norm": 0.1400439590215683, "learning_rate": 1.0515552761224068e-05, "loss": 0.3167, "num_tokens": 4386867858.0, "step": 6926 }, { "epoch": 0.8190847818375311, "grad_norm": 0.12653221189975739, "learning_rate": 1.0509829522459492e-05, "loss": 0.3449, "num_tokens": 4387498625.0, "step": 6927 }, { "epoch": 0.8192030270781601, "grad_norm": 0.13755187392234802, "learning_rate": 1.0504109582427159e-05, "loss": 0.3632, "num_tokens": 4388132685.0, "step": 6928 }, { "epoch": 0.8193212723187892, "grad_norm": 0.1429261416196823, "learning_rate": 1.0498392941966022e-05, "loss": 0.345, "num_tokens": 4388768963.0, "step": 6929 }, { "epoch": 0.8194395175594182, "grad_norm": 0.13054898381233215, "learning_rate": 1.0492679601914582e-05, "loss": 0.3362, "num_tokens": 4389408084.0, "step": 6930 }, { "epoch": 0.8195577628000473, "grad_norm": 0.12281018495559692, "learning_rate": 1.0486969563110832e-05, "loss": 0.3265, "num_tokens": 4390044552.0, "step": 6931 }, { "epoch": 0.8196760080406764, "grad_norm": 0.12718763947486877, "learning_rate": 1.0481262826392279e-05, "loss": 0.3492, "num_tokens": 4390681171.0, "step": 6932 }, { "epoch": 0.8197942532813054, "grad_norm": 0.13880717754364014, "learning_rate": 1.0475559392595974e-05, "loss": 0.3651, "num_tokens": 4391319574.0, "step": 6933 }, { "epoch": 0.8199124985219345, "grad_norm": 0.13593849539756775, "learning_rate": 1.0469859262558452e-05, "loss": 0.3367, "num_tokens": 4391951938.0, "step": 6934 }, { "epoch": 0.8200307437625636, "grad_norm": 0.13602429628372192, "learning_rate": 1.0464162437115779e-05, "loss": 0.352, "num_tokens": 4392590273.0, "step": 6935 }, { "epoch": 0.8201489890031927, "grad_norm": 0.12128621339797974, "learning_rate": 1.045846891710353e-05, "loss": 0.3043, "num_tokens": 4393229022.0, "step": 6936 }, { "epoch": 0.8202672342438216, "grad_norm": 0.13451804220676422, "learning_rate": 1.04527787033568e-05, "loss": 0.3451, "num_tokens": 4393866112.0, "step": 6937 }, { "epoch": 0.8203854794844507, "grad_norm": 0.12010449171066284, "learning_rate": 1.0447091796710206e-05, "loss": 0.2911, "num_tokens": 4394501814.0, "step": 6938 }, { "epoch": 0.8205037247250798, "grad_norm": 0.1159784197807312, "learning_rate": 1.0441408197997866e-05, "loss": 0.3073, "num_tokens": 4395137283.0, "step": 6939 }, { "epoch": 0.8206219699657089, "grad_norm": 0.12376555800437927, "learning_rate": 1.0435727908053419e-05, "loss": 0.317, "num_tokens": 4395770994.0, "step": 6940 }, { "epoch": 0.820740215206338, "grad_norm": 0.12201710790395737, "learning_rate": 1.043005092771001e-05, "loss": 0.2924, "num_tokens": 4396409998.0, "step": 6941 }, { "epoch": 0.820858460446967, "grad_norm": 0.1268424391746521, "learning_rate": 1.042437725780032e-05, "loss": 0.296, "num_tokens": 4397044209.0, "step": 6942 }, { "epoch": 0.8209767056875961, "grad_norm": 0.1280297338962555, "learning_rate": 1.0418706899156526e-05, "loss": 0.308, "num_tokens": 4397675640.0, "step": 6943 }, { "epoch": 0.8210949509282252, "grad_norm": 0.13739904761314392, "learning_rate": 1.0413039852610314e-05, "loss": 0.3173, "num_tokens": 4398275610.0, "step": 6944 }, { "epoch": 0.8212131961688542, "grad_norm": 0.12016229331493378, "learning_rate": 1.040737611899291e-05, "loss": 0.2795, "num_tokens": 4398905586.0, "step": 6945 }, { "epoch": 0.8213314414094832, "grad_norm": 0.12547269463539124, "learning_rate": 1.040171569913503e-05, "loss": 0.3346, "num_tokens": 4399538178.0, "step": 6946 }, { "epoch": 0.8214496866501123, "grad_norm": 0.12396407872438431, "learning_rate": 1.0396058593866926e-05, "loss": 0.3316, "num_tokens": 4400169490.0, "step": 6947 }, { "epoch": 0.8215679318907414, "grad_norm": 0.14115570485591888, "learning_rate": 1.0390404804018328e-05, "loss": 0.3264, "num_tokens": 4400806455.0, "step": 6948 }, { "epoch": 0.8216861771313705, "grad_norm": 0.12673403322696686, "learning_rate": 1.0384754330418522e-05, "loss": 0.3154, "num_tokens": 4401439353.0, "step": 6949 }, { "epoch": 0.8218044223719996, "grad_norm": 0.13858287036418915, "learning_rate": 1.037910717389627e-05, "loss": 0.3381, "num_tokens": 4402078530.0, "step": 6950 }, { "epoch": 0.8219226676126286, "grad_norm": 0.13771620392799377, "learning_rate": 1.037346333527988e-05, "loss": 0.3442, "num_tokens": 4402708597.0, "step": 6951 }, { "epoch": 0.8220409128532576, "grad_norm": 0.13348931074142456, "learning_rate": 1.0367822815397157e-05, "loss": 0.3369, "num_tokens": 4403347625.0, "step": 6952 }, { "epoch": 0.8221591580938867, "grad_norm": 0.13662835955619812, "learning_rate": 1.0362185615075414e-05, "loss": 0.355, "num_tokens": 4403979066.0, "step": 6953 }, { "epoch": 0.8222774033345158, "grad_norm": 0.12310732901096344, "learning_rate": 1.0356551735141488e-05, "loss": 0.3096, "num_tokens": 4404616728.0, "step": 6954 }, { "epoch": 0.8223956485751448, "grad_norm": 0.13748744130134583, "learning_rate": 1.0350921176421728e-05, "loss": 0.3455, "num_tokens": 4405249271.0, "step": 6955 }, { "epoch": 0.8225138938157739, "grad_norm": 0.13093668222427368, "learning_rate": 1.034529393974199e-05, "loss": 0.3395, "num_tokens": 4405888614.0, "step": 6956 }, { "epoch": 0.822632139056403, "grad_norm": 0.13501234352588654, "learning_rate": 1.0339670025927634e-05, "loss": 0.3358, "num_tokens": 4406525831.0, "step": 6957 }, { "epoch": 0.8227503842970321, "grad_norm": 0.13727807998657227, "learning_rate": 1.0334049435803566e-05, "loss": 0.3413, "num_tokens": 4407157966.0, "step": 6958 }, { "epoch": 0.8228686295376612, "grad_norm": 0.12039442360401154, "learning_rate": 1.0328432170194162e-05, "loss": 0.3055, "num_tokens": 4407784770.0, "step": 6959 }, { "epoch": 0.8229868747782901, "grad_norm": 0.13567985594272614, "learning_rate": 1.0322818229923353e-05, "loss": 0.3463, "num_tokens": 4408421012.0, "step": 6960 }, { "epoch": 0.8231051200189192, "grad_norm": 0.1332547813653946, "learning_rate": 1.0317207615814543e-05, "loss": 0.3333, "num_tokens": 4409057749.0, "step": 6961 }, { "epoch": 0.8232233652595483, "grad_norm": 0.12814700603485107, "learning_rate": 1.0311600328690666e-05, "loss": 0.322, "num_tokens": 4409690865.0, "step": 6962 }, { "epoch": 0.8233416105001774, "grad_norm": 0.13320544362068176, "learning_rate": 1.0305996369374178e-05, "loss": 0.3365, "num_tokens": 4410318085.0, "step": 6963 }, { "epoch": 0.8234598557408064, "grad_norm": 0.13311906158924103, "learning_rate": 1.0300395738687032e-05, "loss": 0.3207, "num_tokens": 4410955550.0, "step": 6964 }, { "epoch": 0.8235781009814355, "grad_norm": 0.13316766917705536, "learning_rate": 1.0294798437450694e-05, "loss": 0.3345, "num_tokens": 4411586042.0, "step": 6965 }, { "epoch": 0.8236963462220646, "grad_norm": 0.12943097949028015, "learning_rate": 1.0289204466486142e-05, "loss": 0.3273, "num_tokens": 4412221646.0, "step": 6966 }, { "epoch": 0.8238145914626936, "grad_norm": 0.11482536047697067, "learning_rate": 1.0283613826613868e-05, "loss": 0.2739, "num_tokens": 4412856032.0, "step": 6967 }, { "epoch": 0.8239328367033227, "grad_norm": 0.13856425881385803, "learning_rate": 1.027802651865389e-05, "loss": 0.31, "num_tokens": 4413488812.0, "step": 6968 }, { "epoch": 0.8240510819439517, "grad_norm": 0.13650935888290405, "learning_rate": 1.0272442543425717e-05, "loss": 0.2983, "num_tokens": 4414127240.0, "step": 6969 }, { "epoch": 0.8241693271845808, "grad_norm": 0.13210488855838776, "learning_rate": 1.0266861901748367e-05, "loss": 0.3135, "num_tokens": 4414742635.0, "step": 6970 }, { "epoch": 0.8242875724252099, "grad_norm": 0.12820784747600555, "learning_rate": 1.0261284594440374e-05, "loss": 0.3141, "num_tokens": 4415374188.0, "step": 6971 }, { "epoch": 0.824405817665839, "grad_norm": 0.12312807142734528, "learning_rate": 1.0255710622319805e-05, "loss": 0.3066, "num_tokens": 4416004206.0, "step": 6972 }, { "epoch": 0.8245240629064681, "grad_norm": 0.13242612779140472, "learning_rate": 1.0250139986204206e-05, "loss": 0.309, "num_tokens": 4416639907.0, "step": 6973 }, { "epoch": 0.824642308147097, "grad_norm": 0.12344435602426529, "learning_rate": 1.0244572686910645e-05, "loss": 0.3225, "num_tokens": 4417272373.0, "step": 6974 }, { "epoch": 0.8247605533877261, "grad_norm": 0.11853478103876114, "learning_rate": 1.023900872525571e-05, "loss": 0.3209, "num_tokens": 4417909258.0, "step": 6975 }, { "epoch": 0.8248787986283552, "grad_norm": 0.1251377910375595, "learning_rate": 1.023344810205548e-05, "loss": 0.3281, "num_tokens": 4418545207.0, "step": 6976 }, { "epoch": 0.8249970438689843, "grad_norm": 0.13958625495433807, "learning_rate": 1.0227890818125581e-05, "loss": 0.3406, "num_tokens": 4419178641.0, "step": 6977 }, { "epoch": 0.8251152891096133, "grad_norm": 0.12283563613891602, "learning_rate": 1.0222336874281091e-05, "loss": 0.2951, "num_tokens": 4419810192.0, "step": 6978 }, { "epoch": 0.8252335343502424, "grad_norm": 0.12869255244731903, "learning_rate": 1.0216786271336656e-05, "loss": 0.3328, "num_tokens": 4420446364.0, "step": 6979 }, { "epoch": 0.8253517795908715, "grad_norm": 0.12980833649635315, "learning_rate": 1.021123901010639e-05, "loss": 0.3299, "num_tokens": 4421084685.0, "step": 6980 }, { "epoch": 0.8254700248315006, "grad_norm": 0.12704814970493317, "learning_rate": 1.020569509140395e-05, "loss": 0.3322, "num_tokens": 4421717674.0, "step": 6981 }, { "epoch": 0.8255882700721296, "grad_norm": 0.11776134371757507, "learning_rate": 1.0200154516042484e-05, "loss": 0.309, "num_tokens": 4422354899.0, "step": 6982 }, { "epoch": 0.8257065153127586, "grad_norm": 0.12467045336961746, "learning_rate": 1.019461728483464e-05, "loss": 0.3118, "num_tokens": 4422987333.0, "step": 6983 }, { "epoch": 0.8258247605533877, "grad_norm": 0.12929213047027588, "learning_rate": 1.0189083398592603e-05, "loss": 0.3209, "num_tokens": 4423619530.0, "step": 6984 }, { "epoch": 0.8259430057940168, "grad_norm": 0.1488913595676422, "learning_rate": 1.0183552858128046e-05, "loss": 0.3182, "num_tokens": 4424253987.0, "step": 6985 }, { "epoch": 0.8260612510346459, "grad_norm": 0.12114198505878448, "learning_rate": 1.0178025664252156e-05, "loss": 0.2916, "num_tokens": 4424892198.0, "step": 6986 }, { "epoch": 0.8261794962752749, "grad_norm": 0.12019616365432739, "learning_rate": 1.0172501817775627e-05, "loss": 0.3159, "num_tokens": 4425524124.0, "step": 6987 }, { "epoch": 0.826297741515904, "grad_norm": 0.13098520040512085, "learning_rate": 1.0166981319508672e-05, "loss": 0.3449, "num_tokens": 4426158260.0, "step": 6988 }, { "epoch": 0.826415986756533, "grad_norm": 0.13804885745048523, "learning_rate": 1.0161464170261018e-05, "loss": 0.3058, "num_tokens": 4426795062.0, "step": 6989 }, { "epoch": 0.8265342319971621, "grad_norm": 0.12351163476705551, "learning_rate": 1.015595037084187e-05, "loss": 0.2902, "num_tokens": 4427426279.0, "step": 6990 }, { "epoch": 0.8266524772377912, "grad_norm": 0.13641834259033203, "learning_rate": 1.0150439922059972e-05, "loss": 0.3024, "num_tokens": 4428060000.0, "step": 6991 }, { "epoch": 0.8267707224784202, "grad_norm": 0.12863858044147491, "learning_rate": 1.0144932824723557e-05, "loss": 0.2896, "num_tokens": 4428693118.0, "step": 6992 }, { "epoch": 0.8268889677190493, "grad_norm": 0.12639905512332916, "learning_rate": 1.0139429079640388e-05, "loss": 0.3043, "num_tokens": 4429327365.0, "step": 6993 }, { "epoch": 0.8270072129596784, "grad_norm": 0.13044053316116333, "learning_rate": 1.0133928687617717e-05, "loss": 0.3046, "num_tokens": 4429966117.0, "step": 6994 }, { "epoch": 0.8271254582003075, "grad_norm": 0.1363041251897812, "learning_rate": 1.0128431649462305e-05, "loss": 0.3033, "num_tokens": 4430596303.0, "step": 6995 }, { "epoch": 0.8272437034409365, "grad_norm": 0.13156291842460632, "learning_rate": 1.0122937965980433e-05, "loss": 0.3124, "num_tokens": 4431230548.0, "step": 6996 }, { "epoch": 0.8273619486815655, "grad_norm": 0.12527573108673096, "learning_rate": 1.011744763797788e-05, "loss": 0.326, "num_tokens": 4431869907.0, "step": 6997 }, { "epoch": 0.8274801939221946, "grad_norm": 0.13661998510360718, "learning_rate": 1.0111960666259948e-05, "loss": 0.3522, "num_tokens": 4432505132.0, "step": 6998 }, { "epoch": 0.8275984391628237, "grad_norm": 0.1315530687570572, "learning_rate": 1.0106477051631423e-05, "loss": 0.3371, "num_tokens": 4433138336.0, "step": 6999 }, { "epoch": 0.8277166844034528, "grad_norm": 0.1366676241159439, "learning_rate": 1.0100996794896614e-05, "loss": 0.338, "num_tokens": 4433774027.0, "step": 7000 }, { "epoch": 0.8278349296440818, "grad_norm": 0.4847141206264496, "learning_rate": 1.0095519896859327e-05, "loss": 0.3645, "num_tokens": 4434380973.0, "step": 7001 }, { "epoch": 0.8279531748847109, "grad_norm": 0.13495613634586334, "learning_rate": 1.00900463583229e-05, "loss": 0.3088, "num_tokens": 4435011781.0, "step": 7002 }, { "epoch": 0.82807142012534, "grad_norm": 0.15035969018936157, "learning_rate": 1.0084576180090151e-05, "loss": 0.2968, "num_tokens": 4435648489.0, "step": 7003 }, { "epoch": 0.828189665365969, "grad_norm": 0.1590014100074768, "learning_rate": 1.0079109362963409e-05, "loss": 0.3027, "num_tokens": 4436281451.0, "step": 7004 }, { "epoch": 0.828307910606598, "grad_norm": 0.18805038928985596, "learning_rate": 1.007364590774453e-05, "loss": 0.3339, "num_tokens": 4436920896.0, "step": 7005 }, { "epoch": 0.8284261558472271, "grad_norm": 0.15904654562473297, "learning_rate": 1.006818581523485e-05, "loss": 0.3075, "num_tokens": 4437556174.0, "step": 7006 }, { "epoch": 0.8285444010878562, "grad_norm": 0.16447430849075317, "learning_rate": 1.0062729086235242e-05, "loss": 0.3642, "num_tokens": 4438193093.0, "step": 7007 }, { "epoch": 0.8286626463284853, "grad_norm": 0.14326852560043335, "learning_rate": 1.0057275721546044e-05, "loss": 0.3071, "num_tokens": 4438796069.0, "step": 7008 }, { "epoch": 0.8287808915691144, "grad_norm": 0.13491412997245789, "learning_rate": 1.0051825721967138e-05, "loss": 0.3345, "num_tokens": 4439426991.0, "step": 7009 }, { "epoch": 0.8288991368097434, "grad_norm": 0.14281600713729858, "learning_rate": 1.0046379088297906e-05, "loss": 0.3657, "num_tokens": 4440064125.0, "step": 7010 }, { "epoch": 0.8290173820503725, "grad_norm": 0.1473141759634018, "learning_rate": 1.0040935821337223e-05, "loss": 0.3362, "num_tokens": 4440697927.0, "step": 7011 }, { "epoch": 0.8291356272910015, "grad_norm": 0.14701202511787415, "learning_rate": 1.0035495921883477e-05, "loss": 0.3154, "num_tokens": 4441333416.0, "step": 7012 }, { "epoch": 0.8292538725316306, "grad_norm": 0.13879792392253876, "learning_rate": 1.0030059390734557e-05, "loss": 0.326, "num_tokens": 4441963797.0, "step": 7013 }, { "epoch": 0.8293721177722597, "grad_norm": 0.13159531354904175, "learning_rate": 1.002462622868787e-05, "loss": 0.3003, "num_tokens": 4442601779.0, "step": 7014 }, { "epoch": 0.8294903630128887, "grad_norm": 0.14054906368255615, "learning_rate": 1.0019196436540323e-05, "loss": 0.3286, "num_tokens": 4443239520.0, "step": 7015 }, { "epoch": 0.8296086082535178, "grad_norm": 0.13479922711849213, "learning_rate": 1.0013770015088318e-05, "loss": 0.3257, "num_tokens": 4443876309.0, "step": 7016 }, { "epoch": 0.8297268534941469, "grad_norm": 0.1242513507604599, "learning_rate": 1.0008346965127783e-05, "loss": 0.3115, "num_tokens": 4444508329.0, "step": 7017 }, { "epoch": 0.829845098734776, "grad_norm": 0.1332220733165741, "learning_rate": 1.0002927287454133e-05, "loss": 0.3207, "num_tokens": 4445141984.0, "step": 7018 }, { "epoch": 0.8299633439754049, "grad_norm": 0.13581399619579315, "learning_rate": 9.997510982862301e-06, "loss": 0.3148, "num_tokens": 4445779174.0, "step": 7019 }, { "epoch": 0.830081589216034, "grad_norm": 0.12733280658721924, "learning_rate": 9.992098052146721e-06, "loss": 0.3444, "num_tokens": 4446391617.0, "step": 7020 }, { "epoch": 0.8301998344566631, "grad_norm": 0.1289529949426651, "learning_rate": 9.986688496101327e-06, "loss": 0.3263, "num_tokens": 4447016924.0, "step": 7021 }, { "epoch": 0.8303180796972922, "grad_norm": 0.13775686919689178, "learning_rate": 9.981282315519559e-06, "loss": 0.3497, "num_tokens": 4447652697.0, "step": 7022 }, { "epoch": 0.8304363249379213, "grad_norm": 0.1255083829164505, "learning_rate": 9.975879511194377e-06, "loss": 0.3055, "num_tokens": 4448288885.0, "step": 7023 }, { "epoch": 0.8305545701785503, "grad_norm": 0.13641126453876495, "learning_rate": 9.970480083918223e-06, "loss": 0.3468, "num_tokens": 4448919746.0, "step": 7024 }, { "epoch": 0.8306728154191794, "grad_norm": 0.1272544264793396, "learning_rate": 9.965084034483054e-06, "loss": 0.34, "num_tokens": 4449551430.0, "step": 7025 }, { "epoch": 0.8307910606598085, "grad_norm": 0.1310655027627945, "learning_rate": 9.959691363680348e-06, "loss": 0.3323, "num_tokens": 4450175666.0, "step": 7026 }, { "epoch": 0.8309093059004375, "grad_norm": 0.140006884932518, "learning_rate": 9.954302072301051e-06, "loss": 0.3541, "num_tokens": 4450808819.0, "step": 7027 }, { "epoch": 0.8310275511410665, "grad_norm": 0.13747349381446838, "learning_rate": 9.948916161135656e-06, "loss": 0.3416, "num_tokens": 4451437104.0, "step": 7028 }, { "epoch": 0.8311457963816956, "grad_norm": 0.13058564066886902, "learning_rate": 9.943533630974117e-06, "loss": 0.3331, "num_tokens": 4452073640.0, "step": 7029 }, { "epoch": 0.8312640416223247, "grad_norm": 0.13791827857494354, "learning_rate": 9.938154482605919e-06, "loss": 0.3148, "num_tokens": 4452706971.0, "step": 7030 }, { "epoch": 0.8313822868629538, "grad_norm": 0.12259772419929504, "learning_rate": 9.932778716820055e-06, "loss": 0.3135, "num_tokens": 4453342438.0, "step": 7031 }, { "epoch": 0.8315005321035829, "grad_norm": 0.1398756355047226, "learning_rate": 9.92740633440501e-06, "loss": 0.3703, "num_tokens": 4453967841.0, "step": 7032 }, { "epoch": 0.8316187773442119, "grad_norm": 0.12728799879550934, "learning_rate": 9.92203733614877e-06, "loss": 0.3293, "num_tokens": 4454607071.0, "step": 7033 }, { "epoch": 0.831737022584841, "grad_norm": 0.1403788924217224, "learning_rate": 9.916671722838822e-06, "loss": 0.335, "num_tokens": 4455244661.0, "step": 7034 }, { "epoch": 0.83185526782547, "grad_norm": 0.13908174633979797, "learning_rate": 9.911309495262183e-06, "loss": 0.3367, "num_tokens": 4455883538.0, "step": 7035 }, { "epoch": 0.8319735130660991, "grad_norm": 0.14030687510967255, "learning_rate": 9.905950654205334e-06, "loss": 0.3073, "num_tokens": 4456519641.0, "step": 7036 }, { "epoch": 0.8320917583067281, "grad_norm": 0.13774533569812775, "learning_rate": 9.900595200454305e-06, "loss": 0.2825, "num_tokens": 4457157012.0, "step": 7037 }, { "epoch": 0.8322100035473572, "grad_norm": 0.12591855227947235, "learning_rate": 9.895243134794581e-06, "loss": 0.3191, "num_tokens": 4457785494.0, "step": 7038 }, { "epoch": 0.8323282487879863, "grad_norm": 0.14697271585464478, "learning_rate": 9.889894458011176e-06, "loss": 0.3627, "num_tokens": 4458422191.0, "step": 7039 }, { "epoch": 0.8324464940286154, "grad_norm": 0.15229783952236176, "learning_rate": 9.884549170888615e-06, "loss": 0.3707, "num_tokens": 4459057614.0, "step": 7040 }, { "epoch": 0.8325647392692445, "grad_norm": 0.13496211171150208, "learning_rate": 9.87920727421091e-06, "loss": 0.3394, "num_tokens": 4459691308.0, "step": 7041 }, { "epoch": 0.8326829845098734, "grad_norm": 0.12923437356948853, "learning_rate": 9.873868768761583e-06, "loss": 0.3383, "num_tokens": 4460327603.0, "step": 7042 }, { "epoch": 0.8328012297505025, "grad_norm": 0.12715297937393188, "learning_rate": 9.86853365532364e-06, "loss": 0.3108, "num_tokens": 4460965283.0, "step": 7043 }, { "epoch": 0.8329194749911316, "grad_norm": 0.14025896787643433, "learning_rate": 9.863201934679627e-06, "loss": 0.3307, "num_tokens": 4461599589.0, "step": 7044 }, { "epoch": 0.8330377202317607, "grad_norm": 0.12608030438423157, "learning_rate": 9.857873607611565e-06, "loss": 0.3255, "num_tokens": 4462236953.0, "step": 7045 }, { "epoch": 0.8331559654723897, "grad_norm": 0.12822584807872772, "learning_rate": 9.852548674900969e-06, "loss": 0.3052, "num_tokens": 4462871544.0, "step": 7046 }, { "epoch": 0.8332742107130188, "grad_norm": 0.1227455660700798, "learning_rate": 9.847227137328893e-06, "loss": 0.2971, "num_tokens": 4463502487.0, "step": 7047 }, { "epoch": 0.8333924559536479, "grad_norm": 0.12604041397571564, "learning_rate": 9.841908995675849e-06, "loss": 0.3042, "num_tokens": 4464140896.0, "step": 7048 }, { "epoch": 0.833510701194277, "grad_norm": 0.12976115942001343, "learning_rate": 9.836594250721886e-06, "loss": 0.3372, "num_tokens": 4464773850.0, "step": 7049 }, { "epoch": 0.833628946434906, "grad_norm": 0.13636842370033264, "learning_rate": 9.831282903246542e-06, "loss": 0.3212, "num_tokens": 4465402902.0, "step": 7050 }, { "epoch": 0.833747191675535, "grad_norm": 0.1339859515428543, "learning_rate": 9.825974954028847e-06, "loss": 0.2993, "num_tokens": 4466041317.0, "step": 7051 }, { "epoch": 0.8338654369161641, "grad_norm": 0.13316503167152405, "learning_rate": 9.820670403847342e-06, "loss": 0.3329, "num_tokens": 4466679293.0, "step": 7052 }, { "epoch": 0.8339836821567932, "grad_norm": 0.13833902776241302, "learning_rate": 9.815369253480074e-06, "loss": 0.36, "num_tokens": 4467298543.0, "step": 7053 }, { "epoch": 0.8341019273974223, "grad_norm": 0.13205204904079437, "learning_rate": 9.810071503704588e-06, "loss": 0.3017, "num_tokens": 4467928366.0, "step": 7054 }, { "epoch": 0.8342201726380514, "grad_norm": 0.14704376459121704, "learning_rate": 9.804777155297916e-06, "loss": 0.3678, "num_tokens": 4468555839.0, "step": 7055 }, { "epoch": 0.8343384178786803, "grad_norm": 0.1333952099084854, "learning_rate": 9.799486209036615e-06, "loss": 0.3453, "num_tokens": 4469194447.0, "step": 7056 }, { "epoch": 0.8344566631193094, "grad_norm": 0.12962020933628082, "learning_rate": 9.794198665696727e-06, "loss": 0.3361, "num_tokens": 4469830555.0, "step": 7057 }, { "epoch": 0.8345749083599385, "grad_norm": 0.132342129945755, "learning_rate": 9.78891452605381e-06, "loss": 0.3461, "num_tokens": 4470441259.0, "step": 7058 }, { "epoch": 0.8346931536005676, "grad_norm": 0.1255841851234436, "learning_rate": 9.78363379088289e-06, "loss": 0.2876, "num_tokens": 4471069048.0, "step": 7059 }, { "epoch": 0.8348113988411966, "grad_norm": 0.13289158046245575, "learning_rate": 9.778356460958526e-06, "loss": 0.325, "num_tokens": 4471703924.0, "step": 7060 }, { "epoch": 0.8349296440818257, "grad_norm": 0.12093044072389603, "learning_rate": 9.773082537054778e-06, "loss": 0.2855, "num_tokens": 4472340047.0, "step": 7061 }, { "epoch": 0.8350478893224548, "grad_norm": 0.135834738612175, "learning_rate": 9.76781201994518e-06, "loss": 0.3322, "num_tokens": 4472967521.0, "step": 7062 }, { "epoch": 0.8351661345630839, "grad_norm": 0.13100504875183105, "learning_rate": 9.762544910402794e-06, "loss": 0.3238, "num_tokens": 4473602160.0, "step": 7063 }, { "epoch": 0.835284379803713, "grad_norm": 0.12545250356197357, "learning_rate": 9.757281209200157e-06, "loss": 0.3151, "num_tokens": 4474235707.0, "step": 7064 }, { "epoch": 0.8354026250443419, "grad_norm": 0.13326075673103333, "learning_rate": 9.752020917109334e-06, "loss": 0.3522, "num_tokens": 4474870352.0, "step": 7065 }, { "epoch": 0.835520870284971, "grad_norm": 0.12726525962352753, "learning_rate": 9.746764034901865e-06, "loss": 0.3337, "num_tokens": 4475505174.0, "step": 7066 }, { "epoch": 0.8356391155256001, "grad_norm": 0.13166958093643188, "learning_rate": 9.741510563348796e-06, "loss": 0.3099, "num_tokens": 4476139236.0, "step": 7067 }, { "epoch": 0.8357573607662292, "grad_norm": 0.12311326712369919, "learning_rate": 9.736260503220696e-06, "loss": 0.3339, "num_tokens": 4476775186.0, "step": 7068 }, { "epoch": 0.8358756060068582, "grad_norm": 0.13996779918670654, "learning_rate": 9.731013855287591e-06, "loss": 0.3693, "num_tokens": 4477413255.0, "step": 7069 }, { "epoch": 0.8359938512474873, "grad_norm": 0.1390828788280487, "learning_rate": 9.725770620319048e-06, "loss": 0.3501, "num_tokens": 4478049483.0, "step": 7070 }, { "epoch": 0.8361120964881164, "grad_norm": 0.13005998730659485, "learning_rate": 9.720530799084111e-06, "loss": 0.2747, "num_tokens": 4478683817.0, "step": 7071 }, { "epoch": 0.8362303417287454, "grad_norm": 0.12443861365318298, "learning_rate": 9.715294392351323e-06, "loss": 0.2964, "num_tokens": 4479317604.0, "step": 7072 }, { "epoch": 0.8363485869693745, "grad_norm": 0.13666385412216187, "learning_rate": 9.710061400888727e-06, "loss": 0.2894, "num_tokens": 4479950072.0, "step": 7073 }, { "epoch": 0.8364668322100035, "grad_norm": 0.14085330069065094, "learning_rate": 9.704831825463874e-06, "loss": 0.304, "num_tokens": 4480588785.0, "step": 7074 }, { "epoch": 0.8365850774506326, "grad_norm": 0.13553518056869507, "learning_rate": 9.699605666843823e-06, "loss": 0.3495, "num_tokens": 4481220493.0, "step": 7075 }, { "epoch": 0.8367033226912617, "grad_norm": 0.13244237005710602, "learning_rate": 9.69438292579509e-06, "loss": 0.3676, "num_tokens": 4481856237.0, "step": 7076 }, { "epoch": 0.8368215679318908, "grad_norm": 0.12273471057415009, "learning_rate": 9.689163603083739e-06, "loss": 0.293, "num_tokens": 4482491215.0, "step": 7077 }, { "epoch": 0.8369398131725198, "grad_norm": 0.13491939008235931, "learning_rate": 9.683947699475297e-06, "loss": 0.3511, "num_tokens": 4483119922.0, "step": 7078 }, { "epoch": 0.8370580584131488, "grad_norm": 0.13203509151935577, "learning_rate": 9.678735215734816e-06, "loss": 0.3212, "num_tokens": 4483749299.0, "step": 7079 }, { "epoch": 0.8371763036537779, "grad_norm": 0.13690520823001862, "learning_rate": 9.67352615262683e-06, "loss": 0.3682, "num_tokens": 4484386302.0, "step": 7080 }, { "epoch": 0.837294548894407, "grad_norm": 0.11699734628200531, "learning_rate": 9.668320510915366e-06, "loss": 0.2801, "num_tokens": 4485019184.0, "step": 7081 }, { "epoch": 0.8374127941350361, "grad_norm": 0.13321839272975922, "learning_rate": 9.66311829136397e-06, "loss": 0.3787, "num_tokens": 4485655027.0, "step": 7082 }, { "epoch": 0.8375310393756651, "grad_norm": 0.13589242100715637, "learning_rate": 9.657919494735674e-06, "loss": 0.3067, "num_tokens": 4486292016.0, "step": 7083 }, { "epoch": 0.8376492846162942, "grad_norm": 0.12463857233524323, "learning_rate": 9.652724121793005e-06, "loss": 0.3242, "num_tokens": 4486925465.0, "step": 7084 }, { "epoch": 0.8377675298569233, "grad_norm": 0.13206177949905396, "learning_rate": 9.647532173297979e-06, "loss": 0.3452, "num_tokens": 4487561503.0, "step": 7085 }, { "epoch": 0.8378857750975524, "grad_norm": 0.13667753338813782, "learning_rate": 9.642343650012144e-06, "loss": 0.3054, "num_tokens": 4488198991.0, "step": 7086 }, { "epoch": 0.8380040203381814, "grad_norm": 0.13884276151657104, "learning_rate": 9.637158552696504e-06, "loss": 0.3431, "num_tokens": 4488838374.0, "step": 7087 }, { "epoch": 0.8381222655788104, "grad_norm": 0.1320859044790268, "learning_rate": 9.631976882111597e-06, "loss": 0.2966, "num_tokens": 4489476287.0, "step": 7088 }, { "epoch": 0.8382405108194395, "grad_norm": 0.1382514387369156, "learning_rate": 9.626798639017434e-06, "loss": 0.3293, "num_tokens": 4490107168.0, "step": 7089 }, { "epoch": 0.8383587560600686, "grad_norm": 0.13261498510837555, "learning_rate": 9.621623824173524e-06, "loss": 0.3323, "num_tokens": 4490743071.0, "step": 7090 }, { "epoch": 0.8384770013006977, "grad_norm": 0.12951701879501343, "learning_rate": 9.61645243833889e-06, "loss": 0.2892, "num_tokens": 4491371208.0, "step": 7091 }, { "epoch": 0.8385952465413267, "grad_norm": 0.12372345477342606, "learning_rate": 9.611284482272042e-06, "loss": 0.3013, "num_tokens": 4492005437.0, "step": 7092 }, { "epoch": 0.8387134917819558, "grad_norm": 0.12435108423233032, "learning_rate": 9.60611995673098e-06, "loss": 0.3223, "num_tokens": 4492630116.0, "step": 7093 }, { "epoch": 0.8388317370225848, "grad_norm": 0.14237402379512787, "learning_rate": 9.600958862473208e-06, "loss": 0.3447, "num_tokens": 4493267367.0, "step": 7094 }, { "epoch": 0.8389499822632139, "grad_norm": 0.13444408774375916, "learning_rate": 9.595801200255726e-06, "loss": 0.3162, "num_tokens": 4493906896.0, "step": 7095 }, { "epoch": 0.839068227503843, "grad_norm": 0.1275249719619751, "learning_rate": 9.590646970835046e-06, "loss": 0.3466, "num_tokens": 4494545662.0, "step": 7096 }, { "epoch": 0.839186472744472, "grad_norm": 0.12761040031909943, "learning_rate": 9.585496174967136e-06, "loss": 0.3176, "num_tokens": 4495180809.0, "step": 7097 }, { "epoch": 0.8393047179851011, "grad_norm": 0.12629230320453644, "learning_rate": 9.580348813407508e-06, "loss": 0.3042, "num_tokens": 4495816867.0, "step": 7098 }, { "epoch": 0.8394229632257302, "grad_norm": 0.1393793672323227, "learning_rate": 9.575204886911132e-06, "loss": 0.3623, "num_tokens": 4496446000.0, "step": 7099 }, { "epoch": 0.8395412084663593, "grad_norm": 0.12923385202884674, "learning_rate": 9.570064396232506e-06, "loss": 0.3226, "num_tokens": 4497081305.0, "step": 7100 }, { "epoch": 0.8396594537069882, "grad_norm": 0.14000561833381653, "learning_rate": 9.564927342125599e-06, "loss": 0.3539, "num_tokens": 4497720186.0, "step": 7101 }, { "epoch": 0.8397776989476173, "grad_norm": 0.1363879293203354, "learning_rate": 9.55979372534388e-06, "loss": 0.3221, "num_tokens": 4498356795.0, "step": 7102 }, { "epoch": 0.8398959441882464, "grad_norm": 0.12067767977714539, "learning_rate": 9.554663546640333e-06, "loss": 0.3032, "num_tokens": 4498992721.0, "step": 7103 }, { "epoch": 0.8400141894288755, "grad_norm": 0.132247656583786, "learning_rate": 9.549536806767409e-06, "loss": 0.3002, "num_tokens": 4499627129.0, "step": 7104 }, { "epoch": 0.8401324346695046, "grad_norm": 0.14545109868049622, "learning_rate": 9.544413506477091e-06, "loss": 0.3427, "num_tokens": 4500259265.0, "step": 7105 }, { "epoch": 0.8402506799101336, "grad_norm": 0.1251271814107895, "learning_rate": 9.539293646520807e-06, "loss": 0.321, "num_tokens": 4500892436.0, "step": 7106 }, { "epoch": 0.8403689251507627, "grad_norm": 0.1322944313287735, "learning_rate": 9.534177227649532e-06, "loss": 0.3165, "num_tokens": 4501527595.0, "step": 7107 }, { "epoch": 0.8404871703913918, "grad_norm": 0.15019232034683228, "learning_rate": 9.5290642506137e-06, "loss": 0.3408, "num_tokens": 4502160388.0, "step": 7108 }, { "epoch": 0.8406054156320208, "grad_norm": 0.14302685856819153, "learning_rate": 9.523954716163267e-06, "loss": 0.3582, "num_tokens": 4502795243.0, "step": 7109 }, { "epoch": 0.8407236608726498, "grad_norm": 0.13720358908176422, "learning_rate": 9.51884862504766e-06, "loss": 0.3199, "num_tokens": 4503432478.0, "step": 7110 }, { "epoch": 0.8408419061132789, "grad_norm": 0.1326688975095749, "learning_rate": 9.513745978015815e-06, "loss": 0.3295, "num_tokens": 4504066403.0, "step": 7111 }, { "epoch": 0.840960151353908, "grad_norm": 0.1378660500049591, "learning_rate": 9.508646775816164e-06, "loss": 0.3481, "num_tokens": 4504700752.0, "step": 7112 }, { "epoch": 0.8410783965945371, "grad_norm": 0.11691122502088547, "learning_rate": 9.503551019196625e-06, "loss": 0.2726, "num_tokens": 4505337499.0, "step": 7113 }, { "epoch": 0.8411966418351662, "grad_norm": 0.13642503321170807, "learning_rate": 9.498458708904617e-06, "loss": 0.3428, "num_tokens": 4505970202.0, "step": 7114 }, { "epoch": 0.8413148870757952, "grad_norm": 0.13534903526306152, "learning_rate": 9.493369845687044e-06, "loss": 0.329, "num_tokens": 4506574051.0, "step": 7115 }, { "epoch": 0.8414331323164242, "grad_norm": 0.14487239718437195, "learning_rate": 9.488284430290318e-06, "loss": 0.3326, "num_tokens": 4507204637.0, "step": 7116 }, { "epoch": 0.8415513775570533, "grad_norm": 0.12477918714284897, "learning_rate": 9.483202463460347e-06, "loss": 0.286, "num_tokens": 4507837626.0, "step": 7117 }, { "epoch": 0.8416696227976824, "grad_norm": 0.1250980645418167, "learning_rate": 9.478123945942517e-06, "loss": 0.3014, "num_tokens": 4508465737.0, "step": 7118 }, { "epoch": 0.8417878680383114, "grad_norm": 0.127792090177536, "learning_rate": 9.473048878481717e-06, "loss": 0.3187, "num_tokens": 4509098607.0, "step": 7119 }, { "epoch": 0.8419061132789405, "grad_norm": 0.12725280225276947, "learning_rate": 9.467977261822326e-06, "loss": 0.3173, "num_tokens": 4509734301.0, "step": 7120 }, { "epoch": 0.8420243585195696, "grad_norm": 0.13612040877342224, "learning_rate": 9.462909096708227e-06, "loss": 0.2993, "num_tokens": 4510367147.0, "step": 7121 }, { "epoch": 0.8421426037601987, "grad_norm": 0.1329042762517929, "learning_rate": 9.457844383882789e-06, "loss": 0.3162, "num_tokens": 4510995661.0, "step": 7122 }, { "epoch": 0.8422608490008278, "grad_norm": 0.12048596143722534, "learning_rate": 9.452783124088875e-06, "loss": 0.3522, "num_tokens": 4511628772.0, "step": 7123 }, { "epoch": 0.8423790942414567, "grad_norm": 0.1302330046892166, "learning_rate": 9.447725318068833e-06, "loss": 0.3102, "num_tokens": 4512264978.0, "step": 7124 }, { "epoch": 0.8424973394820858, "grad_norm": 0.13247917592525482, "learning_rate": 9.442670966564526e-06, "loss": 0.3342, "num_tokens": 4512901616.0, "step": 7125 }, { "epoch": 0.8426155847227149, "grad_norm": 0.1444173902273178, "learning_rate": 9.437620070317301e-06, "loss": 0.348, "num_tokens": 4513535803.0, "step": 7126 }, { "epoch": 0.842733829963344, "grad_norm": 0.13277506828308105, "learning_rate": 9.432572630067979e-06, "loss": 0.333, "num_tokens": 4514170584.0, "step": 7127 }, { "epoch": 0.8428520752039731, "grad_norm": 0.1325593888759613, "learning_rate": 9.427528646556905e-06, "loss": 0.3231, "num_tokens": 4514807806.0, "step": 7128 }, { "epoch": 0.8429703204446021, "grad_norm": 0.14087067544460297, "learning_rate": 9.422488120523892e-06, "loss": 0.3405, "num_tokens": 4515444884.0, "step": 7129 }, { "epoch": 0.8430885656852312, "grad_norm": 0.13521364331245422, "learning_rate": 9.417451052708263e-06, "loss": 0.3629, "num_tokens": 4516081185.0, "step": 7130 }, { "epoch": 0.8432068109258603, "grad_norm": 0.13940604031085968, "learning_rate": 9.412417443848828e-06, "loss": 0.3464, "num_tokens": 4516717838.0, "step": 7131 }, { "epoch": 0.8433250561664893, "grad_norm": 0.12416967749595642, "learning_rate": 9.407387294683877e-06, "loss": 0.3157, "num_tokens": 4517351443.0, "step": 7132 }, { "epoch": 0.8434433014071183, "grad_norm": 0.13257424533367157, "learning_rate": 9.40236060595122e-06, "loss": 0.3552, "num_tokens": 4517988135.0, "step": 7133 }, { "epoch": 0.8435615466477474, "grad_norm": 0.13307605683803558, "learning_rate": 9.397337378388134e-06, "loss": 0.3103, "num_tokens": 4518625260.0, "step": 7134 }, { "epoch": 0.8436797918883765, "grad_norm": 0.1309300661087036, "learning_rate": 9.392317612731404e-06, "loss": 0.2997, "num_tokens": 4519219760.0, "step": 7135 }, { "epoch": 0.8437980371290056, "grad_norm": 0.12352846562862396, "learning_rate": 9.387301309717289e-06, "loss": 0.3016, "num_tokens": 4519853322.0, "step": 7136 }, { "epoch": 0.8439162823696347, "grad_norm": 0.13084536790847778, "learning_rate": 9.382288470081561e-06, "loss": 0.3357, "num_tokens": 4520492562.0, "step": 7137 }, { "epoch": 0.8440345276102637, "grad_norm": 0.12281008064746857, "learning_rate": 9.377279094559473e-06, "loss": 0.3217, "num_tokens": 4521123767.0, "step": 7138 }, { "epoch": 0.8441527728508927, "grad_norm": 0.13392004370689392, "learning_rate": 9.37227318388578e-06, "loss": 0.3423, "num_tokens": 4521755399.0, "step": 7139 }, { "epoch": 0.8442710180915218, "grad_norm": 0.12812504172325134, "learning_rate": 9.36727073879471e-06, "loss": 0.3368, "num_tokens": 4522392195.0, "step": 7140 }, { "epoch": 0.8443892633321509, "grad_norm": 0.11858201026916504, "learning_rate": 9.362271760019998e-06, "loss": 0.2892, "num_tokens": 4523028483.0, "step": 7141 }, { "epoch": 0.8445075085727799, "grad_norm": 0.13369633257389069, "learning_rate": 9.357276248294868e-06, "loss": 0.3337, "num_tokens": 4523664800.0, "step": 7142 }, { "epoch": 0.844625753813409, "grad_norm": 0.13482701778411865, "learning_rate": 9.352284204352035e-06, "loss": 0.3365, "num_tokens": 4524299112.0, "step": 7143 }, { "epoch": 0.8447439990540381, "grad_norm": 0.13787882030010223, "learning_rate": 9.347295628923699e-06, "loss": 0.3177, "num_tokens": 4524928216.0, "step": 7144 }, { "epoch": 0.8448622442946672, "grad_norm": 0.1249622255563736, "learning_rate": 9.342310522741551e-06, "loss": 0.298, "num_tokens": 4525567797.0, "step": 7145 }, { "epoch": 0.8449804895352963, "grad_norm": 0.13548579812049866, "learning_rate": 9.337328886536788e-06, "loss": 0.3173, "num_tokens": 4526194942.0, "step": 7146 }, { "epoch": 0.8450987347759252, "grad_norm": 0.14656218886375427, "learning_rate": 9.332350721040094e-06, "loss": 0.3557, "num_tokens": 4526823500.0, "step": 7147 }, { "epoch": 0.8452169800165543, "grad_norm": 0.12304695695638657, "learning_rate": 9.327376026981626e-06, "loss": 0.2962, "num_tokens": 4527457550.0, "step": 7148 }, { "epoch": 0.8453352252571834, "grad_norm": 0.12992353737354279, "learning_rate": 9.322404805091049e-06, "loss": 0.3067, "num_tokens": 4528091796.0, "step": 7149 }, { "epoch": 0.8454534704978125, "grad_norm": 0.13473378121852875, "learning_rate": 9.317437056097513e-06, "loss": 0.2945, "num_tokens": 4528730138.0, "step": 7150 }, { "epoch": 0.8455717157384415, "grad_norm": 0.1282382756471634, "learning_rate": 9.312472780729662e-06, "loss": 0.3109, "num_tokens": 4529356127.0, "step": 7151 }, { "epoch": 0.8456899609790706, "grad_norm": 0.11882302910089493, "learning_rate": 9.307511979715627e-06, "loss": 0.3314, "num_tokens": 4529991916.0, "step": 7152 }, { "epoch": 0.8458082062196997, "grad_norm": 0.13600005209445953, "learning_rate": 9.302554653783026e-06, "loss": 0.3421, "num_tokens": 4530630403.0, "step": 7153 }, { "epoch": 0.8459264514603287, "grad_norm": 0.12627819180488586, "learning_rate": 9.297600803658979e-06, "loss": 0.3322, "num_tokens": 4531260614.0, "step": 7154 }, { "epoch": 0.8460446967009578, "grad_norm": 0.11851624399423599, "learning_rate": 9.292650430070081e-06, "loss": 0.2725, "num_tokens": 4531898911.0, "step": 7155 }, { "epoch": 0.8461629419415868, "grad_norm": 0.12654262781143188, "learning_rate": 9.287703533742444e-06, "loss": 0.2905, "num_tokens": 4532530755.0, "step": 7156 }, { "epoch": 0.8462811871822159, "grad_norm": 0.12850095331668854, "learning_rate": 9.282760115401625e-06, "loss": 0.3412, "num_tokens": 4533162147.0, "step": 7157 }, { "epoch": 0.846399432422845, "grad_norm": 0.1434197574853897, "learning_rate": 9.277820175772712e-06, "loss": 0.3736, "num_tokens": 4533792715.0, "step": 7158 }, { "epoch": 0.8465176776634741, "grad_norm": 0.12604695558547974, "learning_rate": 9.27288371558026e-06, "loss": 0.3299, "num_tokens": 4534426163.0, "step": 7159 }, { "epoch": 0.846635922904103, "grad_norm": 0.12612684071063995, "learning_rate": 9.267950735548332e-06, "loss": 0.3352, "num_tokens": 4535062191.0, "step": 7160 }, { "epoch": 0.8467541681447321, "grad_norm": 0.1355384737253189, "learning_rate": 9.263021236400463e-06, "loss": 0.3289, "num_tokens": 4535701620.0, "step": 7161 }, { "epoch": 0.8468724133853612, "grad_norm": 0.1359146237373352, "learning_rate": 9.258095218859681e-06, "loss": 0.3622, "num_tokens": 4536334759.0, "step": 7162 }, { "epoch": 0.8469906586259903, "grad_norm": 0.12060156464576721, "learning_rate": 9.253172683648516e-06, "loss": 0.3181, "num_tokens": 4536971137.0, "step": 7163 }, { "epoch": 0.8471089038666194, "grad_norm": 0.12857480347156525, "learning_rate": 9.248253631488974e-06, "loss": 0.3041, "num_tokens": 4537605571.0, "step": 7164 }, { "epoch": 0.8472271491072484, "grad_norm": 0.12609156966209412, "learning_rate": 9.243338063102551e-06, "loss": 0.3043, "num_tokens": 4538242421.0, "step": 7165 }, { "epoch": 0.8473453943478775, "grad_norm": 0.1324903815984726, "learning_rate": 9.238425979210231e-06, "loss": 0.291, "num_tokens": 4538871868.0, "step": 7166 }, { "epoch": 0.8474636395885066, "grad_norm": 0.1421802043914795, "learning_rate": 9.233517380532496e-06, "loss": 0.3813, "num_tokens": 4539508490.0, "step": 7167 }, { "epoch": 0.8475818848291357, "grad_norm": 0.12589183449745178, "learning_rate": 9.22861226778932e-06, "loss": 0.3085, "num_tokens": 4540137221.0, "step": 7168 }, { "epoch": 0.8477001300697647, "grad_norm": 0.13519439101219177, "learning_rate": 9.223710641700148e-06, "loss": 0.3478, "num_tokens": 4540770590.0, "step": 7169 }, { "epoch": 0.8478183753103937, "grad_norm": 0.12688513100147247, "learning_rate": 9.218812502983924e-06, "loss": 0.2847, "num_tokens": 4541400856.0, "step": 7170 }, { "epoch": 0.8479366205510228, "grad_norm": 0.1258152425289154, "learning_rate": 9.213917852359077e-06, "loss": 0.2935, "num_tokens": 4542038294.0, "step": 7171 }, { "epoch": 0.8480548657916519, "grad_norm": 0.12759941816329956, "learning_rate": 9.209026690543538e-06, "loss": 0.2674, "num_tokens": 4542672403.0, "step": 7172 }, { "epoch": 0.848173111032281, "grad_norm": 0.12690867483615875, "learning_rate": 9.204139018254702e-06, "loss": 0.3229, "num_tokens": 4543297740.0, "step": 7173 }, { "epoch": 0.84829135627291, "grad_norm": 0.13337557017803192, "learning_rate": 9.199254836209471e-06, "loss": 0.2935, "num_tokens": 4543932265.0, "step": 7174 }, { "epoch": 0.8484096015135391, "grad_norm": 0.12593312561511993, "learning_rate": 9.194374145124232e-06, "loss": 0.3171, "num_tokens": 4544558561.0, "step": 7175 }, { "epoch": 0.8485278467541681, "grad_norm": 0.13391803205013275, "learning_rate": 9.189496945714852e-06, "loss": 0.3214, "num_tokens": 4545195171.0, "step": 7176 }, { "epoch": 0.8486460919947972, "grad_norm": 0.1298227310180664, "learning_rate": 9.184623238696698e-06, "loss": 0.3409, "num_tokens": 4545833242.0, "step": 7177 }, { "epoch": 0.8487643372354263, "grad_norm": 0.1289445459842682, "learning_rate": 9.179753024784612e-06, "loss": 0.3262, "num_tokens": 4546469560.0, "step": 7178 }, { "epoch": 0.8488825824760553, "grad_norm": 0.13720116019248962, "learning_rate": 9.174886304692936e-06, "loss": 0.343, "num_tokens": 4547105799.0, "step": 7179 }, { "epoch": 0.8490008277166844, "grad_norm": 0.12136892974376678, "learning_rate": 9.170023079135484e-06, "loss": 0.3041, "num_tokens": 4547741771.0, "step": 7180 }, { "epoch": 0.8491190729573135, "grad_norm": 0.14014802873134613, "learning_rate": 9.165163348825573e-06, "loss": 0.3422, "num_tokens": 4548371742.0, "step": 7181 }, { "epoch": 0.8492373181979426, "grad_norm": 0.1272333562374115, "learning_rate": 9.160307114476003e-06, "loss": 0.2906, "num_tokens": 4549002012.0, "step": 7182 }, { "epoch": 0.8493555634385715, "grad_norm": 0.13158828020095825, "learning_rate": 9.15545437679905e-06, "loss": 0.3272, "num_tokens": 4549637735.0, "step": 7183 }, { "epoch": 0.8494738086792006, "grad_norm": 0.12674345076084137, "learning_rate": 9.1506051365065e-06, "loss": 0.3364, "num_tokens": 4550274163.0, "step": 7184 }, { "epoch": 0.8495920539198297, "grad_norm": 0.12989653646945953, "learning_rate": 9.145759394309598e-06, "loss": 0.317, "num_tokens": 4550901143.0, "step": 7185 }, { "epoch": 0.8497102991604588, "grad_norm": 0.12057428807020187, "learning_rate": 9.140917150919107e-06, "loss": 0.2937, "num_tokens": 4551529135.0, "step": 7186 }, { "epoch": 0.8498285444010879, "grad_norm": 0.1287367045879364, "learning_rate": 9.136078407045239e-06, "loss": 0.3516, "num_tokens": 4552164724.0, "step": 7187 }, { "epoch": 0.8499467896417169, "grad_norm": 0.13454125821590424, "learning_rate": 9.131243163397732e-06, "loss": 0.3295, "num_tokens": 4552797925.0, "step": 7188 }, { "epoch": 0.850065034882346, "grad_norm": 0.1298028975725174, "learning_rate": 9.126411420685782e-06, "loss": 0.3652, "num_tokens": 4553427195.0, "step": 7189 }, { "epoch": 0.8501832801229751, "grad_norm": 0.1396150439977646, "learning_rate": 9.121583179618091e-06, "loss": 0.3665, "num_tokens": 4554053459.0, "step": 7190 }, { "epoch": 0.8503015253636041, "grad_norm": 0.13265785574913025, "learning_rate": 9.116758440902832e-06, "loss": 0.3181, "num_tokens": 4554688583.0, "step": 7191 }, { "epoch": 0.8504197706042331, "grad_norm": 0.120753712952137, "learning_rate": 9.111937205247665e-06, "loss": 0.3121, "num_tokens": 4555327375.0, "step": 7192 }, { "epoch": 0.8505380158448622, "grad_norm": 0.13688598573207855, "learning_rate": 9.107119473359758e-06, "loss": 0.3397, "num_tokens": 4555954338.0, "step": 7193 }, { "epoch": 0.8506562610854913, "grad_norm": 0.12465313076972961, "learning_rate": 9.102305245945735e-06, "loss": 0.2977, "num_tokens": 4556587586.0, "step": 7194 }, { "epoch": 0.8507745063261204, "grad_norm": 0.13854438066482544, "learning_rate": 9.097494523711722e-06, "loss": 0.3489, "num_tokens": 4557219538.0, "step": 7195 }, { "epoch": 0.8508927515667495, "grad_norm": 0.1196054220199585, "learning_rate": 9.092687307363336e-06, "loss": 0.3031, "num_tokens": 4557851792.0, "step": 7196 }, { "epoch": 0.8510109968073785, "grad_norm": 0.12531518936157227, "learning_rate": 9.08788359760566e-06, "loss": 0.3094, "num_tokens": 4558489267.0, "step": 7197 }, { "epoch": 0.8511292420480076, "grad_norm": 0.1412142664194107, "learning_rate": 9.083083395143292e-06, "loss": 0.3062, "num_tokens": 4559127633.0, "step": 7198 }, { "epoch": 0.8512474872886366, "grad_norm": 0.13349416851997375, "learning_rate": 9.078286700680286e-06, "loss": 0.3598, "num_tokens": 4559764630.0, "step": 7199 }, { "epoch": 0.8513657325292657, "grad_norm": 0.11842963844537735, "learning_rate": 9.073493514920202e-06, "loss": 0.3149, "num_tokens": 4560401386.0, "step": 7200 }, { "epoch": 0.8514839777698947, "grad_norm": 0.12795104086399078, "learning_rate": 9.068703838566067e-06, "loss": 0.2944, "num_tokens": 4561034014.0, "step": 7201 }, { "epoch": 0.8516022230105238, "grad_norm": 0.1296541690826416, "learning_rate": 9.063917672320414e-06, "loss": 0.34, "num_tokens": 4561670430.0, "step": 7202 }, { "epoch": 0.8517204682511529, "grad_norm": 0.1275460124015808, "learning_rate": 9.059135016885251e-06, "loss": 0.342, "num_tokens": 4562297602.0, "step": 7203 }, { "epoch": 0.851838713491782, "grad_norm": 0.1247381865978241, "learning_rate": 9.054355872962058e-06, "loss": 0.3077, "num_tokens": 4562927672.0, "step": 7204 }, { "epoch": 0.8519569587324111, "grad_norm": 0.13478238880634308, "learning_rate": 9.04958024125183e-06, "loss": 0.3276, "num_tokens": 4563566020.0, "step": 7205 }, { "epoch": 0.85207520397304, "grad_norm": 0.129652202129364, "learning_rate": 9.04480812245502e-06, "loss": 0.3237, "num_tokens": 4564196968.0, "step": 7206 }, { "epoch": 0.8521934492136691, "grad_norm": 0.1414068043231964, "learning_rate": 9.04003951727159e-06, "loss": 0.3643, "num_tokens": 4564833723.0, "step": 7207 }, { "epoch": 0.8523116944542982, "grad_norm": 0.13696357607841492, "learning_rate": 9.035274426400945e-06, "loss": 0.3329, "num_tokens": 4565467161.0, "step": 7208 }, { "epoch": 0.8524299396949273, "grad_norm": 0.13971206545829773, "learning_rate": 9.030512850542027e-06, "loss": 0.3411, "num_tokens": 4566097443.0, "step": 7209 }, { "epoch": 0.8525481849355564, "grad_norm": 0.13369347155094147, "learning_rate": 9.02575479039322e-06, "loss": 0.3826, "num_tokens": 4566736577.0, "step": 7210 }, { "epoch": 0.8526664301761854, "grad_norm": 0.12694859504699707, "learning_rate": 9.021000246652422e-06, "loss": 0.3491, "num_tokens": 4567364774.0, "step": 7211 }, { "epoch": 0.8527846754168145, "grad_norm": 0.13170109689235687, "learning_rate": 9.016249220017002e-06, "loss": 0.3363, "num_tokens": 4567999141.0, "step": 7212 }, { "epoch": 0.8529029206574436, "grad_norm": 0.14641666412353516, "learning_rate": 9.011501711183802e-06, "loss": 0.3348, "num_tokens": 4568633911.0, "step": 7213 }, { "epoch": 0.8530211658980726, "grad_norm": 0.1281093806028366, "learning_rate": 9.006757720849174e-06, "loss": 0.3033, "num_tokens": 4569271050.0, "step": 7214 }, { "epoch": 0.8531394111387016, "grad_norm": 0.12395498901605606, "learning_rate": 9.00201724970893e-06, "loss": 0.3124, "num_tokens": 4569904783.0, "step": 7215 }, { "epoch": 0.8532576563793307, "grad_norm": 0.1255914568901062, "learning_rate": 8.997280298458391e-06, "loss": 0.3112, "num_tokens": 4570542639.0, "step": 7216 }, { "epoch": 0.8533759016199598, "grad_norm": 0.13013947010040283, "learning_rate": 8.992546867792322e-06, "loss": 0.3116, "num_tokens": 4571172988.0, "step": 7217 }, { "epoch": 0.8534941468605889, "grad_norm": 0.1306982785463333, "learning_rate": 8.987816958405013e-06, "loss": 0.3423, "num_tokens": 4571805636.0, "step": 7218 }, { "epoch": 0.853612392101218, "grad_norm": 0.1364063024520874, "learning_rate": 8.98309057099022e-06, "loss": 0.3547, "num_tokens": 4572443472.0, "step": 7219 }, { "epoch": 0.853730637341847, "grad_norm": 0.12869755923748016, "learning_rate": 8.97836770624118e-06, "loss": 0.303, "num_tokens": 4573082675.0, "step": 7220 }, { "epoch": 0.853848882582476, "grad_norm": 0.11801758408546448, "learning_rate": 8.973648364850619e-06, "loss": 0.2968, "num_tokens": 4573719370.0, "step": 7221 }, { "epoch": 0.8539671278231051, "grad_norm": 0.13483695685863495, "learning_rate": 8.968932547510734e-06, "loss": 0.326, "num_tokens": 4574353900.0, "step": 7222 }, { "epoch": 0.8540853730637342, "grad_norm": 0.19094835221767426, "learning_rate": 8.964220254913227e-06, "loss": 0.3474, "num_tokens": 4574962953.0, "step": 7223 }, { "epoch": 0.8542036183043632, "grad_norm": 0.1266838014125824, "learning_rate": 8.959511487749267e-06, "loss": 0.3129, "num_tokens": 4575593128.0, "step": 7224 }, { "epoch": 0.8543218635449923, "grad_norm": 0.12750506401062012, "learning_rate": 8.9548062467095e-06, "loss": 0.3267, "num_tokens": 4576229909.0, "step": 7225 }, { "epoch": 0.8544401087856214, "grad_norm": 0.13043497502803802, "learning_rate": 8.950104532484083e-06, "loss": 0.3528, "num_tokens": 4576863460.0, "step": 7226 }, { "epoch": 0.8545583540262505, "grad_norm": 0.13396504521369934, "learning_rate": 8.945406345762616e-06, "loss": 0.3295, "num_tokens": 4577495230.0, "step": 7227 }, { "epoch": 0.8546765992668796, "grad_norm": 0.12963752448558807, "learning_rate": 8.94071168723422e-06, "loss": 0.3295, "num_tokens": 4578134722.0, "step": 7228 }, { "epoch": 0.8547948445075085, "grad_norm": 0.12389910221099854, "learning_rate": 8.936020557587475e-06, "loss": 0.3132, "num_tokens": 4578770812.0, "step": 7229 }, { "epoch": 0.8549130897481376, "grad_norm": 0.12220277637243271, "learning_rate": 8.931332957510446e-06, "loss": 0.3178, "num_tokens": 4579407323.0, "step": 7230 }, { "epoch": 0.8550313349887667, "grad_norm": 0.1211710125207901, "learning_rate": 8.92664888769069e-06, "loss": 0.3054, "num_tokens": 4580045594.0, "step": 7231 }, { "epoch": 0.8551495802293958, "grad_norm": 0.11857175081968307, "learning_rate": 8.921968348815235e-06, "loss": 0.2927, "num_tokens": 4580679103.0, "step": 7232 }, { "epoch": 0.8552678254700248, "grad_norm": 0.14043863117694855, "learning_rate": 8.917291341570599e-06, "loss": 0.3477, "num_tokens": 4581310937.0, "step": 7233 }, { "epoch": 0.8553860707106539, "grad_norm": 0.13112251460552216, "learning_rate": 8.912617866642776e-06, "loss": 0.3247, "num_tokens": 4581948606.0, "step": 7234 }, { "epoch": 0.855504315951283, "grad_norm": 0.11836176365613937, "learning_rate": 8.907947924717253e-06, "loss": 0.3034, "num_tokens": 4582586167.0, "step": 7235 }, { "epoch": 0.855622561191912, "grad_norm": 0.12779119610786438, "learning_rate": 8.90328151647898e-06, "loss": 0.3198, "num_tokens": 4583220971.0, "step": 7236 }, { "epoch": 0.8557408064325411, "grad_norm": 0.13929222524166107, "learning_rate": 8.898618642612412e-06, "loss": 0.3324, "num_tokens": 4583819587.0, "step": 7237 }, { "epoch": 0.8558590516731701, "grad_norm": 0.12641964852809906, "learning_rate": 8.89395930380146e-06, "loss": 0.3353, "num_tokens": 4584453513.0, "step": 7238 }, { "epoch": 0.8559772969137992, "grad_norm": 0.12840180099010468, "learning_rate": 8.889303500729537e-06, "loss": 0.3249, "num_tokens": 4585089536.0, "step": 7239 }, { "epoch": 0.8560955421544283, "grad_norm": 0.12757714092731476, "learning_rate": 8.884651234079531e-06, "loss": 0.3034, "num_tokens": 4585723625.0, "step": 7240 }, { "epoch": 0.8562137873950574, "grad_norm": 0.12758047878742218, "learning_rate": 8.880002504533811e-06, "loss": 0.3212, "num_tokens": 4586362739.0, "step": 7241 }, { "epoch": 0.8563320326356865, "grad_norm": 0.13377583026885986, "learning_rate": 8.875357312774224e-06, "loss": 0.316, "num_tokens": 4586998822.0, "step": 7242 }, { "epoch": 0.8564502778763154, "grad_norm": 0.12812921404838562, "learning_rate": 8.870715659482096e-06, "loss": 0.3422, "num_tokens": 4587622781.0, "step": 7243 }, { "epoch": 0.8565685231169445, "grad_norm": 0.13080938160419464, "learning_rate": 8.866077545338246e-06, "loss": 0.297, "num_tokens": 4588254808.0, "step": 7244 }, { "epoch": 0.8566867683575736, "grad_norm": 0.12945681810379028, "learning_rate": 8.861442971022964e-06, "loss": 0.3072, "num_tokens": 4588889591.0, "step": 7245 }, { "epoch": 0.8568050135982027, "grad_norm": 0.13848505914211273, "learning_rate": 8.856811937216028e-06, "loss": 0.3323, "num_tokens": 4589520925.0, "step": 7246 }, { "epoch": 0.8569232588388317, "grad_norm": 0.1341945379972458, "learning_rate": 8.852184444596685e-06, "loss": 0.347, "num_tokens": 4590160037.0, "step": 7247 }, { "epoch": 0.8570415040794608, "grad_norm": 0.12497184425592422, "learning_rate": 8.84756049384367e-06, "loss": 0.3165, "num_tokens": 4590795682.0, "step": 7248 }, { "epoch": 0.8571597493200899, "grad_norm": 0.12216004729270935, "learning_rate": 8.842940085635206e-06, "loss": 0.3026, "num_tokens": 4591418828.0, "step": 7249 }, { "epoch": 0.857277994560719, "grad_norm": 0.14526574313640594, "learning_rate": 8.838323220648985e-06, "loss": 0.3702, "num_tokens": 4592048164.0, "step": 7250 }, { "epoch": 0.857396239801348, "grad_norm": 0.12391407042741776, "learning_rate": 8.833709899562178e-06, "loss": 0.3013, "num_tokens": 4592681910.0, "step": 7251 }, { "epoch": 0.857514485041977, "grad_norm": 0.132630854845047, "learning_rate": 8.82910012305144e-06, "loss": 0.3163, "num_tokens": 4593315365.0, "step": 7252 }, { "epoch": 0.8576327302826061, "grad_norm": 0.12389779090881348, "learning_rate": 8.824493891792912e-06, "loss": 0.3188, "num_tokens": 4593949148.0, "step": 7253 }, { "epoch": 0.8577509755232352, "grad_norm": 0.1248469203710556, "learning_rate": 8.81989120646222e-06, "loss": 0.2622, "num_tokens": 4594584750.0, "step": 7254 }, { "epoch": 0.8578692207638643, "grad_norm": 0.13290776312351227, "learning_rate": 8.81529206773444e-06, "loss": 0.2853, "num_tokens": 4595218296.0, "step": 7255 }, { "epoch": 0.8579874660044933, "grad_norm": 0.12840574979782104, "learning_rate": 8.810696476284159e-06, "loss": 0.2885, "num_tokens": 4595856768.0, "step": 7256 }, { "epoch": 0.8581057112451224, "grad_norm": 0.1488383710384369, "learning_rate": 8.806104432785433e-06, "loss": 0.3504, "num_tokens": 4596486324.0, "step": 7257 }, { "epoch": 0.8582239564857514, "grad_norm": 0.13568533957004547, "learning_rate": 8.801515937911796e-06, "loss": 0.3212, "num_tokens": 4597119872.0, "step": 7258 }, { "epoch": 0.8583422017263805, "grad_norm": 0.12779875099658966, "learning_rate": 8.796930992336263e-06, "loss": 0.3035, "num_tokens": 4597753771.0, "step": 7259 }, { "epoch": 0.8584604469670096, "grad_norm": 0.13668116927146912, "learning_rate": 8.792349596731324e-06, "loss": 0.3526, "num_tokens": 4598388392.0, "step": 7260 }, { "epoch": 0.8585786922076386, "grad_norm": 0.1242004856467247, "learning_rate": 8.78777175176896e-06, "loss": 0.309, "num_tokens": 4599021399.0, "step": 7261 }, { "epoch": 0.8586969374482677, "grad_norm": 0.1311301440000534, "learning_rate": 8.78319745812062e-06, "loss": 0.3251, "num_tokens": 4599658672.0, "step": 7262 }, { "epoch": 0.8588151826888968, "grad_norm": 0.13225553929805756, "learning_rate": 8.778626716457234e-06, "loss": 0.3076, "num_tokens": 4600291998.0, "step": 7263 }, { "epoch": 0.8589334279295259, "grad_norm": 0.12186132371425629, "learning_rate": 8.774059527449211e-06, "loss": 0.3025, "num_tokens": 4600921895.0, "step": 7264 }, { "epoch": 0.8590516731701549, "grad_norm": 0.13786593079566956, "learning_rate": 8.76949589176645e-06, "loss": 0.3792, "num_tokens": 4601556391.0, "step": 7265 }, { "epoch": 0.8591699184107839, "grad_norm": 0.136198028922081, "learning_rate": 8.764935810078305e-06, "loss": 0.3203, "num_tokens": 4602191753.0, "step": 7266 }, { "epoch": 0.859288163651413, "grad_norm": 0.12084567546844482, "learning_rate": 8.760379283053638e-06, "loss": 0.2999, "num_tokens": 4602828645.0, "step": 7267 }, { "epoch": 0.8594064088920421, "grad_norm": 0.1384076476097107, "learning_rate": 8.755826311360768e-06, "loss": 0.3499, "num_tokens": 4603466366.0, "step": 7268 }, { "epoch": 0.8595246541326712, "grad_norm": 0.12600135803222656, "learning_rate": 8.751276895667495e-06, "loss": 0.3242, "num_tokens": 4604103174.0, "step": 7269 }, { "epoch": 0.8596428993733002, "grad_norm": 0.11840786784887314, "learning_rate": 8.74673103664111e-06, "loss": 0.2893, "num_tokens": 4604734286.0, "step": 7270 }, { "epoch": 0.8597611446139293, "grad_norm": 0.13508251309394836, "learning_rate": 8.742188734948377e-06, "loss": 0.3577, "num_tokens": 4605370934.0, "step": 7271 }, { "epoch": 0.8598793898545584, "grad_norm": 0.12492526322603226, "learning_rate": 8.737649991255524e-06, "loss": 0.2958, "num_tokens": 4606001316.0, "step": 7272 }, { "epoch": 0.8599976350951875, "grad_norm": 0.1201426163315773, "learning_rate": 8.73311480622827e-06, "loss": 0.2975, "num_tokens": 4606638551.0, "step": 7273 }, { "epoch": 0.8601158803358164, "grad_norm": 0.1265263557434082, "learning_rate": 8.728583180531813e-06, "loss": 0.3345, "num_tokens": 4607272167.0, "step": 7274 }, { "epoch": 0.8602341255764455, "grad_norm": 0.13572274148464203, "learning_rate": 8.724055114830837e-06, "loss": 0.3335, "num_tokens": 4607899529.0, "step": 7275 }, { "epoch": 0.8603523708170746, "grad_norm": 0.12243066728115082, "learning_rate": 8.719530609789477e-06, "loss": 0.3237, "num_tokens": 4608535200.0, "step": 7276 }, { "epoch": 0.8604706160577037, "grad_norm": 0.11803536117076874, "learning_rate": 8.715009666071375e-06, "loss": 0.2975, "num_tokens": 4609171160.0, "step": 7277 }, { "epoch": 0.8605888612983328, "grad_norm": 0.13681021332740784, "learning_rate": 8.710492284339623e-06, "loss": 0.3519, "num_tokens": 4609808133.0, "step": 7278 }, { "epoch": 0.8607071065389618, "grad_norm": 0.13088729977607727, "learning_rate": 8.705978465256822e-06, "loss": 0.3174, "num_tokens": 4610433005.0, "step": 7279 }, { "epoch": 0.8608253517795909, "grad_norm": 0.12524840235710144, "learning_rate": 8.701468209485024e-06, "loss": 0.3131, "num_tokens": 4611068954.0, "step": 7280 }, { "epoch": 0.8609435970202199, "grad_norm": 0.13051484525203705, "learning_rate": 8.696961517685768e-06, "loss": 0.3129, "num_tokens": 4611707697.0, "step": 7281 }, { "epoch": 0.861061842260849, "grad_norm": 0.12407872080802917, "learning_rate": 8.692458390520068e-06, "loss": 0.2964, "num_tokens": 4612346335.0, "step": 7282 }, { "epoch": 0.8611800875014781, "grad_norm": 0.11662879586219788, "learning_rate": 8.687958828648425e-06, "loss": 0.294, "num_tokens": 4612981568.0, "step": 7283 }, { "epoch": 0.8612983327421071, "grad_norm": 0.1281258910894394, "learning_rate": 8.683462832730813e-06, "loss": 0.3214, "num_tokens": 4613618626.0, "step": 7284 }, { "epoch": 0.8614165779827362, "grad_norm": 0.1212584599852562, "learning_rate": 8.678970403426665e-06, "loss": 0.3201, "num_tokens": 4614253229.0, "step": 7285 }, { "epoch": 0.8615348232233653, "grad_norm": 0.12880779802799225, "learning_rate": 8.674481541394916e-06, "loss": 0.2947, "num_tokens": 4614871774.0, "step": 7286 }, { "epoch": 0.8616530684639944, "grad_norm": 0.13165755569934845, "learning_rate": 8.669996247293958e-06, "loss": 0.3321, "num_tokens": 4615506568.0, "step": 7287 }, { "epoch": 0.8617713137046233, "grad_norm": 0.13202685117721558, "learning_rate": 8.665514521781683e-06, "loss": 0.3503, "num_tokens": 4616135725.0, "step": 7288 }, { "epoch": 0.8618895589452524, "grad_norm": 0.11507639288902283, "learning_rate": 8.661036365515433e-06, "loss": 0.3141, "num_tokens": 4616774721.0, "step": 7289 }, { "epoch": 0.8620078041858815, "grad_norm": 0.11586082726716995, "learning_rate": 8.656561779152041e-06, "loss": 0.3111, "num_tokens": 4617405549.0, "step": 7290 }, { "epoch": 0.8621260494265106, "grad_norm": 0.12472735345363617, "learning_rate": 8.652090763347822e-06, "loss": 0.3088, "num_tokens": 4618037728.0, "step": 7291 }, { "epoch": 0.8622442946671397, "grad_norm": 0.13152234256267548, "learning_rate": 8.647623318758555e-06, "loss": 0.3245, "num_tokens": 4618668724.0, "step": 7292 }, { "epoch": 0.8623625399077687, "grad_norm": 0.12133363634347916, "learning_rate": 8.643159446039496e-06, "loss": 0.3163, "num_tokens": 4619304425.0, "step": 7293 }, { "epoch": 0.8624807851483978, "grad_norm": 0.13580965995788574, "learning_rate": 8.638699145845378e-06, "loss": 0.2863, "num_tokens": 4619939492.0, "step": 7294 }, { "epoch": 0.8625990303890269, "grad_norm": 0.12819118797779083, "learning_rate": 8.634242418830426e-06, "loss": 0.2898, "num_tokens": 4620574473.0, "step": 7295 }, { "epoch": 0.8627172756296559, "grad_norm": 0.1317034661769867, "learning_rate": 8.629789265648318e-06, "loss": 0.3132, "num_tokens": 4621214098.0, "step": 7296 }, { "epoch": 0.8628355208702849, "grad_norm": 0.1261962205171585, "learning_rate": 8.62533968695222e-06, "loss": 0.2933, "num_tokens": 4621844896.0, "step": 7297 }, { "epoch": 0.862953766110914, "grad_norm": 0.12493127584457397, "learning_rate": 8.620893683394772e-06, "loss": 0.3281, "num_tokens": 4622480165.0, "step": 7298 }, { "epoch": 0.8630720113515431, "grad_norm": 0.1250608116388321, "learning_rate": 8.616451255628081e-06, "loss": 0.3165, "num_tokens": 4623118156.0, "step": 7299 }, { "epoch": 0.8631902565921722, "grad_norm": 0.1323828399181366, "learning_rate": 8.612012404303753e-06, "loss": 0.347, "num_tokens": 4623753159.0, "step": 7300 }, { "epoch": 0.8633085018328013, "grad_norm": 0.12320327758789062, "learning_rate": 8.607577130072843e-06, "loss": 0.2838, "num_tokens": 4624388586.0, "step": 7301 }, { "epoch": 0.8634267470734303, "grad_norm": 0.13055746257305145, "learning_rate": 8.603145433585895e-06, "loss": 0.3009, "num_tokens": 4625023321.0, "step": 7302 }, { "epoch": 0.8635449923140593, "grad_norm": 0.1371437907218933, "learning_rate": 8.598717315492918e-06, "loss": 0.3492, "num_tokens": 4625662229.0, "step": 7303 }, { "epoch": 0.8636632375546884, "grad_norm": 0.12503276765346527, "learning_rate": 8.594292776443413e-06, "loss": 0.2947, "num_tokens": 4626298841.0, "step": 7304 }, { "epoch": 0.8637814827953175, "grad_norm": 0.13779622316360474, "learning_rate": 8.589871817086353e-06, "loss": 0.3197, "num_tokens": 4626936733.0, "step": 7305 }, { "epoch": 0.8638997280359465, "grad_norm": 0.13313865661621094, "learning_rate": 8.585454438070162e-06, "loss": 0.3374, "num_tokens": 4627564520.0, "step": 7306 }, { "epoch": 0.8640179732765756, "grad_norm": 0.13896046578884125, "learning_rate": 8.581040640042768e-06, "loss": 0.3618, "num_tokens": 4628198430.0, "step": 7307 }, { "epoch": 0.8641362185172047, "grad_norm": 0.13296565413475037, "learning_rate": 8.576630423651557e-06, "loss": 0.3364, "num_tokens": 4628828911.0, "step": 7308 }, { "epoch": 0.8642544637578338, "grad_norm": 0.13878871500492096, "learning_rate": 8.5722237895434e-06, "loss": 0.3196, "num_tokens": 4629465009.0, "step": 7309 }, { "epoch": 0.8643727089984629, "grad_norm": 0.12160852551460266, "learning_rate": 8.567820738364633e-06, "loss": 0.2979, "num_tokens": 4630104051.0, "step": 7310 }, { "epoch": 0.8644909542390918, "grad_norm": 0.12265296280384064, "learning_rate": 8.563421270761069e-06, "loss": 0.3217, "num_tokens": 4630735675.0, "step": 7311 }, { "epoch": 0.8646091994797209, "grad_norm": 0.14521615207195282, "learning_rate": 8.559025387378005e-06, "loss": 0.3262, "num_tokens": 4631363819.0, "step": 7312 }, { "epoch": 0.86472744472035, "grad_norm": 0.11895240843296051, "learning_rate": 8.5546330888602e-06, "loss": 0.303, "num_tokens": 4632003502.0, "step": 7313 }, { "epoch": 0.8648456899609791, "grad_norm": 0.1364382803440094, "learning_rate": 8.550244375851892e-06, "loss": 0.3146, "num_tokens": 4632633664.0, "step": 7314 }, { "epoch": 0.8649639352016081, "grad_norm": 0.12675973773002625, "learning_rate": 8.545859248996787e-06, "loss": 0.3281, "num_tokens": 4633270159.0, "step": 7315 }, { "epoch": 0.8650821804422372, "grad_norm": 0.13340766727924347, "learning_rate": 8.541477708938083e-06, "loss": 0.3488, "num_tokens": 4633902407.0, "step": 7316 }, { "epoch": 0.8652004256828663, "grad_norm": 0.12668859958648682, "learning_rate": 8.537099756318426e-06, "loss": 0.3286, "num_tokens": 4634532008.0, "step": 7317 }, { "epoch": 0.8653186709234953, "grad_norm": 0.13295981287956238, "learning_rate": 8.532725391779964e-06, "loss": 0.3353, "num_tokens": 4635162649.0, "step": 7318 }, { "epoch": 0.8654369161641244, "grad_norm": 0.12789876759052277, "learning_rate": 8.528354615964296e-06, "loss": 0.3292, "num_tokens": 4635790915.0, "step": 7319 }, { "epoch": 0.8655551614047534, "grad_norm": 0.1302398294210434, "learning_rate": 8.5239874295125e-06, "loss": 0.3553, "num_tokens": 4636429883.0, "step": 7320 }, { "epoch": 0.8656734066453825, "grad_norm": 0.12465158849954605, "learning_rate": 8.51962383306514e-06, "loss": 0.2861, "num_tokens": 4637066463.0, "step": 7321 }, { "epoch": 0.8657916518860116, "grad_norm": 0.14367921650409698, "learning_rate": 8.515263827262234e-06, "loss": 0.3341, "num_tokens": 4637702641.0, "step": 7322 }, { "epoch": 0.8659098971266407, "grad_norm": 0.12562347948551178, "learning_rate": 8.510907412743293e-06, "loss": 0.3041, "num_tokens": 4638336988.0, "step": 7323 }, { "epoch": 0.8660281423672698, "grad_norm": 0.12337248027324677, "learning_rate": 8.506554590147282e-06, "loss": 0.2859, "num_tokens": 4638971571.0, "step": 7324 }, { "epoch": 0.8661463876078987, "grad_norm": 0.12582571804523468, "learning_rate": 8.502205360112647e-06, "loss": 0.3171, "num_tokens": 4639584476.0, "step": 7325 }, { "epoch": 0.8662646328485278, "grad_norm": 0.1342136114835739, "learning_rate": 8.497859723277322e-06, "loss": 0.3226, "num_tokens": 4640217722.0, "step": 7326 }, { "epoch": 0.8663828780891569, "grad_norm": 0.139348566532135, "learning_rate": 8.493517680278695e-06, "loss": 0.3521, "num_tokens": 4640851714.0, "step": 7327 }, { "epoch": 0.866501123329786, "grad_norm": 0.13658316433429718, "learning_rate": 8.489179231753626e-06, "loss": 0.3445, "num_tokens": 4641484963.0, "step": 7328 }, { "epoch": 0.866619368570415, "grad_norm": 0.11705240607261658, "learning_rate": 8.484844378338458e-06, "loss": 0.2965, "num_tokens": 4642119846.0, "step": 7329 }, { "epoch": 0.8667376138110441, "grad_norm": 0.12827494740486145, "learning_rate": 8.48051312066901e-06, "loss": 0.3215, "num_tokens": 4642751078.0, "step": 7330 }, { "epoch": 0.8668558590516732, "grad_norm": 0.13348063826560974, "learning_rate": 8.476185459380556e-06, "loss": 0.3347, "num_tokens": 4643386442.0, "step": 7331 }, { "epoch": 0.8669741042923023, "grad_norm": 0.13791252672672272, "learning_rate": 8.471861395107855e-06, "loss": 0.3512, "num_tokens": 4644020850.0, "step": 7332 }, { "epoch": 0.8670923495329314, "grad_norm": 0.12756404280662537, "learning_rate": 8.467540928485146e-06, "loss": 0.3102, "num_tokens": 4644655819.0, "step": 7333 }, { "epoch": 0.8672105947735603, "grad_norm": 0.1312532126903534, "learning_rate": 8.463224060146117e-06, "loss": 0.326, "num_tokens": 4645292040.0, "step": 7334 }, { "epoch": 0.8673288400141894, "grad_norm": 0.12358541786670685, "learning_rate": 8.45891079072396e-06, "loss": 0.3122, "num_tokens": 4645922692.0, "step": 7335 }, { "epoch": 0.8674470852548185, "grad_norm": 0.1270725429058075, "learning_rate": 8.454601120851301e-06, "loss": 0.2871, "num_tokens": 4646559455.0, "step": 7336 }, { "epoch": 0.8675653304954476, "grad_norm": 0.13504837453365326, "learning_rate": 8.450295051160272e-06, "loss": 0.343, "num_tokens": 4647190529.0, "step": 7337 }, { "epoch": 0.8676835757360766, "grad_norm": 0.1354321539402008, "learning_rate": 8.445992582282452e-06, "loss": 0.3195, "num_tokens": 4647825682.0, "step": 7338 }, { "epoch": 0.8678018209767057, "grad_norm": 0.1233978345990181, "learning_rate": 8.441693714848917e-06, "loss": 0.2842, "num_tokens": 4648461177.0, "step": 7339 }, { "epoch": 0.8679200662173348, "grad_norm": 0.12888219952583313, "learning_rate": 8.437398449490193e-06, "loss": 0.3328, "num_tokens": 4649100485.0, "step": 7340 }, { "epoch": 0.8680383114579638, "grad_norm": 0.13651101291179657, "learning_rate": 8.433106786836281e-06, "loss": 0.3328, "num_tokens": 4649731797.0, "step": 7341 }, { "epoch": 0.8681565566985929, "grad_norm": 0.13357298076152802, "learning_rate": 8.428818727516667e-06, "loss": 0.2917, "num_tokens": 4650365239.0, "step": 7342 }, { "epoch": 0.8682748019392219, "grad_norm": 0.12341143935918808, "learning_rate": 8.424534272160297e-06, "loss": 0.3107, "num_tokens": 4650988318.0, "step": 7343 }, { "epoch": 0.868393047179851, "grad_norm": 0.12544292211532593, "learning_rate": 8.42025342139559e-06, "loss": 0.3176, "num_tokens": 4651625914.0, "step": 7344 }, { "epoch": 0.8685112924204801, "grad_norm": 0.12296797335147858, "learning_rate": 8.415976175850431e-06, "loss": 0.2958, "num_tokens": 4652264745.0, "step": 7345 }, { "epoch": 0.8686295376611092, "grad_norm": 0.14044329524040222, "learning_rate": 8.411702536152187e-06, "loss": 0.3101, "num_tokens": 4652896497.0, "step": 7346 }, { "epoch": 0.8687477829017382, "grad_norm": 0.13456258177757263, "learning_rate": 8.4074325029277e-06, "loss": 0.3602, "num_tokens": 4653515550.0, "step": 7347 }, { "epoch": 0.8688660281423672, "grad_norm": 0.13084480166435242, "learning_rate": 8.403166076803267e-06, "loss": 0.3179, "num_tokens": 4654153177.0, "step": 7348 }, { "epoch": 0.8689842733829963, "grad_norm": 0.12971945106983185, "learning_rate": 8.398903258404661e-06, "loss": 0.3217, "num_tokens": 4654792317.0, "step": 7349 }, { "epoch": 0.8691025186236254, "grad_norm": 0.11858166754245758, "learning_rate": 8.39464404835713e-06, "loss": 0.3034, "num_tokens": 4655426642.0, "step": 7350 }, { "epoch": 0.8692207638642545, "grad_norm": 0.13607199490070343, "learning_rate": 8.390388447285394e-06, "loss": 0.3263, "num_tokens": 4656064178.0, "step": 7351 }, { "epoch": 0.8693390091048835, "grad_norm": 0.12614083290100098, "learning_rate": 8.38613645581364e-06, "loss": 0.3203, "num_tokens": 4656691325.0, "step": 7352 }, { "epoch": 0.8694572543455126, "grad_norm": 0.13703525066375732, "learning_rate": 8.38188807456552e-06, "loss": 0.2986, "num_tokens": 4657327393.0, "step": 7353 }, { "epoch": 0.8695754995861417, "grad_norm": 0.13876773416996002, "learning_rate": 8.377643304164175e-06, "loss": 0.3569, "num_tokens": 4657959279.0, "step": 7354 }, { "epoch": 0.8696937448267708, "grad_norm": 0.11807417869567871, "learning_rate": 8.37340214523219e-06, "loss": 0.289, "num_tokens": 4658594978.0, "step": 7355 }, { "epoch": 0.8698119900673997, "grad_norm": 0.11857373267412186, "learning_rate": 8.369164598391648e-06, "loss": 0.2716, "num_tokens": 4659228052.0, "step": 7356 }, { "epoch": 0.8699302353080288, "grad_norm": 0.1296607106924057, "learning_rate": 8.36493066426408e-06, "loss": 0.337, "num_tokens": 4659862666.0, "step": 7357 }, { "epoch": 0.8700484805486579, "grad_norm": 0.13987882435321808, "learning_rate": 8.360700343470501e-06, "loss": 0.3504, "num_tokens": 4660496346.0, "step": 7358 }, { "epoch": 0.870166725789287, "grad_norm": 0.13722029328346252, "learning_rate": 8.356473636631381e-06, "loss": 0.3367, "num_tokens": 4661099299.0, "step": 7359 }, { "epoch": 0.8702849710299161, "grad_norm": 0.13666458427906036, "learning_rate": 8.35225054436668e-06, "loss": 0.304, "num_tokens": 4661737307.0, "step": 7360 }, { "epoch": 0.8704032162705451, "grad_norm": 0.1245967373251915, "learning_rate": 8.348031067295812e-06, "loss": 0.3282, "num_tokens": 4662367043.0, "step": 7361 }, { "epoch": 0.8705214615111742, "grad_norm": 0.13049601018428802, "learning_rate": 8.343815206037668e-06, "loss": 0.3658, "num_tokens": 4663002600.0, "step": 7362 }, { "epoch": 0.8706397067518032, "grad_norm": 0.12612499296665192, "learning_rate": 8.339602961210611e-06, "loss": 0.2969, "num_tokens": 4663639312.0, "step": 7363 }, { "epoch": 0.8707579519924323, "grad_norm": 0.13818828761577606, "learning_rate": 8.33539433343246e-06, "loss": 0.3252, "num_tokens": 4664277199.0, "step": 7364 }, { "epoch": 0.8708761972330614, "grad_norm": 0.13346362113952637, "learning_rate": 8.331189323320529e-06, "loss": 0.3523, "num_tokens": 4664911134.0, "step": 7365 }, { "epoch": 0.8709944424736904, "grad_norm": 0.12053103744983673, "learning_rate": 8.326987931491562e-06, "loss": 0.3181, "num_tokens": 4665545840.0, "step": 7366 }, { "epoch": 0.8711126877143195, "grad_norm": 0.15090353786945343, "learning_rate": 8.322790158561817e-06, "loss": 0.3301, "num_tokens": 4666179111.0, "step": 7367 }, { "epoch": 0.8712309329549486, "grad_norm": 0.13050545752048492, "learning_rate": 8.318596005146985e-06, "loss": 0.3237, "num_tokens": 4666815938.0, "step": 7368 }, { "epoch": 0.8713491781955777, "grad_norm": 0.1311125010251999, "learning_rate": 8.314405471862253e-06, "loss": 0.3511, "num_tokens": 4667453180.0, "step": 7369 }, { "epoch": 0.8714674234362066, "grad_norm": 0.12433575093746185, "learning_rate": 8.310218559322255e-06, "loss": 0.3123, "num_tokens": 4668082013.0, "step": 7370 }, { "epoch": 0.8715856686768357, "grad_norm": 0.1305399239063263, "learning_rate": 8.306035268141107e-06, "loss": 0.3096, "num_tokens": 4668718852.0, "step": 7371 }, { "epoch": 0.8717039139174648, "grad_norm": 0.13478243350982666, "learning_rate": 8.301855598932397e-06, "loss": 0.3466, "num_tokens": 4669357326.0, "step": 7372 }, { "epoch": 0.8718221591580939, "grad_norm": 0.11530677229166031, "learning_rate": 8.297679552309168e-06, "loss": 0.2755, "num_tokens": 4669994149.0, "step": 7373 }, { "epoch": 0.871940404398723, "grad_norm": 0.12142017483711243, "learning_rate": 8.293507128883942e-06, "loss": 0.3073, "num_tokens": 4670629272.0, "step": 7374 }, { "epoch": 0.872058649639352, "grad_norm": 0.13734005391597748, "learning_rate": 8.289338329268704e-06, "loss": 0.3389, "num_tokens": 4671268789.0, "step": 7375 }, { "epoch": 0.8721768948799811, "grad_norm": 0.12476067990064621, "learning_rate": 8.285173154074908e-06, "loss": 0.2977, "num_tokens": 4671906862.0, "step": 7376 }, { "epoch": 0.8722951401206102, "grad_norm": 0.12040780484676361, "learning_rate": 8.281011603913488e-06, "loss": 0.3015, "num_tokens": 4672540687.0, "step": 7377 }, { "epoch": 0.8724133853612392, "grad_norm": 0.13068737089633942, "learning_rate": 8.276853679394834e-06, "loss": 0.3245, "num_tokens": 4673177432.0, "step": 7378 }, { "epoch": 0.8725316306018682, "grad_norm": 0.12849844992160797, "learning_rate": 8.272699381128804e-06, "loss": 0.3343, "num_tokens": 4673815582.0, "step": 7379 }, { "epoch": 0.8726498758424973, "grad_norm": 0.1435433328151703, "learning_rate": 8.268548709724722e-06, "loss": 0.3307, "num_tokens": 4674451307.0, "step": 7380 }, { "epoch": 0.8727681210831264, "grad_norm": 0.1280987411737442, "learning_rate": 8.264401665791396e-06, "loss": 0.2939, "num_tokens": 4675083298.0, "step": 7381 }, { "epoch": 0.8728863663237555, "grad_norm": 0.12293042242527008, "learning_rate": 8.260258249937088e-06, "loss": 0.3198, "num_tokens": 4675718989.0, "step": 7382 }, { "epoch": 0.8730046115643846, "grad_norm": 0.1238991841673851, "learning_rate": 8.256118462769523e-06, "loss": 0.2691, "num_tokens": 4676356410.0, "step": 7383 }, { "epoch": 0.8731228568050136, "grad_norm": 0.13109853863716125, "learning_rate": 8.251982304895915e-06, "loss": 0.3295, "num_tokens": 4676986162.0, "step": 7384 }, { "epoch": 0.8732411020456426, "grad_norm": 0.12667487561702728, "learning_rate": 8.247849776922919e-06, "loss": 0.325, "num_tokens": 4677622981.0, "step": 7385 }, { "epoch": 0.8733593472862717, "grad_norm": 0.14220283925533295, "learning_rate": 8.243720879456684e-06, "loss": 0.3492, "num_tokens": 4678258931.0, "step": 7386 }, { "epoch": 0.8734775925269008, "grad_norm": 0.13203589618206024, "learning_rate": 8.239595613102806e-06, "loss": 0.3225, "num_tokens": 4678879059.0, "step": 7387 }, { "epoch": 0.8735958377675298, "grad_norm": 0.1360780894756317, "learning_rate": 8.235473978466356e-06, "loss": 0.3141, "num_tokens": 4679504771.0, "step": 7388 }, { "epoch": 0.8737140830081589, "grad_norm": 0.13149577379226685, "learning_rate": 8.231355976151872e-06, "loss": 0.3137, "num_tokens": 4680140271.0, "step": 7389 }, { "epoch": 0.873832328248788, "grad_norm": 0.12946726381778717, "learning_rate": 8.227241606763365e-06, "loss": 0.3368, "num_tokens": 4680775980.0, "step": 7390 }, { "epoch": 0.8739505734894171, "grad_norm": 0.12854436039924622, "learning_rate": 8.2231308709043e-06, "loss": 0.2975, "num_tokens": 4681410472.0, "step": 7391 }, { "epoch": 0.8740688187300462, "grad_norm": 0.1343127191066742, "learning_rate": 8.21902376917762e-06, "loss": 0.3072, "num_tokens": 4682043257.0, "step": 7392 }, { "epoch": 0.8741870639706751, "grad_norm": 0.1246611550450325, "learning_rate": 8.214920302185736e-06, "loss": 0.2904, "num_tokens": 4682671492.0, "step": 7393 }, { "epoch": 0.8743053092113042, "grad_norm": 0.13046614825725555, "learning_rate": 8.21082047053051e-06, "loss": 0.3398, "num_tokens": 4683307419.0, "step": 7394 }, { "epoch": 0.8744235544519333, "grad_norm": 0.13420316576957703, "learning_rate": 8.206724274813304e-06, "loss": 0.2936, "num_tokens": 4683938964.0, "step": 7395 }, { "epoch": 0.8745417996925624, "grad_norm": 0.14131011068820953, "learning_rate": 8.202631715634896e-06, "loss": 0.332, "num_tokens": 4684577274.0, "step": 7396 }, { "epoch": 0.8746600449331915, "grad_norm": 0.13107536733150482, "learning_rate": 8.19854279359558e-06, "loss": 0.3405, "num_tokens": 4685211909.0, "step": 7397 }, { "epoch": 0.8747782901738205, "grad_norm": 0.1342563033103943, "learning_rate": 8.19445750929509e-06, "loss": 0.3118, "num_tokens": 4685848340.0, "step": 7398 }, { "epoch": 0.8748965354144496, "grad_norm": 0.12048318237066269, "learning_rate": 8.190375863332638e-06, "loss": 0.3085, "num_tokens": 4686485124.0, "step": 7399 }, { "epoch": 0.8750147806550787, "grad_norm": 0.12943416833877563, "learning_rate": 8.186297856306892e-06, "loss": 0.298, "num_tokens": 4687114900.0, "step": 7400 }, { "epoch": 0.8751330258957077, "grad_norm": 0.12201087921857834, "learning_rate": 8.182223488815986e-06, "loss": 0.299, "num_tokens": 4687745137.0, "step": 7401 }, { "epoch": 0.8752512711363367, "grad_norm": 0.13657286763191223, "learning_rate": 8.178152761457535e-06, "loss": 0.3363, "num_tokens": 4688381613.0, "step": 7402 }, { "epoch": 0.8753695163769658, "grad_norm": 0.13109692931175232, "learning_rate": 8.174085674828607e-06, "loss": 0.3073, "num_tokens": 4689018699.0, "step": 7403 }, { "epoch": 0.8754877616175949, "grad_norm": 0.12412352859973907, "learning_rate": 8.170022229525734e-06, "loss": 0.2891, "num_tokens": 4689654478.0, "step": 7404 }, { "epoch": 0.875606006858224, "grad_norm": 0.12104927003383636, "learning_rate": 8.16596242614493e-06, "loss": 0.3298, "num_tokens": 4690292429.0, "step": 7405 }, { "epoch": 0.8757242520988531, "grad_norm": 0.1411617547273636, "learning_rate": 8.161906265281651e-06, "loss": 0.3511, "num_tokens": 4690929002.0, "step": 7406 }, { "epoch": 0.875842497339482, "grad_norm": 0.136392280459404, "learning_rate": 8.157853747530847e-06, "loss": 0.3336, "num_tokens": 4691565718.0, "step": 7407 }, { "epoch": 0.8759607425801111, "grad_norm": 0.11240213364362717, "learning_rate": 8.153804873486912e-06, "loss": 0.2867, "num_tokens": 4692203986.0, "step": 7408 }, { "epoch": 0.8760789878207402, "grad_norm": 0.12615922093391418, "learning_rate": 8.14975964374371e-06, "loss": 0.3054, "num_tokens": 4692842696.0, "step": 7409 }, { "epoch": 0.8761972330613693, "grad_norm": 0.127946138381958, "learning_rate": 8.145718058894572e-06, "loss": 0.3408, "num_tokens": 4693475833.0, "step": 7410 }, { "epoch": 0.8763154783019983, "grad_norm": 0.12757103145122528, "learning_rate": 8.141680119532297e-06, "loss": 0.319, "num_tokens": 4694113440.0, "step": 7411 }, { "epoch": 0.8764337235426274, "grad_norm": 0.12259481847286224, "learning_rate": 8.13764582624915e-06, "loss": 0.2943, "num_tokens": 4694751798.0, "step": 7412 }, { "epoch": 0.8765519687832565, "grad_norm": 0.12527643144130707, "learning_rate": 8.13361517963685e-06, "loss": 0.3098, "num_tokens": 4695374936.0, "step": 7413 }, { "epoch": 0.8766702140238856, "grad_norm": 0.13639235496520996, "learning_rate": 8.129588180286602e-06, "loss": 0.3253, "num_tokens": 4695995554.0, "step": 7414 }, { "epoch": 0.8767884592645147, "grad_norm": 0.1271868795156479, "learning_rate": 8.125564828789052e-06, "loss": 0.3115, "num_tokens": 4696634387.0, "step": 7415 }, { "epoch": 0.8769067045051436, "grad_norm": 0.1347718983888626, "learning_rate": 8.121545125734338e-06, "loss": 0.3118, "num_tokens": 4697262854.0, "step": 7416 }, { "epoch": 0.8770249497457727, "grad_norm": 0.13268549740314484, "learning_rate": 8.117529071712028e-06, "loss": 0.3255, "num_tokens": 4697897433.0, "step": 7417 }, { "epoch": 0.8771431949864018, "grad_norm": 0.1403394639492035, "learning_rate": 8.113516667311182e-06, "loss": 0.3621, "num_tokens": 4698531069.0, "step": 7418 }, { "epoch": 0.8772614402270309, "grad_norm": 0.13333502411842346, "learning_rate": 8.109507913120323e-06, "loss": 0.3021, "num_tokens": 4699165841.0, "step": 7419 }, { "epoch": 0.8773796854676599, "grad_norm": 0.12851974368095398, "learning_rate": 8.105502809727432e-06, "loss": 0.3553, "num_tokens": 4699802968.0, "step": 7420 }, { "epoch": 0.877497930708289, "grad_norm": 0.13427890837192535, "learning_rate": 8.101501357719947e-06, "loss": 0.3016, "num_tokens": 4700433674.0, "step": 7421 }, { "epoch": 0.877616175948918, "grad_norm": 0.15037135779857635, "learning_rate": 8.097503557684783e-06, "loss": 0.3936, "num_tokens": 4701068882.0, "step": 7422 }, { "epoch": 0.8777344211895471, "grad_norm": 0.12756402790546417, "learning_rate": 8.093509410208316e-06, "loss": 0.3104, "num_tokens": 4701700483.0, "step": 7423 }, { "epoch": 0.8778526664301762, "grad_norm": 0.1319846659898758, "learning_rate": 8.089518915876382e-06, "loss": 0.3316, "num_tokens": 4702336585.0, "step": 7424 }, { "epoch": 0.8779709116708052, "grad_norm": 0.12381576001644135, "learning_rate": 8.08553207527429e-06, "loss": 0.3055, "num_tokens": 4702969932.0, "step": 7425 }, { "epoch": 0.8780891569114343, "grad_norm": 0.13098204135894775, "learning_rate": 8.081548888986803e-06, "loss": 0.287, "num_tokens": 4703587146.0, "step": 7426 }, { "epoch": 0.8782074021520634, "grad_norm": 0.13268058001995087, "learning_rate": 8.077569357598153e-06, "loss": 0.319, "num_tokens": 4704222648.0, "step": 7427 }, { "epoch": 0.8783256473926925, "grad_norm": 0.1527712643146515, "learning_rate": 8.073593481692036e-06, "loss": 0.3262, "num_tokens": 4704856580.0, "step": 7428 }, { "epoch": 0.8784438926333215, "grad_norm": 0.13466379046440125, "learning_rate": 8.069621261851613e-06, "loss": 0.3239, "num_tokens": 4705493160.0, "step": 7429 }, { "epoch": 0.8785621378739505, "grad_norm": 0.12866663932800293, "learning_rate": 8.065652698659506e-06, "loss": 0.3346, "num_tokens": 4706122438.0, "step": 7430 }, { "epoch": 0.8786803831145796, "grad_norm": 0.14845037460327148, "learning_rate": 8.061687792697793e-06, "loss": 0.3653, "num_tokens": 4706754822.0, "step": 7431 }, { "epoch": 0.8787986283552087, "grad_norm": 0.1203131303191185, "learning_rate": 8.057726544548033e-06, "loss": 0.3088, "num_tokens": 4707389538.0, "step": 7432 }, { "epoch": 0.8789168735958378, "grad_norm": 0.1350252628326416, "learning_rate": 8.053768954791249e-06, "loss": 0.3198, "num_tokens": 4708023548.0, "step": 7433 }, { "epoch": 0.8790351188364668, "grad_norm": 0.13611410558223724, "learning_rate": 8.049815024007898e-06, "loss": 0.3304, "num_tokens": 4708659535.0, "step": 7434 }, { "epoch": 0.8791533640770959, "grad_norm": 0.13813966512680054, "learning_rate": 8.045864752777933e-06, "loss": 0.3376, "num_tokens": 4709295805.0, "step": 7435 }, { "epoch": 0.879271609317725, "grad_norm": 0.13462527096271515, "learning_rate": 8.04191814168075e-06, "loss": 0.3316, "num_tokens": 4709925538.0, "step": 7436 }, { "epoch": 0.8793898545583541, "grad_norm": 0.14537909626960754, "learning_rate": 8.037975191295229e-06, "loss": 0.3323, "num_tokens": 4710554278.0, "step": 7437 }, { "epoch": 0.8795080997989831, "grad_norm": 0.13323234021663666, "learning_rate": 8.03403590219969e-06, "loss": 0.3399, "num_tokens": 4711186520.0, "step": 7438 }, { "epoch": 0.8796263450396121, "grad_norm": 0.13171370327472687, "learning_rate": 8.03010027497192e-06, "loss": 0.3132, "num_tokens": 4711821366.0, "step": 7439 }, { "epoch": 0.8797445902802412, "grad_norm": 0.12776106595993042, "learning_rate": 8.026168310189186e-06, "loss": 0.3408, "num_tokens": 4712451795.0, "step": 7440 }, { "epoch": 0.8798628355208703, "grad_norm": 0.1433226615190506, "learning_rate": 8.022240008428209e-06, "loss": 0.3222, "num_tokens": 4713078108.0, "step": 7441 }, { "epoch": 0.8799810807614994, "grad_norm": 0.12453059107065201, "learning_rate": 8.018315370265157e-06, "loss": 0.3256, "num_tokens": 4713717175.0, "step": 7442 }, { "epoch": 0.8800993260021284, "grad_norm": 0.12242448329925537, "learning_rate": 8.014394396275678e-06, "loss": 0.2973, "num_tokens": 4714352853.0, "step": 7443 }, { "epoch": 0.8802175712427575, "grad_norm": 0.14058931171894073, "learning_rate": 8.010477087034886e-06, "loss": 0.3284, "num_tokens": 4714985931.0, "step": 7444 }, { "epoch": 0.8803358164833865, "grad_norm": 0.12712399661540985, "learning_rate": 8.006563443117342e-06, "loss": 0.2985, "num_tokens": 4715621200.0, "step": 7445 }, { "epoch": 0.8804540617240156, "grad_norm": 0.14320702850818634, "learning_rate": 8.00265346509709e-06, "loss": 0.303, "num_tokens": 4716257622.0, "step": 7446 }, { "epoch": 0.8805723069646447, "grad_norm": 0.1340249627828598, "learning_rate": 7.998747153547602e-06, "loss": 0.3108, "num_tokens": 4716894207.0, "step": 7447 }, { "epoch": 0.8806905522052737, "grad_norm": 0.12425673007965088, "learning_rate": 7.99484450904185e-06, "loss": 0.3076, "num_tokens": 4717530366.0, "step": 7448 }, { "epoch": 0.8808087974459028, "grad_norm": 0.1438559889793396, "learning_rate": 7.99094553215225e-06, "loss": 0.3449, "num_tokens": 4718167428.0, "step": 7449 }, { "epoch": 0.8809270426865319, "grad_norm": 0.12429283559322357, "learning_rate": 7.987050223450683e-06, "loss": 0.3046, "num_tokens": 4718805485.0, "step": 7450 }, { "epoch": 0.881045287927161, "grad_norm": 0.11720947921276093, "learning_rate": 7.983158583508487e-06, "loss": 0.2834, "num_tokens": 4719424642.0, "step": 7451 }, { "epoch": 0.88116353316779, "grad_norm": 0.1264176219701767, "learning_rate": 7.979270612896462e-06, "loss": 0.3069, "num_tokens": 4720062274.0, "step": 7452 }, { "epoch": 0.881281778408419, "grad_norm": 0.14376534521579742, "learning_rate": 7.975386312184885e-06, "loss": 0.3203, "num_tokens": 4720678915.0, "step": 7453 }, { "epoch": 0.8814000236490481, "grad_norm": 0.1319657862186432, "learning_rate": 7.971505681943477e-06, "loss": 0.3202, "num_tokens": 4721314366.0, "step": 7454 }, { "epoch": 0.8815182688896772, "grad_norm": 0.13153788447380066, "learning_rate": 7.967628722741424e-06, "loss": 0.341, "num_tokens": 4721948757.0, "step": 7455 }, { "epoch": 0.8816365141303063, "grad_norm": 0.1220145896077156, "learning_rate": 7.963755435147383e-06, "loss": 0.294, "num_tokens": 4722569359.0, "step": 7456 }, { "epoch": 0.8817547593709353, "grad_norm": 0.12624484300613403, "learning_rate": 7.95988581972946e-06, "loss": 0.324, "num_tokens": 4723176339.0, "step": 7457 }, { "epoch": 0.8818730046115644, "grad_norm": 0.15272369980812073, "learning_rate": 7.956019877055236e-06, "loss": 0.3479, "num_tokens": 4723809930.0, "step": 7458 }, { "epoch": 0.8819912498521935, "grad_norm": 0.12961986660957336, "learning_rate": 7.952157607691744e-06, "loss": 0.3044, "num_tokens": 4724448382.0, "step": 7459 }, { "epoch": 0.8821094950928225, "grad_norm": 0.13231904804706573, "learning_rate": 7.948299012205476e-06, "loss": 0.3362, "num_tokens": 4725075558.0, "step": 7460 }, { "epoch": 0.8822277403334515, "grad_norm": 0.14990440011024475, "learning_rate": 7.944444091162389e-06, "loss": 0.3721, "num_tokens": 4725713395.0, "step": 7461 }, { "epoch": 0.8823459855740806, "grad_norm": 0.12336690723896027, "learning_rate": 7.940592845127897e-06, "loss": 0.2794, "num_tokens": 4726346966.0, "step": 7462 }, { "epoch": 0.8824642308147097, "grad_norm": 0.12336976826190948, "learning_rate": 7.936745274666899e-06, "loss": 0.3116, "num_tokens": 4726985975.0, "step": 7463 }, { "epoch": 0.8825824760553388, "grad_norm": 0.12926356494426727, "learning_rate": 7.932901380343714e-06, "loss": 0.3067, "num_tokens": 4727616613.0, "step": 7464 }, { "epoch": 0.8827007212959679, "grad_norm": 0.12191840261220932, "learning_rate": 7.929061162722153e-06, "loss": 0.3233, "num_tokens": 4728254529.0, "step": 7465 }, { "epoch": 0.8828189665365969, "grad_norm": 0.1327754408121109, "learning_rate": 7.925224622365472e-06, "loss": 0.3268, "num_tokens": 4728891846.0, "step": 7466 }, { "epoch": 0.882937211777226, "grad_norm": 0.1283477395772934, "learning_rate": 7.921391759836396e-06, "loss": 0.3085, "num_tokens": 4729528306.0, "step": 7467 }, { "epoch": 0.883055457017855, "grad_norm": 0.11954344063997269, "learning_rate": 7.917562575697113e-06, "loss": 0.2883, "num_tokens": 4730166626.0, "step": 7468 }, { "epoch": 0.8831737022584841, "grad_norm": 0.14291800558567047, "learning_rate": 7.913737070509257e-06, "loss": 0.3406, "num_tokens": 4730803742.0, "step": 7469 }, { "epoch": 0.8832919474991131, "grad_norm": 0.1382201462984085, "learning_rate": 7.909915244833936e-06, "loss": 0.3494, "num_tokens": 4731438017.0, "step": 7470 }, { "epoch": 0.8834101927397422, "grad_norm": 0.13118039071559906, "learning_rate": 7.906097099231718e-06, "loss": 0.3187, "num_tokens": 4732076161.0, "step": 7471 }, { "epoch": 0.8835284379803713, "grad_norm": 0.12926723062992096, "learning_rate": 7.902282634262623e-06, "loss": 0.3221, "num_tokens": 4732708088.0, "step": 7472 }, { "epoch": 0.8836466832210004, "grad_norm": 0.12585487961769104, "learning_rate": 7.898471850486131e-06, "loss": 0.3004, "num_tokens": 4733341063.0, "step": 7473 }, { "epoch": 0.8837649284616295, "grad_norm": 0.1366075724363327, "learning_rate": 7.894664748461193e-06, "loss": 0.3167, "num_tokens": 4733976001.0, "step": 7474 }, { "epoch": 0.8838831737022584, "grad_norm": 0.13367706537246704, "learning_rate": 7.890861328746209e-06, "loss": 0.2926, "num_tokens": 4734612095.0, "step": 7475 }, { "epoch": 0.8840014189428875, "grad_norm": 0.12848959863185883, "learning_rate": 7.88706159189905e-06, "loss": 0.3068, "num_tokens": 4735245627.0, "step": 7476 }, { "epoch": 0.8841196641835166, "grad_norm": 0.13203521072864532, "learning_rate": 7.883265538477034e-06, "loss": 0.3319, "num_tokens": 4735873990.0, "step": 7477 }, { "epoch": 0.8842379094241457, "grad_norm": 0.13275663554668427, "learning_rate": 7.879473169036947e-06, "loss": 0.3278, "num_tokens": 4736508222.0, "step": 7478 }, { "epoch": 0.8843561546647748, "grad_norm": 0.12780584394931793, "learning_rate": 7.875684484135035e-06, "loss": 0.3097, "num_tokens": 4737142991.0, "step": 7479 }, { "epoch": 0.8844743999054038, "grad_norm": 0.13604597747325897, "learning_rate": 7.871899484326996e-06, "loss": 0.3146, "num_tokens": 4737780186.0, "step": 7480 }, { "epoch": 0.8845926451460329, "grad_norm": 0.1289207637310028, "learning_rate": 7.868118170167995e-06, "loss": 0.3221, "num_tokens": 4738415928.0, "step": 7481 }, { "epoch": 0.884710890386662, "grad_norm": 0.12091828882694244, "learning_rate": 7.864340542212652e-06, "loss": 0.3273, "num_tokens": 4739049737.0, "step": 7482 }, { "epoch": 0.884829135627291, "grad_norm": 0.13304975628852844, "learning_rate": 7.860566601015048e-06, "loss": 0.3192, "num_tokens": 4739685349.0, "step": 7483 }, { "epoch": 0.88494738086792, "grad_norm": 0.12730051577091217, "learning_rate": 7.856796347128733e-06, "loss": 0.2979, "num_tokens": 4740314215.0, "step": 7484 }, { "epoch": 0.8850656261085491, "grad_norm": 0.13531629741191864, "learning_rate": 7.853029781106694e-06, "loss": 0.332, "num_tokens": 4740950504.0, "step": 7485 }, { "epoch": 0.8851838713491782, "grad_norm": 0.1366368979215622, "learning_rate": 7.849266903501396e-06, "loss": 0.3152, "num_tokens": 4741587421.0, "step": 7486 }, { "epoch": 0.8853021165898073, "grad_norm": 0.12489424645900726, "learning_rate": 7.845507714864756e-06, "loss": 0.3126, "num_tokens": 4742226715.0, "step": 7487 }, { "epoch": 0.8854203618304364, "grad_norm": 0.12862485647201538, "learning_rate": 7.841752215748152e-06, "loss": 0.2867, "num_tokens": 4742847887.0, "step": 7488 }, { "epoch": 0.8855386070710654, "grad_norm": 0.1264260709285736, "learning_rate": 7.83800040670242e-06, "loss": 0.2948, "num_tokens": 4743486445.0, "step": 7489 }, { "epoch": 0.8856568523116944, "grad_norm": 0.14958328008651733, "learning_rate": 7.834252288277847e-06, "loss": 0.3562, "num_tokens": 4744096815.0, "step": 7490 }, { "epoch": 0.8857750975523235, "grad_norm": 0.1289801150560379, "learning_rate": 7.830507861024198e-06, "loss": 0.2928, "num_tokens": 4744735195.0, "step": 7491 }, { "epoch": 0.8858933427929526, "grad_norm": 0.12078237533569336, "learning_rate": 7.826767125490671e-06, "loss": 0.2761, "num_tokens": 4745371427.0, "step": 7492 }, { "epoch": 0.8860115880335816, "grad_norm": 0.12571115791797638, "learning_rate": 7.823030082225955e-06, "loss": 0.3082, "num_tokens": 4746005628.0, "step": 7493 }, { "epoch": 0.8861298332742107, "grad_norm": 0.14134135842323303, "learning_rate": 7.819296731778157e-06, "loss": 0.3466, "num_tokens": 4746639093.0, "step": 7494 }, { "epoch": 0.8862480785148398, "grad_norm": 0.13165181875228882, "learning_rate": 7.815567074694879e-06, "loss": 0.2836, "num_tokens": 4747277673.0, "step": 7495 }, { "epoch": 0.8863663237554689, "grad_norm": 0.14108553528785706, "learning_rate": 7.811841111523156e-06, "loss": 0.3356, "num_tokens": 4747901076.0, "step": 7496 }, { "epoch": 0.886484568996098, "grad_norm": 0.12647470831871033, "learning_rate": 7.808118842809502e-06, "loss": 0.295, "num_tokens": 4748536737.0, "step": 7497 }, { "epoch": 0.8866028142367269, "grad_norm": 0.13177908957004547, "learning_rate": 7.804400269099871e-06, "loss": 0.3317, "num_tokens": 4749175216.0, "step": 7498 }, { "epoch": 0.886721059477356, "grad_norm": 0.14289718866348267, "learning_rate": 7.800685390939683e-06, "loss": 0.3329, "num_tokens": 4749813431.0, "step": 7499 }, { "epoch": 0.8868393047179851, "grad_norm": 0.12484896928071976, "learning_rate": 7.796974208873821e-06, "loss": 0.2966, "num_tokens": 4750445193.0, "step": 7500 }, { "epoch": 0.8869575499586142, "grad_norm": 0.11420823633670807, "learning_rate": 7.793266723446616e-06, "loss": 0.2797, "num_tokens": 4751076788.0, "step": 7501 }, { "epoch": 0.8870757951992432, "grad_norm": 0.13908609747886658, "learning_rate": 7.789562935201862e-06, "loss": 0.3186, "num_tokens": 4751716165.0, "step": 7502 }, { "epoch": 0.8871940404398723, "grad_norm": 0.12479356676340103, "learning_rate": 7.785862844682806e-06, "loss": 0.3114, "num_tokens": 4752348009.0, "step": 7503 }, { "epoch": 0.8873122856805014, "grad_norm": 0.13153384625911713, "learning_rate": 7.78216645243216e-06, "loss": 0.3206, "num_tokens": 4752981584.0, "step": 7504 }, { "epoch": 0.8874305309211304, "grad_norm": 0.13432393968105316, "learning_rate": 7.778473758992092e-06, "loss": 0.3409, "num_tokens": 4753614657.0, "step": 7505 }, { "epoch": 0.8875487761617595, "grad_norm": 0.12809881567955017, "learning_rate": 7.774784764904224e-06, "loss": 0.3138, "num_tokens": 4754253340.0, "step": 7506 }, { "epoch": 0.8876670214023885, "grad_norm": 0.13760223984718323, "learning_rate": 7.77109947070964e-06, "loss": 0.3397, "num_tokens": 4754888606.0, "step": 7507 }, { "epoch": 0.8877852666430176, "grad_norm": 0.1337011456489563, "learning_rate": 7.767417876948871e-06, "loss": 0.3346, "num_tokens": 4755519093.0, "step": 7508 }, { "epoch": 0.8879035118836467, "grad_norm": 0.12700265645980835, "learning_rate": 7.763739984161918e-06, "loss": 0.3045, "num_tokens": 4756157009.0, "step": 7509 }, { "epoch": 0.8880217571242758, "grad_norm": 0.13527527451515198, "learning_rate": 7.760065792888236e-06, "loss": 0.342, "num_tokens": 4756791374.0, "step": 7510 }, { "epoch": 0.8881400023649049, "grad_norm": 0.13158980011940002, "learning_rate": 7.756395303666728e-06, "loss": 0.3027, "num_tokens": 4757427632.0, "step": 7511 }, { "epoch": 0.8882582476055338, "grad_norm": 0.13361196219921112, "learning_rate": 7.752728517035763e-06, "loss": 0.3163, "num_tokens": 4758065608.0, "step": 7512 }, { "epoch": 0.8883764928461629, "grad_norm": 0.14940723776817322, "learning_rate": 7.749065433533164e-06, "loss": 0.2997, "num_tokens": 4758702358.0, "step": 7513 }, { "epoch": 0.888494738086792, "grad_norm": 0.12615785002708435, "learning_rate": 7.745406053696226e-06, "loss": 0.291, "num_tokens": 4759337014.0, "step": 7514 }, { "epoch": 0.8886129833274211, "grad_norm": 0.13552004098892212, "learning_rate": 7.741750378061661e-06, "loss": 0.3331, "num_tokens": 4759968272.0, "step": 7515 }, { "epoch": 0.8887312285680501, "grad_norm": 0.12985658645629883, "learning_rate": 7.73809840716568e-06, "loss": 0.3397, "num_tokens": 4760605087.0, "step": 7516 }, { "epoch": 0.8888494738086792, "grad_norm": 0.12657774984836578, "learning_rate": 7.734450141543925e-06, "loss": 0.3285, "num_tokens": 4761242174.0, "step": 7517 }, { "epoch": 0.8889677190493083, "grad_norm": 0.12255483120679855, "learning_rate": 7.730805581731514e-06, "loss": 0.3136, "num_tokens": 4761878060.0, "step": 7518 }, { "epoch": 0.8890859642899374, "grad_norm": 0.14188222587108612, "learning_rate": 7.727164728263001e-06, "loss": 0.3579, "num_tokens": 4762509204.0, "step": 7519 }, { "epoch": 0.8892042095305664, "grad_norm": 0.13328786194324493, "learning_rate": 7.723527581672408e-06, "loss": 0.3303, "num_tokens": 4763148646.0, "step": 7520 }, { "epoch": 0.8893224547711954, "grad_norm": 0.14224758744239807, "learning_rate": 7.719894142493218e-06, "loss": 0.3318, "num_tokens": 4763784382.0, "step": 7521 }, { "epoch": 0.8894407000118245, "grad_norm": 0.12259330600500107, "learning_rate": 7.716264411258353e-06, "loss": 0.2813, "num_tokens": 4764423448.0, "step": 7522 }, { "epoch": 0.8895589452524536, "grad_norm": 0.13808777928352356, "learning_rate": 7.712638388500211e-06, "loss": 0.3141, "num_tokens": 4765059108.0, "step": 7523 }, { "epoch": 0.8896771904930827, "grad_norm": 0.15120714902877808, "learning_rate": 7.709016074750626e-06, "loss": 0.3527, "num_tokens": 4765695508.0, "step": 7524 }, { "epoch": 0.8897954357337117, "grad_norm": 0.1309562474489212, "learning_rate": 7.70539747054091e-06, "loss": 0.3698, "num_tokens": 4766330361.0, "step": 7525 }, { "epoch": 0.8899136809743408, "grad_norm": 0.11555121093988419, "learning_rate": 7.70178257640181e-06, "loss": 0.2921, "num_tokens": 4766967687.0, "step": 7526 }, { "epoch": 0.8900319262149698, "grad_norm": 0.13395486772060394, "learning_rate": 7.698171392863548e-06, "loss": 0.3112, "num_tokens": 4767582832.0, "step": 7527 }, { "epoch": 0.8901501714555989, "grad_norm": 0.13950461149215698, "learning_rate": 7.694563920455787e-06, "loss": 0.3268, "num_tokens": 4768209669.0, "step": 7528 }, { "epoch": 0.890268416696228, "grad_norm": 0.13009454309940338, "learning_rate": 7.690960159707647e-06, "loss": 0.3036, "num_tokens": 4768844516.0, "step": 7529 }, { "epoch": 0.890386661936857, "grad_norm": 0.13773077726364136, "learning_rate": 7.687360111147717e-06, "loss": 0.3016, "num_tokens": 4769478774.0, "step": 7530 }, { "epoch": 0.8905049071774861, "grad_norm": 0.13410943746566772, "learning_rate": 7.683763775304023e-06, "loss": 0.3258, "num_tokens": 4770112864.0, "step": 7531 }, { "epoch": 0.8906231524181152, "grad_norm": 0.13319005072116852, "learning_rate": 7.680171152704057e-06, "loss": 0.3447, "num_tokens": 4770746844.0, "step": 7532 }, { "epoch": 0.8907413976587443, "grad_norm": 0.13912373781204224, "learning_rate": 7.67658224387477e-06, "loss": 0.3626, "num_tokens": 4771383352.0, "step": 7533 }, { "epoch": 0.8908596428993732, "grad_norm": 0.12807194888591766, "learning_rate": 7.672997049342555e-06, "loss": 0.3207, "num_tokens": 4772017934.0, "step": 7534 }, { "epoch": 0.8909778881400023, "grad_norm": 0.12303610146045685, "learning_rate": 7.669415569633276e-06, "loss": 0.2836, "num_tokens": 4772652043.0, "step": 7535 }, { "epoch": 0.8910961333806314, "grad_norm": 0.13338883221149445, "learning_rate": 7.665837805272246e-06, "loss": 0.339, "num_tokens": 4773280070.0, "step": 7536 }, { "epoch": 0.8912143786212605, "grad_norm": 0.12898211181163788, "learning_rate": 7.662263756784223e-06, "loss": 0.2849, "num_tokens": 4773911582.0, "step": 7537 }, { "epoch": 0.8913326238618896, "grad_norm": 0.14062397181987762, "learning_rate": 7.658693424693429e-06, "loss": 0.3204, "num_tokens": 4774546170.0, "step": 7538 }, { "epoch": 0.8914508691025186, "grad_norm": 0.12361055612564087, "learning_rate": 7.655126809523547e-06, "loss": 0.3089, "num_tokens": 4775181764.0, "step": 7539 }, { "epoch": 0.8915691143431477, "grad_norm": 0.14267170429229736, "learning_rate": 7.651563911797707e-06, "loss": 0.3352, "num_tokens": 4775813896.0, "step": 7540 }, { "epoch": 0.8916873595837768, "grad_norm": 0.1348239779472351, "learning_rate": 7.648004732038488e-06, "loss": 0.3447, "num_tokens": 4776450917.0, "step": 7541 }, { "epoch": 0.8918056048244059, "grad_norm": 0.13880662620067596, "learning_rate": 7.644449270767937e-06, "loss": 0.354, "num_tokens": 4777089543.0, "step": 7542 }, { "epoch": 0.8919238500650348, "grad_norm": 0.12674029171466827, "learning_rate": 7.640897528507542e-06, "loss": 0.3163, "num_tokens": 4777725764.0, "step": 7543 }, { "epoch": 0.8920420953056639, "grad_norm": 0.12884581089019775, "learning_rate": 7.63734950577827e-06, "loss": 0.3362, "num_tokens": 4778353605.0, "step": 7544 }, { "epoch": 0.892160340546293, "grad_norm": 0.13674932718276978, "learning_rate": 7.633805203100501e-06, "loss": 0.3224, "num_tokens": 4778983546.0, "step": 7545 }, { "epoch": 0.8922785857869221, "grad_norm": 0.129029780626297, "learning_rate": 7.630264620994111e-06, "loss": 0.3301, "num_tokens": 4779604277.0, "step": 7546 }, { "epoch": 0.8923968310275512, "grad_norm": 0.13990245759487152, "learning_rate": 7.626727759978403e-06, "loss": 0.3382, "num_tokens": 4780240147.0, "step": 7547 }, { "epoch": 0.8925150762681802, "grad_norm": 0.12723326683044434, "learning_rate": 7.623194620572156e-06, "loss": 0.3106, "num_tokens": 4780879614.0, "step": 7548 }, { "epoch": 0.8926333215088093, "grad_norm": 0.13201068341732025, "learning_rate": 7.619665203293577e-06, "loss": 0.2926, "num_tokens": 4781508559.0, "step": 7549 }, { "epoch": 0.8927515667494383, "grad_norm": 0.13000346720218658, "learning_rate": 7.6161395086603464e-06, "loss": 0.3279, "num_tokens": 4782147044.0, "step": 7550 }, { "epoch": 0.8928698119900674, "grad_norm": 0.15604229271411896, "learning_rate": 7.612617537189601e-06, "loss": 0.3745, "num_tokens": 4782786323.0, "step": 7551 }, { "epoch": 0.8929880572306965, "grad_norm": 0.11812501400709152, "learning_rate": 7.609099289397915e-06, "loss": 0.3033, "num_tokens": 4783397211.0, "step": 7552 }, { "epoch": 0.8931063024713255, "grad_norm": 0.13686683773994446, "learning_rate": 7.605584765801323e-06, "loss": 0.348, "num_tokens": 4784029641.0, "step": 7553 }, { "epoch": 0.8932245477119546, "grad_norm": 0.12338482588529587, "learning_rate": 7.6020739669153235e-06, "loss": 0.3224, "num_tokens": 4784660451.0, "step": 7554 }, { "epoch": 0.8933427929525837, "grad_norm": 0.12992629408836365, "learning_rate": 7.598566893254853e-06, "loss": 0.3506, "num_tokens": 4785299895.0, "step": 7555 }, { "epoch": 0.8934610381932128, "grad_norm": 0.13132788240909576, "learning_rate": 7.595063545334319e-06, "loss": 0.3344, "num_tokens": 4785932049.0, "step": 7556 }, { "epoch": 0.8935792834338417, "grad_norm": 0.1254241019487381, "learning_rate": 7.591563923667568e-06, "loss": 0.3044, "num_tokens": 4786568988.0, "step": 7557 }, { "epoch": 0.8936975286744708, "grad_norm": 0.12378402054309845, "learning_rate": 7.588068028767903e-06, "loss": 0.3118, "num_tokens": 4787205169.0, "step": 7558 }, { "epoch": 0.8938157739150999, "grad_norm": 0.12277564406394958, "learning_rate": 7.584575861148081e-06, "loss": 0.3411, "num_tokens": 4787838311.0, "step": 7559 }, { "epoch": 0.893934019155729, "grad_norm": 0.12432721257209778, "learning_rate": 7.581087421320323e-06, "loss": 0.298, "num_tokens": 4788471889.0, "step": 7560 }, { "epoch": 0.8940522643963581, "grad_norm": 0.14251331984996796, "learning_rate": 7.577602709796284e-06, "loss": 0.3506, "num_tokens": 4789109649.0, "step": 7561 }, { "epoch": 0.8941705096369871, "grad_norm": 0.12038671970367432, "learning_rate": 7.574121727087084e-06, "loss": 0.2959, "num_tokens": 4789745763.0, "step": 7562 }, { "epoch": 0.8942887548776162, "grad_norm": 0.12742824852466583, "learning_rate": 7.570644473703298e-06, "loss": 0.3105, "num_tokens": 4790382788.0, "step": 7563 }, { "epoch": 0.8944070001182453, "grad_norm": 0.13998104631900787, "learning_rate": 7.567170950154941e-06, "loss": 0.3438, "num_tokens": 4791017140.0, "step": 7564 }, { "epoch": 0.8945252453588743, "grad_norm": 0.1374569535255432, "learning_rate": 7.563701156951504e-06, "loss": 0.3431, "num_tokens": 4791645651.0, "step": 7565 }, { "epoch": 0.8946434905995033, "grad_norm": 0.12699221074581146, "learning_rate": 7.560235094601908e-06, "loss": 0.3168, "num_tokens": 4792253709.0, "step": 7566 }, { "epoch": 0.8947617358401324, "grad_norm": 0.13432075083255768, "learning_rate": 7.5567727636145374e-06, "loss": 0.3293, "num_tokens": 4792890293.0, "step": 7567 }, { "epoch": 0.8948799810807615, "grad_norm": 0.12112536281347275, "learning_rate": 7.553314164497223e-06, "loss": 0.2933, "num_tokens": 4793525946.0, "step": 7568 }, { "epoch": 0.8949982263213906, "grad_norm": 0.14106158912181854, "learning_rate": 7.54985929775726e-06, "loss": 0.3603, "num_tokens": 4794125093.0, "step": 7569 }, { "epoch": 0.8951164715620197, "grad_norm": 0.1437697559595108, "learning_rate": 7.546408163901386e-06, "loss": 0.3555, "num_tokens": 4794755901.0, "step": 7570 }, { "epoch": 0.8952347168026487, "grad_norm": 0.12793433666229248, "learning_rate": 7.54296076343579e-06, "loss": 0.3163, "num_tokens": 4795391114.0, "step": 7571 }, { "epoch": 0.8953529620432777, "grad_norm": 0.13170817494392395, "learning_rate": 7.539517096866124e-06, "loss": 0.3099, "num_tokens": 4796025318.0, "step": 7572 }, { "epoch": 0.8954712072839068, "grad_norm": 0.13433825969696045, "learning_rate": 7.5360771646974794e-06, "loss": 0.3507, "num_tokens": 4796657911.0, "step": 7573 }, { "epoch": 0.8955894525245359, "grad_norm": 0.12844164669513702, "learning_rate": 7.5326409674344175e-06, "loss": 0.3372, "num_tokens": 4797294464.0, "step": 7574 }, { "epoch": 0.8957076977651649, "grad_norm": 0.13758932054042816, "learning_rate": 7.529208505580923e-06, "loss": 0.3238, "num_tokens": 4797931935.0, "step": 7575 }, { "epoch": 0.895825943005794, "grad_norm": 0.13102081418037415, "learning_rate": 7.525779779640464e-06, "loss": 0.3228, "num_tokens": 4798571089.0, "step": 7576 }, { "epoch": 0.8959441882464231, "grad_norm": 0.14692145586013794, "learning_rate": 7.522354790115943e-06, "loss": 0.3254, "num_tokens": 4799199597.0, "step": 7577 }, { "epoch": 0.8960624334870522, "grad_norm": 0.1381709724664688, "learning_rate": 7.518933537509717e-06, "loss": 0.3323, "num_tokens": 4799837055.0, "step": 7578 }, { "epoch": 0.8961806787276813, "grad_norm": 0.12692712247371674, "learning_rate": 7.515516022323598e-06, "loss": 0.2761, "num_tokens": 4800468706.0, "step": 7579 }, { "epoch": 0.8962989239683102, "grad_norm": 0.1308266967535019, "learning_rate": 7.512102245058845e-06, "loss": 0.2884, "num_tokens": 4801104966.0, "step": 7580 }, { "epoch": 0.8964171692089393, "grad_norm": 0.15506123006343842, "learning_rate": 7.508692206216175e-06, "loss": 0.3656, "num_tokens": 4801743125.0, "step": 7581 }, { "epoch": 0.8965354144495684, "grad_norm": 0.12983110547065735, "learning_rate": 7.505285906295753e-06, "loss": 0.3015, "num_tokens": 4802371241.0, "step": 7582 }, { "epoch": 0.8966536596901975, "grad_norm": 0.1310785561800003, "learning_rate": 7.501883345797191e-06, "loss": 0.3288, "num_tokens": 4803007336.0, "step": 7583 }, { "epoch": 0.8967719049308265, "grad_norm": 0.1388908326625824, "learning_rate": 7.498484525219565e-06, "loss": 0.312, "num_tokens": 4803640755.0, "step": 7584 }, { "epoch": 0.8968901501714556, "grad_norm": 0.1302836686372757, "learning_rate": 7.495089445061389e-06, "loss": 0.2941, "num_tokens": 4804279083.0, "step": 7585 }, { "epoch": 0.8970083954120847, "grad_norm": 0.12853863835334778, "learning_rate": 7.4916981058206386e-06, "loss": 0.2834, "num_tokens": 4804918558.0, "step": 7586 }, { "epoch": 0.8971266406527137, "grad_norm": 0.1383909285068512, "learning_rate": 7.488310507994736e-06, "loss": 0.3527, "num_tokens": 4805552511.0, "step": 7587 }, { "epoch": 0.8972448858933428, "grad_norm": 0.12375692278146744, "learning_rate": 7.484926652080554e-06, "loss": 0.3096, "num_tokens": 4806189209.0, "step": 7588 }, { "epoch": 0.8973631311339718, "grad_norm": 0.12916630506515503, "learning_rate": 7.481546538574414e-06, "loss": 0.3158, "num_tokens": 4806821959.0, "step": 7589 }, { "epoch": 0.8974813763746009, "grad_norm": 0.1214345395565033, "learning_rate": 7.4781701679721e-06, "loss": 0.3017, "num_tokens": 4807430582.0, "step": 7590 }, { "epoch": 0.89759962161523, "grad_norm": 0.12978698313236237, "learning_rate": 7.474797540768832e-06, "loss": 0.3395, "num_tokens": 4808064778.0, "step": 7591 }, { "epoch": 0.8977178668558591, "grad_norm": 0.130963996052742, "learning_rate": 7.471428657459287e-06, "loss": 0.3132, "num_tokens": 4808698958.0, "step": 7592 }, { "epoch": 0.8978361120964882, "grad_norm": 0.13265690207481384, "learning_rate": 7.468063518537604e-06, "loss": 0.3177, "num_tokens": 4809331544.0, "step": 7593 }, { "epoch": 0.8979543573371171, "grad_norm": 0.12786796689033508, "learning_rate": 7.46470212449735e-06, "loss": 0.3056, "num_tokens": 4809968369.0, "step": 7594 }, { "epoch": 0.8980726025777462, "grad_norm": 0.12547990679740906, "learning_rate": 7.461344475831573e-06, "loss": 0.3278, "num_tokens": 4810603008.0, "step": 7595 }, { "epoch": 0.8981908478183753, "grad_norm": 0.1285749226808548, "learning_rate": 7.457990573032732e-06, "loss": 0.3329, "num_tokens": 4811237826.0, "step": 7596 }, { "epoch": 0.8983090930590044, "grad_norm": 0.12951387465000153, "learning_rate": 7.4546404165927696e-06, "loss": 0.3141, "num_tokens": 4811873586.0, "step": 7597 }, { "epoch": 0.8984273382996334, "grad_norm": 0.13525241613388062, "learning_rate": 7.4512940070030714e-06, "loss": 0.3393, "num_tokens": 4812509237.0, "step": 7598 }, { "epoch": 0.8985455835402625, "grad_norm": 0.12392687797546387, "learning_rate": 7.4479513447544685e-06, "loss": 0.2981, "num_tokens": 4813134452.0, "step": 7599 }, { "epoch": 0.8986638287808916, "grad_norm": 0.12503258883953094, "learning_rate": 7.444612430337243e-06, "loss": 0.3176, "num_tokens": 4813766535.0, "step": 7600 }, { "epoch": 0.8987820740215207, "grad_norm": 0.13705022633075714, "learning_rate": 7.44127726424112e-06, "loss": 0.3362, "num_tokens": 4814401067.0, "step": 7601 }, { "epoch": 0.8989003192621497, "grad_norm": 0.13161621987819672, "learning_rate": 7.4379458469552965e-06, "loss": 0.3038, "num_tokens": 4815035479.0, "step": 7602 }, { "epoch": 0.8990185645027787, "grad_norm": 0.136680006980896, "learning_rate": 7.434618178968394e-06, "loss": 0.3224, "num_tokens": 4815654832.0, "step": 7603 }, { "epoch": 0.8991368097434078, "grad_norm": 0.12666869163513184, "learning_rate": 7.43129426076851e-06, "loss": 0.287, "num_tokens": 4816293687.0, "step": 7604 }, { "epoch": 0.8992550549840369, "grad_norm": 0.13210324943065643, "learning_rate": 7.427974092843165e-06, "loss": 0.3093, "num_tokens": 4816927049.0, "step": 7605 }, { "epoch": 0.899373300224666, "grad_norm": 0.12607966363430023, "learning_rate": 7.424657675679345e-06, "loss": 0.3142, "num_tokens": 4817562757.0, "step": 7606 }, { "epoch": 0.899491545465295, "grad_norm": 0.12682083249092102, "learning_rate": 7.4213450097634915e-06, "loss": 0.3023, "num_tokens": 4818165369.0, "step": 7607 }, { "epoch": 0.8996097907059241, "grad_norm": 0.13852836191654205, "learning_rate": 7.418036095581482e-06, "loss": 0.3113, "num_tokens": 4818800680.0, "step": 7608 }, { "epoch": 0.8997280359465532, "grad_norm": 0.14408448338508606, "learning_rate": 7.414730933618653e-06, "loss": 0.3422, "num_tokens": 4819439092.0, "step": 7609 }, { "epoch": 0.8998462811871822, "grad_norm": 0.1352120339870453, "learning_rate": 7.411429524359777e-06, "loss": 0.3322, "num_tokens": 4820077758.0, "step": 7610 }, { "epoch": 0.8999645264278113, "grad_norm": 0.1347416639328003, "learning_rate": 7.4081318682890985e-06, "loss": 0.3138, "num_tokens": 4820713031.0, "step": 7611 }, { "epoch": 0.9000827716684403, "grad_norm": 0.12895788252353668, "learning_rate": 7.404837965890292e-06, "loss": 0.3057, "num_tokens": 4821345729.0, "step": 7612 }, { "epoch": 0.9002010169090694, "grad_norm": 0.14497020840644836, "learning_rate": 7.40154781764649e-06, "loss": 0.3511, "num_tokens": 4821954405.0, "step": 7613 }, { "epoch": 0.9003192621496985, "grad_norm": 0.12429824471473694, "learning_rate": 7.398261424040277e-06, "loss": 0.2831, "num_tokens": 4822593411.0, "step": 7614 }, { "epoch": 0.9004375073903276, "grad_norm": 0.13450899720191956, "learning_rate": 7.394978785553673e-06, "loss": 0.3041, "num_tokens": 4823217858.0, "step": 7615 }, { "epoch": 0.9005557526309566, "grad_norm": 0.1246315985918045, "learning_rate": 7.3916999026681715e-06, "loss": 0.2905, "num_tokens": 4823850197.0, "step": 7616 }, { "epoch": 0.9006739978715856, "grad_norm": 0.1421375274658203, "learning_rate": 7.388424775864689e-06, "loss": 0.3782, "num_tokens": 4824488611.0, "step": 7617 }, { "epoch": 0.9007922431122147, "grad_norm": 0.14190272986888885, "learning_rate": 7.3851534056236105e-06, "loss": 0.3253, "num_tokens": 4825116681.0, "step": 7618 }, { "epoch": 0.9009104883528438, "grad_norm": 0.14263063669204712, "learning_rate": 7.381885792424751e-06, "loss": 0.3124, "num_tokens": 4825751535.0, "step": 7619 }, { "epoch": 0.9010287335934729, "grad_norm": 0.13225826621055603, "learning_rate": 7.3786219367474e-06, "loss": 0.315, "num_tokens": 4826385108.0, "step": 7620 }, { "epoch": 0.9011469788341019, "grad_norm": 0.1337123066186905, "learning_rate": 7.375361839070272e-06, "loss": 0.3469, "num_tokens": 4827019852.0, "step": 7621 }, { "epoch": 0.901265224074731, "grad_norm": 0.1355578452348709, "learning_rate": 7.37210549987154e-06, "loss": 0.3188, "num_tokens": 4827650896.0, "step": 7622 }, { "epoch": 0.9013834693153601, "grad_norm": 0.13500036299228668, "learning_rate": 7.368852919628832e-06, "loss": 0.3518, "num_tokens": 4828288649.0, "step": 7623 }, { "epoch": 0.9015017145559892, "grad_norm": 0.12722237408161163, "learning_rate": 7.365604098819211e-06, "loss": 0.3074, "num_tokens": 4828924178.0, "step": 7624 }, { "epoch": 0.9016199597966181, "grad_norm": 0.13344234228134155, "learning_rate": 7.362359037919206e-06, "loss": 0.3474, "num_tokens": 4829563362.0, "step": 7625 }, { "epoch": 0.9017382050372472, "grad_norm": 0.13551992177963257, "learning_rate": 7.35911773740477e-06, "loss": 0.3384, "num_tokens": 4830202279.0, "step": 7626 }, { "epoch": 0.9018564502778763, "grad_norm": 0.13153289258480072, "learning_rate": 7.355880197751324e-06, "loss": 0.3069, "num_tokens": 4830832083.0, "step": 7627 }, { "epoch": 0.9019746955185054, "grad_norm": 0.1382070630788803, "learning_rate": 7.352646419433742e-06, "loss": 0.3185, "num_tokens": 4831464354.0, "step": 7628 }, { "epoch": 0.9020929407591345, "grad_norm": 0.13112618029117584, "learning_rate": 7.349416402926328e-06, "loss": 0.2924, "num_tokens": 4832100729.0, "step": 7629 }, { "epoch": 0.9022111859997635, "grad_norm": 0.13651232421398163, "learning_rate": 7.346190148702843e-06, "loss": 0.2957, "num_tokens": 4832708902.0, "step": 7630 }, { "epoch": 0.9023294312403926, "grad_norm": 0.14705461263656616, "learning_rate": 7.342967657236492e-06, "loss": 0.3575, "num_tokens": 4833346162.0, "step": 7631 }, { "epoch": 0.9024476764810216, "grad_norm": 0.13444305956363678, "learning_rate": 7.339748928999942e-06, "loss": 0.3101, "num_tokens": 4833982864.0, "step": 7632 }, { "epoch": 0.9025659217216507, "grad_norm": 0.15558834373950958, "learning_rate": 7.3365339644652895e-06, "loss": 0.3538, "num_tokens": 4834617708.0, "step": 7633 }, { "epoch": 0.9026841669622798, "grad_norm": 0.13603968918323517, "learning_rate": 7.333322764104093e-06, "loss": 0.3332, "num_tokens": 4835241961.0, "step": 7634 }, { "epoch": 0.9028024122029088, "grad_norm": 0.12811771035194397, "learning_rate": 7.330115328387351e-06, "loss": 0.3268, "num_tokens": 4835878692.0, "step": 7635 }, { "epoch": 0.9029206574435379, "grad_norm": 0.13160811364650726, "learning_rate": 7.326911657785509e-06, "loss": 0.3099, "num_tokens": 4836508136.0, "step": 7636 }, { "epoch": 0.903038902684167, "grad_norm": 0.13548433780670166, "learning_rate": 7.323711752768469e-06, "loss": 0.3275, "num_tokens": 4837145444.0, "step": 7637 }, { "epoch": 0.9031571479247961, "grad_norm": 0.1271149218082428, "learning_rate": 7.320515613805576e-06, "loss": 0.3292, "num_tokens": 4837781603.0, "step": 7638 }, { "epoch": 0.903275393165425, "grad_norm": 0.1255885809659958, "learning_rate": 7.317323241365616e-06, "loss": 0.2766, "num_tokens": 4838414311.0, "step": 7639 }, { "epoch": 0.9033936384060541, "grad_norm": 0.1356625109910965, "learning_rate": 7.31413463591683e-06, "loss": 0.3594, "num_tokens": 4839048847.0, "step": 7640 }, { "epoch": 0.9035118836466832, "grad_norm": 0.12721049785614014, "learning_rate": 7.310949797926902e-06, "loss": 0.3178, "num_tokens": 4839687714.0, "step": 7641 }, { "epoch": 0.9036301288873123, "grad_norm": 0.11907783895730972, "learning_rate": 7.30776872786298e-06, "loss": 0.2927, "num_tokens": 4840322189.0, "step": 7642 }, { "epoch": 0.9037483741279414, "grad_norm": 0.11676138639450073, "learning_rate": 7.304591426191626e-06, "loss": 0.3008, "num_tokens": 4840959833.0, "step": 7643 }, { "epoch": 0.9038666193685704, "grad_norm": 0.13499297201633453, "learning_rate": 7.301417893378883e-06, "loss": 0.3177, "num_tokens": 4841593414.0, "step": 7644 }, { "epoch": 0.9039848646091995, "grad_norm": 0.13025952875614166, "learning_rate": 7.298248129890218e-06, "loss": 0.3012, "num_tokens": 4842231404.0, "step": 7645 }, { "epoch": 0.9041031098498286, "grad_norm": 0.1318182647228241, "learning_rate": 7.295082136190565e-06, "loss": 0.3355, "num_tokens": 4842866847.0, "step": 7646 }, { "epoch": 0.9042213550904576, "grad_norm": 0.12636998295783997, "learning_rate": 7.291919912744284e-06, "loss": 0.3224, "num_tokens": 4843499691.0, "step": 7647 }, { "epoch": 0.9043396003310866, "grad_norm": 0.13014435768127441, "learning_rate": 7.288761460015195e-06, "loss": 0.2948, "num_tokens": 4844097541.0, "step": 7648 }, { "epoch": 0.9044578455717157, "grad_norm": 0.1346408575773239, "learning_rate": 7.285606778466564e-06, "loss": 0.334, "num_tokens": 4844729573.0, "step": 7649 }, { "epoch": 0.9045760908123448, "grad_norm": 0.13509416580200195, "learning_rate": 7.282455868561102e-06, "loss": 0.3371, "num_tokens": 4845367196.0, "step": 7650 }, { "epoch": 0.9046943360529739, "grad_norm": 0.12391680479049683, "learning_rate": 7.2793087307609685e-06, "loss": 0.308, "num_tokens": 4845998491.0, "step": 7651 }, { "epoch": 0.904812581293603, "grad_norm": 0.12343543022871017, "learning_rate": 7.276165365527759e-06, "loss": 0.2844, "num_tokens": 4846632914.0, "step": 7652 }, { "epoch": 0.904930826534232, "grad_norm": 0.12441429495811462, "learning_rate": 7.273025773322535e-06, "loss": 0.3188, "num_tokens": 4847269946.0, "step": 7653 }, { "epoch": 0.905049071774861, "grad_norm": 0.13303667306900024, "learning_rate": 7.2698899546057855e-06, "loss": 0.3011, "num_tokens": 4847905469.0, "step": 7654 }, { "epoch": 0.9051673170154901, "grad_norm": 0.13628724217414856, "learning_rate": 7.266757909837467e-06, "loss": 0.2859, "num_tokens": 4848535262.0, "step": 7655 }, { "epoch": 0.9052855622561192, "grad_norm": 0.12846340239048004, "learning_rate": 7.26362963947696e-06, "loss": 0.3065, "num_tokens": 4849171540.0, "step": 7656 }, { "epoch": 0.9054038074967482, "grad_norm": 0.12560856342315674, "learning_rate": 7.260505143983103e-06, "loss": 0.2841, "num_tokens": 4849803021.0, "step": 7657 }, { "epoch": 0.9055220527373773, "grad_norm": 0.1409880667924881, "learning_rate": 7.2573844238141826e-06, "loss": 0.3347, "num_tokens": 4850436943.0, "step": 7658 }, { "epoch": 0.9056402979780064, "grad_norm": 0.126939058303833, "learning_rate": 7.254267479427931e-06, "loss": 0.2833, "num_tokens": 4851067449.0, "step": 7659 }, { "epoch": 0.9057585432186355, "grad_norm": 0.12664024531841278, "learning_rate": 7.251154311281516e-06, "loss": 0.2968, "num_tokens": 4851697391.0, "step": 7660 }, { "epoch": 0.9058767884592646, "grad_norm": 0.1363763064146042, "learning_rate": 7.2480449198315635e-06, "loss": 0.3125, "num_tokens": 4852326116.0, "step": 7661 }, { "epoch": 0.9059950336998935, "grad_norm": 0.12120404839515686, "learning_rate": 7.2449393055341394e-06, "loss": 0.3075, "num_tokens": 4852963377.0, "step": 7662 }, { "epoch": 0.9061132789405226, "grad_norm": 0.12953302264213562, "learning_rate": 7.24183746884477e-06, "loss": 0.3129, "num_tokens": 4853599223.0, "step": 7663 }, { "epoch": 0.9062315241811517, "grad_norm": 0.13272029161453247, "learning_rate": 7.238739410218397e-06, "loss": 0.3458, "num_tokens": 4854235248.0, "step": 7664 }, { "epoch": 0.9063497694217808, "grad_norm": 0.13671572506427765, "learning_rate": 7.235645130109441e-06, "loss": 0.3449, "num_tokens": 4854857758.0, "step": 7665 }, { "epoch": 0.9064680146624099, "grad_norm": 0.12900333106517792, "learning_rate": 7.232554628971741e-06, "loss": 0.299, "num_tokens": 4855493608.0, "step": 7666 }, { "epoch": 0.9065862599030389, "grad_norm": 0.11905461549758911, "learning_rate": 7.229467907258607e-06, "loss": 0.3044, "num_tokens": 4856127339.0, "step": 7667 }, { "epoch": 0.906704505143668, "grad_norm": 0.12211315333843231, "learning_rate": 7.226384965422774e-06, "loss": 0.3099, "num_tokens": 4856761822.0, "step": 7668 }, { "epoch": 0.906822750384297, "grad_norm": 0.13672004640102386, "learning_rate": 7.223305803916433e-06, "loss": 0.3676, "num_tokens": 4857401543.0, "step": 7669 }, { "epoch": 0.9069409956249261, "grad_norm": 0.14398953318595886, "learning_rate": 7.220230423191218e-06, "loss": 0.3553, "num_tokens": 4858030726.0, "step": 7670 }, { "epoch": 0.9070592408655551, "grad_norm": 0.1336560994386673, "learning_rate": 7.217158823698207e-06, "loss": 0.3319, "num_tokens": 4858667299.0, "step": 7671 }, { "epoch": 0.9071774861061842, "grad_norm": 0.13168326020240784, "learning_rate": 7.214091005887932e-06, "loss": 0.3178, "num_tokens": 4859306070.0, "step": 7672 }, { "epoch": 0.9072957313468133, "grad_norm": 0.12199695408344269, "learning_rate": 7.211026970210354e-06, "loss": 0.3095, "num_tokens": 4859932979.0, "step": 7673 }, { "epoch": 0.9074139765874424, "grad_norm": 0.11862476915121078, "learning_rate": 7.207966717114894e-06, "loss": 0.3018, "num_tokens": 4860569118.0, "step": 7674 }, { "epoch": 0.9075322218280715, "grad_norm": 0.13595734536647797, "learning_rate": 7.2049102470504085e-06, "loss": 0.3104, "num_tokens": 4861201306.0, "step": 7675 }, { "epoch": 0.9076504670687005, "grad_norm": 0.1262384057044983, "learning_rate": 7.201857560465208e-06, "loss": 0.2918, "num_tokens": 4861837203.0, "step": 7676 }, { "epoch": 0.9077687123093295, "grad_norm": 0.12233507633209229, "learning_rate": 7.198808657807045e-06, "loss": 0.2993, "num_tokens": 4862472470.0, "step": 7677 }, { "epoch": 0.9078869575499586, "grad_norm": 0.12419585138559341, "learning_rate": 7.195763539523106e-06, "loss": 0.314, "num_tokens": 4863109856.0, "step": 7678 }, { "epoch": 0.9080052027905877, "grad_norm": 0.13122080266475677, "learning_rate": 7.1927222060600445e-06, "loss": 0.32, "num_tokens": 4863746242.0, "step": 7679 }, { "epoch": 0.9081234480312167, "grad_norm": 0.13007786870002747, "learning_rate": 7.1896846578639386e-06, "loss": 0.3444, "num_tokens": 4864382982.0, "step": 7680 }, { "epoch": 0.9082416932718458, "grad_norm": 0.13562928140163422, "learning_rate": 7.186650895380319e-06, "loss": 0.3505, "num_tokens": 4865019939.0, "step": 7681 }, { "epoch": 0.9083599385124749, "grad_norm": 0.13849271833896637, "learning_rate": 7.183620919054159e-06, "loss": 0.3276, "num_tokens": 4865655108.0, "step": 7682 }, { "epoch": 0.908478183753104, "grad_norm": 0.13452871143817902, "learning_rate": 7.180594729329882e-06, "loss": 0.3121, "num_tokens": 4866285601.0, "step": 7683 }, { "epoch": 0.908596428993733, "grad_norm": 0.14253680408000946, "learning_rate": 7.177572326651354e-06, "loss": 0.3306, "num_tokens": 4866898039.0, "step": 7684 }, { "epoch": 0.908714674234362, "grad_norm": 0.13840056955814362, "learning_rate": 7.174553711461884e-06, "loss": 0.3396, "num_tokens": 4867535425.0, "step": 7685 }, { "epoch": 0.9088329194749911, "grad_norm": 0.1385040432214737, "learning_rate": 7.171538884204226e-06, "loss": 0.3465, "num_tokens": 4868169955.0, "step": 7686 }, { "epoch": 0.9089511647156202, "grad_norm": 0.12798000872135162, "learning_rate": 7.168527845320567e-06, "loss": 0.3305, "num_tokens": 4868806669.0, "step": 7687 }, { "epoch": 0.9090694099562493, "grad_norm": 0.13449805974960327, "learning_rate": 7.165520595252569e-06, "loss": 0.3298, "num_tokens": 4869438503.0, "step": 7688 }, { "epoch": 0.9091876551968783, "grad_norm": 0.1221543699502945, "learning_rate": 7.1625171344413035e-06, "loss": 0.2906, "num_tokens": 4870074470.0, "step": 7689 }, { "epoch": 0.9093059004375074, "grad_norm": 0.14236518740653992, "learning_rate": 7.159517463327303e-06, "loss": 0.3169, "num_tokens": 4870703331.0, "step": 7690 }, { "epoch": 0.9094241456781365, "grad_norm": 0.13728490471839905, "learning_rate": 7.156521582350546e-06, "loss": 0.3236, "num_tokens": 4871338493.0, "step": 7691 }, { "epoch": 0.9095423909187655, "grad_norm": 0.13317139446735382, "learning_rate": 7.153529491950447e-06, "loss": 0.3228, "num_tokens": 4871974133.0, "step": 7692 }, { "epoch": 0.9096606361593946, "grad_norm": 0.12097819894552231, "learning_rate": 7.150541192565883e-06, "loss": 0.303, "num_tokens": 4872606543.0, "step": 7693 }, { "epoch": 0.9097788814000236, "grad_norm": 0.13035382330417633, "learning_rate": 7.147556684635143e-06, "loss": 0.3245, "num_tokens": 4873243636.0, "step": 7694 }, { "epoch": 0.9098971266406527, "grad_norm": 0.13877668976783752, "learning_rate": 7.144575968595987e-06, "loss": 0.3532, "num_tokens": 4873878366.0, "step": 7695 }, { "epoch": 0.9100153718812818, "grad_norm": 0.13660405576229095, "learning_rate": 7.1415990448856075e-06, "loss": 0.3049, "num_tokens": 4874508767.0, "step": 7696 }, { "epoch": 0.9101336171219109, "grad_norm": 0.13470448553562164, "learning_rate": 7.138625913940647e-06, "loss": 0.34, "num_tokens": 4875139479.0, "step": 7697 }, { "epoch": 0.9102518623625399, "grad_norm": 0.13805383443832397, "learning_rate": 7.135656576197182e-06, "loss": 0.3226, "num_tokens": 4875775334.0, "step": 7698 }, { "epoch": 0.9103701076031689, "grad_norm": 0.13225281238555908, "learning_rate": 7.1326910320907375e-06, "loss": 0.2831, "num_tokens": 4876410670.0, "step": 7699 }, { "epoch": 0.910488352843798, "grad_norm": 0.12496241927146912, "learning_rate": 7.129729282056293e-06, "loss": 0.3358, "num_tokens": 4877014061.0, "step": 7700 }, { "epoch": 0.9106065980844271, "grad_norm": 0.12390656024217606, "learning_rate": 7.1267713265282475e-06, "loss": 0.3105, "num_tokens": 4877651384.0, "step": 7701 }, { "epoch": 0.9107248433250562, "grad_norm": 0.12660756707191467, "learning_rate": 7.123817165940473e-06, "loss": 0.3221, "num_tokens": 4878285578.0, "step": 7702 }, { "epoch": 0.9108430885656852, "grad_norm": 0.13662245869636536, "learning_rate": 7.120866800726254e-06, "loss": 0.3284, "num_tokens": 4878923369.0, "step": 7703 }, { "epoch": 0.9109613338063143, "grad_norm": 0.11982785910367966, "learning_rate": 7.117920231318345e-06, "loss": 0.3078, "num_tokens": 4879556087.0, "step": 7704 }, { "epoch": 0.9110795790469434, "grad_norm": 0.1338290274143219, "learning_rate": 7.1149774581489235e-06, "loss": 0.3151, "num_tokens": 4880190663.0, "step": 7705 }, { "epoch": 0.9111978242875725, "grad_norm": 0.12243515998125076, "learning_rate": 7.112038481649624e-06, "loss": 0.2926, "num_tokens": 4880823771.0, "step": 7706 }, { "epoch": 0.9113160695282015, "grad_norm": 0.12157813459634781, "learning_rate": 7.10910330225152e-06, "loss": 0.3071, "num_tokens": 4881459750.0, "step": 7707 }, { "epoch": 0.9114343147688305, "grad_norm": 0.13406985998153687, "learning_rate": 7.106171920385122e-06, "loss": 0.3207, "num_tokens": 4882096213.0, "step": 7708 }, { "epoch": 0.9115525600094596, "grad_norm": 0.13597171008586884, "learning_rate": 7.1032443364803955e-06, "loss": 0.335, "num_tokens": 4882728899.0, "step": 7709 }, { "epoch": 0.9116708052500887, "grad_norm": 0.12823422253131866, "learning_rate": 7.100320550966739e-06, "loss": 0.3088, "num_tokens": 4883364890.0, "step": 7710 }, { "epoch": 0.9117890504907178, "grad_norm": 0.12310314923524857, "learning_rate": 7.0974005642729944e-06, "loss": 0.3258, "num_tokens": 4884003132.0, "step": 7711 }, { "epoch": 0.9119072957313468, "grad_norm": 0.12380990386009216, "learning_rate": 7.094484376827449e-06, "loss": 0.2985, "num_tokens": 4884642134.0, "step": 7712 }, { "epoch": 0.9120255409719759, "grad_norm": 0.1255313605070114, "learning_rate": 7.091571989057838e-06, "loss": 0.3143, "num_tokens": 4885275549.0, "step": 7713 }, { "epoch": 0.912143786212605, "grad_norm": 0.12017205357551575, "learning_rate": 7.088663401391329e-06, "loss": 0.3121, "num_tokens": 4885913781.0, "step": 7714 }, { "epoch": 0.912262031453234, "grad_norm": 0.14157156646251678, "learning_rate": 7.0857586142545436e-06, "loss": 0.3724, "num_tokens": 4886541113.0, "step": 7715 }, { "epoch": 0.9123802766938631, "grad_norm": 0.12915055453777313, "learning_rate": 7.082857628073535e-06, "loss": 0.3423, "num_tokens": 4887177363.0, "step": 7716 }, { "epoch": 0.9124985219344921, "grad_norm": 0.12461072206497192, "learning_rate": 7.079960443273801e-06, "loss": 0.3303, "num_tokens": 4887813507.0, "step": 7717 }, { "epoch": 0.9126167671751212, "grad_norm": 0.12444888055324554, "learning_rate": 7.077067060280287e-06, "loss": 0.3087, "num_tokens": 4888448550.0, "step": 7718 }, { "epoch": 0.9127350124157503, "grad_norm": 0.12407427281141281, "learning_rate": 7.074177479517384e-06, "loss": 0.3151, "num_tokens": 4889082189.0, "step": 7719 }, { "epoch": 0.9128532576563794, "grad_norm": 0.1321941763162613, "learning_rate": 7.07129170140891e-06, "loss": 0.3522, "num_tokens": 4889720588.0, "step": 7720 }, { "epoch": 0.9129715028970083, "grad_norm": 0.13738113641738892, "learning_rate": 7.06840972637814e-06, "loss": 0.3285, "num_tokens": 4890348719.0, "step": 7721 }, { "epoch": 0.9130897481376374, "grad_norm": 0.13173311948776245, "learning_rate": 7.0655315548477845e-06, "loss": 0.3051, "num_tokens": 4890987570.0, "step": 7722 }, { "epoch": 0.9132079933782665, "grad_norm": 0.1362680196762085, "learning_rate": 7.0626571872400025e-06, "loss": 0.3226, "num_tokens": 4891625106.0, "step": 7723 }, { "epoch": 0.9133262386188956, "grad_norm": 0.11706435680389404, "learning_rate": 7.059786623976379e-06, "loss": 0.3092, "num_tokens": 4892258998.0, "step": 7724 }, { "epoch": 0.9134444838595247, "grad_norm": 0.13159960508346558, "learning_rate": 7.056919865477966e-06, "loss": 0.3002, "num_tokens": 4892891970.0, "step": 7725 }, { "epoch": 0.9135627291001537, "grad_norm": 0.1346406787633896, "learning_rate": 7.05405691216523e-06, "loss": 0.3079, "num_tokens": 4893521412.0, "step": 7726 }, { "epoch": 0.9136809743407828, "grad_norm": 0.13626651465892792, "learning_rate": 7.051197764458105e-06, "loss": 0.3525, "num_tokens": 4894152833.0, "step": 7727 }, { "epoch": 0.9137992195814119, "grad_norm": 0.12659278512001038, "learning_rate": 7.048342422775947e-06, "loss": 0.301, "num_tokens": 4894784660.0, "step": 7728 }, { "epoch": 0.913917464822041, "grad_norm": 0.12499964982271194, "learning_rate": 7.045490887537561e-06, "loss": 0.3219, "num_tokens": 4895416260.0, "step": 7729 }, { "epoch": 0.9140357100626699, "grad_norm": 0.14714843034744263, "learning_rate": 7.042643159161202e-06, "loss": 0.3613, "num_tokens": 4896049802.0, "step": 7730 }, { "epoch": 0.914153955303299, "grad_norm": 0.12850795686244965, "learning_rate": 7.039799238064552e-06, "loss": 0.3071, "num_tokens": 4896686644.0, "step": 7731 }, { "epoch": 0.9142722005439281, "grad_norm": 0.11813236027956009, "learning_rate": 7.036959124664743e-06, "loss": 0.3164, "num_tokens": 4897325350.0, "step": 7732 }, { "epoch": 0.9143904457845572, "grad_norm": 0.12554921209812164, "learning_rate": 7.034122819378346e-06, "loss": 0.2983, "num_tokens": 4897961788.0, "step": 7733 }, { "epoch": 0.9145086910251863, "grad_norm": 0.1320030838251114, "learning_rate": 7.0312903226213725e-06, "loss": 0.3035, "num_tokens": 4898595028.0, "step": 7734 }, { "epoch": 0.9146269362658153, "grad_norm": 0.12621420621871948, "learning_rate": 7.028461634809286e-06, "loss": 0.319, "num_tokens": 4899232085.0, "step": 7735 }, { "epoch": 0.9147451815064443, "grad_norm": 0.13027168810367584, "learning_rate": 7.025636756356976e-06, "loss": 0.3027, "num_tokens": 4899863079.0, "step": 7736 }, { "epoch": 0.9148634267470734, "grad_norm": 0.1252802610397339, "learning_rate": 7.0228156876787815e-06, "loss": 0.3144, "num_tokens": 4900498769.0, "step": 7737 }, { "epoch": 0.9149816719877025, "grad_norm": 0.13003598153591156, "learning_rate": 7.019998429188477e-06, "loss": 0.2974, "num_tokens": 4901125473.0, "step": 7738 }, { "epoch": 0.9150999172283315, "grad_norm": 0.12233404070138931, "learning_rate": 7.017184981299288e-06, "loss": 0.2926, "num_tokens": 4901758947.0, "step": 7739 }, { "epoch": 0.9152181624689606, "grad_norm": 0.1347981095314026, "learning_rate": 7.014375344423874e-06, "loss": 0.3218, "num_tokens": 4902387323.0, "step": 7740 }, { "epoch": 0.9153364077095897, "grad_norm": 0.13342373073101044, "learning_rate": 7.011569518974334e-06, "loss": 0.3152, "num_tokens": 4903021793.0, "step": 7741 }, { "epoch": 0.9154546529502188, "grad_norm": 0.13020728528499603, "learning_rate": 7.008767505362212e-06, "loss": 0.3314, "num_tokens": 4903659299.0, "step": 7742 }, { "epoch": 0.9155728981908479, "grad_norm": 0.1170753464102745, "learning_rate": 7.0059693039984905e-06, "loss": 0.2977, "num_tokens": 4904293344.0, "step": 7743 }, { "epoch": 0.9156911434314768, "grad_norm": 0.1261478215456009, "learning_rate": 7.003174915293598e-06, "loss": 0.2958, "num_tokens": 4904924880.0, "step": 7744 }, { "epoch": 0.9158093886721059, "grad_norm": 0.12334287911653519, "learning_rate": 7.000384339657396e-06, "loss": 0.3007, "num_tokens": 4905560042.0, "step": 7745 }, { "epoch": 0.915927633912735, "grad_norm": 0.12269110232591629, "learning_rate": 6.997597577499195e-06, "loss": 0.3071, "num_tokens": 4906194029.0, "step": 7746 }, { "epoch": 0.9160458791533641, "grad_norm": 0.12502506375312805, "learning_rate": 6.994814629227734e-06, "loss": 0.3052, "num_tokens": 4906829879.0, "step": 7747 }, { "epoch": 0.9161641243939932, "grad_norm": 0.12823623418807983, "learning_rate": 6.992035495251208e-06, "loss": 0.3371, "num_tokens": 4907462649.0, "step": 7748 }, { "epoch": 0.9162823696346222, "grad_norm": 0.1350601613521576, "learning_rate": 6.989260175977239e-06, "loss": 0.325, "num_tokens": 4908099635.0, "step": 7749 }, { "epoch": 0.9164006148752513, "grad_norm": 0.12180980294942856, "learning_rate": 6.986488671812897e-06, "loss": 0.2968, "num_tokens": 4908739248.0, "step": 7750 }, { "epoch": 0.9165188601158804, "grad_norm": 0.1183503046631813, "learning_rate": 6.983720983164695e-06, "loss": 0.2764, "num_tokens": 4909371340.0, "step": 7751 }, { "epoch": 0.9166371053565094, "grad_norm": 0.13435925543308258, "learning_rate": 6.980957110438574e-06, "loss": 0.3545, "num_tokens": 4910005931.0, "step": 7752 }, { "epoch": 0.9167553505971384, "grad_norm": 0.11797181516885757, "learning_rate": 6.978197054039937e-06, "loss": 0.2935, "num_tokens": 4910638535.0, "step": 7753 }, { "epoch": 0.9168735958377675, "grad_norm": 0.13103322684764862, "learning_rate": 6.975440814373596e-06, "loss": 0.3058, "num_tokens": 4911275752.0, "step": 7754 }, { "epoch": 0.9169918410783966, "grad_norm": 0.13084839284420013, "learning_rate": 6.97268839184383e-06, "loss": 0.3137, "num_tokens": 4911912018.0, "step": 7755 }, { "epoch": 0.9171100863190257, "grad_norm": 0.1301589012145996, "learning_rate": 6.969939786854352e-06, "loss": 0.3193, "num_tokens": 4912548350.0, "step": 7756 }, { "epoch": 0.9172283315596548, "grad_norm": 0.14464806020259857, "learning_rate": 6.967194999808306e-06, "loss": 0.3086, "num_tokens": 4913183674.0, "step": 7757 }, { "epoch": 0.9173465768002838, "grad_norm": 0.1344212144613266, "learning_rate": 6.964454031108286e-06, "loss": 0.3651, "num_tokens": 4913820208.0, "step": 7758 }, { "epoch": 0.9174648220409128, "grad_norm": 0.13078774511814117, "learning_rate": 6.9617168811563186e-06, "loss": 0.3376, "num_tokens": 4914459681.0, "step": 7759 }, { "epoch": 0.9175830672815419, "grad_norm": 0.12721140682697296, "learning_rate": 6.9589835503538764e-06, "loss": 0.3154, "num_tokens": 4915092566.0, "step": 7760 }, { "epoch": 0.917701312522171, "grad_norm": 0.12042620033025742, "learning_rate": 6.956254039101869e-06, "loss": 0.3171, "num_tokens": 4915727140.0, "step": 7761 }, { "epoch": 0.9178195577628, "grad_norm": 0.15112309157848358, "learning_rate": 6.95352834780064e-06, "loss": 0.333, "num_tokens": 4916353906.0, "step": 7762 }, { "epoch": 0.9179378030034291, "grad_norm": 0.1281762719154358, "learning_rate": 6.950806476849987e-06, "loss": 0.3107, "num_tokens": 4916984843.0, "step": 7763 }, { "epoch": 0.9180560482440582, "grad_norm": 0.14249937236309052, "learning_rate": 6.948088426649133e-06, "loss": 0.3315, "num_tokens": 4917621634.0, "step": 7764 }, { "epoch": 0.9181742934846873, "grad_norm": 0.15428632497787476, "learning_rate": 6.945374197596751e-06, "loss": 0.3916, "num_tokens": 4918258983.0, "step": 7765 }, { "epoch": 0.9182925387253164, "grad_norm": 0.12753809988498688, "learning_rate": 6.942663790090944e-06, "loss": 0.3239, "num_tokens": 4918895837.0, "step": 7766 }, { "epoch": 0.9184107839659453, "grad_norm": 0.12733297049999237, "learning_rate": 6.939957204529263e-06, "loss": 0.2993, "num_tokens": 4919526596.0, "step": 7767 }, { "epoch": 0.9185290292065744, "grad_norm": 0.12106706947088242, "learning_rate": 6.937254441308693e-06, "loss": 0.2819, "num_tokens": 4920159021.0, "step": 7768 }, { "epoch": 0.9186472744472035, "grad_norm": 0.12784521281719208, "learning_rate": 6.934555500825661e-06, "loss": 0.3271, "num_tokens": 4920794463.0, "step": 7769 }, { "epoch": 0.9187655196878326, "grad_norm": 0.1254706084728241, "learning_rate": 6.931860383476035e-06, "loss": 0.3231, "num_tokens": 4921434238.0, "step": 7770 }, { "epoch": 0.9188837649284616, "grad_norm": 0.12662459909915924, "learning_rate": 6.92916908965511e-06, "loss": 0.3169, "num_tokens": 4922068697.0, "step": 7771 }, { "epoch": 0.9190020101690907, "grad_norm": 0.12771731615066528, "learning_rate": 6.926481619757643e-06, "loss": 0.3162, "num_tokens": 4922698354.0, "step": 7772 }, { "epoch": 0.9191202554097198, "grad_norm": 0.1330360472202301, "learning_rate": 6.923797974177807e-06, "loss": 0.3361, "num_tokens": 4923338028.0, "step": 7773 }, { "epoch": 0.9192385006503488, "grad_norm": 0.13414111733436584, "learning_rate": 6.92111815330923e-06, "loss": 0.3556, "num_tokens": 4923944558.0, "step": 7774 }, { "epoch": 0.9193567458909779, "grad_norm": 0.13816195726394653, "learning_rate": 6.9184421575449726e-06, "loss": 0.3651, "num_tokens": 4924579862.0, "step": 7775 }, { "epoch": 0.9194749911316069, "grad_norm": 0.12251488119363785, "learning_rate": 6.915769987277531e-06, "loss": 0.2909, "num_tokens": 4925215555.0, "step": 7776 }, { "epoch": 0.919593236372236, "grad_norm": 0.12782122194766998, "learning_rate": 6.913101642898846e-06, "loss": 0.3222, "num_tokens": 4925848456.0, "step": 7777 }, { "epoch": 0.9197114816128651, "grad_norm": 0.12512719631195068, "learning_rate": 6.910437124800295e-06, "loss": 0.3052, "num_tokens": 4926475090.0, "step": 7778 }, { "epoch": 0.9198297268534942, "grad_norm": 0.1390993446111679, "learning_rate": 6.907776433372699e-06, "loss": 0.3206, "num_tokens": 4927105642.0, "step": 7779 }, { "epoch": 0.9199479720941232, "grad_norm": 0.12569034099578857, "learning_rate": 6.905119569006308e-06, "loss": 0.3217, "num_tokens": 4927740965.0, "step": 7780 }, { "epoch": 0.9200662173347522, "grad_norm": 0.12942753732204437, "learning_rate": 6.902466532090818e-06, "loss": 0.3207, "num_tokens": 4928376500.0, "step": 7781 }, { "epoch": 0.9201844625753813, "grad_norm": 0.1423528492450714, "learning_rate": 6.899817323015359e-06, "loss": 0.329, "num_tokens": 4929009940.0, "step": 7782 }, { "epoch": 0.9203027078160104, "grad_norm": 0.12756770849227905, "learning_rate": 6.897171942168511e-06, "loss": 0.3276, "num_tokens": 4929647775.0, "step": 7783 }, { "epoch": 0.9204209530566395, "grad_norm": 0.13887245953083038, "learning_rate": 6.89453038993827e-06, "loss": 0.3363, "num_tokens": 4930281348.0, "step": 7784 }, { "epoch": 0.9205391982972685, "grad_norm": 0.12782149016857147, "learning_rate": 6.8918926667120924e-06, "loss": 0.3029, "num_tokens": 4930908910.0, "step": 7785 }, { "epoch": 0.9206574435378976, "grad_norm": 0.13063262403011322, "learning_rate": 6.8892587728768665e-06, "loss": 0.3189, "num_tokens": 4931548194.0, "step": 7786 }, { "epoch": 0.9207756887785267, "grad_norm": 0.12908202409744263, "learning_rate": 6.886628708818912e-06, "loss": 0.315, "num_tokens": 4932182806.0, "step": 7787 }, { "epoch": 0.9208939340191558, "grad_norm": 0.12199325859546661, "learning_rate": 6.884002474923997e-06, "loss": 0.3157, "num_tokens": 4932821959.0, "step": 7788 }, { "epoch": 0.9210121792597848, "grad_norm": 0.1251426339149475, "learning_rate": 6.881380071577316e-06, "loss": 0.3262, "num_tokens": 4933456821.0, "step": 7789 }, { "epoch": 0.9211304245004138, "grad_norm": 0.13554102182388306, "learning_rate": 6.878761499163517e-06, "loss": 0.3448, "num_tokens": 4934096112.0, "step": 7790 }, { "epoch": 0.9212486697410429, "grad_norm": 0.12767145037651062, "learning_rate": 6.876146758066668e-06, "loss": 0.3272, "num_tokens": 4934730010.0, "step": 7791 }, { "epoch": 0.921366914981672, "grad_norm": 0.1307981163263321, "learning_rate": 6.87353584867029e-06, "loss": 0.319, "num_tokens": 4935360988.0, "step": 7792 }, { "epoch": 0.9214851602223011, "grad_norm": 0.12776660919189453, "learning_rate": 6.870928771357337e-06, "loss": 0.2941, "num_tokens": 4935997589.0, "step": 7793 }, { "epoch": 0.9216034054629301, "grad_norm": 0.12306507676839828, "learning_rate": 6.868325526510199e-06, "loss": 0.3129, "num_tokens": 4936629591.0, "step": 7794 }, { "epoch": 0.9217216507035592, "grad_norm": 0.12243074178695679, "learning_rate": 6.865726114510707e-06, "loss": 0.2786, "num_tokens": 4937256373.0, "step": 7795 }, { "epoch": 0.9218398959441882, "grad_norm": 0.12509837746620178, "learning_rate": 6.863130535740126e-06, "loss": 0.3029, "num_tokens": 4937895027.0, "step": 7796 }, { "epoch": 0.9219581411848173, "grad_norm": 0.12686152756214142, "learning_rate": 6.860538790579166e-06, "loss": 0.2943, "num_tokens": 4938520545.0, "step": 7797 }, { "epoch": 0.9220763864254464, "grad_norm": 0.138662651181221, "learning_rate": 6.85795087940796e-06, "loss": 0.3395, "num_tokens": 4939150829.0, "step": 7798 }, { "epoch": 0.9221946316660754, "grad_norm": 0.13335177302360535, "learning_rate": 6.855366802606099e-06, "loss": 0.3318, "num_tokens": 4939783237.0, "step": 7799 }, { "epoch": 0.9223128769067045, "grad_norm": 0.13200627267360687, "learning_rate": 6.852786560552593e-06, "loss": 0.3375, "num_tokens": 4940421102.0, "step": 7800 }, { "epoch": 0.9224311221473336, "grad_norm": 0.14048537611961365, "learning_rate": 6.850210153625898e-06, "loss": 0.3193, "num_tokens": 4941051195.0, "step": 7801 }, { "epoch": 0.9225493673879627, "grad_norm": 0.1236596405506134, "learning_rate": 6.847637582203912e-06, "loss": 0.3062, "num_tokens": 4941688945.0, "step": 7802 }, { "epoch": 0.9226676126285916, "grad_norm": 0.14546853303909302, "learning_rate": 6.84506884666396e-06, "loss": 0.3243, "num_tokens": 4942318692.0, "step": 7803 }, { "epoch": 0.9227858578692207, "grad_norm": 0.12562453746795654, "learning_rate": 6.842503947382819e-06, "loss": 0.345, "num_tokens": 4942958239.0, "step": 7804 }, { "epoch": 0.9229041031098498, "grad_norm": 0.14114268124103546, "learning_rate": 6.83994288473668e-06, "loss": 0.326, "num_tokens": 4943593664.0, "step": 7805 }, { "epoch": 0.9230223483504789, "grad_norm": 0.12515494227409363, "learning_rate": 6.837385659101194e-06, "loss": 0.31, "num_tokens": 4944230576.0, "step": 7806 }, { "epoch": 0.923140593591108, "grad_norm": 0.13064147531986237, "learning_rate": 6.834832270851443e-06, "loss": 0.3303, "num_tokens": 4944867336.0, "step": 7807 }, { "epoch": 0.923258838831737, "grad_norm": 0.12665048241615295, "learning_rate": 6.83228272036194e-06, "loss": 0.3271, "num_tokens": 4945499534.0, "step": 7808 }, { "epoch": 0.9233770840723661, "grad_norm": 0.11890138685703278, "learning_rate": 6.829737008006636e-06, "loss": 0.2999, "num_tokens": 4946132291.0, "step": 7809 }, { "epoch": 0.9234953293129952, "grad_norm": 0.11924120783805847, "learning_rate": 6.827195134158925e-06, "loss": 0.2812, "num_tokens": 4946769349.0, "step": 7810 }, { "epoch": 0.9236135745536243, "grad_norm": 0.1334647685289383, "learning_rate": 6.824657099191635e-06, "loss": 0.3401, "num_tokens": 4947386818.0, "step": 7811 }, { "epoch": 0.9237318197942532, "grad_norm": 0.14205054938793182, "learning_rate": 6.8221229034770295e-06, "loss": 0.331, "num_tokens": 4948015496.0, "step": 7812 }, { "epoch": 0.9238500650348823, "grad_norm": 0.1283503621816635, "learning_rate": 6.819592547386813e-06, "loss": 0.3399, "num_tokens": 4948650434.0, "step": 7813 }, { "epoch": 0.9239683102755114, "grad_norm": 0.12403405457735062, "learning_rate": 6.81706603129212e-06, "loss": 0.303, "num_tokens": 4949281853.0, "step": 7814 }, { "epoch": 0.9240865555161405, "grad_norm": 0.14202038943767548, "learning_rate": 6.814543355563525e-06, "loss": 0.3521, "num_tokens": 4949915728.0, "step": 7815 }, { "epoch": 0.9242048007567696, "grad_norm": 0.13617371022701263, "learning_rate": 6.812024520571048e-06, "loss": 0.3391, "num_tokens": 4950552705.0, "step": 7816 }, { "epoch": 0.9243230459973986, "grad_norm": 0.12828786671161652, "learning_rate": 6.809509526684129e-06, "loss": 0.337, "num_tokens": 4951191780.0, "step": 7817 }, { "epoch": 0.9244412912380277, "grad_norm": 0.12576867640018463, "learning_rate": 6.806998374271658e-06, "loss": 0.3351, "num_tokens": 4951827322.0, "step": 7818 }, { "epoch": 0.9245595364786567, "grad_norm": 0.13876523077487946, "learning_rate": 6.80449106370195e-06, "loss": 0.3423, "num_tokens": 4952461047.0, "step": 7819 }, { "epoch": 0.9246777817192858, "grad_norm": 0.12748512625694275, "learning_rate": 6.801987595342772e-06, "loss": 0.3145, "num_tokens": 4953087394.0, "step": 7820 }, { "epoch": 0.9247960269599149, "grad_norm": 0.12987138330936432, "learning_rate": 6.7994879695613175e-06, "loss": 0.3011, "num_tokens": 4953724181.0, "step": 7821 }, { "epoch": 0.9249142722005439, "grad_norm": 0.13329769670963287, "learning_rate": 6.7969921867242095e-06, "loss": 0.296, "num_tokens": 4954352322.0, "step": 7822 }, { "epoch": 0.925032517441173, "grad_norm": 0.13665804266929626, "learning_rate": 6.794500247197525e-06, "loss": 0.3532, "num_tokens": 4954990522.0, "step": 7823 }, { "epoch": 0.9251507626818021, "grad_norm": 0.13048113882541656, "learning_rate": 6.792012151346761e-06, "loss": 0.305, "num_tokens": 4955628497.0, "step": 7824 }, { "epoch": 0.9252690079224312, "grad_norm": 0.13573376834392548, "learning_rate": 6.7895278995368635e-06, "loss": 0.3209, "num_tokens": 4956264250.0, "step": 7825 }, { "epoch": 0.9253872531630601, "grad_norm": 0.14739425480365753, "learning_rate": 6.787047492132207e-06, "loss": 0.3593, "num_tokens": 4956898029.0, "step": 7826 }, { "epoch": 0.9255054984036892, "grad_norm": 0.13189226388931274, "learning_rate": 6.784570929496596e-06, "loss": 0.2983, "num_tokens": 4957533773.0, "step": 7827 }, { "epoch": 0.9256237436443183, "grad_norm": 0.1261245459318161, "learning_rate": 6.782098211993289e-06, "loss": 0.2868, "num_tokens": 4958167698.0, "step": 7828 }, { "epoch": 0.9257419888849474, "grad_norm": 0.12495657056570053, "learning_rate": 6.779629339984966e-06, "loss": 0.3347, "num_tokens": 4958802441.0, "step": 7829 }, { "epoch": 0.9258602341255765, "grad_norm": 0.13403964042663574, "learning_rate": 6.7771643138337495e-06, "loss": 0.3247, "num_tokens": 4959441719.0, "step": 7830 }, { "epoch": 0.9259784793662055, "grad_norm": 0.12847162783145905, "learning_rate": 6.774703133901189e-06, "loss": 0.3307, "num_tokens": 4960077311.0, "step": 7831 }, { "epoch": 0.9260967246068346, "grad_norm": 0.12948307394981384, "learning_rate": 6.7722458005482855e-06, "loss": 0.2933, "num_tokens": 4960709235.0, "step": 7832 }, { "epoch": 0.9262149698474637, "grad_norm": 0.14134907722473145, "learning_rate": 6.769792314135457e-06, "loss": 0.3235, "num_tokens": 4961338392.0, "step": 7833 }, { "epoch": 0.9263332150880927, "grad_norm": 0.1364922672510147, "learning_rate": 6.767342675022581e-06, "loss": 0.3303, "num_tokens": 4961974033.0, "step": 7834 }, { "epoch": 0.9264514603287217, "grad_norm": 0.1390475183725357, "learning_rate": 6.764896883568944e-06, "loss": 0.3394, "num_tokens": 4962603329.0, "step": 7835 }, { "epoch": 0.9265697055693508, "grad_norm": 0.12311691790819168, "learning_rate": 6.762454940133288e-06, "loss": 0.3318, "num_tokens": 4963238415.0, "step": 7836 }, { "epoch": 0.9266879508099799, "grad_norm": 0.1363048255443573, "learning_rate": 6.7600168450737805e-06, "loss": 0.3232, "num_tokens": 4963877111.0, "step": 7837 }, { "epoch": 0.926806196050609, "grad_norm": 0.14348295331001282, "learning_rate": 6.757582598748029e-06, "loss": 0.3372, "num_tokens": 4964509011.0, "step": 7838 }, { "epoch": 0.9269244412912381, "grad_norm": 0.13681846857070923, "learning_rate": 6.7551522015130765e-06, "loss": 0.3069, "num_tokens": 4965143445.0, "step": 7839 }, { "epoch": 0.927042686531867, "grad_norm": 0.12517307698726654, "learning_rate": 6.7527256537253934e-06, "loss": 0.3098, "num_tokens": 4965781956.0, "step": 7840 }, { "epoch": 0.9271609317724961, "grad_norm": 0.13675402104854584, "learning_rate": 6.750302955740897e-06, "loss": 0.3216, "num_tokens": 4966416478.0, "step": 7841 }, { "epoch": 0.9272791770131252, "grad_norm": 0.1500520259141922, "learning_rate": 6.747884107914942e-06, "loss": 0.3535, "num_tokens": 4967043821.0, "step": 7842 }, { "epoch": 0.9273974222537543, "grad_norm": 0.13796469569206238, "learning_rate": 6.745469110602299e-06, "loss": 0.3184, "num_tokens": 4967680457.0, "step": 7843 }, { "epoch": 0.9275156674943833, "grad_norm": 0.12778319418430328, "learning_rate": 6.743057964157194e-06, "loss": 0.3075, "num_tokens": 4968309431.0, "step": 7844 }, { "epoch": 0.9276339127350124, "grad_norm": 0.12691131234169006, "learning_rate": 6.740650668933277e-06, "loss": 0.3244, "num_tokens": 4968944120.0, "step": 7845 }, { "epoch": 0.9277521579756415, "grad_norm": 0.12682248651981354, "learning_rate": 6.738247225283639e-06, "loss": 0.2858, "num_tokens": 4969578746.0, "step": 7846 }, { "epoch": 0.9278704032162706, "grad_norm": 0.1277487426996231, "learning_rate": 6.735847633560804e-06, "loss": 0.3356, "num_tokens": 4970213415.0, "step": 7847 }, { "epoch": 0.9279886484568997, "grad_norm": 0.13380184769630432, "learning_rate": 6.733451894116725e-06, "loss": 0.3096, "num_tokens": 4970845572.0, "step": 7848 }, { "epoch": 0.9281068936975286, "grad_norm": 0.13858406245708466, "learning_rate": 6.7310600073028e-06, "loss": 0.3542, "num_tokens": 4971484814.0, "step": 7849 }, { "epoch": 0.9282251389381577, "grad_norm": 0.13550053536891937, "learning_rate": 6.72867197346986e-06, "loss": 0.3404, "num_tokens": 4972119832.0, "step": 7850 }, { "epoch": 0.9283433841787868, "grad_norm": 0.13671958446502686, "learning_rate": 6.726287792968167e-06, "loss": 0.3375, "num_tokens": 4972757648.0, "step": 7851 }, { "epoch": 0.9284616294194159, "grad_norm": 0.1354990303516388, "learning_rate": 6.723907466147414e-06, "loss": 0.3455, "num_tokens": 4973391413.0, "step": 7852 }, { "epoch": 0.9285798746600449, "grad_norm": 0.1329493522644043, "learning_rate": 6.721530993356741e-06, "loss": 0.3521, "num_tokens": 4974026001.0, "step": 7853 }, { "epoch": 0.928698119900674, "grad_norm": 0.12949714064598083, "learning_rate": 6.719158374944709e-06, "loss": 0.3069, "num_tokens": 4974664116.0, "step": 7854 }, { "epoch": 0.9288163651413031, "grad_norm": 0.12980176508426666, "learning_rate": 6.716789611259328e-06, "loss": 0.3221, "num_tokens": 4975298402.0, "step": 7855 }, { "epoch": 0.9289346103819321, "grad_norm": 0.14096492528915405, "learning_rate": 6.7144247026480315e-06, "loss": 0.3389, "num_tokens": 4975935180.0, "step": 7856 }, { "epoch": 0.9290528556225612, "grad_norm": 0.13023962080478668, "learning_rate": 6.7120636494576886e-06, "loss": 0.3071, "num_tokens": 4976527520.0, "step": 7857 }, { "epoch": 0.9291711008631902, "grad_norm": 0.12274400889873505, "learning_rate": 6.7097064520346085e-06, "loss": 0.2948, "num_tokens": 4977160176.0, "step": 7858 }, { "epoch": 0.9292893461038193, "grad_norm": 0.12039759755134583, "learning_rate": 6.70735311072453e-06, "loss": 0.317, "num_tokens": 4977788737.0, "step": 7859 }, { "epoch": 0.9294075913444484, "grad_norm": 0.1222093403339386, "learning_rate": 6.7050036258726295e-06, "loss": 0.3207, "num_tokens": 4978425424.0, "step": 7860 }, { "epoch": 0.9295258365850775, "grad_norm": 0.13323494791984558, "learning_rate": 6.702657997823515e-06, "loss": 0.3356, "num_tokens": 4979058031.0, "step": 7861 }, { "epoch": 0.9296440818257066, "grad_norm": 0.12982280552387238, "learning_rate": 6.700316226921231e-06, "loss": 0.3046, "num_tokens": 4979690046.0, "step": 7862 }, { "epoch": 0.9297623270663355, "grad_norm": 0.11675791442394257, "learning_rate": 6.69797831350925e-06, "loss": 0.2714, "num_tokens": 4980316794.0, "step": 7863 }, { "epoch": 0.9298805723069646, "grad_norm": 0.12083552032709122, "learning_rate": 6.695644257930495e-06, "loss": 0.3231, "num_tokens": 4980947629.0, "step": 7864 }, { "epoch": 0.9299988175475937, "grad_norm": 0.12451217323541641, "learning_rate": 6.693314060527304e-06, "loss": 0.312, "num_tokens": 4981585565.0, "step": 7865 }, { "epoch": 0.9301170627882228, "grad_norm": 0.1429755985736847, "learning_rate": 6.690987721641458e-06, "loss": 0.3447, "num_tokens": 4982218125.0, "step": 7866 }, { "epoch": 0.9302353080288518, "grad_norm": 0.1225399449467659, "learning_rate": 6.6886652416141756e-06, "loss": 0.3146, "num_tokens": 4982852397.0, "step": 7867 }, { "epoch": 0.9303535532694809, "grad_norm": 0.13689494132995605, "learning_rate": 6.6863466207861024e-06, "loss": 0.3253, "num_tokens": 4983486067.0, "step": 7868 }, { "epoch": 0.93047179851011, "grad_norm": 0.12943658232688904, "learning_rate": 6.684031859497321e-06, "loss": 0.3225, "num_tokens": 4984118397.0, "step": 7869 }, { "epoch": 0.9305900437507391, "grad_norm": 0.1376446634531021, "learning_rate": 6.6817209580873426e-06, "loss": 0.3684, "num_tokens": 4984755313.0, "step": 7870 }, { "epoch": 0.9307082889913681, "grad_norm": 0.11897671222686768, "learning_rate": 6.679413916895123e-06, "loss": 0.2715, "num_tokens": 4985392888.0, "step": 7871 }, { "epoch": 0.9308265342319971, "grad_norm": 0.1304844617843628, "learning_rate": 6.677110736259051e-06, "loss": 0.293, "num_tokens": 4986028059.0, "step": 7872 }, { "epoch": 0.9309447794726262, "grad_norm": 0.13744761049747467, "learning_rate": 6.674811416516931e-06, "loss": 0.3325, "num_tokens": 4986658123.0, "step": 7873 }, { "epoch": 0.9310630247132553, "grad_norm": 0.126140296459198, "learning_rate": 6.672515958006026e-06, "loss": 0.32, "num_tokens": 4987293559.0, "step": 7874 }, { "epoch": 0.9311812699538844, "grad_norm": 0.13467518985271454, "learning_rate": 6.670224361063013e-06, "loss": 0.3429, "num_tokens": 4987928389.0, "step": 7875 }, { "epoch": 0.9312995151945134, "grad_norm": 0.13180392980575562, "learning_rate": 6.667936626024019e-06, "loss": 0.3223, "num_tokens": 4988558812.0, "step": 7876 }, { "epoch": 0.9314177604351425, "grad_norm": 0.13555270433425903, "learning_rate": 6.665652753224589e-06, "loss": 0.325, "num_tokens": 4989198318.0, "step": 7877 }, { "epoch": 0.9315360056757716, "grad_norm": 0.12463904917240143, "learning_rate": 6.6633727429997115e-06, "loss": 0.2918, "num_tokens": 4989836409.0, "step": 7878 }, { "epoch": 0.9316542509164006, "grad_norm": 0.13091883063316345, "learning_rate": 6.661096595683805e-06, "loss": 0.312, "num_tokens": 4990467186.0, "step": 7879 }, { "epoch": 0.9317724961570297, "grad_norm": 0.14641191065311432, "learning_rate": 6.6588243116107195e-06, "loss": 0.3177, "num_tokens": 4991104349.0, "step": 7880 }, { "epoch": 0.9318907413976587, "grad_norm": 0.13150188326835632, "learning_rate": 6.656555891113749e-06, "loss": 0.3318, "num_tokens": 4991742506.0, "step": 7881 }, { "epoch": 0.9320089866382878, "grad_norm": 0.13515165448188782, "learning_rate": 6.654291334525603e-06, "loss": 0.3318, "num_tokens": 4992372838.0, "step": 7882 }, { "epoch": 0.9321272318789169, "grad_norm": 0.12877938151359558, "learning_rate": 6.652030642178441e-06, "loss": 0.3027, "num_tokens": 4993002476.0, "step": 7883 }, { "epoch": 0.932245477119546, "grad_norm": 0.1230025589466095, "learning_rate": 6.649773814403841e-06, "loss": 0.2809, "num_tokens": 4993633323.0, "step": 7884 }, { "epoch": 0.932363722360175, "grad_norm": 0.1273583620786667, "learning_rate": 6.647520851532832e-06, "loss": 0.2833, "num_tokens": 4994263120.0, "step": 7885 }, { "epoch": 0.932481967600804, "grad_norm": 0.13362717628479004, "learning_rate": 6.64527175389586e-06, "loss": 0.3458, "num_tokens": 4994898876.0, "step": 7886 }, { "epoch": 0.9326002128414331, "grad_norm": 0.12596678733825684, "learning_rate": 6.6430265218228075e-06, "loss": 0.3367, "num_tokens": 4995534661.0, "step": 7887 }, { "epoch": 0.9327184580820622, "grad_norm": 0.13628673553466797, "learning_rate": 6.640785155643e-06, "loss": 0.2989, "num_tokens": 4996165571.0, "step": 7888 }, { "epoch": 0.9328367033226913, "grad_norm": 0.13308709859848022, "learning_rate": 6.638547655685183e-06, "loss": 0.3251, "num_tokens": 4996801198.0, "step": 7889 }, { "epoch": 0.9329549485633203, "grad_norm": 0.14568030834197998, "learning_rate": 6.6363140222775445e-06, "loss": 0.3241, "num_tokens": 4997440198.0, "step": 7890 }, { "epoch": 0.9330731938039494, "grad_norm": 0.14132678508758545, "learning_rate": 6.634084255747693e-06, "loss": 0.3355, "num_tokens": 4998073100.0, "step": 7891 }, { "epoch": 0.9331914390445785, "grad_norm": 0.12305846065282822, "learning_rate": 6.631858356422686e-06, "loss": 0.2857, "num_tokens": 4998706562.0, "step": 7892 }, { "epoch": 0.9333096842852076, "grad_norm": 0.12399455159902573, "learning_rate": 6.629636324629004e-06, "loss": 0.2919, "num_tokens": 4999341783.0, "step": 7893 }, { "epoch": 0.9334279295258365, "grad_norm": 0.13983938097953796, "learning_rate": 6.627418160692565e-06, "loss": 0.3283, "num_tokens": 4999977921.0, "step": 7894 }, { "epoch": 0.9335461747664656, "grad_norm": 0.12885220348834991, "learning_rate": 6.625203864938712e-06, "loss": 0.2996, "num_tokens": 5000610475.0, "step": 7895 }, { "epoch": 0.9336644200070947, "grad_norm": 0.12376195192337036, "learning_rate": 6.622993437692226e-06, "loss": 0.3004, "num_tokens": 5001241892.0, "step": 7896 }, { "epoch": 0.9337826652477238, "grad_norm": 0.11203567683696747, "learning_rate": 6.6207868792773225e-06, "loss": 0.2697, "num_tokens": 5001876413.0, "step": 7897 }, { "epoch": 0.9339009104883529, "grad_norm": 0.1278931349515915, "learning_rate": 6.618584190017649e-06, "loss": 0.2917, "num_tokens": 5002507538.0, "step": 7898 }, { "epoch": 0.9340191557289819, "grad_norm": 0.12287084758281708, "learning_rate": 6.6163853702362756e-06, "loss": 0.3021, "num_tokens": 5003138932.0, "step": 7899 }, { "epoch": 0.934137400969611, "grad_norm": 0.1379907876253128, "learning_rate": 6.614190420255723e-06, "loss": 0.3154, "num_tokens": 5003746256.0, "step": 7900 }, { "epoch": 0.93425564621024, "grad_norm": 0.12560676038265228, "learning_rate": 6.611999340397924e-06, "loss": 0.2852, "num_tokens": 5004383446.0, "step": 7901 }, { "epoch": 0.9343738914508691, "grad_norm": 0.1280817687511444, "learning_rate": 6.6098121309842645e-06, "loss": 0.3065, "num_tokens": 5005017554.0, "step": 7902 }, { "epoch": 0.9344921366914982, "grad_norm": 0.12612475454807281, "learning_rate": 6.607628792335541e-06, "loss": 0.3093, "num_tokens": 5005653877.0, "step": 7903 }, { "epoch": 0.9346103819321272, "grad_norm": 0.13653971254825592, "learning_rate": 6.6054493247720046e-06, "loss": 0.3418, "num_tokens": 5006291278.0, "step": 7904 }, { "epoch": 0.9347286271727563, "grad_norm": 0.12580417096614838, "learning_rate": 6.603273728613317e-06, "loss": 0.3309, "num_tokens": 5006927632.0, "step": 7905 }, { "epoch": 0.9348468724133854, "grad_norm": 0.1322667896747589, "learning_rate": 6.601102004178591e-06, "loss": 0.2854, "num_tokens": 5007557262.0, "step": 7906 }, { "epoch": 0.9349651176540145, "grad_norm": 0.11427977681159973, "learning_rate": 6.59893415178636e-06, "loss": 0.3046, "num_tokens": 5008195320.0, "step": 7907 }, { "epoch": 0.9350833628946434, "grad_norm": 0.12578855454921722, "learning_rate": 6.5967701717545865e-06, "loss": 0.3333, "num_tokens": 5008827510.0, "step": 7908 }, { "epoch": 0.9352016081352725, "grad_norm": 0.1283116638660431, "learning_rate": 6.594610064400682e-06, "loss": 0.3197, "num_tokens": 5009462532.0, "step": 7909 }, { "epoch": 0.9353198533759016, "grad_norm": 0.13651898503303528, "learning_rate": 6.59245383004147e-06, "loss": 0.2888, "num_tokens": 5010090823.0, "step": 7910 }, { "epoch": 0.9354380986165307, "grad_norm": 0.14527790248394012, "learning_rate": 6.590301468993222e-06, "loss": 0.3329, "num_tokens": 5010724242.0, "step": 7911 }, { "epoch": 0.9355563438571598, "grad_norm": 0.13051067292690277, "learning_rate": 6.588152981571625e-06, "loss": 0.2888, "num_tokens": 5011358931.0, "step": 7912 }, { "epoch": 0.9356745890977888, "grad_norm": 0.1283981055021286, "learning_rate": 6.586008368091813e-06, "loss": 0.3304, "num_tokens": 5011996760.0, "step": 7913 }, { "epoch": 0.9357928343384179, "grad_norm": 0.13858874142169952, "learning_rate": 6.583867628868347e-06, "loss": 0.3024, "num_tokens": 5012623331.0, "step": 7914 }, { "epoch": 0.935911079579047, "grad_norm": 0.11631153523921967, "learning_rate": 6.581730764215218e-06, "loss": 0.2883, "num_tokens": 5013256260.0, "step": 7915 }, { "epoch": 0.936029324819676, "grad_norm": 0.11790558695793152, "learning_rate": 6.57959777444585e-06, "loss": 0.2841, "num_tokens": 5013887143.0, "step": 7916 }, { "epoch": 0.936147570060305, "grad_norm": 0.13260555267333984, "learning_rate": 6.577468659873091e-06, "loss": 0.3352, "num_tokens": 5014525541.0, "step": 7917 }, { "epoch": 0.9362658153009341, "grad_norm": 0.12989358603954315, "learning_rate": 6.575343420809236e-06, "loss": 0.3433, "num_tokens": 5015161061.0, "step": 7918 }, { "epoch": 0.9363840605415632, "grad_norm": 0.12931028008460999, "learning_rate": 6.573222057566001e-06, "loss": 0.3027, "num_tokens": 5015796073.0, "step": 7919 }, { "epoch": 0.9365023057821923, "grad_norm": 0.1456698775291443, "learning_rate": 6.5711045704545305e-06, "loss": 0.3525, "num_tokens": 5016432401.0, "step": 7920 }, { "epoch": 0.9366205510228214, "grad_norm": 0.12677200138568878, "learning_rate": 6.568990959785414e-06, "loss": 0.2909, "num_tokens": 5017061530.0, "step": 7921 }, { "epoch": 0.9367387962634504, "grad_norm": 0.12353679537773132, "learning_rate": 6.566881225868659e-06, "loss": 0.3144, "num_tokens": 5017665250.0, "step": 7922 }, { "epoch": 0.9368570415040794, "grad_norm": 0.13035282492637634, "learning_rate": 6.564775369013709e-06, "loss": 0.3174, "num_tokens": 5018292889.0, "step": 7923 }, { "epoch": 0.9369752867447085, "grad_norm": 0.12288947403430939, "learning_rate": 6.562673389529444e-06, "loss": 0.2934, "num_tokens": 5018926153.0, "step": 7924 }, { "epoch": 0.9370935319853376, "grad_norm": 0.12703578174114227, "learning_rate": 6.560575287724167e-06, "loss": 0.282, "num_tokens": 5019560697.0, "step": 7925 }, { "epoch": 0.9372117772259666, "grad_norm": 0.12624843418598175, "learning_rate": 6.558481063905612e-06, "loss": 0.3016, "num_tokens": 5020199674.0, "step": 7926 }, { "epoch": 0.9373300224665957, "grad_norm": 0.13343991339206696, "learning_rate": 6.556390718380956e-06, "loss": 0.3378, "num_tokens": 5020837960.0, "step": 7927 }, { "epoch": 0.9374482677072248, "grad_norm": 0.13075432181358337, "learning_rate": 6.554304251456796e-06, "loss": 0.3177, "num_tokens": 5021470873.0, "step": 7928 }, { "epoch": 0.9375665129478539, "grad_norm": 0.1279655545949936, "learning_rate": 6.552221663439158e-06, "loss": 0.3164, "num_tokens": 5022102064.0, "step": 7929 }, { "epoch": 0.937684758188483, "grad_norm": 0.11821383982896805, "learning_rate": 6.55014295463351e-06, "loss": 0.2933, "num_tokens": 5022735772.0, "step": 7930 }, { "epoch": 0.9378030034291119, "grad_norm": 0.12414173781871796, "learning_rate": 6.548068125344745e-06, "loss": 0.3389, "num_tokens": 5023370494.0, "step": 7931 }, { "epoch": 0.937921248669741, "grad_norm": 0.13859039545059204, "learning_rate": 6.545997175877193e-06, "loss": 0.3449, "num_tokens": 5024005092.0, "step": 7932 }, { "epoch": 0.9380394939103701, "grad_norm": 0.11680852621793747, "learning_rate": 6.543930106534595e-06, "loss": 0.2957, "num_tokens": 5024637089.0, "step": 7933 }, { "epoch": 0.9381577391509992, "grad_norm": 0.13135814666748047, "learning_rate": 6.541866917620148e-06, "loss": 0.3378, "num_tokens": 5025266384.0, "step": 7934 }, { "epoch": 0.9382759843916282, "grad_norm": 0.12428507953882217, "learning_rate": 6.539807609436462e-06, "loss": 0.3277, "num_tokens": 5025905671.0, "step": 7935 }, { "epoch": 0.9383942296322573, "grad_norm": 0.12633320689201355, "learning_rate": 6.537752182285595e-06, "loss": 0.3128, "num_tokens": 5026544502.0, "step": 7936 }, { "epoch": 0.9385124748728864, "grad_norm": 0.13510072231292725, "learning_rate": 6.535700636469015e-06, "loss": 0.3431, "num_tokens": 5027181136.0, "step": 7937 }, { "epoch": 0.9386307201135154, "grad_norm": 0.1342879831790924, "learning_rate": 6.533652972287636e-06, "loss": 0.3633, "num_tokens": 5027815561.0, "step": 7938 }, { "epoch": 0.9387489653541445, "grad_norm": 0.12930600345134735, "learning_rate": 6.5316091900418e-06, "loss": 0.3187, "num_tokens": 5028450990.0, "step": 7939 }, { "epoch": 0.9388672105947735, "grad_norm": 0.13400140404701233, "learning_rate": 6.529569290031276e-06, "loss": 0.3288, "num_tokens": 5029087960.0, "step": 7940 }, { "epoch": 0.9389854558354026, "grad_norm": 0.12314188480377197, "learning_rate": 6.527533272555262e-06, "loss": 0.3055, "num_tokens": 5029719697.0, "step": 7941 }, { "epoch": 0.9391037010760317, "grad_norm": 0.13767509162425995, "learning_rate": 6.5255011379123915e-06, "loss": 0.347, "num_tokens": 5030347905.0, "step": 7942 }, { "epoch": 0.9392219463166608, "grad_norm": 0.13386741280555725, "learning_rate": 6.523472886400726e-06, "loss": 0.3175, "num_tokens": 5030984766.0, "step": 7943 }, { "epoch": 0.9393401915572899, "grad_norm": 0.12113789469003677, "learning_rate": 6.521448518317761e-06, "loss": 0.289, "num_tokens": 5031618480.0, "step": 7944 }, { "epoch": 0.9394584367979188, "grad_norm": 0.13387753069400787, "learning_rate": 6.519428033960417e-06, "loss": 0.3255, "num_tokens": 5032255336.0, "step": 7945 }, { "epoch": 0.9395766820385479, "grad_norm": 0.1304858922958374, "learning_rate": 6.517411433625049e-06, "loss": 0.316, "num_tokens": 5032887409.0, "step": 7946 }, { "epoch": 0.939694927279177, "grad_norm": 0.12055995315313339, "learning_rate": 6.515398717607438e-06, "loss": 0.3034, "num_tokens": 5033522328.0, "step": 7947 }, { "epoch": 0.9398131725198061, "grad_norm": 0.1393008530139923, "learning_rate": 6.5133898862028e-06, "loss": 0.3499, "num_tokens": 5034148623.0, "step": 7948 }, { "epoch": 0.9399314177604351, "grad_norm": 0.1455509215593338, "learning_rate": 6.511384939705776e-06, "loss": 0.335, "num_tokens": 5034773505.0, "step": 7949 }, { "epoch": 0.9400496630010642, "grad_norm": 0.1330309510231018, "learning_rate": 6.509383878410445e-06, "loss": 0.3336, "num_tokens": 5035406380.0, "step": 7950 }, { "epoch": 0.9401679082416933, "grad_norm": 0.15695497393608093, "learning_rate": 6.507386702610312e-06, "loss": 0.3636, "num_tokens": 5036028447.0, "step": 7951 }, { "epoch": 0.9402861534823224, "grad_norm": 0.13090547919273376, "learning_rate": 6.505393412598303e-06, "loss": 0.2914, "num_tokens": 5036650093.0, "step": 7952 }, { "epoch": 0.9404043987229515, "grad_norm": 0.13987688720226288, "learning_rate": 6.503404008666793e-06, "loss": 0.3198, "num_tokens": 5037287690.0, "step": 7953 }, { "epoch": 0.9405226439635804, "grad_norm": 0.13045866787433624, "learning_rate": 6.501418491107569e-06, "loss": 0.279, "num_tokens": 5037924507.0, "step": 7954 }, { "epoch": 0.9406408892042095, "grad_norm": 0.14035862684249878, "learning_rate": 6.499436860211864e-06, "loss": 0.3482, "num_tokens": 5038562437.0, "step": 7955 }, { "epoch": 0.9407591344448386, "grad_norm": 0.1310453563928604, "learning_rate": 6.497459116270321e-06, "loss": 0.3074, "num_tokens": 5039196076.0, "step": 7956 }, { "epoch": 0.9408773796854677, "grad_norm": 0.1331660896539688, "learning_rate": 6.495485259573035e-06, "loss": 0.3307, "num_tokens": 5039829515.0, "step": 7957 }, { "epoch": 0.9409956249260967, "grad_norm": 0.12825679779052734, "learning_rate": 6.493515290409514e-06, "loss": 0.3176, "num_tokens": 5040460472.0, "step": 7958 }, { "epoch": 0.9411138701667258, "grad_norm": 0.1422814577817917, "learning_rate": 6.491549209068703e-06, "loss": 0.3679, "num_tokens": 5041092619.0, "step": 7959 }, { "epoch": 0.9412321154073549, "grad_norm": 0.1312824934720993, "learning_rate": 6.489587015838977e-06, "loss": 0.3298, "num_tokens": 5041732268.0, "step": 7960 }, { "epoch": 0.9413503606479839, "grad_norm": 0.13014937937259674, "learning_rate": 6.487628711008139e-06, "loss": 0.3216, "num_tokens": 5042362202.0, "step": 7961 }, { "epoch": 0.941468605888613, "grad_norm": 0.12798571586608887, "learning_rate": 6.485674294863426e-06, "loss": 0.3362, "num_tokens": 5042991212.0, "step": 7962 }, { "epoch": 0.941586851129242, "grad_norm": 0.13668450713157654, "learning_rate": 6.483723767691494e-06, "loss": 0.3223, "num_tokens": 5043624246.0, "step": 7963 }, { "epoch": 0.9417050963698711, "grad_norm": 0.13081251084804535, "learning_rate": 6.4817771297784375e-06, "loss": 0.3588, "num_tokens": 5044253342.0, "step": 7964 }, { "epoch": 0.9418233416105002, "grad_norm": 0.14275525510311127, "learning_rate": 6.479834381409784e-06, "loss": 0.347, "num_tokens": 5044889823.0, "step": 7965 }, { "epoch": 0.9419415868511293, "grad_norm": 0.13763408362865448, "learning_rate": 6.477895522870477e-06, "loss": 0.3314, "num_tokens": 5045529375.0, "step": 7966 }, { "epoch": 0.9420598320917583, "grad_norm": 0.1263134479522705, "learning_rate": 6.475960554444903e-06, "loss": 0.3476, "num_tokens": 5046165915.0, "step": 7967 }, { "epoch": 0.9421780773323873, "grad_norm": 0.11786458641290665, "learning_rate": 6.474029476416866e-06, "loss": 0.3046, "num_tokens": 5046804470.0, "step": 7968 }, { "epoch": 0.9422963225730164, "grad_norm": 0.1256287842988968, "learning_rate": 6.472102289069616e-06, "loss": 0.3077, "num_tokens": 5047438433.0, "step": 7969 }, { "epoch": 0.9424145678136455, "grad_norm": 0.136765256524086, "learning_rate": 6.470178992685812e-06, "loss": 0.3502, "num_tokens": 5048073621.0, "step": 7970 }, { "epoch": 0.9425328130542746, "grad_norm": 0.12625689804553986, "learning_rate": 6.468259587547557e-06, "loss": 0.3115, "num_tokens": 5048706687.0, "step": 7971 }, { "epoch": 0.9426510582949036, "grad_norm": 0.13418669998645782, "learning_rate": 6.466344073936378e-06, "loss": 0.3297, "num_tokens": 5049343445.0, "step": 7972 }, { "epoch": 0.9427693035355327, "grad_norm": 0.1245817318558693, "learning_rate": 6.464432452133231e-06, "loss": 0.3194, "num_tokens": 5049982086.0, "step": 7973 }, { "epoch": 0.9428875487761618, "grad_norm": 0.13007965683937073, "learning_rate": 6.4625247224185055e-06, "loss": 0.3201, "num_tokens": 5050621506.0, "step": 7974 }, { "epoch": 0.9430057940167909, "grad_norm": 0.12761163711547852, "learning_rate": 6.460620885072012e-06, "loss": 0.3112, "num_tokens": 5051255140.0, "step": 7975 }, { "epoch": 0.9431240392574199, "grad_norm": 0.12296957522630692, "learning_rate": 6.458720940372999e-06, "loss": 0.2874, "num_tokens": 5051885630.0, "step": 7976 }, { "epoch": 0.9432422844980489, "grad_norm": 0.12822197377681732, "learning_rate": 6.456824888600133e-06, "loss": 0.3148, "num_tokens": 5052524300.0, "step": 7977 }, { "epoch": 0.943360529738678, "grad_norm": 0.12191552668809891, "learning_rate": 6.454932730031522e-06, "loss": 0.2983, "num_tokens": 5053152527.0, "step": 7978 }, { "epoch": 0.9434787749793071, "grad_norm": 0.12673836946487427, "learning_rate": 6.453044464944699e-06, "loss": 0.3185, "num_tokens": 5053787202.0, "step": 7979 }, { "epoch": 0.9435970202199362, "grad_norm": 0.1296648532152176, "learning_rate": 6.451160093616615e-06, "loss": 0.3102, "num_tokens": 5054418538.0, "step": 7980 }, { "epoch": 0.9437152654605652, "grad_norm": 0.12429476529359818, "learning_rate": 6.44927961632367e-06, "loss": 0.3082, "num_tokens": 5055054470.0, "step": 7981 }, { "epoch": 0.9438335107011943, "grad_norm": 0.12412666529417038, "learning_rate": 6.447403033341675e-06, "loss": 0.3321, "num_tokens": 5055685325.0, "step": 7982 }, { "epoch": 0.9439517559418233, "grad_norm": 0.1287357062101364, "learning_rate": 6.4455303449458795e-06, "loss": 0.3162, "num_tokens": 5056319152.0, "step": 7983 }, { "epoch": 0.9440700011824524, "grad_norm": 0.12678509950637817, "learning_rate": 6.443661551410956e-06, "loss": 0.3236, "num_tokens": 5056943649.0, "step": 7984 }, { "epoch": 0.9441882464230815, "grad_norm": 0.12791453301906586, "learning_rate": 6.441796653011011e-06, "loss": 0.288, "num_tokens": 5057571976.0, "step": 7985 }, { "epoch": 0.9443064916637105, "grad_norm": 0.14430683851242065, "learning_rate": 6.439935650019576e-06, "loss": 0.35, "num_tokens": 5058207470.0, "step": 7986 }, { "epoch": 0.9444247369043396, "grad_norm": 0.12852174043655396, "learning_rate": 6.438078542709618e-06, "loss": 0.3149, "num_tokens": 5058840792.0, "step": 7987 }, { "epoch": 0.9445429821449687, "grad_norm": 0.12255551666021347, "learning_rate": 6.4362253313535185e-06, "loss": 0.3199, "num_tokens": 5059477196.0, "step": 7988 }, { "epoch": 0.9446612273855978, "grad_norm": 0.13329945504665375, "learning_rate": 6.434376016223101e-06, "loss": 0.3496, "num_tokens": 5060104364.0, "step": 7989 }, { "epoch": 0.9447794726262267, "grad_norm": 0.129975825548172, "learning_rate": 6.4325305975896135e-06, "loss": 0.3313, "num_tokens": 5060736161.0, "step": 7990 }, { "epoch": 0.9448977178668558, "grad_norm": 0.12774623930454254, "learning_rate": 6.430689075723726e-06, "loss": 0.3146, "num_tokens": 5061367573.0, "step": 7991 }, { "epoch": 0.9450159631074849, "grad_norm": 0.12314406782388687, "learning_rate": 6.42885145089555e-06, "loss": 0.3088, "num_tokens": 5062003989.0, "step": 7992 }, { "epoch": 0.945134208348114, "grad_norm": 0.12001129239797592, "learning_rate": 6.427017723374614e-06, "loss": 0.3049, "num_tokens": 5062641406.0, "step": 7993 }, { "epoch": 0.9452524535887431, "grad_norm": 0.13301093876361847, "learning_rate": 6.425187893429878e-06, "loss": 0.3351, "num_tokens": 5063277365.0, "step": 7994 }, { "epoch": 0.9453706988293721, "grad_norm": 0.13187932968139648, "learning_rate": 6.4233619613297344e-06, "loss": 0.3085, "num_tokens": 5063884056.0, "step": 7995 }, { "epoch": 0.9454889440700012, "grad_norm": 0.13687318563461304, "learning_rate": 6.4215399273420005e-06, "loss": 0.2958, "num_tokens": 5064522411.0, "step": 7996 }, { "epoch": 0.9456071893106303, "grad_norm": 0.12469948828220367, "learning_rate": 6.419721791733921e-06, "loss": 0.3193, "num_tokens": 5065161687.0, "step": 7997 }, { "epoch": 0.9457254345512593, "grad_norm": 0.13275712728500366, "learning_rate": 6.417907554772167e-06, "loss": 0.3258, "num_tokens": 5065795601.0, "step": 7998 }, { "epoch": 0.9458436797918883, "grad_norm": 0.1320505291223526, "learning_rate": 6.4160972167228405e-06, "loss": 0.3503, "num_tokens": 5066432718.0, "step": 7999 }, { "epoch": 0.9459619250325174, "grad_norm": 0.13748905062675476, "learning_rate": 6.414290777851481e-06, "loss": 0.3462, "num_tokens": 5067066106.0, "step": 8000 }, { "epoch": 0.9460801702731465, "grad_norm": 0.14252015948295593, "learning_rate": 6.412488238423037e-06, "loss": 0.3203, "num_tokens": 5067703767.0, "step": 8001 }, { "epoch": 0.9461984155137756, "grad_norm": 0.13175076246261597, "learning_rate": 6.4106895987018964e-06, "loss": 0.3035, "num_tokens": 5068342390.0, "step": 8002 }, { "epoch": 0.9463166607544047, "grad_norm": 0.13075652718544006, "learning_rate": 6.408894858951876e-06, "loss": 0.299, "num_tokens": 5068976312.0, "step": 8003 }, { "epoch": 0.9464349059950337, "grad_norm": 0.13562722504138947, "learning_rate": 6.407104019436219e-06, "loss": 0.3363, "num_tokens": 5069607545.0, "step": 8004 }, { "epoch": 0.9465531512356627, "grad_norm": 0.12931746244430542, "learning_rate": 6.405317080417594e-06, "loss": 0.3254, "num_tokens": 5070243564.0, "step": 8005 }, { "epoch": 0.9466713964762918, "grad_norm": 0.12982690334320068, "learning_rate": 6.403534042158098e-06, "loss": 0.3113, "num_tokens": 5070880710.0, "step": 8006 }, { "epoch": 0.9467896417169209, "grad_norm": 0.13068896532058716, "learning_rate": 6.401754904919259e-06, "loss": 0.3135, "num_tokens": 5071490371.0, "step": 8007 }, { "epoch": 0.9469078869575499, "grad_norm": 0.1288313865661621, "learning_rate": 6.399979668962033e-06, "loss": 0.33, "num_tokens": 5072124668.0, "step": 8008 }, { "epoch": 0.947026132198179, "grad_norm": 0.13774725794792175, "learning_rate": 6.398208334546797e-06, "loss": 0.316, "num_tokens": 5072760485.0, "step": 8009 }, { "epoch": 0.9471443774388081, "grad_norm": 0.13805268704891205, "learning_rate": 6.396440901933359e-06, "loss": 0.3184, "num_tokens": 5073395304.0, "step": 8010 }, { "epoch": 0.9472626226794372, "grad_norm": 0.126872256398201, "learning_rate": 6.394677371380963e-06, "loss": 0.3323, "num_tokens": 5074025158.0, "step": 8011 }, { "epoch": 0.9473808679200663, "grad_norm": 0.13623903691768646, "learning_rate": 6.392917743148269e-06, "loss": 0.3218, "num_tokens": 5074660787.0, "step": 8012 }, { "epoch": 0.9474991131606952, "grad_norm": 0.1386144459247589, "learning_rate": 6.391162017493371e-06, "loss": 0.3892, "num_tokens": 5075291775.0, "step": 8013 }, { "epoch": 0.9476173584013243, "grad_norm": 0.126929372549057, "learning_rate": 6.389410194673791e-06, "loss": 0.3153, "num_tokens": 5075929793.0, "step": 8014 }, { "epoch": 0.9477356036419534, "grad_norm": 0.12808369100093842, "learning_rate": 6.3876622749464695e-06, "loss": 0.3091, "num_tokens": 5076567774.0, "step": 8015 }, { "epoch": 0.9478538488825825, "grad_norm": 0.12983132898807526, "learning_rate": 6.385918258567789e-06, "loss": 0.3208, "num_tokens": 5077184310.0, "step": 8016 }, { "epoch": 0.9479720941232116, "grad_norm": 0.14852990210056305, "learning_rate": 6.384178145793551e-06, "loss": 0.3494, "num_tokens": 5077818142.0, "step": 8017 }, { "epoch": 0.9480903393638406, "grad_norm": 0.1363523155450821, "learning_rate": 6.382441936878982e-06, "loss": 0.3458, "num_tokens": 5078456144.0, "step": 8018 }, { "epoch": 0.9482085846044697, "grad_norm": 0.13058635592460632, "learning_rate": 6.380709632078741e-06, "loss": 0.3192, "num_tokens": 5079091008.0, "step": 8019 }, { "epoch": 0.9483268298450988, "grad_norm": 0.1271863728761673, "learning_rate": 6.378981231646913e-06, "loss": 0.3199, "num_tokens": 5079725819.0, "step": 8020 }, { "epoch": 0.9484450750857278, "grad_norm": 0.1224251240491867, "learning_rate": 6.37725673583701e-06, "loss": 0.2635, "num_tokens": 5080364982.0, "step": 8021 }, { "epoch": 0.9485633203263568, "grad_norm": 0.13081899285316467, "learning_rate": 6.375536144901971e-06, "loss": 0.3531, "num_tokens": 5080996884.0, "step": 8022 }, { "epoch": 0.9486815655669859, "grad_norm": 0.1223100870847702, "learning_rate": 6.3738194590941674e-06, "loss": 0.304, "num_tokens": 5081627871.0, "step": 8023 }, { "epoch": 0.948799810807615, "grad_norm": 0.13102951645851135, "learning_rate": 6.372106678665383e-06, "loss": 0.2806, "num_tokens": 5082267499.0, "step": 8024 }, { "epoch": 0.9489180560482441, "grad_norm": 0.13109256327152252, "learning_rate": 6.370397803866847e-06, "loss": 0.3063, "num_tokens": 5082899614.0, "step": 8025 }, { "epoch": 0.9490363012888732, "grad_norm": 0.13245661556720734, "learning_rate": 6.368692834949206e-06, "loss": 0.3249, "num_tokens": 5083537181.0, "step": 8026 }, { "epoch": 0.9491545465295022, "grad_norm": 0.13355505466461182, "learning_rate": 6.366991772162536e-06, "loss": 0.356, "num_tokens": 5084171894.0, "step": 8027 }, { "epoch": 0.9492727917701312, "grad_norm": 0.13401514291763306, "learning_rate": 6.365294615756334e-06, "loss": 0.3121, "num_tokens": 5084808562.0, "step": 8028 }, { "epoch": 0.9493910370107603, "grad_norm": 0.12398333102464676, "learning_rate": 6.363601365979534e-06, "loss": 0.333, "num_tokens": 5085444741.0, "step": 8029 }, { "epoch": 0.9495092822513894, "grad_norm": 0.12369203567504883, "learning_rate": 6.361912023080498e-06, "loss": 0.3098, "num_tokens": 5086081819.0, "step": 8030 }, { "epoch": 0.9496275274920184, "grad_norm": 0.12987764179706573, "learning_rate": 6.360226587306994e-06, "loss": 0.3086, "num_tokens": 5086712331.0, "step": 8031 }, { "epoch": 0.9497457727326475, "grad_norm": 0.13003014028072357, "learning_rate": 6.358545058906248e-06, "loss": 0.3256, "num_tokens": 5087345814.0, "step": 8032 }, { "epoch": 0.9498640179732766, "grad_norm": 0.12506069242954254, "learning_rate": 6.356867438124887e-06, "loss": 0.3055, "num_tokens": 5087984254.0, "step": 8033 }, { "epoch": 0.9499822632139057, "grad_norm": 0.13542471826076508, "learning_rate": 6.355193725208978e-06, "loss": 0.3201, "num_tokens": 5088607973.0, "step": 8034 }, { "epoch": 0.9501005084545348, "grad_norm": 0.11930713057518005, "learning_rate": 6.353523920404014e-06, "loss": 0.2684, "num_tokens": 5089232364.0, "step": 8035 }, { "epoch": 0.9502187536951637, "grad_norm": 0.12510192394256592, "learning_rate": 6.351858023954912e-06, "loss": 0.3282, "num_tokens": 5089863963.0, "step": 8036 }, { "epoch": 0.9503369989357928, "grad_norm": 0.1283147931098938, "learning_rate": 6.350196036106013e-06, "loss": 0.3103, "num_tokens": 5090497388.0, "step": 8037 }, { "epoch": 0.9504552441764219, "grad_norm": 0.1366681158542633, "learning_rate": 6.348537957101092e-06, "loss": 0.3495, "num_tokens": 5091129504.0, "step": 8038 }, { "epoch": 0.950573489417051, "grad_norm": 0.12656515836715698, "learning_rate": 6.346883787183345e-06, "loss": 0.311, "num_tokens": 5091762982.0, "step": 8039 }, { "epoch": 0.95069173465768, "grad_norm": 0.1401248574256897, "learning_rate": 6.345233526595395e-06, "loss": 0.3378, "num_tokens": 5092396058.0, "step": 8040 }, { "epoch": 0.9508099798983091, "grad_norm": 0.13151727616786957, "learning_rate": 6.343587175579296e-06, "loss": 0.3188, "num_tokens": 5093030263.0, "step": 8041 }, { "epoch": 0.9509282251389382, "grad_norm": 0.12044956535100937, "learning_rate": 6.341944734376523e-06, "loss": 0.3335, "num_tokens": 5093669582.0, "step": 8042 }, { "epoch": 0.9510464703795672, "grad_norm": 0.12004297971725464, "learning_rate": 6.34030620322798e-06, "loss": 0.2673, "num_tokens": 5094301831.0, "step": 8043 }, { "epoch": 0.9511647156201963, "grad_norm": 0.1266290843486786, "learning_rate": 6.338671582374002e-06, "loss": 0.2864, "num_tokens": 5094937623.0, "step": 8044 }, { "epoch": 0.9512829608608253, "grad_norm": 0.13799303770065308, "learning_rate": 6.33704087205434e-06, "loss": 0.3124, "num_tokens": 5095569088.0, "step": 8045 }, { "epoch": 0.9514012061014544, "grad_norm": 0.1423172503709793, "learning_rate": 6.335414072508182e-06, "loss": 0.351, "num_tokens": 5096201403.0, "step": 8046 }, { "epoch": 0.9515194513420835, "grad_norm": 0.13178914785385132, "learning_rate": 6.333791183974135e-06, "loss": 0.34, "num_tokens": 5096840194.0, "step": 8047 }, { "epoch": 0.9516376965827126, "grad_norm": 0.1292847990989685, "learning_rate": 6.3321722066902364e-06, "loss": 0.3012, "num_tokens": 5097469070.0, "step": 8048 }, { "epoch": 0.9517559418233416, "grad_norm": 0.1361699104309082, "learning_rate": 6.3305571408939475e-06, "loss": 0.3452, "num_tokens": 5098102753.0, "step": 8049 }, { "epoch": 0.9518741870639706, "grad_norm": 0.1299755871295929, "learning_rate": 6.328945986822159e-06, "loss": 0.3115, "num_tokens": 5098736798.0, "step": 8050 }, { "epoch": 0.9519924323045997, "grad_norm": 0.12544654309749603, "learning_rate": 6.327338744711187e-06, "loss": 0.3314, "num_tokens": 5099374476.0, "step": 8051 }, { "epoch": 0.9521106775452288, "grad_norm": 0.13814909756183624, "learning_rate": 6.325735414796767e-06, "loss": 0.3638, "num_tokens": 5100006584.0, "step": 8052 }, { "epoch": 0.9522289227858579, "grad_norm": 0.12414059787988663, "learning_rate": 6.324135997314074e-06, "loss": 0.3116, "num_tokens": 5100644459.0, "step": 8053 }, { "epoch": 0.9523471680264869, "grad_norm": 0.13402383029460907, "learning_rate": 6.322540492497694e-06, "loss": 0.3105, "num_tokens": 5101277523.0, "step": 8054 }, { "epoch": 0.952465413267116, "grad_norm": 0.12208166718482971, "learning_rate": 6.320948900581655e-06, "loss": 0.3046, "num_tokens": 5101904823.0, "step": 8055 }, { "epoch": 0.9525836585077451, "grad_norm": 0.13789114356040955, "learning_rate": 6.319361221799397e-06, "loss": 0.3289, "num_tokens": 5102538897.0, "step": 8056 }, { "epoch": 0.9527019037483742, "grad_norm": 0.12561962008476257, "learning_rate": 6.31777745638379e-06, "loss": 0.3037, "num_tokens": 5103170422.0, "step": 8057 }, { "epoch": 0.9528201489890032, "grad_norm": 0.13807716965675354, "learning_rate": 6.316197604567141e-06, "loss": 0.3527, "num_tokens": 5103806223.0, "step": 8058 }, { "epoch": 0.9529383942296322, "grad_norm": 0.13468046486377716, "learning_rate": 6.314621666581162e-06, "loss": 0.34, "num_tokens": 5104436849.0, "step": 8059 }, { "epoch": 0.9530566394702613, "grad_norm": 0.12520718574523926, "learning_rate": 6.313049642657013e-06, "loss": 0.3015, "num_tokens": 5105069300.0, "step": 8060 }, { "epoch": 0.9531748847108904, "grad_norm": 0.13286270201206207, "learning_rate": 6.311481533025261e-06, "loss": 0.3263, "num_tokens": 5105682480.0, "step": 8061 }, { "epoch": 0.9532931299515195, "grad_norm": 0.12630903720855713, "learning_rate": 6.309917337915917e-06, "loss": 0.3026, "num_tokens": 5106312643.0, "step": 8062 }, { "epoch": 0.9534113751921485, "grad_norm": 0.12276430428028107, "learning_rate": 6.308357057558399e-06, "loss": 0.3221, "num_tokens": 5106949276.0, "step": 8063 }, { "epoch": 0.9535296204327776, "grad_norm": 0.15235276520252228, "learning_rate": 6.306800692181567e-06, "loss": 0.3694, "num_tokens": 5107588898.0, "step": 8064 }, { "epoch": 0.9536478656734066, "grad_norm": 0.1347724348306656, "learning_rate": 6.305248242013695e-06, "loss": 0.3503, "num_tokens": 5108225291.0, "step": 8065 }, { "epoch": 0.9537661109140357, "grad_norm": 0.1322181075811386, "learning_rate": 6.303699707282491e-06, "loss": 0.3375, "num_tokens": 5108856894.0, "step": 8066 }, { "epoch": 0.9538843561546648, "grad_norm": 0.1398695856332779, "learning_rate": 6.302155088215086e-06, "loss": 0.3195, "num_tokens": 5109493923.0, "step": 8067 }, { "epoch": 0.9540026013952938, "grad_norm": 0.13297602534294128, "learning_rate": 6.300614385038034e-06, "loss": 0.3443, "num_tokens": 5110127408.0, "step": 8068 }, { "epoch": 0.9541208466359229, "grad_norm": 0.12643149495124817, "learning_rate": 6.299077597977315e-06, "loss": 0.3073, "num_tokens": 5110763853.0, "step": 8069 }, { "epoch": 0.954239091876552, "grad_norm": 0.1332225650548935, "learning_rate": 6.297544727258342e-06, "loss": 0.368, "num_tokens": 5111400075.0, "step": 8070 }, { "epoch": 0.9543573371171811, "grad_norm": 0.12662197649478912, "learning_rate": 6.296015773105939e-06, "loss": 0.3369, "num_tokens": 5112030985.0, "step": 8071 }, { "epoch": 0.95447558235781, "grad_norm": 0.1362462192773819, "learning_rate": 6.294490735744377e-06, "loss": 0.3187, "num_tokens": 5112663053.0, "step": 8072 }, { "epoch": 0.9545938275984391, "grad_norm": 0.13334424793720245, "learning_rate": 6.292969615397328e-06, "loss": 0.3091, "num_tokens": 5113298131.0, "step": 8073 }, { "epoch": 0.9547120728390682, "grad_norm": 0.12534628808498383, "learning_rate": 6.29145241228791e-06, "loss": 0.349, "num_tokens": 5113935269.0, "step": 8074 }, { "epoch": 0.9548303180796973, "grad_norm": 0.13886858522891998, "learning_rate": 6.2899391266386505e-06, "loss": 0.3337, "num_tokens": 5114574274.0, "step": 8075 }, { "epoch": 0.9549485633203264, "grad_norm": 0.12704677879810333, "learning_rate": 6.288429758671514e-06, "loss": 0.2963, "num_tokens": 5115211196.0, "step": 8076 }, { "epoch": 0.9550668085609554, "grad_norm": 0.13456355035305023, "learning_rate": 6.286924308607887e-06, "loss": 0.3222, "num_tokens": 5115840171.0, "step": 8077 }, { "epoch": 0.9551850538015845, "grad_norm": 0.13168327510356903, "learning_rate": 6.285422776668578e-06, "loss": 0.3014, "num_tokens": 5116473389.0, "step": 8078 }, { "epoch": 0.9553032990422136, "grad_norm": 0.12040334194898605, "learning_rate": 6.283925163073822e-06, "loss": 0.2971, "num_tokens": 5117102980.0, "step": 8079 }, { "epoch": 0.9554215442828426, "grad_norm": 0.13639457523822784, "learning_rate": 6.282431468043285e-06, "loss": 0.3024, "num_tokens": 5117734457.0, "step": 8080 }, { "epoch": 0.9555397895234716, "grad_norm": 0.12059475481510162, "learning_rate": 6.280941691796054e-06, "loss": 0.2952, "num_tokens": 5118368948.0, "step": 8081 }, { "epoch": 0.9556580347641007, "grad_norm": 0.13280169665813446, "learning_rate": 6.279455834550635e-06, "loss": 0.326, "num_tokens": 5119006596.0, "step": 8082 }, { "epoch": 0.9557762800047298, "grad_norm": 0.13485634326934814, "learning_rate": 6.277973896524973e-06, "loss": 0.3298, "num_tokens": 5119640270.0, "step": 8083 }, { "epoch": 0.9558945252453589, "grad_norm": 0.13439947366714478, "learning_rate": 6.276495877936423e-06, "loss": 0.3293, "num_tokens": 5120274502.0, "step": 8084 }, { "epoch": 0.956012770485988, "grad_norm": 0.14128918945789337, "learning_rate": 6.275021779001776e-06, "loss": 0.3737, "num_tokens": 5120911369.0, "step": 8085 }, { "epoch": 0.956131015726617, "grad_norm": 0.13226372003555298, "learning_rate": 6.273551599937246e-06, "loss": 0.3169, "num_tokens": 5121548058.0, "step": 8086 }, { "epoch": 0.956249260967246, "grad_norm": 0.1375778615474701, "learning_rate": 6.2720853409584655e-06, "loss": 0.3408, "num_tokens": 5122177881.0, "step": 8087 }, { "epoch": 0.9563675062078751, "grad_norm": 0.12630976736545563, "learning_rate": 6.270623002280505e-06, "loss": 0.2675, "num_tokens": 5122801905.0, "step": 8088 }, { "epoch": 0.9564857514485042, "grad_norm": 0.12061083316802979, "learning_rate": 6.269164584117845e-06, "loss": 0.3318, "num_tokens": 5123438161.0, "step": 8089 }, { "epoch": 0.9566039966891332, "grad_norm": 0.129425510764122, "learning_rate": 6.267710086684402e-06, "loss": 0.306, "num_tokens": 5124070642.0, "step": 8090 }, { "epoch": 0.9567222419297623, "grad_norm": 0.1381276696920395, "learning_rate": 6.266259510193512e-06, "loss": 0.3561, "num_tokens": 5124706517.0, "step": 8091 }, { "epoch": 0.9568404871703914, "grad_norm": 0.14153186976909637, "learning_rate": 6.264812854857937e-06, "loss": 0.3131, "num_tokens": 5125341946.0, "step": 8092 }, { "epoch": 0.9569587324110205, "grad_norm": 0.12651802599430084, "learning_rate": 6.263370120889868e-06, "loss": 0.3013, "num_tokens": 5125978132.0, "step": 8093 }, { "epoch": 0.9570769776516496, "grad_norm": 0.12922444939613342, "learning_rate": 6.261931308500914e-06, "loss": 0.3401, "num_tokens": 5126613119.0, "step": 8094 }, { "epoch": 0.9571952228922785, "grad_norm": 0.19478337466716766, "learning_rate": 6.260496417902109e-06, "loss": 0.3193, "num_tokens": 5127209917.0, "step": 8095 }, { "epoch": 0.9573134681329076, "grad_norm": 0.1310076117515564, "learning_rate": 6.25906544930392e-06, "loss": 0.3373, "num_tokens": 5127844910.0, "step": 8096 }, { "epoch": 0.9574317133735367, "grad_norm": 0.14186719059944153, "learning_rate": 6.25763840291623e-06, "loss": 0.3475, "num_tokens": 5128483454.0, "step": 8097 }, { "epoch": 0.9575499586141658, "grad_norm": 0.1272251456975937, "learning_rate": 6.256215278948353e-06, "loss": 0.3173, "num_tokens": 5129112612.0, "step": 8098 }, { "epoch": 0.9576682038547949, "grad_norm": 0.1259782612323761, "learning_rate": 6.254796077609025e-06, "loss": 0.3392, "num_tokens": 5129750729.0, "step": 8099 }, { "epoch": 0.9577864490954239, "grad_norm": 0.14556074142456055, "learning_rate": 6.253380799106403e-06, "loss": 0.3254, "num_tokens": 5130381872.0, "step": 8100 }, { "epoch": 0.957904694336053, "grad_norm": 0.13493098318576813, "learning_rate": 6.251969443648071e-06, "loss": 0.3439, "num_tokens": 5131011926.0, "step": 8101 }, { "epoch": 0.958022939576682, "grad_norm": 0.12409204244613647, "learning_rate": 6.250562011441047e-06, "loss": 0.293, "num_tokens": 5131644711.0, "step": 8102 }, { "epoch": 0.9581411848173111, "grad_norm": 0.14170928299427032, "learning_rate": 6.249158502691758e-06, "loss": 0.3102, "num_tokens": 5132249381.0, "step": 8103 }, { "epoch": 0.9582594300579401, "grad_norm": 0.1340659260749817, "learning_rate": 6.247758917606067e-06, "loss": 0.3129, "num_tokens": 5132881281.0, "step": 8104 }, { "epoch": 0.9583776752985692, "grad_norm": 0.14224185049533844, "learning_rate": 6.246363256389249e-06, "loss": 0.358, "num_tokens": 5133518630.0, "step": 8105 }, { "epoch": 0.9584959205391983, "grad_norm": 0.1333722174167633, "learning_rate": 6.244971519246023e-06, "loss": 0.312, "num_tokens": 5134154905.0, "step": 8106 }, { "epoch": 0.9586141657798274, "grad_norm": 0.12851783633232117, "learning_rate": 6.243583706380515e-06, "loss": 0.3338, "num_tokens": 5134791779.0, "step": 8107 }, { "epoch": 0.9587324110204565, "grad_norm": 0.1289902776479721, "learning_rate": 6.242199817996285e-06, "loss": 0.2982, "num_tokens": 5135421417.0, "step": 8108 }, { "epoch": 0.9588506562610855, "grad_norm": 0.13212299346923828, "learning_rate": 6.240819854296308e-06, "loss": 0.3372, "num_tokens": 5136055952.0, "step": 8109 }, { "epoch": 0.9589689015017145, "grad_norm": 0.12030923366546631, "learning_rate": 6.239443815482994e-06, "loss": 0.2925, "num_tokens": 5136695231.0, "step": 8110 }, { "epoch": 0.9590871467423436, "grad_norm": 0.13869477808475494, "learning_rate": 6.238071701758176e-06, "loss": 0.3228, "num_tokens": 5137328442.0, "step": 8111 }, { "epoch": 0.9592053919829727, "grad_norm": 0.14744272828102112, "learning_rate": 6.2367035133230985e-06, "loss": 0.346, "num_tokens": 5137963517.0, "step": 8112 }, { "epoch": 0.9593236372236017, "grad_norm": 0.14710669219493866, "learning_rate": 6.235339250378447e-06, "loss": 0.366, "num_tokens": 5138601657.0, "step": 8113 }, { "epoch": 0.9594418824642308, "grad_norm": 0.1333823949098587, "learning_rate": 6.233978913124321e-06, "loss": 0.3083, "num_tokens": 5139240797.0, "step": 8114 }, { "epoch": 0.9595601277048599, "grad_norm": 0.1317744106054306, "learning_rate": 6.232622501760247e-06, "loss": 0.3259, "num_tokens": 5139868280.0, "step": 8115 }, { "epoch": 0.959678372945489, "grad_norm": 0.1379823237657547, "learning_rate": 6.23127001648518e-06, "loss": 0.3483, "num_tokens": 5140501353.0, "step": 8116 }, { "epoch": 0.9597966181861181, "grad_norm": 0.12964381277561188, "learning_rate": 6.229921457497491e-06, "loss": 0.2861, "num_tokens": 5141113641.0, "step": 8117 }, { "epoch": 0.959914863426747, "grad_norm": 0.1339108794927597, "learning_rate": 6.228576824994979e-06, "loss": 0.346, "num_tokens": 5141747771.0, "step": 8118 }, { "epoch": 0.9600331086673761, "grad_norm": 0.12612400949001312, "learning_rate": 6.227236119174867e-06, "loss": 0.3021, "num_tokens": 5142385097.0, "step": 8119 }, { "epoch": 0.9601513539080052, "grad_norm": 0.133366197347641, "learning_rate": 6.225899340233805e-06, "loss": 0.3226, "num_tokens": 5143017186.0, "step": 8120 }, { "epoch": 0.9602695991486343, "grad_norm": 0.13420464098453522, "learning_rate": 6.224566488367863e-06, "loss": 0.3497, "num_tokens": 5143656288.0, "step": 8121 }, { "epoch": 0.9603878443892633, "grad_norm": 0.12935985624790192, "learning_rate": 6.223237563772532e-06, "loss": 0.3356, "num_tokens": 5144284078.0, "step": 8122 }, { "epoch": 0.9605060896298924, "grad_norm": 0.12562406063079834, "learning_rate": 6.22191256664274e-06, "loss": 0.3138, "num_tokens": 5144918653.0, "step": 8123 }, { "epoch": 0.9606243348705215, "grad_norm": 0.13598977029323578, "learning_rate": 6.220591497172823e-06, "loss": 0.3072, "num_tokens": 5145553496.0, "step": 8124 }, { "epoch": 0.9607425801111505, "grad_norm": 0.13337890803813934, "learning_rate": 6.219274355556552e-06, "loss": 0.3326, "num_tokens": 5146191180.0, "step": 8125 }, { "epoch": 0.9608608253517796, "grad_norm": 0.1328846663236618, "learning_rate": 6.217961141987111e-06, "loss": 0.292, "num_tokens": 5146822628.0, "step": 8126 }, { "epoch": 0.9609790705924086, "grad_norm": 0.1519065946340561, "learning_rate": 6.216651856657125e-06, "loss": 0.3262, "num_tokens": 5147459560.0, "step": 8127 }, { "epoch": 0.9610973158330377, "grad_norm": 0.1280958503484726, "learning_rate": 6.215346499758627e-06, "loss": 0.3188, "num_tokens": 5148086617.0, "step": 8128 }, { "epoch": 0.9612155610736668, "grad_norm": 0.12603697180747986, "learning_rate": 6.214045071483079e-06, "loss": 0.3253, "num_tokens": 5148724097.0, "step": 8129 }, { "epoch": 0.9613338063142959, "grad_norm": 0.1375526338815689, "learning_rate": 6.212747572021367e-06, "loss": 0.321, "num_tokens": 5149359108.0, "step": 8130 }, { "epoch": 0.961452051554925, "grad_norm": 0.13921910524368286, "learning_rate": 6.211454001563803e-06, "loss": 0.3651, "num_tokens": 5149997841.0, "step": 8131 }, { "epoch": 0.961570296795554, "grad_norm": 0.14466944336891174, "learning_rate": 6.210164360300119e-06, "loss": 0.2957, "num_tokens": 5150633361.0, "step": 8132 }, { "epoch": 0.961688542036183, "grad_norm": 0.11211797595024109, "learning_rate": 6.2088786484194766e-06, "loss": 0.2873, "num_tokens": 5151270695.0, "step": 8133 }, { "epoch": 0.9618067872768121, "grad_norm": 0.12801077961921692, "learning_rate": 6.207596866110454e-06, "loss": 0.284, "num_tokens": 5151903934.0, "step": 8134 }, { "epoch": 0.9619250325174412, "grad_norm": 0.13171634078025818, "learning_rate": 6.206319013561052e-06, "loss": 0.3439, "num_tokens": 5152539918.0, "step": 8135 }, { "epoch": 0.9620432777580702, "grad_norm": 0.13618981838226318, "learning_rate": 6.205045090958701e-06, "loss": 0.3456, "num_tokens": 5153177795.0, "step": 8136 }, { "epoch": 0.9621615229986993, "grad_norm": 0.13542699813842773, "learning_rate": 6.203775098490258e-06, "loss": 0.2828, "num_tokens": 5153816736.0, "step": 8137 }, { "epoch": 0.9622797682393284, "grad_norm": 0.14349032938480377, "learning_rate": 6.202509036341991e-06, "loss": 0.3159, "num_tokens": 5154422370.0, "step": 8138 }, { "epoch": 0.9623980134799575, "grad_norm": 0.1380891054868698, "learning_rate": 6.201246904699606e-06, "loss": 0.3553, "num_tokens": 5155056643.0, "step": 8139 }, { "epoch": 0.9625162587205865, "grad_norm": 0.1329144686460495, "learning_rate": 6.199988703748218e-06, "loss": 0.3358, "num_tokens": 5155695878.0, "step": 8140 }, { "epoch": 0.9626345039612155, "grad_norm": 0.12959074974060059, "learning_rate": 6.19873443367238e-06, "loss": 0.3297, "num_tokens": 5156328608.0, "step": 8141 }, { "epoch": 0.9627527492018446, "grad_norm": 0.1325128972530365, "learning_rate": 6.197484094656055e-06, "loss": 0.3213, "num_tokens": 5156967167.0, "step": 8142 }, { "epoch": 0.9628709944424737, "grad_norm": 0.13369429111480713, "learning_rate": 6.196237686882638e-06, "loss": 0.2878, "num_tokens": 5157598830.0, "step": 8143 }, { "epoch": 0.9629892396831028, "grad_norm": 0.1208302229642868, "learning_rate": 6.194995210534946e-06, "loss": 0.3045, "num_tokens": 5158227319.0, "step": 8144 }, { "epoch": 0.9631074849237318, "grad_norm": 0.13618184626102448, "learning_rate": 6.1937566657952205e-06, "loss": 0.2951, "num_tokens": 5158859779.0, "step": 8145 }, { "epoch": 0.9632257301643609, "grad_norm": 0.13382680714130402, "learning_rate": 6.1925220528451216e-06, "loss": 0.2965, "num_tokens": 5159497218.0, "step": 8146 }, { "epoch": 0.96334397540499, "grad_norm": 0.1227061077952385, "learning_rate": 6.191291371865734e-06, "loss": 0.296, "num_tokens": 5160133386.0, "step": 8147 }, { "epoch": 0.963462220645619, "grad_norm": 0.12797623872756958, "learning_rate": 6.190064623037572e-06, "loss": 0.3174, "num_tokens": 5160763898.0, "step": 8148 }, { "epoch": 0.9635804658862481, "grad_norm": 0.12501998245716095, "learning_rate": 6.188841806540563e-06, "loss": 0.3117, "num_tokens": 5161396279.0, "step": 8149 }, { "epoch": 0.9636987111268771, "grad_norm": 0.1339840292930603, "learning_rate": 6.187622922554064e-06, "loss": 0.3129, "num_tokens": 5162016700.0, "step": 8150 }, { "epoch": 0.9638169563675062, "grad_norm": 0.1292400062084198, "learning_rate": 6.186407971256857e-06, "loss": 0.2913, "num_tokens": 5162649124.0, "step": 8151 }, { "epoch": 0.9639352016081353, "grad_norm": 0.12197978049516678, "learning_rate": 6.18519695282714e-06, "loss": 0.327, "num_tokens": 5163282130.0, "step": 8152 }, { "epoch": 0.9640534468487644, "grad_norm": 0.13172084093093872, "learning_rate": 6.1839898674425436e-06, "loss": 0.3418, "num_tokens": 5163916173.0, "step": 8153 }, { "epoch": 0.9641716920893934, "grad_norm": 0.1421179473400116, "learning_rate": 6.182786715280111e-06, "loss": 0.2916, "num_tokens": 5164551102.0, "step": 8154 }, { "epoch": 0.9642899373300224, "grad_norm": 0.12092426419258118, "learning_rate": 6.181587496516319e-06, "loss": 0.322, "num_tokens": 5165170385.0, "step": 8155 }, { "epoch": 0.9644081825706515, "grad_norm": 0.12364111095666885, "learning_rate": 6.180392211327057e-06, "loss": 0.3159, "num_tokens": 5165803263.0, "step": 8156 }, { "epoch": 0.9645264278112806, "grad_norm": 0.1287110298871994, "learning_rate": 6.1792008598876455e-06, "loss": 0.3143, "num_tokens": 5166439112.0, "step": 8157 }, { "epoch": 0.9646446730519097, "grad_norm": 0.1295938789844513, "learning_rate": 6.178013442372828e-06, "loss": 0.321, "num_tokens": 5167077100.0, "step": 8158 }, { "epoch": 0.9647629182925387, "grad_norm": 0.1319471150636673, "learning_rate": 6.1768299589567605e-06, "loss": 0.3514, "num_tokens": 5167716819.0, "step": 8159 }, { "epoch": 0.9648811635331678, "grad_norm": 0.13641133904457092, "learning_rate": 6.175650409813041e-06, "loss": 0.3577, "num_tokens": 5168350368.0, "step": 8160 }, { "epoch": 0.9649994087737969, "grad_norm": 0.12321803718805313, "learning_rate": 6.174474795114666e-06, "loss": 0.3134, "num_tokens": 5168987755.0, "step": 8161 }, { "epoch": 0.965117654014426, "grad_norm": 0.1336054801940918, "learning_rate": 6.173303115034079e-06, "loss": 0.336, "num_tokens": 5169621164.0, "step": 8162 }, { "epoch": 0.9652358992550549, "grad_norm": 0.1270831823348999, "learning_rate": 6.172135369743131e-06, "loss": 0.327, "num_tokens": 5170257640.0, "step": 8163 }, { "epoch": 0.965354144495684, "grad_norm": 0.13087859749794006, "learning_rate": 6.170971559413096e-06, "loss": 0.3514, "num_tokens": 5170890939.0, "step": 8164 }, { "epoch": 0.9654723897363131, "grad_norm": 0.13244934380054474, "learning_rate": 6.169811684214685e-06, "loss": 0.311, "num_tokens": 5171525931.0, "step": 8165 }, { "epoch": 0.9655906349769422, "grad_norm": 0.13853906095027924, "learning_rate": 6.168655744318016e-06, "loss": 0.3391, "num_tokens": 5172155423.0, "step": 8166 }, { "epoch": 0.9657088802175713, "grad_norm": 0.13265585899353027, "learning_rate": 6.167503739892636e-06, "loss": 0.3149, "num_tokens": 5172794539.0, "step": 8167 }, { "epoch": 0.9658271254582003, "grad_norm": 0.12828420102596283, "learning_rate": 6.166355671107513e-06, "loss": 0.3153, "num_tokens": 5173429090.0, "step": 8168 }, { "epoch": 0.9659453706988294, "grad_norm": 0.1377156376838684, "learning_rate": 6.165211538131045e-06, "loss": 0.3436, "num_tokens": 5174062980.0, "step": 8169 }, { "epoch": 0.9660636159394584, "grad_norm": 0.1325216442346573, "learning_rate": 6.1640713411310395e-06, "loss": 0.3339, "num_tokens": 5174696454.0, "step": 8170 }, { "epoch": 0.9661818611800875, "grad_norm": 0.1310647577047348, "learning_rate": 6.16293508027474e-06, "loss": 0.3389, "num_tokens": 5175312503.0, "step": 8171 }, { "epoch": 0.9663001064207166, "grad_norm": 0.12105315178632736, "learning_rate": 6.161802755728807e-06, "loss": 0.3121, "num_tokens": 5175950722.0, "step": 8172 }, { "epoch": 0.9664183516613456, "grad_norm": 0.1250411868095398, "learning_rate": 6.1606743676593195e-06, "loss": 0.3176, "num_tokens": 5176588811.0, "step": 8173 }, { "epoch": 0.9665365969019747, "grad_norm": 0.12267660349607468, "learning_rate": 6.159549916231785e-06, "loss": 0.3143, "num_tokens": 5177208501.0, "step": 8174 }, { "epoch": 0.9666548421426038, "grad_norm": 0.12738636136054993, "learning_rate": 6.158429401611133e-06, "loss": 0.3011, "num_tokens": 5177839874.0, "step": 8175 }, { "epoch": 0.9667730873832329, "grad_norm": 0.12334895133972168, "learning_rate": 6.157312823961714e-06, "loss": 0.3156, "num_tokens": 5178473755.0, "step": 8176 }, { "epoch": 0.9668913326238618, "grad_norm": 0.12709619104862213, "learning_rate": 6.156200183447298e-06, "loss": 0.2892, "num_tokens": 5179108816.0, "step": 8177 }, { "epoch": 0.9670095778644909, "grad_norm": 0.12659960985183716, "learning_rate": 6.155091480231085e-06, "loss": 0.3149, "num_tokens": 5179746329.0, "step": 8178 }, { "epoch": 0.96712782310512, "grad_norm": 0.14053186774253845, "learning_rate": 6.153986714475693e-06, "loss": 0.3534, "num_tokens": 5180376790.0, "step": 8179 }, { "epoch": 0.9672460683457491, "grad_norm": 0.13160701096057892, "learning_rate": 6.152885886343159e-06, "loss": 0.3245, "num_tokens": 5181011381.0, "step": 8180 }, { "epoch": 0.9673643135863782, "grad_norm": 0.1250775307416916, "learning_rate": 6.15178899599495e-06, "loss": 0.2983, "num_tokens": 5181648782.0, "step": 8181 }, { "epoch": 0.9674825588270072, "grad_norm": 0.12127652019262314, "learning_rate": 6.150696043591949e-06, "loss": 0.2983, "num_tokens": 5182285574.0, "step": 8182 }, { "epoch": 0.9676008040676363, "grad_norm": 0.12341879308223724, "learning_rate": 6.149607029294467e-06, "loss": 0.2886, "num_tokens": 5182920291.0, "step": 8183 }, { "epoch": 0.9677190493082654, "grad_norm": 0.12556231021881104, "learning_rate": 6.1485219532622355e-06, "loss": 0.2924, "num_tokens": 5183551294.0, "step": 8184 }, { "epoch": 0.9678372945488944, "grad_norm": 0.13641570508480072, "learning_rate": 6.147440815654403e-06, "loss": 0.3407, "num_tokens": 5184186806.0, "step": 8185 }, { "epoch": 0.9679555397895234, "grad_norm": 0.13526597619056702, "learning_rate": 6.146363616629547e-06, "loss": 0.3083, "num_tokens": 5184820017.0, "step": 8186 }, { "epoch": 0.9680737850301525, "grad_norm": 0.12628044188022614, "learning_rate": 6.145290356345667e-06, "loss": 0.3026, "num_tokens": 5185453802.0, "step": 8187 }, { "epoch": 0.9681920302707816, "grad_norm": 0.145676389336586, "learning_rate": 6.14422103496018e-06, "loss": 0.3257, "num_tokens": 5186090407.0, "step": 8188 }, { "epoch": 0.9683102755114107, "grad_norm": 0.14072903990745544, "learning_rate": 6.143155652629928e-06, "loss": 0.3638, "num_tokens": 5186725797.0, "step": 8189 }, { "epoch": 0.9684285207520398, "grad_norm": 0.12744446098804474, "learning_rate": 6.142094209511178e-06, "loss": 0.3379, "num_tokens": 5187355529.0, "step": 8190 }, { "epoch": 0.9685467659926688, "grad_norm": 0.13668496906757355, "learning_rate": 6.141036705759611e-06, "loss": 0.3483, "num_tokens": 5187987331.0, "step": 8191 }, { "epoch": 0.9686650112332978, "grad_norm": 0.12407245486974716, "learning_rate": 6.139983141530344e-06, "loss": 0.3073, "num_tokens": 5188616154.0, "step": 8192 }, { "epoch": 0.9687832564739269, "grad_norm": 0.13782620429992676, "learning_rate": 6.138933516977901e-06, "loss": 0.3493, "num_tokens": 5189249632.0, "step": 8193 }, { "epoch": 0.968901501714556, "grad_norm": 0.13674263656139374, "learning_rate": 6.137887832256237e-06, "loss": 0.2875, "num_tokens": 5189888925.0, "step": 8194 }, { "epoch": 0.969019746955185, "grad_norm": 0.15233033895492554, "learning_rate": 6.1368460875187275e-06, "loss": 0.3287, "num_tokens": 5190528055.0, "step": 8195 }, { "epoch": 0.9691379921958141, "grad_norm": 0.15075848996639252, "learning_rate": 6.13580828291817e-06, "loss": 0.3248, "num_tokens": 5191165656.0, "step": 8196 }, { "epoch": 0.9692562374364432, "grad_norm": 0.13953663408756256, "learning_rate": 6.134774418606786e-06, "loss": 0.328, "num_tokens": 5191799718.0, "step": 8197 }, { "epoch": 0.9693744826770723, "grad_norm": 0.12433179467916489, "learning_rate": 6.133744494736209e-06, "loss": 0.3009, "num_tokens": 5192436975.0, "step": 8198 }, { "epoch": 0.9694927279177014, "grad_norm": 0.12677857279777527, "learning_rate": 6.132718511457513e-06, "loss": 0.3096, "num_tokens": 5193073883.0, "step": 8199 }, { "epoch": 0.9696109731583303, "grad_norm": 0.1402478814125061, "learning_rate": 6.131696468921177e-06, "loss": 0.2946, "num_tokens": 5193707576.0, "step": 8200 }, { "epoch": 0.9697292183989594, "grad_norm": 0.13447536528110504, "learning_rate": 6.130678367277112e-06, "loss": 0.3029, "num_tokens": 5194344589.0, "step": 8201 }, { "epoch": 0.9698474636395885, "grad_norm": 0.13504986464977264, "learning_rate": 6.129664206674642e-06, "loss": 0.3323, "num_tokens": 5194981226.0, "step": 8202 }, { "epoch": 0.9699657088802176, "grad_norm": 0.14518985152244568, "learning_rate": 6.128653987262525e-06, "loss": 0.3183, "num_tokens": 5195620903.0, "step": 8203 }, { "epoch": 0.9700839541208466, "grad_norm": 0.13518904149532318, "learning_rate": 6.12764770918893e-06, "loss": 0.339, "num_tokens": 5196254985.0, "step": 8204 }, { "epoch": 0.9702021993614757, "grad_norm": 0.14773309230804443, "learning_rate": 6.126645372601455e-06, "loss": 0.3521, "num_tokens": 5196893315.0, "step": 8205 }, { "epoch": 0.9703204446021048, "grad_norm": 0.14005114138126373, "learning_rate": 6.1256469776471144e-06, "loss": 0.353, "num_tokens": 5197527618.0, "step": 8206 }, { "epoch": 0.9704386898427338, "grad_norm": 0.12807810306549072, "learning_rate": 6.124652524472345e-06, "loss": 0.3191, "num_tokens": 5198156703.0, "step": 8207 }, { "epoch": 0.9705569350833629, "grad_norm": 0.11811191588640213, "learning_rate": 6.123662013223016e-06, "loss": 0.2646, "num_tokens": 5198788742.0, "step": 8208 }, { "epoch": 0.9706751803239919, "grad_norm": 0.13220082223415375, "learning_rate": 6.122675444044402e-06, "loss": 0.3213, "num_tokens": 5199422540.0, "step": 8209 }, { "epoch": 0.970793425564621, "grad_norm": 0.1441463828086853, "learning_rate": 6.12169281708121e-06, "loss": 0.3651, "num_tokens": 5200050176.0, "step": 8210 }, { "epoch": 0.9709116708052501, "grad_norm": 0.13506196439266205, "learning_rate": 6.12071413247757e-06, "loss": 0.2978, "num_tokens": 5200683333.0, "step": 8211 }, { "epoch": 0.9710299160458792, "grad_norm": 0.1274338662624359, "learning_rate": 6.119739390377024e-06, "loss": 0.2936, "num_tokens": 5201309874.0, "step": 8212 }, { "epoch": 0.9711481612865083, "grad_norm": 0.1294974684715271, "learning_rate": 6.118768590922545e-06, "loss": 0.3101, "num_tokens": 5201948487.0, "step": 8213 }, { "epoch": 0.9712664065271372, "grad_norm": 0.1326567828655243, "learning_rate": 6.117801734256525e-06, "loss": 0.3116, "num_tokens": 5202579811.0, "step": 8214 }, { "epoch": 0.9713846517677663, "grad_norm": 0.12890625, "learning_rate": 6.116838820520773e-06, "loss": 0.3305, "num_tokens": 5203211511.0, "step": 8215 }, { "epoch": 0.9715028970083954, "grad_norm": 0.1219540610909462, "learning_rate": 6.11587984985653e-06, "loss": 0.2897, "num_tokens": 5203849989.0, "step": 8216 }, { "epoch": 0.9716211422490245, "grad_norm": 0.14595083892345428, "learning_rate": 6.1149248224044465e-06, "loss": 0.3469, "num_tokens": 5204488933.0, "step": 8217 }, { "epoch": 0.9717393874896535, "grad_norm": 0.1366407722234726, "learning_rate": 6.113973738304605e-06, "loss": 0.3209, "num_tokens": 5205126188.0, "step": 8218 }, { "epoch": 0.9718576327302826, "grad_norm": 0.12387462705373764, "learning_rate": 6.113026597696502e-06, "loss": 0.3061, "num_tokens": 5205763639.0, "step": 8219 }, { "epoch": 0.9719758779709117, "grad_norm": 0.1294083446264267, "learning_rate": 6.112083400719062e-06, "loss": 0.3045, "num_tokens": 5206395190.0, "step": 8220 }, { "epoch": 0.9720941232115408, "grad_norm": 0.13315802812576294, "learning_rate": 6.111144147510625e-06, "loss": 0.3394, "num_tokens": 5207034508.0, "step": 8221 }, { "epoch": 0.9722123684521699, "grad_norm": 0.1434919834136963, "learning_rate": 6.110208838208957e-06, "loss": 0.3601, "num_tokens": 5207670303.0, "step": 8222 }, { "epoch": 0.9723306136927988, "grad_norm": 0.12872831523418427, "learning_rate": 6.109277472951245e-06, "loss": 0.3217, "num_tokens": 5208301503.0, "step": 8223 }, { "epoch": 0.9724488589334279, "grad_norm": 0.14000748097896576, "learning_rate": 6.10835005187409e-06, "loss": 0.3465, "num_tokens": 5208940501.0, "step": 8224 }, { "epoch": 0.972567104174057, "grad_norm": 0.12758778035640717, "learning_rate": 6.107426575113529e-06, "loss": 0.2918, "num_tokens": 5209573218.0, "step": 8225 }, { "epoch": 0.9726853494146861, "grad_norm": 0.12512005865573883, "learning_rate": 6.10650704280501e-06, "loss": 0.3119, "num_tokens": 5210204253.0, "step": 8226 }, { "epoch": 0.9728035946553151, "grad_norm": 0.12699688971042633, "learning_rate": 6.105591455083403e-06, "loss": 0.286, "num_tokens": 5210810652.0, "step": 8227 }, { "epoch": 0.9729218398959442, "grad_norm": 0.12660478055477142, "learning_rate": 6.104679812082999e-06, "loss": 0.3108, "num_tokens": 5211443356.0, "step": 8228 }, { "epoch": 0.9730400851365733, "grad_norm": 0.12291398644447327, "learning_rate": 6.103772113937519e-06, "loss": 0.2737, "num_tokens": 5212068060.0, "step": 8229 }, { "epoch": 0.9731583303772023, "grad_norm": 0.13405944406986237, "learning_rate": 6.102868360780095e-06, "loss": 0.3555, "num_tokens": 5212706846.0, "step": 8230 }, { "epoch": 0.9732765756178314, "grad_norm": 0.11853740364313126, "learning_rate": 6.101968552743289e-06, "loss": 0.2892, "num_tokens": 5213339176.0, "step": 8231 }, { "epoch": 0.9733948208584604, "grad_norm": 0.13049733638763428, "learning_rate": 6.101072689959071e-06, "loss": 0.3315, "num_tokens": 5213976412.0, "step": 8232 }, { "epoch": 0.9735130660990895, "grad_norm": 0.12468564510345459, "learning_rate": 6.10018077255885e-06, "loss": 0.2935, "num_tokens": 5214612923.0, "step": 8233 }, { "epoch": 0.9736313113397186, "grad_norm": 0.14113260805606842, "learning_rate": 6.099292800673442e-06, "loss": 0.3374, "num_tokens": 5215238246.0, "step": 8234 }, { "epoch": 0.9737495565803477, "grad_norm": 0.1377945840358734, "learning_rate": 6.0984087744330945e-06, "loss": 0.3492, "num_tokens": 5215877685.0, "step": 8235 }, { "epoch": 0.9738678018209767, "grad_norm": 0.1276702582836151, "learning_rate": 6.097528693967465e-06, "loss": 0.2853, "num_tokens": 5216514769.0, "step": 8236 }, { "epoch": 0.9739860470616057, "grad_norm": 0.12911364436149597, "learning_rate": 6.096652559405645e-06, "loss": 0.3462, "num_tokens": 5217143164.0, "step": 8237 }, { "epoch": 0.9741042923022348, "grad_norm": 0.12776261568069458, "learning_rate": 6.095780370876138e-06, "loss": 0.3199, "num_tokens": 5217774090.0, "step": 8238 }, { "epoch": 0.9742225375428639, "grad_norm": 0.12866678833961487, "learning_rate": 6.094912128506871e-06, "loss": 0.3141, "num_tokens": 5218410202.0, "step": 8239 }, { "epoch": 0.974340782783493, "grad_norm": 0.13215592503547668, "learning_rate": 6.094047832425194e-06, "loss": 0.3235, "num_tokens": 5219043294.0, "step": 8240 }, { "epoch": 0.974459028024122, "grad_norm": 0.12851832807064056, "learning_rate": 6.093187482757879e-06, "loss": 0.309, "num_tokens": 5219682263.0, "step": 8241 }, { "epoch": 0.9745772732647511, "grad_norm": 0.12405325472354889, "learning_rate": 6.092331079631114e-06, "loss": 0.3083, "num_tokens": 5220319580.0, "step": 8242 }, { "epoch": 0.9746955185053802, "grad_norm": 0.1368079036474228, "learning_rate": 6.091478623170513e-06, "loss": 0.3209, "num_tokens": 5220939911.0, "step": 8243 }, { "epoch": 0.9748137637460093, "grad_norm": 0.13476397097110748, "learning_rate": 6.090630113501111e-06, "loss": 0.3384, "num_tokens": 5221574757.0, "step": 8244 }, { "epoch": 0.9749320089866383, "grad_norm": 0.13084036111831665, "learning_rate": 6.089785550747358e-06, "loss": 0.302, "num_tokens": 5222210936.0, "step": 8245 }, { "epoch": 0.9750502542272673, "grad_norm": 0.13987120985984802, "learning_rate": 6.0889449350331346e-06, "loss": 0.347, "num_tokens": 5222847735.0, "step": 8246 }, { "epoch": 0.9751684994678964, "grad_norm": 0.11218731105327606, "learning_rate": 6.088108266481736e-06, "loss": 0.2665, "num_tokens": 5223479347.0, "step": 8247 }, { "epoch": 0.9752867447085255, "grad_norm": 0.12171830236911774, "learning_rate": 6.087275545215881e-06, "loss": 0.2946, "num_tokens": 5224113431.0, "step": 8248 }, { "epoch": 0.9754049899491546, "grad_norm": 0.14232802391052246, "learning_rate": 6.086446771357704e-06, "loss": 0.3251, "num_tokens": 5224750626.0, "step": 8249 }, { "epoch": 0.9755232351897836, "grad_norm": 0.12744568288326263, "learning_rate": 6.085621945028771e-06, "loss": 0.3209, "num_tokens": 5225386900.0, "step": 8250 }, { "epoch": 0.9756414804304127, "grad_norm": 0.12900236248970032, "learning_rate": 6.084801066350057e-06, "loss": 0.2874, "num_tokens": 5226020725.0, "step": 8251 }, { "epoch": 0.9757597256710417, "grad_norm": 0.14372433722019196, "learning_rate": 6.083984135441972e-06, "loss": 0.3273, "num_tokens": 5226657192.0, "step": 8252 }, { "epoch": 0.9758779709116708, "grad_norm": 0.1294369101524353, "learning_rate": 6.083171152424331e-06, "loss": 0.2927, "num_tokens": 5227291754.0, "step": 8253 }, { "epoch": 0.9759962161522999, "grad_norm": 0.12236184626817703, "learning_rate": 6.082362117416378e-06, "loss": 0.2875, "num_tokens": 5227922825.0, "step": 8254 }, { "epoch": 0.9761144613929289, "grad_norm": 0.1295822411775589, "learning_rate": 6.081557030536785e-06, "loss": 0.306, "num_tokens": 5228554474.0, "step": 8255 }, { "epoch": 0.976232706633558, "grad_norm": 0.12716969847679138, "learning_rate": 6.0807558919036305e-06, "loss": 0.287, "num_tokens": 5229187957.0, "step": 8256 }, { "epoch": 0.9763509518741871, "grad_norm": 0.13794802129268646, "learning_rate": 6.0799587016344245e-06, "loss": 0.3709, "num_tokens": 5229820165.0, "step": 8257 }, { "epoch": 0.9764691971148162, "grad_norm": 0.12548044323921204, "learning_rate": 6.079165459846094e-06, "loss": 0.3053, "num_tokens": 5230422407.0, "step": 8258 }, { "epoch": 0.9765874423554451, "grad_norm": 0.13591615855693817, "learning_rate": 6.078376166654984e-06, "loss": 0.3218, "num_tokens": 5231058459.0, "step": 8259 }, { "epoch": 0.9767056875960742, "grad_norm": 0.1226133331656456, "learning_rate": 6.077590822176872e-06, "loss": 0.3199, "num_tokens": 5231692884.0, "step": 8260 }, { "epoch": 0.9768239328367033, "grad_norm": 0.14057843387126923, "learning_rate": 6.076809426526938e-06, "loss": 0.3364, "num_tokens": 5232329995.0, "step": 8261 }, { "epoch": 0.9769421780773324, "grad_norm": 0.13288551568984985, "learning_rate": 6.076031979819796e-06, "loss": 0.33, "num_tokens": 5232965851.0, "step": 8262 }, { "epoch": 0.9770604233179615, "grad_norm": 0.12242656201124191, "learning_rate": 6.07525848216948e-06, "loss": 0.2983, "num_tokens": 5233599062.0, "step": 8263 }, { "epoch": 0.9771786685585905, "grad_norm": 0.1336710900068283, "learning_rate": 6.074488933689441e-06, "loss": 0.2975, "num_tokens": 5234234526.0, "step": 8264 }, { "epoch": 0.9772969137992196, "grad_norm": 0.13392069935798645, "learning_rate": 6.073723334492552e-06, "loss": 0.3664, "num_tokens": 5234867493.0, "step": 8265 }, { "epoch": 0.9774151590398487, "grad_norm": 0.14412294328212738, "learning_rate": 6.0729616846911065e-06, "loss": 0.3587, "num_tokens": 5235467716.0, "step": 8266 }, { "epoch": 0.9775334042804777, "grad_norm": 0.1341749131679535, "learning_rate": 6.07220398439682e-06, "loss": 0.3201, "num_tokens": 5236099253.0, "step": 8267 }, { "epoch": 0.9776516495211067, "grad_norm": 0.1299642026424408, "learning_rate": 6.071450233720823e-06, "loss": 0.3445, "num_tokens": 5236733869.0, "step": 8268 }, { "epoch": 0.9777698947617358, "grad_norm": 0.13662073016166687, "learning_rate": 6.0707004327736825e-06, "loss": 0.3395, "num_tokens": 5237369531.0, "step": 8269 }, { "epoch": 0.9778881400023649, "grad_norm": 0.14333143830299377, "learning_rate": 6.0699545816653645e-06, "loss": 0.3296, "num_tokens": 5238003531.0, "step": 8270 }, { "epoch": 0.978006385242994, "grad_norm": 0.128883495926857, "learning_rate": 6.069212680505268e-06, "loss": 0.302, "num_tokens": 5238636004.0, "step": 8271 }, { "epoch": 0.9781246304836231, "grad_norm": 0.12946166098117828, "learning_rate": 6.068474729402213e-06, "loss": 0.3286, "num_tokens": 5239273815.0, "step": 8272 }, { "epoch": 0.9782428757242521, "grad_norm": 0.13141897320747375, "learning_rate": 6.067740728464441e-06, "loss": 0.332, "num_tokens": 5239906905.0, "step": 8273 }, { "epoch": 0.9783611209648811, "grad_norm": 0.1239815279841423, "learning_rate": 6.067010677799605e-06, "loss": 0.3018, "num_tokens": 5240534892.0, "step": 8274 }, { "epoch": 0.9784793662055102, "grad_norm": 0.1377616822719574, "learning_rate": 6.066284577514788e-06, "loss": 0.3435, "num_tokens": 5241174477.0, "step": 8275 }, { "epoch": 0.9785976114461393, "grad_norm": 0.13642843067646027, "learning_rate": 6.0655624277164876e-06, "loss": 0.3254, "num_tokens": 5241812065.0, "step": 8276 }, { "epoch": 0.9787158566867683, "grad_norm": 0.1342545747756958, "learning_rate": 6.064844228510631e-06, "loss": 0.32, "num_tokens": 5242446586.0, "step": 8277 }, { "epoch": 0.9788341019273974, "grad_norm": 0.12853077054023743, "learning_rate": 6.064129980002553e-06, "loss": 0.2977, "num_tokens": 5243055537.0, "step": 8278 }, { "epoch": 0.9789523471680265, "grad_norm": 0.13522247970104218, "learning_rate": 6.06341968229702e-06, "loss": 0.3177, "num_tokens": 5243693214.0, "step": 8279 }, { "epoch": 0.9790705924086556, "grad_norm": 0.12929853796958923, "learning_rate": 6.062713335498211e-06, "loss": 0.3037, "num_tokens": 5244331299.0, "step": 8280 }, { "epoch": 0.9791888376492847, "grad_norm": 0.1242319718003273, "learning_rate": 6.062010939709732e-06, "loss": 0.3026, "num_tokens": 5244964539.0, "step": 8281 }, { "epoch": 0.9793070828899136, "grad_norm": 0.13000254333019257, "learning_rate": 6.061312495034605e-06, "loss": 0.3196, "num_tokens": 5245594742.0, "step": 8282 }, { "epoch": 0.9794253281305427, "grad_norm": 0.12702573835849762, "learning_rate": 6.060618001575274e-06, "loss": 0.3046, "num_tokens": 5246230921.0, "step": 8283 }, { "epoch": 0.9795435733711718, "grad_norm": 0.12647698819637299, "learning_rate": 6.059927459433604e-06, "loss": 0.3083, "num_tokens": 5246869042.0, "step": 8284 }, { "epoch": 0.9796618186118009, "grad_norm": 0.12717832624912262, "learning_rate": 6.05924086871088e-06, "loss": 0.3622, "num_tokens": 5247505144.0, "step": 8285 }, { "epoch": 0.97978006385243, "grad_norm": 0.1338759809732437, "learning_rate": 6.058558229507804e-06, "loss": 0.3298, "num_tokens": 5248139260.0, "step": 8286 }, { "epoch": 0.979898309093059, "grad_norm": 0.1294010579586029, "learning_rate": 6.057879541924505e-06, "loss": 0.3402, "num_tokens": 5248775746.0, "step": 8287 }, { "epoch": 0.9800165543336881, "grad_norm": 0.13267570734024048, "learning_rate": 6.05720480606053e-06, "loss": 0.2922, "num_tokens": 5249405323.0, "step": 8288 }, { "epoch": 0.9801347995743172, "grad_norm": 0.1277601271867752, "learning_rate": 6.056534022014843e-06, "loss": 0.3023, "num_tokens": 5250037017.0, "step": 8289 }, { "epoch": 0.9802530448149462, "grad_norm": 0.1261870414018631, "learning_rate": 6.055867189885832e-06, "loss": 0.3141, "num_tokens": 5250675484.0, "step": 8290 }, { "epoch": 0.9803712900555752, "grad_norm": 0.12810583412647247, "learning_rate": 6.055204309771306e-06, "loss": 0.3431, "num_tokens": 5251307632.0, "step": 8291 }, { "epoch": 0.9804895352962043, "grad_norm": 0.13402719795703888, "learning_rate": 6.054545381768489e-06, "loss": 0.3337, "num_tokens": 5251938961.0, "step": 8292 }, { "epoch": 0.9806077805368334, "grad_norm": 0.12217825651168823, "learning_rate": 6.053890405974029e-06, "loss": 0.3373, "num_tokens": 5252575531.0, "step": 8293 }, { "epoch": 0.9807260257774625, "grad_norm": 0.13465343415737152, "learning_rate": 6.0532393824839945e-06, "loss": 0.3453, "num_tokens": 5253215079.0, "step": 8294 }, { "epoch": 0.9808442710180916, "grad_norm": 0.14350569248199463, "learning_rate": 6.052592311393879e-06, "loss": 0.3102, "num_tokens": 5253851941.0, "step": 8295 }, { "epoch": 0.9809625162587206, "grad_norm": 0.1289779245853424, "learning_rate": 6.051949192798584e-06, "loss": 0.3022, "num_tokens": 5254490026.0, "step": 8296 }, { "epoch": 0.9810807614993496, "grad_norm": 0.13768883049488068, "learning_rate": 6.051310026792443e-06, "loss": 0.3367, "num_tokens": 5255127446.0, "step": 8297 }, { "epoch": 0.9811990067399787, "grad_norm": 0.13083931803703308, "learning_rate": 6.050674813469205e-06, "loss": 0.342, "num_tokens": 5255761679.0, "step": 8298 }, { "epoch": 0.9813172519806078, "grad_norm": 0.13485562801361084, "learning_rate": 6.050043552922037e-06, "loss": 0.345, "num_tokens": 5256396795.0, "step": 8299 }, { "epoch": 0.9814354972212368, "grad_norm": 0.11807753890752792, "learning_rate": 6.049416245243533e-06, "loss": 0.3137, "num_tokens": 5257034504.0, "step": 8300 }, { "epoch": 0.9815537424618659, "grad_norm": 0.12036912888288498, "learning_rate": 6.048792890525697e-06, "loss": 0.3005, "num_tokens": 5257664925.0, "step": 8301 }, { "epoch": 0.981671987702495, "grad_norm": 0.13659296929836273, "learning_rate": 6.048173488859963e-06, "loss": 0.3221, "num_tokens": 5258304437.0, "step": 8302 }, { "epoch": 0.9817902329431241, "grad_norm": 0.1448575109243393, "learning_rate": 6.047558040337185e-06, "loss": 0.352, "num_tokens": 5258938554.0, "step": 8303 }, { "epoch": 0.9819084781837532, "grad_norm": 0.13607923686504364, "learning_rate": 6.046946545047627e-06, "loss": 0.3165, "num_tokens": 5259570021.0, "step": 8304 }, { "epoch": 0.9820267234243821, "grad_norm": 0.12448085844516754, "learning_rate": 6.04633900308098e-06, "loss": 0.313, "num_tokens": 5260209440.0, "step": 8305 }, { "epoch": 0.9821449686650112, "grad_norm": 0.12490374594926834, "learning_rate": 6.04573541452636e-06, "loss": 0.2834, "num_tokens": 5260835881.0, "step": 8306 }, { "epoch": 0.9822632139056403, "grad_norm": 0.12780529260635376, "learning_rate": 6.045135779472295e-06, "loss": 0.2783, "num_tokens": 5261465543.0, "step": 8307 }, { "epoch": 0.9823814591462694, "grad_norm": 0.12387599050998688, "learning_rate": 6.044540098006733e-06, "loss": 0.3063, "num_tokens": 5262097664.0, "step": 8308 }, { "epoch": 0.9824997043868984, "grad_norm": 0.1282551884651184, "learning_rate": 6.043948370217051e-06, "loss": 0.3313, "num_tokens": 5262732431.0, "step": 8309 }, { "epoch": 0.9826179496275275, "grad_norm": 0.11978895217180252, "learning_rate": 6.043360596190037e-06, "loss": 0.3048, "num_tokens": 5263366114.0, "step": 8310 }, { "epoch": 0.9827361948681566, "grad_norm": 0.1409744918346405, "learning_rate": 6.042776776011903e-06, "loss": 0.3435, "num_tokens": 5264001935.0, "step": 8311 }, { "epoch": 0.9828544401087856, "grad_norm": 0.1332922726869583, "learning_rate": 6.042196909768283e-06, "loss": 0.3096, "num_tokens": 5264641451.0, "step": 8312 }, { "epoch": 0.9829726853494147, "grad_norm": 0.1358286291360855, "learning_rate": 6.041620997544223e-06, "loss": 0.3167, "num_tokens": 5265270265.0, "step": 8313 }, { "epoch": 0.9830909305900437, "grad_norm": 0.12362612783908844, "learning_rate": 6.0410490394241974e-06, "loss": 0.2806, "num_tokens": 5265906861.0, "step": 8314 }, { "epoch": 0.9832091758306728, "grad_norm": 0.12919586896896362, "learning_rate": 6.040481035492102e-06, "loss": 0.3276, "num_tokens": 5266546046.0, "step": 8315 }, { "epoch": 0.9833274210713019, "grad_norm": 0.12803572416305542, "learning_rate": 6.039916985831242e-06, "loss": 0.319, "num_tokens": 5267181737.0, "step": 8316 }, { "epoch": 0.983445666311931, "grad_norm": 0.12157775461673737, "learning_rate": 6.03935689052435e-06, "loss": 0.3221, "num_tokens": 5267817607.0, "step": 8317 }, { "epoch": 0.98356391155256, "grad_norm": 0.13102170825004578, "learning_rate": 6.0388007496535805e-06, "loss": 0.3305, "num_tokens": 5268452745.0, "step": 8318 }, { "epoch": 0.983682156793189, "grad_norm": 0.12993736565113068, "learning_rate": 6.038248563300504e-06, "loss": 0.3109, "num_tokens": 5269086651.0, "step": 8319 }, { "epoch": 0.9838004020338181, "grad_norm": 0.1409064680337906, "learning_rate": 6.037700331546111e-06, "loss": 0.3396, "num_tokens": 5269721375.0, "step": 8320 }, { "epoch": 0.9839186472744472, "grad_norm": 0.13445183634757996, "learning_rate": 6.037156054470817e-06, "loss": 0.3335, "num_tokens": 5270350205.0, "step": 8321 }, { "epoch": 0.9840368925150763, "grad_norm": 0.14085860550403595, "learning_rate": 6.036615732154446e-06, "loss": 0.3462, "num_tokens": 5270983794.0, "step": 8322 }, { "epoch": 0.9841551377557053, "grad_norm": 0.1372838318347931, "learning_rate": 6.036079364676257e-06, "loss": 0.3747, "num_tokens": 5271617387.0, "step": 8323 }, { "epoch": 0.9842733829963344, "grad_norm": 0.12568484246730804, "learning_rate": 6.035546952114919e-06, "loss": 0.3213, "num_tokens": 5272247496.0, "step": 8324 }, { "epoch": 0.9843916282369635, "grad_norm": 0.13607677817344666, "learning_rate": 6.035018494548519e-06, "loss": 0.3254, "num_tokens": 5272886843.0, "step": 8325 }, { "epoch": 0.9845098734775926, "grad_norm": 0.12915809452533722, "learning_rate": 6.034493992054577e-06, "loss": 0.2911, "num_tokens": 5273521895.0, "step": 8326 }, { "epoch": 0.9846281187182216, "grad_norm": 0.12494554370641708, "learning_rate": 6.0339734447100135e-06, "loss": 0.3045, "num_tokens": 5274159185.0, "step": 8327 }, { "epoch": 0.9847463639588506, "grad_norm": 0.12094021588563919, "learning_rate": 6.033456852591189e-06, "loss": 0.3149, "num_tokens": 5274798127.0, "step": 8328 }, { "epoch": 0.9848646091994797, "grad_norm": 0.14130283892154694, "learning_rate": 6.032944215773868e-06, "loss": 0.3442, "num_tokens": 5275433507.0, "step": 8329 }, { "epoch": 0.9849828544401088, "grad_norm": 0.12631502747535706, "learning_rate": 6.032435534333245e-06, "loss": 0.3171, "num_tokens": 5276069192.0, "step": 8330 }, { "epoch": 0.9851010996807379, "grad_norm": 0.1265324056148529, "learning_rate": 6.031930808343927e-06, "loss": 0.2834, "num_tokens": 5276677563.0, "step": 8331 }, { "epoch": 0.9852193449213669, "grad_norm": 0.14178752899169922, "learning_rate": 6.031430037879949e-06, "loss": 0.3338, "num_tokens": 5277309299.0, "step": 8332 }, { "epoch": 0.985337590161996, "grad_norm": 0.12706254422664642, "learning_rate": 6.030933223014756e-06, "loss": 0.3032, "num_tokens": 5277947332.0, "step": 8333 }, { "epoch": 0.985455835402625, "grad_norm": 0.12809669971466064, "learning_rate": 6.030440363821222e-06, "loss": 0.3278, "num_tokens": 5278580964.0, "step": 8334 }, { "epoch": 0.9855740806432541, "grad_norm": 0.12420350313186646, "learning_rate": 6.0299514603716336e-06, "loss": 0.3, "num_tokens": 5279216592.0, "step": 8335 }, { "epoch": 0.9856923258838832, "grad_norm": 0.13227316737174988, "learning_rate": 6.029466512737705e-06, "loss": 0.2797, "num_tokens": 5279846150.0, "step": 8336 }, { "epoch": 0.9858105711245122, "grad_norm": 0.13044704496860504, "learning_rate": 6.02898552099056e-06, "loss": 0.3341, "num_tokens": 5280483998.0, "step": 8337 }, { "epoch": 0.9859288163651413, "grad_norm": 0.12699879705905914, "learning_rate": 6.028508485200751e-06, "loss": 0.3206, "num_tokens": 5281113150.0, "step": 8338 }, { "epoch": 0.9860470616057704, "grad_norm": 0.1339361071586609, "learning_rate": 6.028035405438248e-06, "loss": 0.3055, "num_tokens": 5281746743.0, "step": 8339 }, { "epoch": 0.9861653068463995, "grad_norm": 0.1360834538936615, "learning_rate": 6.0275662817724395e-06, "loss": 0.3214, "num_tokens": 5282383764.0, "step": 8340 }, { "epoch": 0.9862835520870284, "grad_norm": 0.13688234984874725, "learning_rate": 6.027101114272132e-06, "loss": 0.3193, "num_tokens": 5283017608.0, "step": 8341 }, { "epoch": 0.9864017973276575, "grad_norm": 0.1348162740468979, "learning_rate": 6.02663990300555e-06, "loss": 0.316, "num_tokens": 5283655012.0, "step": 8342 }, { "epoch": 0.9865200425682866, "grad_norm": 0.13315971195697784, "learning_rate": 6.026182648040352e-06, "loss": 0.3135, "num_tokens": 5284287951.0, "step": 8343 }, { "epoch": 0.9866382878089157, "grad_norm": 0.1340332180261612, "learning_rate": 6.025729349443596e-06, "loss": 0.3078, "num_tokens": 5284884466.0, "step": 8344 }, { "epoch": 0.9867565330495448, "grad_norm": 0.13966993987560272, "learning_rate": 6.02528000728177e-06, "loss": 0.2667, "num_tokens": 5285514478.0, "step": 8345 }, { "epoch": 0.9868747782901738, "grad_norm": 0.13473132252693176, "learning_rate": 6.024834621620788e-06, "loss": 0.3063, "num_tokens": 5286144508.0, "step": 8346 }, { "epoch": 0.9869930235308029, "grad_norm": 0.11717087775468826, "learning_rate": 6.02439319252597e-06, "loss": 0.2762, "num_tokens": 5286778410.0, "step": 8347 }, { "epoch": 0.987111268771432, "grad_norm": 0.13211621344089508, "learning_rate": 6.023955720062067e-06, "loss": 0.3594, "num_tokens": 5287417665.0, "step": 8348 }, { "epoch": 0.987229514012061, "grad_norm": 0.12897589802742004, "learning_rate": 6.023522204293238e-06, "loss": 0.2947, "num_tokens": 5288045464.0, "step": 8349 }, { "epoch": 0.98734775925269, "grad_norm": 0.1325943022966385, "learning_rate": 6.023092645283077e-06, "loss": 0.3264, "num_tokens": 5288677057.0, "step": 8350 }, { "epoch": 0.9874660044933191, "grad_norm": 0.1240144670009613, "learning_rate": 6.022667043094586e-06, "loss": 0.3052, "num_tokens": 5289315374.0, "step": 8351 }, { "epoch": 0.9875842497339482, "grad_norm": 0.1315339058637619, "learning_rate": 6.0222453977901885e-06, "loss": 0.3192, "num_tokens": 5289950396.0, "step": 8352 }, { "epoch": 0.9877024949745773, "grad_norm": 0.1442670077085495, "learning_rate": 6.021827709431732e-06, "loss": 0.3093, "num_tokens": 5290580729.0, "step": 8353 }, { "epoch": 0.9878207402152064, "grad_norm": 0.13173291087150574, "learning_rate": 6.021413978080476e-06, "loss": 0.3242, "num_tokens": 5291215964.0, "step": 8354 }, { "epoch": 0.9879389854558354, "grad_norm": 0.13818521797657013, "learning_rate": 6.021004203797108e-06, "loss": 0.3074, "num_tokens": 5291846573.0, "step": 8355 }, { "epoch": 0.9880572306964645, "grad_norm": 0.14068926870822906, "learning_rate": 6.0205983866417285e-06, "loss": 0.3342, "num_tokens": 5292483754.0, "step": 8356 }, { "epoch": 0.9881754759370935, "grad_norm": 0.12342841923236847, "learning_rate": 6.020196526673865e-06, "loss": 0.2771, "num_tokens": 5293117303.0, "step": 8357 }, { "epoch": 0.9882937211777226, "grad_norm": 0.12965553998947144, "learning_rate": 6.019798623952456e-06, "loss": 0.335, "num_tokens": 5293746954.0, "step": 8358 }, { "epoch": 0.9884119664183516, "grad_norm": 0.12391670793294907, "learning_rate": 6.019404678535868e-06, "loss": 0.3207, "num_tokens": 5294381360.0, "step": 8359 }, { "epoch": 0.9885302116589807, "grad_norm": 0.12507295608520508, "learning_rate": 6.019014690481877e-06, "loss": 0.3157, "num_tokens": 5295016205.0, "step": 8360 }, { "epoch": 0.9886484568996098, "grad_norm": 0.12219098210334778, "learning_rate": 6.018628659847689e-06, "loss": 0.2935, "num_tokens": 5295644812.0, "step": 8361 }, { "epoch": 0.9887667021402389, "grad_norm": 0.14612101018428802, "learning_rate": 6.0182465866899215e-06, "loss": 0.35, "num_tokens": 5296280564.0, "step": 8362 }, { "epoch": 0.988884947380868, "grad_norm": 0.12390107661485672, "learning_rate": 6.017868471064616e-06, "loss": 0.2808, "num_tokens": 5296916339.0, "step": 8363 }, { "epoch": 0.9890031926214969, "grad_norm": 0.13337615132331848, "learning_rate": 6.017494313027234e-06, "loss": 0.3238, "num_tokens": 5297550917.0, "step": 8364 }, { "epoch": 0.989121437862126, "grad_norm": 0.12817448377609253, "learning_rate": 6.017124112632656e-06, "loss": 0.2924, "num_tokens": 5298184860.0, "step": 8365 }, { "epoch": 0.9892396831027551, "grad_norm": 0.13612917065620422, "learning_rate": 6.016757869935174e-06, "loss": 0.3767, "num_tokens": 5298819193.0, "step": 8366 }, { "epoch": 0.9893579283433842, "grad_norm": 0.1295391172170639, "learning_rate": 6.0163955849885135e-06, "loss": 0.3082, "num_tokens": 5299448178.0, "step": 8367 }, { "epoch": 0.9894761735840133, "grad_norm": 0.13923567533493042, "learning_rate": 6.01603725784581e-06, "loss": 0.3303, "num_tokens": 5300082637.0, "step": 8368 }, { "epoch": 0.9895944188246423, "grad_norm": 0.13374292850494385, "learning_rate": 6.01568288855962e-06, "loss": 0.3171, "num_tokens": 5300719841.0, "step": 8369 }, { "epoch": 0.9897126640652714, "grad_norm": 0.1305582970380783, "learning_rate": 6.015332477181921e-06, "loss": 0.2847, "num_tokens": 5301355657.0, "step": 8370 }, { "epoch": 0.9898309093059005, "grad_norm": 0.14369647204875946, "learning_rate": 6.01498602376411e-06, "loss": 0.3327, "num_tokens": 5301990969.0, "step": 8371 }, { "epoch": 0.9899491545465295, "grad_norm": 0.1326875239610672, "learning_rate": 6.014643528357001e-06, "loss": 0.3366, "num_tokens": 5302630373.0, "step": 8372 }, { "epoch": 0.9900673997871585, "grad_norm": 0.1271059513092041, "learning_rate": 6.014304991010834e-06, "loss": 0.3195, "num_tokens": 5303258268.0, "step": 8373 }, { "epoch": 0.9901856450277876, "grad_norm": 0.1405857503414154, "learning_rate": 6.013970411775258e-06, "loss": 0.3918, "num_tokens": 5303890300.0, "step": 8374 }, { "epoch": 0.9903038902684167, "grad_norm": 0.1322239637374878, "learning_rate": 6.01363979069935e-06, "loss": 0.3179, "num_tokens": 5304529434.0, "step": 8375 }, { "epoch": 0.9904221355090458, "grad_norm": 0.13154441118240356, "learning_rate": 6.0133131278316055e-06, "loss": 0.3179, "num_tokens": 5305163540.0, "step": 8376 }, { "epoch": 0.9905403807496749, "grad_norm": 0.1329275667667389, "learning_rate": 6.012990423219932e-06, "loss": 0.3179, "num_tokens": 5305765790.0, "step": 8377 }, { "epoch": 0.9906586259903039, "grad_norm": 0.12294628471136093, "learning_rate": 6.012671676911668e-06, "loss": 0.2989, "num_tokens": 5306399895.0, "step": 8378 }, { "epoch": 0.9907768712309329, "grad_norm": 0.13117071986198425, "learning_rate": 6.012356888953562e-06, "loss": 0.324, "num_tokens": 5307037554.0, "step": 8379 }, { "epoch": 0.990895116471562, "grad_norm": 0.13337624073028564, "learning_rate": 6.0120460593917855e-06, "loss": 0.3296, "num_tokens": 5307675468.0, "step": 8380 }, { "epoch": 0.9910133617121911, "grad_norm": 0.12937510013580322, "learning_rate": 6.011739188271931e-06, "loss": 0.3106, "num_tokens": 5308309383.0, "step": 8381 }, { "epoch": 0.9911316069528201, "grad_norm": 0.14128737151622772, "learning_rate": 6.011436275639006e-06, "loss": 0.3243, "num_tokens": 5308945079.0, "step": 8382 }, { "epoch": 0.9912498521934492, "grad_norm": 0.13013893365859985, "learning_rate": 6.011137321537444e-06, "loss": 0.3265, "num_tokens": 5309579615.0, "step": 8383 }, { "epoch": 0.9913680974340783, "grad_norm": 0.12617677450180054, "learning_rate": 6.010842326011092e-06, "loss": 0.2913, "num_tokens": 5310213338.0, "step": 8384 }, { "epoch": 0.9914863426747074, "grad_norm": 0.12442494183778763, "learning_rate": 6.0105512891032155e-06, "loss": 0.3111, "num_tokens": 5310846441.0, "step": 8385 }, { "epoch": 0.9916045879153365, "grad_norm": 0.1316176801919937, "learning_rate": 6.010264210856504e-06, "loss": 0.3268, "num_tokens": 5311481044.0, "step": 8386 }, { "epoch": 0.9917228331559654, "grad_norm": 0.1374335139989853, "learning_rate": 6.009981091313066e-06, "loss": 0.3126, "num_tokens": 5312114143.0, "step": 8387 }, { "epoch": 0.9918410783965945, "grad_norm": 0.14715424180030823, "learning_rate": 6.009701930514426e-06, "loss": 0.3335, "num_tokens": 5312727642.0, "step": 8388 }, { "epoch": 0.9919593236372236, "grad_norm": 0.13556255400180817, "learning_rate": 6.009426728501533e-06, "loss": 0.3418, "num_tokens": 5313357560.0, "step": 8389 }, { "epoch": 0.9920775688778527, "grad_norm": 0.12820345163345337, "learning_rate": 6.0091554853147475e-06, "loss": 0.3074, "num_tokens": 5313993038.0, "step": 8390 }, { "epoch": 0.9921958141184817, "grad_norm": 0.12908929586410522, "learning_rate": 6.008888200993857e-06, "loss": 0.3255, "num_tokens": 5314624688.0, "step": 8391 }, { "epoch": 0.9923140593591108, "grad_norm": 0.1197088211774826, "learning_rate": 6.008624875578065e-06, "loss": 0.3334, "num_tokens": 5315259001.0, "step": 8392 }, { "epoch": 0.9924323045997399, "grad_norm": 0.12787586450576782, "learning_rate": 6.0083655091059935e-06, "loss": 0.3297, "num_tokens": 5315895902.0, "step": 8393 }, { "epoch": 0.992550549840369, "grad_norm": 0.1258198767900467, "learning_rate": 6.008110101615686e-06, "loss": 0.3146, "num_tokens": 5316533764.0, "step": 8394 }, { "epoch": 0.992668795080998, "grad_norm": 0.14479690790176392, "learning_rate": 6.007858653144602e-06, "loss": 0.3546, "num_tokens": 5317165032.0, "step": 8395 }, { "epoch": 0.992787040321627, "grad_norm": 0.128653421998024, "learning_rate": 6.0076111637296255e-06, "loss": 0.3445, "num_tokens": 5317797366.0, "step": 8396 }, { "epoch": 0.9929052855622561, "grad_norm": 0.12900324165821075, "learning_rate": 6.007367633407056e-06, "loss": 0.3216, "num_tokens": 5318435179.0, "step": 8397 }, { "epoch": 0.9930235308028852, "grad_norm": 0.1502055525779724, "learning_rate": 6.007128062212611e-06, "loss": 0.3417, "num_tokens": 5319061326.0, "step": 8398 }, { "epoch": 0.9931417760435143, "grad_norm": 0.12129844725131989, "learning_rate": 6.006892450181436e-06, "loss": 0.2999, "num_tokens": 5319698268.0, "step": 8399 }, { "epoch": 0.9932600212841434, "grad_norm": 0.1248626634478569, "learning_rate": 6.0066607973480785e-06, "loss": 0.2807, "num_tokens": 5320331384.0, "step": 8400 }, { "epoch": 0.9933782665247723, "grad_norm": 0.12210075557231903, "learning_rate": 6.006433103746526e-06, "loss": 0.3217, "num_tokens": 5320968079.0, "step": 8401 }, { "epoch": 0.9934965117654014, "grad_norm": 0.13090315461158752, "learning_rate": 6.006209369410169e-06, "loss": 0.3311, "num_tokens": 5321604844.0, "step": 8402 }, { "epoch": 0.9936147570060305, "grad_norm": 0.13128890097141266, "learning_rate": 6.005989594371825e-06, "loss": 0.3056, "num_tokens": 5322235070.0, "step": 8403 }, { "epoch": 0.9937330022466596, "grad_norm": 0.13665130734443665, "learning_rate": 6.0057737786637356e-06, "loss": 0.3192, "num_tokens": 5322866971.0, "step": 8404 }, { "epoch": 0.9938512474872886, "grad_norm": 0.12663716077804565, "learning_rate": 6.005561922317546e-06, "loss": 0.2891, "num_tokens": 5323499297.0, "step": 8405 }, { "epoch": 0.9939694927279177, "grad_norm": 0.14594483375549316, "learning_rate": 6.005354025364334e-06, "loss": 0.3221, "num_tokens": 5324132340.0, "step": 8406 }, { "epoch": 0.9940877379685468, "grad_norm": 0.13400320708751678, "learning_rate": 6.005150087834595e-06, "loss": 0.3389, "num_tokens": 5324759567.0, "step": 8407 }, { "epoch": 0.9942059832091759, "grad_norm": 0.12572234869003296, "learning_rate": 6.004950109758237e-06, "loss": 0.2922, "num_tokens": 5325395174.0, "step": 8408 }, { "epoch": 0.994324228449805, "grad_norm": 0.14227521419525146, "learning_rate": 6.004754091164597e-06, "loss": 0.3224, "num_tokens": 5326028778.0, "step": 8409 }, { "epoch": 0.9944424736904339, "grad_norm": 0.1276744157075882, "learning_rate": 6.00456203208242e-06, "loss": 0.3341, "num_tokens": 5326668102.0, "step": 8410 }, { "epoch": 0.994560718931063, "grad_norm": 0.1259220540523529, "learning_rate": 6.0043739325398805e-06, "loss": 0.3077, "num_tokens": 5327307085.0, "step": 8411 }, { "epoch": 0.9946789641716921, "grad_norm": 0.1295536756515503, "learning_rate": 6.004189792564567e-06, "loss": 0.3232, "num_tokens": 5327931959.0, "step": 8412 }, { "epoch": 0.9947972094123212, "grad_norm": 0.13815905153751373, "learning_rate": 6.004009612183487e-06, "loss": 0.3304, "num_tokens": 5328557478.0, "step": 8413 }, { "epoch": 0.9949154546529502, "grad_norm": 0.1335347443819046, "learning_rate": 6.003833391423071e-06, "loss": 0.292, "num_tokens": 5329190755.0, "step": 8414 }, { "epoch": 0.9950336998935793, "grad_norm": 0.13637635111808777, "learning_rate": 6.003661130309162e-06, "loss": 0.3547, "num_tokens": 5329830405.0, "step": 8415 }, { "epoch": 0.9951519451342083, "grad_norm": 0.1401950567960739, "learning_rate": 6.00349282886703e-06, "loss": 0.3745, "num_tokens": 5330467536.0, "step": 8416 }, { "epoch": 0.9952701903748374, "grad_norm": 0.13383512198925018, "learning_rate": 6.003328487121354e-06, "loss": 0.3024, "num_tokens": 5331103527.0, "step": 8417 }, { "epoch": 0.9953884356154665, "grad_norm": 0.12540915608406067, "learning_rate": 6.003168105096249e-06, "loss": 0.3067, "num_tokens": 5331742578.0, "step": 8418 }, { "epoch": 0.9955066808560955, "grad_norm": 0.13343775272369385, "learning_rate": 6.003011682815231e-06, "loss": 0.3098, "num_tokens": 5332374691.0, "step": 8419 }, { "epoch": 0.9956249260967246, "grad_norm": 0.14648503065109253, "learning_rate": 6.002859220301247e-06, "loss": 0.3459, "num_tokens": 5333001818.0, "step": 8420 }, { "epoch": 0.9957431713373537, "grad_norm": 0.12428183108568192, "learning_rate": 6.0027107175766574e-06, "loss": 0.3528, "num_tokens": 5333637098.0, "step": 8421 }, { "epoch": 0.9958614165779828, "grad_norm": 0.13005532324314117, "learning_rate": 6.002566174663244e-06, "loss": 0.3226, "num_tokens": 5334272515.0, "step": 8422 }, { "epoch": 0.9959796618186118, "grad_norm": 0.138321191072464, "learning_rate": 6.002425591582209e-06, "loss": 0.335, "num_tokens": 5334904364.0, "step": 8423 }, { "epoch": 0.9960979070592408, "grad_norm": 0.1258166879415512, "learning_rate": 6.002288968354172e-06, "loss": 0.2939, "num_tokens": 5335542747.0, "step": 8424 }, { "epoch": 0.9962161522998699, "grad_norm": 0.12636058032512665, "learning_rate": 6.002156304999169e-06, "loss": 0.3113, "num_tokens": 5336175140.0, "step": 8425 }, { "epoch": 0.996334397540499, "grad_norm": 0.1311343014240265, "learning_rate": 6.002027601536661e-06, "loss": 0.2879, "num_tokens": 5336786410.0, "step": 8426 }, { "epoch": 0.9964526427811281, "grad_norm": 0.13422849774360657, "learning_rate": 6.001902857985528e-06, "loss": 0.2949, "num_tokens": 5337421140.0, "step": 8427 }, { "epoch": 0.9965708880217571, "grad_norm": 0.1292581707239151, "learning_rate": 6.001782074364062e-06, "loss": 0.3216, "num_tokens": 5338051489.0, "step": 8428 }, { "epoch": 0.9966891332623862, "grad_norm": 0.1230071485042572, "learning_rate": 6.001665250689982e-06, "loss": 0.3366, "num_tokens": 5338689938.0, "step": 8429 }, { "epoch": 0.9968073785030153, "grad_norm": 0.1381818950176239, "learning_rate": 6.001552386980418e-06, "loss": 0.3369, "num_tokens": 5339321000.0, "step": 8430 }, { "epoch": 0.9969256237436444, "grad_norm": 0.13387945294380188, "learning_rate": 6.001443483251932e-06, "loss": 0.3036, "num_tokens": 5339952107.0, "step": 8431 }, { "epoch": 0.9970438689842733, "grad_norm": 0.13549645245075226, "learning_rate": 6.0013385395204905e-06, "loss": 0.3444, "num_tokens": 5340585035.0, "step": 8432 }, { "epoch": 0.9971621142249024, "grad_norm": 0.12512056529521942, "learning_rate": 6.001237555801492e-06, "loss": 0.3263, "num_tokens": 5341221358.0, "step": 8433 }, { "epoch": 0.9972803594655315, "grad_norm": 0.14423313736915588, "learning_rate": 6.0011405321097425e-06, "loss": 0.339, "num_tokens": 5341858440.0, "step": 8434 }, { "epoch": 0.9973986047061606, "grad_norm": 0.12664687633514404, "learning_rate": 6.00104746845948e-06, "loss": 0.3083, "num_tokens": 5342491370.0, "step": 8435 }, { "epoch": 0.9975168499467897, "grad_norm": 0.13205693662166595, "learning_rate": 6.000958364864347e-06, "loss": 0.3486, "num_tokens": 5343112387.0, "step": 8436 }, { "epoch": 0.9976350951874187, "grad_norm": 0.13354982435703278, "learning_rate": 6.000873221337415e-06, "loss": 0.3029, "num_tokens": 5343739283.0, "step": 8437 }, { "epoch": 0.9977533404280478, "grad_norm": 0.1334676444530487, "learning_rate": 6.000792037891175e-06, "loss": 0.3496, "num_tokens": 5344372482.0, "step": 8438 }, { "epoch": 0.9978715856686768, "grad_norm": 0.14983855187892914, "learning_rate": 6.000714814537532e-06, "loss": 0.3406, "num_tokens": 5345004359.0, "step": 8439 }, { "epoch": 0.9979898309093059, "grad_norm": 0.1286334991455078, "learning_rate": 6.000641551287814e-06, "loss": 0.3262, "num_tokens": 5345644007.0, "step": 8440 }, { "epoch": 0.998108076149935, "grad_norm": 0.13067254424095154, "learning_rate": 6.000572248152765e-06, "loss": 0.3106, "num_tokens": 5346268576.0, "step": 8441 }, { "epoch": 0.998226321390564, "grad_norm": 0.12917651236057281, "learning_rate": 6.000506905142551e-06, "loss": 0.3316, "num_tokens": 5346907902.0, "step": 8442 }, { "epoch": 0.9983445666311931, "grad_norm": 0.1407715082168579, "learning_rate": 6.0004455222667586e-06, "loss": 0.3444, "num_tokens": 5347537062.0, "step": 8443 }, { "epoch": 0.9984628118718222, "grad_norm": 0.13665875792503357, "learning_rate": 6.000388099534387e-06, "loss": 0.3259, "num_tokens": 5348171941.0, "step": 8444 }, { "epoch": 0.9985810571124513, "grad_norm": 0.13985520601272583, "learning_rate": 6.000334636953863e-06, "loss": 0.3613, "num_tokens": 5348804384.0, "step": 8445 }, { "epoch": 0.9986993023530802, "grad_norm": 0.1279362440109253, "learning_rate": 6.000285134533023e-06, "loss": 0.3028, "num_tokens": 5349438728.0, "step": 8446 }, { "epoch": 0.9988175475937093, "grad_norm": 0.13649433851242065, "learning_rate": 6.000239592279132e-06, "loss": 0.3605, "num_tokens": 5350073382.0, "step": 8447 }, { "epoch": 0.9989357928343384, "grad_norm": 0.13089171051979065, "learning_rate": 6.000198010198869e-06, "loss": 0.3273, "num_tokens": 5350710879.0, "step": 8448 }, { "epoch": 0.9990540380749675, "grad_norm": 0.13444791734218597, "learning_rate": 6.000160388298331e-06, "loss": 0.3255, "num_tokens": 5351346071.0, "step": 8449 }, { "epoch": 0.9991722833155966, "grad_norm": 0.1348676234483719, "learning_rate": 6.000126726583039e-06, "loss": 0.3123, "num_tokens": 5351982722.0, "step": 8450 }, { "epoch": 0.9992905285562256, "grad_norm": 0.12566529214382172, "learning_rate": 6.0000970250579295e-06, "loss": 0.306, "num_tokens": 5352617247.0, "step": 8451 }, { "epoch": 0.9994087737968547, "grad_norm": 0.12419701367616653, "learning_rate": 6.000071283727355e-06, "loss": 0.3318, "num_tokens": 5353249431.0, "step": 8452 }, { "epoch": 0.9995270190374838, "grad_norm": 0.1352619081735611, "learning_rate": 6.000049502595098e-06, "loss": 0.3137, "num_tokens": 5353878239.0, "step": 8453 }, { "epoch": 0.9996452642781128, "grad_norm": 0.12178152054548264, "learning_rate": 6.0000316816643485e-06, "loss": 0.2944, "num_tokens": 5354514448.0, "step": 8454 }, { "epoch": 0.9997635095187418, "grad_norm": 0.1312275528907776, "learning_rate": 6.000017820937722e-06, "loss": 0.3158, "num_tokens": 5355147792.0, "step": 8455 }, { "epoch": 0.9998817547593709, "grad_norm": 0.11818688362836838, "learning_rate": 6.000007920417248e-06, "loss": 0.2886, "num_tokens": 5355780950.0, "step": 8456 }, { "epoch": 1.0, "grad_norm": 0.12681478261947632, "learning_rate": 6.000001980104385e-06, "loss": 0.3304, "num_tokens": 5356180414.0, "step": 8457 } ], "logging_steps": 1.0, "max_steps": 8457, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.529633531101446e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }