{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.33297859761144616, "eval_steps": 500, "global_step": 2816, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00011824524062906468, "grad_norm": 1.7525548934936523, "learning_rate": 0.0, "loss": 0.9062, "num_tokens": 628048.0, "step": 1 }, { "epoch": 0.00023649048125812936, "grad_norm": 1.6681532859802246, "learning_rate": 7.058823529411765e-07, "loss": 0.8713, "num_tokens": 1266689.0, "step": 2 }, { "epoch": 0.000354735721887194, "grad_norm": 1.6785393953323364, "learning_rate": 1.411764705882353e-06, "loss": 0.8748, "num_tokens": 1900338.0, "step": 3 }, { "epoch": 0.0004729809625162587, "grad_norm": 1.6376346349716187, "learning_rate": 2.1176470588235296e-06, "loss": 0.8704, "num_tokens": 2539095.0, "step": 4 }, { "epoch": 0.0005912262031453234, "grad_norm": 1.6173648834228516, "learning_rate": 2.823529411764706e-06, "loss": 0.8767, "num_tokens": 3172740.0, "step": 5 }, { "epoch": 0.000709471443774388, "grad_norm": 1.3518872261047363, "learning_rate": 3.5294117647058825e-06, "loss": 0.8425, "num_tokens": 3810090.0, "step": 6 }, { "epoch": 0.0008277166844034528, "grad_norm": 1.2358812093734741, "learning_rate": 4.235294117647059e-06, "loss": 0.9167, "num_tokens": 4447913.0, "step": 7 }, { "epoch": 0.0009459619250325174, "grad_norm": 1.0764226913452148, "learning_rate": 4.941176470588235e-06, "loss": 0.8203, "num_tokens": 5085253.0, "step": 8 }, { "epoch": 0.001064207165661582, "grad_norm": 1.1310309171676636, "learning_rate": 5.647058823529412e-06, "loss": 0.8602, "num_tokens": 5721930.0, "step": 9 }, { "epoch": 0.0011824524062906468, "grad_norm": 1.1147594451904297, "learning_rate": 6.352941176470589e-06, "loss": 0.8557, "num_tokens": 6354428.0, "step": 10 }, { "epoch": 0.0013006976469197116, "grad_norm": 0.9504016637802124, "learning_rate": 7.058823529411765e-06, "loss": 0.8223, "num_tokens": 6988898.0, "step": 11 }, { "epoch": 0.001418942887548776, "grad_norm": 1.2047721147537231, "learning_rate": 7.764705882352943e-06, "loss": 0.8035, "num_tokens": 7627058.0, "step": 12 }, { "epoch": 0.0015371881281778408, "grad_norm": 1.0793852806091309, "learning_rate": 8.470588235294118e-06, "loss": 0.7834, "num_tokens": 8261730.0, "step": 13 }, { "epoch": 0.0016554333688069056, "grad_norm": 0.9650391340255737, "learning_rate": 9.176470588235295e-06, "loss": 0.7852, "num_tokens": 8889249.0, "step": 14 }, { "epoch": 0.0017736786094359701, "grad_norm": 0.6715760231018066, "learning_rate": 9.88235294117647e-06, "loss": 0.7063, "num_tokens": 9524710.0, "step": 15 }, { "epoch": 0.0018919238500650349, "grad_norm": 0.8713334202766418, "learning_rate": 1.0588235294117648e-05, "loss": 0.7234, "num_tokens": 10161723.0, "step": 16 }, { "epoch": 0.0020101690906940994, "grad_norm": 0.8030041456222534, "learning_rate": 1.1294117647058823e-05, "loss": 0.717, "num_tokens": 10793622.0, "step": 17 }, { "epoch": 0.002128414331323164, "grad_norm": 0.7192631363868713, "learning_rate": 1.2e-05, "loss": 0.7109, "num_tokens": 11420435.0, "step": 18 }, { "epoch": 0.002246659571952229, "grad_norm": 0.6215784549713135, "learning_rate": 1.2705882352941177e-05, "loss": 0.7001, "num_tokens": 12055174.0, "step": 19 }, { "epoch": 0.0023649048125812936, "grad_norm": 0.5151238441467285, "learning_rate": 1.3411764705882354e-05, "loss": 0.6322, "num_tokens": 12687424.0, "step": 20 }, { "epoch": 0.0024831500532103584, "grad_norm": 0.6060124635696411, "learning_rate": 1.411764705882353e-05, "loss": 0.683, "num_tokens": 13318953.0, "step": 21 }, { "epoch": 0.002601395293839423, "grad_norm": 0.5640995502471924, "learning_rate": 1.4823529411764707e-05, "loss": 0.6415, "num_tokens": 13937259.0, "step": 22 }, { "epoch": 0.0027196405344684874, "grad_norm": 0.5616605877876282, "learning_rate": 1.5529411764705886e-05, "loss": 0.6736, "num_tokens": 14569053.0, "step": 23 }, { "epoch": 0.002837885775097552, "grad_norm": 0.5228525400161743, "learning_rate": 1.623529411764706e-05, "loss": 0.6779, "num_tokens": 15199901.0, "step": 24 }, { "epoch": 0.002956131015726617, "grad_norm": 0.5150514841079712, "learning_rate": 1.6941176470588237e-05, "loss": 0.6548, "num_tokens": 15830276.0, "step": 25 }, { "epoch": 0.0030743762563556817, "grad_norm": 0.45277050137519836, "learning_rate": 1.7647058823529414e-05, "loss": 0.6527, "num_tokens": 16465223.0, "step": 26 }, { "epoch": 0.0031926214969847464, "grad_norm": 0.4658268094062805, "learning_rate": 1.835294117647059e-05, "loss": 0.6367, "num_tokens": 17102309.0, "step": 27 }, { "epoch": 0.003310866737613811, "grad_norm": 0.48463839292526245, "learning_rate": 1.9058823529411764e-05, "loss": 0.6367, "num_tokens": 17738787.0, "step": 28 }, { "epoch": 0.003429111978242876, "grad_norm": 0.4104596674442291, "learning_rate": 1.976470588235294e-05, "loss": 0.5729, "num_tokens": 18370503.0, "step": 29 }, { "epoch": 0.0035473572188719402, "grad_norm": 0.4189141094684601, "learning_rate": 2.047058823529412e-05, "loss": 0.6082, "num_tokens": 19008700.0, "step": 30 }, { "epoch": 0.003665602459501005, "grad_norm": 0.4681914448738098, "learning_rate": 2.1176470588235296e-05, "loss": 0.6145, "num_tokens": 19641076.0, "step": 31 }, { "epoch": 0.0037838477001300697, "grad_norm": 0.36613571643829346, "learning_rate": 2.188235294117647e-05, "loss": 0.5343, "num_tokens": 20268470.0, "step": 32 }, { "epoch": 0.0039020929407591345, "grad_norm": 0.37985900044441223, "learning_rate": 2.2588235294117646e-05, "loss": 0.6184, "num_tokens": 20903791.0, "step": 33 }, { "epoch": 0.004020338181388199, "grad_norm": 0.4060671031475067, "learning_rate": 2.3294117647058824e-05, "loss": 0.5974, "num_tokens": 21537889.0, "step": 34 }, { "epoch": 0.0041385834220172635, "grad_norm": 0.36260876059532166, "learning_rate": 2.4e-05, "loss": 0.5276, "num_tokens": 22169163.0, "step": 35 }, { "epoch": 0.004256828662646328, "grad_norm": 0.37660202383995056, "learning_rate": 2.4705882352941174e-05, "loss": 0.5824, "num_tokens": 22802993.0, "step": 36 }, { "epoch": 0.004375073903275393, "grad_norm": 0.3913825750350952, "learning_rate": 2.5411764705882355e-05, "loss": 0.5597, "num_tokens": 23436910.0, "step": 37 }, { "epoch": 0.004493319143904458, "grad_norm": 0.4073478579521179, "learning_rate": 2.6117647058823532e-05, "loss": 0.5951, "num_tokens": 24066237.0, "step": 38 }, { "epoch": 0.0046115643845335225, "grad_norm": 0.36139920353889465, "learning_rate": 2.682352941176471e-05, "loss": 0.5452, "num_tokens": 24702070.0, "step": 39 }, { "epoch": 0.004729809625162587, "grad_norm": 0.3636697232723236, "learning_rate": 2.7529411764705883e-05, "loss": 0.5321, "num_tokens": 25335554.0, "step": 40 }, { "epoch": 0.004848054865791652, "grad_norm": 0.3299705684185028, "learning_rate": 2.823529411764706e-05, "loss": 0.4893, "num_tokens": 25967347.0, "step": 41 }, { "epoch": 0.004966300106420717, "grad_norm": 0.36068227887153625, "learning_rate": 2.8941176470588237e-05, "loss": 0.5031, "num_tokens": 26599914.0, "step": 42 }, { "epoch": 0.0050845453470497815, "grad_norm": 0.429426908493042, "learning_rate": 2.9647058823529414e-05, "loss": 0.5608, "num_tokens": 27231412.0, "step": 43 }, { "epoch": 0.005202790587678846, "grad_norm": 0.40092170238494873, "learning_rate": 3.0352941176470588e-05, "loss": 0.5662, "num_tokens": 27868949.0, "step": 44 }, { "epoch": 0.005321035828307911, "grad_norm": 0.4001767635345459, "learning_rate": 3.105882352941177e-05, "loss": 0.5389, "num_tokens": 28505673.0, "step": 45 }, { "epoch": 0.005439281068936975, "grad_norm": 0.3943917155265808, "learning_rate": 3.1764705882352945e-05, "loss": 0.5483, "num_tokens": 29143956.0, "step": 46 }, { "epoch": 0.00555752630956604, "grad_norm": 0.3981671929359436, "learning_rate": 3.247058823529412e-05, "loss": 0.5405, "num_tokens": 29780377.0, "step": 47 }, { "epoch": 0.005675771550195104, "grad_norm": 0.4339058995246887, "learning_rate": 3.31764705882353e-05, "loss": 0.5444, "num_tokens": 30419988.0, "step": 48 }, { "epoch": 0.005794016790824169, "grad_norm": 0.46332406997680664, "learning_rate": 3.388235294117647e-05, "loss": 0.5356, "num_tokens": 31058641.0, "step": 49 }, { "epoch": 0.005912262031453234, "grad_norm": 0.3915022015571594, "learning_rate": 3.458823529411765e-05, "loss": 0.537, "num_tokens": 31696113.0, "step": 50 }, { "epoch": 0.006030507272082299, "grad_norm": 0.4514997601509094, "learning_rate": 3.529411764705883e-05, "loss": 0.5701, "num_tokens": 32335026.0, "step": 51 }, { "epoch": 0.006148752512711363, "grad_norm": 0.5017653107643127, "learning_rate": 3.6e-05, "loss": 0.4915, "num_tokens": 32968147.0, "step": 52 }, { "epoch": 0.006266997753340428, "grad_norm": 0.4031103253364563, "learning_rate": 3.670588235294118e-05, "loss": 0.5266, "num_tokens": 33599687.0, "step": 53 }, { "epoch": 0.006385242993969493, "grad_norm": 0.5270867943763733, "learning_rate": 3.7411764705882355e-05, "loss": 0.4989, "num_tokens": 34235952.0, "step": 54 }, { "epoch": 0.006503488234598558, "grad_norm": 0.3970652222633362, "learning_rate": 3.811764705882353e-05, "loss": 0.5222, "num_tokens": 34855614.0, "step": 55 }, { "epoch": 0.006621733475227622, "grad_norm": 0.5039134621620178, "learning_rate": 3.882352941176471e-05, "loss": 0.5615, "num_tokens": 35490773.0, "step": 56 }, { "epoch": 0.006739978715856687, "grad_norm": 0.42443519830703735, "learning_rate": 3.952941176470588e-05, "loss": 0.5441, "num_tokens": 36118336.0, "step": 57 }, { "epoch": 0.006858223956485752, "grad_norm": 0.4183923900127411, "learning_rate": 4.023529411764706e-05, "loss": 0.5106, "num_tokens": 36751911.0, "step": 58 }, { "epoch": 0.0069764691971148166, "grad_norm": 0.40575334429740906, "learning_rate": 4.094117647058824e-05, "loss": 0.504, "num_tokens": 37375413.0, "step": 59 }, { "epoch": 0.0070947144377438804, "grad_norm": 0.4154472351074219, "learning_rate": 4.164705882352941e-05, "loss": 0.4789, "num_tokens": 38006888.0, "step": 60 }, { "epoch": 0.007212959678372945, "grad_norm": 0.4931453764438629, "learning_rate": 4.235294117647059e-05, "loss": 0.4956, "num_tokens": 38640497.0, "step": 61 }, { "epoch": 0.00733120491900201, "grad_norm": 0.4350408911705017, "learning_rate": 4.3058823529411765e-05, "loss": 0.482, "num_tokens": 39273313.0, "step": 62 }, { "epoch": 0.007449450159631075, "grad_norm": 0.5501086115837097, "learning_rate": 4.376470588235294e-05, "loss": 0.6147, "num_tokens": 39910620.0, "step": 63 }, { "epoch": 0.007567695400260139, "grad_norm": 0.643365740776062, "learning_rate": 4.447058823529412e-05, "loss": 0.4939, "num_tokens": 40546868.0, "step": 64 }, { "epoch": 0.007685940640889204, "grad_norm": 0.4394073188304901, "learning_rate": 4.517647058823529e-05, "loss": 0.4614, "num_tokens": 41182518.0, "step": 65 }, { "epoch": 0.007804185881518269, "grad_norm": 0.5629273056983948, "learning_rate": 4.5882352941176467e-05, "loss": 0.5081, "num_tokens": 41817083.0, "step": 66 }, { "epoch": 0.007922431122147333, "grad_norm": 0.6570345163345337, "learning_rate": 4.658823529411765e-05, "loss": 0.5063, "num_tokens": 42454047.0, "step": 67 }, { "epoch": 0.008040676362776398, "grad_norm": 0.44039052724838257, "learning_rate": 4.729411764705882e-05, "loss": 0.4853, "num_tokens": 43066339.0, "step": 68 }, { "epoch": 0.008158921603405462, "grad_norm": 0.6671131253242493, "learning_rate": 4.8e-05, "loss": 0.532, "num_tokens": 43703254.0, "step": 69 }, { "epoch": 0.008277166844034527, "grad_norm": 0.4714736044406891, "learning_rate": 4.8705882352941175e-05, "loss": 0.5248, "num_tokens": 44339105.0, "step": 70 }, { "epoch": 0.008395412084663592, "grad_norm": 0.43229085206985474, "learning_rate": 4.941176470588235e-05, "loss": 0.4623, "num_tokens": 44969251.0, "step": 71 }, { "epoch": 0.008513657325292657, "grad_norm": 0.5161452889442444, "learning_rate": 5.0117647058823536e-05, "loss": 0.5103, "num_tokens": 45605957.0, "step": 72 }, { "epoch": 0.008631902565921721, "grad_norm": 0.39117467403411865, "learning_rate": 5.082352941176471e-05, "loss": 0.4618, "num_tokens": 46242216.0, "step": 73 }, { "epoch": 0.008750147806550786, "grad_norm": 0.5116973519325256, "learning_rate": 5.152941176470588e-05, "loss": 0.5111, "num_tokens": 46879736.0, "step": 74 }, { "epoch": 0.00886839304717985, "grad_norm": 0.42920881509780884, "learning_rate": 5.2235294117647064e-05, "loss": 0.4705, "num_tokens": 47508876.0, "step": 75 }, { "epoch": 0.008986638287808916, "grad_norm": 0.4628167748451233, "learning_rate": 5.294117647058824e-05, "loss": 0.4731, "num_tokens": 48148110.0, "step": 76 }, { "epoch": 0.00910488352843798, "grad_norm": 0.4489562511444092, "learning_rate": 5.364705882352942e-05, "loss": 0.4551, "num_tokens": 48779111.0, "step": 77 }, { "epoch": 0.009223128769067045, "grad_norm": 0.4363901615142822, "learning_rate": 5.435294117647059e-05, "loss": 0.4606, "num_tokens": 49414520.0, "step": 78 }, { "epoch": 0.00934137400969611, "grad_norm": 0.5536901354789734, "learning_rate": 5.5058823529411765e-05, "loss": 0.4838, "num_tokens": 50047364.0, "step": 79 }, { "epoch": 0.009459619250325174, "grad_norm": 0.39150169491767883, "learning_rate": 5.5764705882352946e-05, "loss": 0.4495, "num_tokens": 50679909.0, "step": 80 }, { "epoch": 0.00957786449095424, "grad_norm": 0.6313601136207581, "learning_rate": 5.647058823529412e-05, "loss": 0.4621, "num_tokens": 51313449.0, "step": 81 }, { "epoch": 0.009696109731583304, "grad_norm": 0.4643058776855469, "learning_rate": 5.717647058823529e-05, "loss": 0.4763, "num_tokens": 51949695.0, "step": 82 }, { "epoch": 0.009814354972212369, "grad_norm": 0.5276260375976562, "learning_rate": 5.7882352941176474e-05, "loss": 0.488, "num_tokens": 52582348.0, "step": 83 }, { "epoch": 0.009932600212841433, "grad_norm": 0.47664791345596313, "learning_rate": 5.858823529411765e-05, "loss": 0.4893, "num_tokens": 53211571.0, "step": 84 }, { "epoch": 0.010050845453470498, "grad_norm": 0.4026750922203064, "learning_rate": 5.929411764705883e-05, "loss": 0.4828, "num_tokens": 53846179.0, "step": 85 }, { "epoch": 0.010169090694099563, "grad_norm": 0.4486851096153259, "learning_rate": 6e-05, "loss": 0.4724, "num_tokens": 54478244.0, "step": 86 }, { "epoch": 0.010287335934728628, "grad_norm": 0.4775157570838928, "learning_rate": 5.99999821355329e-05, "loss": 0.4917, "num_tokens": 55113336.0, "step": 87 }, { "epoch": 0.010405581175357692, "grad_norm": 0.38246527314186096, "learning_rate": 5.9999928542155226e-05, "loss": 0.4554, "num_tokens": 55750901.0, "step": 88 }, { "epoch": 0.010523826415986757, "grad_norm": 0.516433596611023, "learning_rate": 5.999983921993793e-05, "loss": 0.5338, "num_tokens": 56387320.0, "step": 89 }, { "epoch": 0.010642071656615822, "grad_norm": 0.39624902606010437, "learning_rate": 5.999971416899916e-05, "loss": 0.4585, "num_tokens": 57023503.0, "step": 90 }, { "epoch": 0.010760316897244887, "grad_norm": 0.48605141043663025, "learning_rate": 5.999955338950444e-05, "loss": 0.5225, "num_tokens": 57651196.0, "step": 91 }, { "epoch": 0.01087856213787395, "grad_norm": 0.48312604427337646, "learning_rate": 5.9999356881666505e-05, "loss": 0.478, "num_tokens": 58290170.0, "step": 92 }, { "epoch": 0.010996807378503014, "grad_norm": 0.39719170331954956, "learning_rate": 5.9999124645745405e-05, "loss": 0.444, "num_tokens": 58929585.0, "step": 93 }, { "epoch": 0.01111505261913208, "grad_norm": 0.39644670486450195, "learning_rate": 5.999885668204845e-05, "loss": 0.4479, "num_tokens": 59569034.0, "step": 94 }, { "epoch": 0.011233297859761144, "grad_norm": 0.3955964148044586, "learning_rate": 5.999855299093024e-05, "loss": 0.4797, "num_tokens": 60197509.0, "step": 95 }, { "epoch": 0.011351543100390209, "grad_norm": 0.3650868535041809, "learning_rate": 5.9998213572792634e-05, "loss": 0.4547, "num_tokens": 60830564.0, "step": 96 }, { "epoch": 0.011469788341019273, "grad_norm": 0.47804784774780273, "learning_rate": 5.9997838428084794e-05, "loss": 0.4838, "num_tokens": 61466570.0, "step": 97 }, { "epoch": 0.011588033581648338, "grad_norm": 0.3899470269680023, "learning_rate": 5.999742755730315e-05, "loss": 0.4589, "num_tokens": 62100797.0, "step": 98 }, { "epoch": 0.011706278822277403, "grad_norm": 0.36873653531074524, "learning_rate": 5.99969809609914e-05, "loss": 0.4666, "num_tokens": 62734274.0, "step": 99 }, { "epoch": 0.011824524062906468, "grad_norm": 0.4157959222793579, "learning_rate": 5.9996498639740514e-05, "loss": 0.4896, "num_tokens": 63350637.0, "step": 100 }, { "epoch": 0.011942769303535532, "grad_norm": 0.3493950366973877, "learning_rate": 5.999598059418875e-05, "loss": 0.4667, "num_tokens": 63989248.0, "step": 101 }, { "epoch": 0.012061014544164597, "grad_norm": 0.38584789633750916, "learning_rate": 5.999542682502165e-05, "loss": 0.4768, "num_tokens": 64623402.0, "step": 102 }, { "epoch": 0.012179259784793662, "grad_norm": 0.36720627546310425, "learning_rate": 5.999483733297199e-05, "loss": 0.4301, "num_tokens": 65263084.0, "step": 103 }, { "epoch": 0.012297505025422727, "grad_norm": 0.3988032341003418, "learning_rate": 5.999421211881985e-05, "loss": 0.4946, "num_tokens": 65895544.0, "step": 104 }, { "epoch": 0.012415750266051791, "grad_norm": 0.4363256096839905, "learning_rate": 5.9993551183392574e-05, "loss": 0.4513, "num_tokens": 66526292.0, "step": 105 }, { "epoch": 0.012533995506680856, "grad_norm": 0.38990506529808044, "learning_rate": 5.9992854527564775e-05, "loss": 0.4651, "num_tokens": 67163008.0, "step": 106 }, { "epoch": 0.012652240747309921, "grad_norm": 0.37964877486228943, "learning_rate": 5.999212215225833e-05, "loss": 0.4597, "num_tokens": 67799173.0, "step": 107 }, { "epoch": 0.012770485987938986, "grad_norm": 0.368556410074234, "learning_rate": 5.999135405844239e-05, "loss": 0.4414, "num_tokens": 68433626.0, "step": 108 }, { "epoch": 0.01288873122856805, "grad_norm": 0.4006548225879669, "learning_rate": 5.9990550247133366e-05, "loss": 0.4724, "num_tokens": 69071597.0, "step": 109 }, { "epoch": 0.013006976469197115, "grad_norm": 0.4608422815799713, "learning_rate": 5.9989710719394935e-05, "loss": 0.4951, "num_tokens": 69708272.0, "step": 110 }, { "epoch": 0.01312522170982618, "grad_norm": 0.3924156427383423, "learning_rate": 5.9988835476338044e-05, "loss": 0.4799, "num_tokens": 70309426.0, "step": 111 }, { "epoch": 0.013243466950455245, "grad_norm": 0.5403560996055603, "learning_rate": 5.998792451912089e-05, "loss": 0.5011, "num_tokens": 70946347.0, "step": 112 }, { "epoch": 0.01336171219108431, "grad_norm": 0.3970711827278137, "learning_rate": 5.998697784894894e-05, "loss": 0.4399, "num_tokens": 71578069.0, "step": 113 }, { "epoch": 0.013479957431713374, "grad_norm": 0.37565523386001587, "learning_rate": 5.9985995467074924e-05, "loss": 0.4449, "num_tokens": 72216733.0, "step": 114 }, { "epoch": 0.013598202672342439, "grad_norm": 0.4285765290260315, "learning_rate": 5.998497737479881e-05, "loss": 0.4691, "num_tokens": 72844928.0, "step": 115 }, { "epoch": 0.013716447912971504, "grad_norm": 0.4407060444355011, "learning_rate": 5.998392357346784e-05, "loss": 0.4696, "num_tokens": 73482837.0, "step": 116 }, { "epoch": 0.013834693153600568, "grad_norm": 0.3529186546802521, "learning_rate": 5.998283406447651e-05, "loss": 0.4928, "num_tokens": 74118196.0, "step": 117 }, { "epoch": 0.013952938394229633, "grad_norm": 0.3553130328655243, "learning_rate": 5.998170884926654e-05, "loss": 0.436, "num_tokens": 74756174.0, "step": 118 }, { "epoch": 0.014071183634858696, "grad_norm": 0.34798574447631836, "learning_rate": 5.998054792932695e-05, "loss": 0.4969, "num_tokens": 75393367.0, "step": 119 }, { "epoch": 0.014189428875487761, "grad_norm": 0.3467991054058075, "learning_rate": 5.997935130619394e-05, "loss": 0.4438, "num_tokens": 76025875.0, "step": 120 }, { "epoch": 0.014307674116116826, "grad_norm": 0.3396257758140564, "learning_rate": 5.997811898145103e-05, "loss": 0.4239, "num_tokens": 76657710.0, "step": 121 }, { "epoch": 0.01442591935674589, "grad_norm": 0.3784518837928772, "learning_rate": 5.997685095672892e-05, "loss": 0.4644, "num_tokens": 77289318.0, "step": 122 }, { "epoch": 0.014544164597374955, "grad_norm": 0.4056454002857208, "learning_rate": 5.997554723370561e-05, "loss": 0.4493, "num_tokens": 77923482.0, "step": 123 }, { "epoch": 0.01466240983800402, "grad_norm": 0.39920276403427124, "learning_rate": 5.997420781410627e-05, "loss": 0.4695, "num_tokens": 78551959.0, "step": 124 }, { "epoch": 0.014780655078633085, "grad_norm": 0.3787338137626648, "learning_rate": 5.997283269970338e-05, "loss": 0.4583, "num_tokens": 79181459.0, "step": 125 }, { "epoch": 0.01489890031926215, "grad_norm": 0.37710845470428467, "learning_rate": 5.99714218923166e-05, "loss": 0.4281, "num_tokens": 79816398.0, "step": 126 }, { "epoch": 0.015017145559891214, "grad_norm": 0.3817121386528015, "learning_rate": 5.996997539381286e-05, "loss": 0.462, "num_tokens": 80451198.0, "step": 127 }, { "epoch": 0.015135390800520279, "grad_norm": 0.360310822725296, "learning_rate": 5.996849320610628e-05, "loss": 0.4697, "num_tokens": 81087693.0, "step": 128 }, { "epoch": 0.015253636041149344, "grad_norm": 0.3428126573562622, "learning_rate": 5.996697533115825e-05, "loss": 0.4215, "num_tokens": 81722590.0, "step": 129 }, { "epoch": 0.015371881281778408, "grad_norm": 0.4063381850719452, "learning_rate": 5.9965421770977355e-05, "loss": 0.5232, "num_tokens": 82358511.0, "step": 130 }, { "epoch": 0.015490126522407473, "grad_norm": 0.3583608567714691, "learning_rate": 5.996383252761942e-05, "loss": 0.4426, "num_tokens": 82997292.0, "step": 131 }, { "epoch": 0.015608371763036538, "grad_norm": 0.3704170882701874, "learning_rate": 5.996220760318747e-05, "loss": 0.4344, "num_tokens": 83636002.0, "step": 132 }, { "epoch": 0.0157266170036656, "grad_norm": 0.33110740780830383, "learning_rate": 5.9960546999831764e-05, "loss": 0.4562, "num_tokens": 84266256.0, "step": 133 }, { "epoch": 0.015844862244294666, "grad_norm": 0.374468594789505, "learning_rate": 5.9958850719749756e-05, "loss": 0.4743, "num_tokens": 84901903.0, "step": 134 }, { "epoch": 0.01596310748492373, "grad_norm": 0.33168625831604004, "learning_rate": 5.995711876518614e-05, "loss": 0.4431, "num_tokens": 85532297.0, "step": 135 }, { "epoch": 0.016081352725552795, "grad_norm": 0.3238625228404999, "learning_rate": 5.9955351138432795e-05, "loss": 0.397, "num_tokens": 86159004.0, "step": 136 }, { "epoch": 0.01619959796618186, "grad_norm": 0.3387395441532135, "learning_rate": 5.995354784182881e-05, "loss": 0.4474, "num_tokens": 86791668.0, "step": 137 }, { "epoch": 0.016317843206810925, "grad_norm": 0.36932268738746643, "learning_rate": 5.995170887776047e-05, "loss": 0.4581, "num_tokens": 87415500.0, "step": 138 }, { "epoch": 0.01643608844743999, "grad_norm": 0.347771555185318, "learning_rate": 5.994983424866128e-05, "loss": 0.4258, "num_tokens": 88054250.0, "step": 139 }, { "epoch": 0.016554333688069054, "grad_norm": 0.3125162720680237, "learning_rate": 5.994792395701191e-05, "loss": 0.4077, "num_tokens": 88683640.0, "step": 140 }, { "epoch": 0.01667257892869812, "grad_norm": 0.36889344453811646, "learning_rate": 5.994597800534024e-05, "loss": 0.4097, "num_tokens": 89322994.0, "step": 141 }, { "epoch": 0.016790824169327184, "grad_norm": 0.3739296495914459, "learning_rate": 5.9943996396221355e-05, "loss": 0.4557, "num_tokens": 89959870.0, "step": 142 }, { "epoch": 0.01690906940995625, "grad_norm": 0.4370843172073364, "learning_rate": 5.9941979132277475e-05, "loss": 0.4581, "num_tokens": 90595155.0, "step": 143 }, { "epoch": 0.017027314650585313, "grad_norm": 0.3677273690700531, "learning_rate": 5.993992621617805e-05, "loss": 0.4606, "num_tokens": 91229434.0, "step": 144 }, { "epoch": 0.017145559891214378, "grad_norm": 0.39628374576568604, "learning_rate": 5.993783765063969e-05, "loss": 0.4316, "num_tokens": 91864729.0, "step": 145 }, { "epoch": 0.017263805131843443, "grad_norm": 0.326407790184021, "learning_rate": 5.993571343842618e-05, "loss": 0.4114, "num_tokens": 92497696.0, "step": 146 }, { "epoch": 0.017382050372472507, "grad_norm": 0.4197523891925812, "learning_rate": 5.993355358234848e-05, "loss": 0.4856, "num_tokens": 93136961.0, "step": 147 }, { "epoch": 0.017500295613101572, "grad_norm": 0.36535730957984924, "learning_rate": 5.9931358085264714e-05, "loss": 0.4677, "num_tokens": 93763389.0, "step": 148 }, { "epoch": 0.017618540853730637, "grad_norm": 0.3775778114795685, "learning_rate": 5.992912695008017e-05, "loss": 0.4677, "num_tokens": 94396681.0, "step": 149 }, { "epoch": 0.0177367860943597, "grad_norm": 0.3371557295322418, "learning_rate": 5.992686017974728e-05, "loss": 0.4695, "num_tokens": 95024975.0, "step": 150 }, { "epoch": 0.017855031334988766, "grad_norm": 0.3853801190853119, "learning_rate": 5.992455777726568e-05, "loss": 0.4755, "num_tokens": 95647926.0, "step": 151 }, { "epoch": 0.01797327657561783, "grad_norm": 0.35996532440185547, "learning_rate": 5.9922219745682086e-05, "loss": 0.4435, "num_tokens": 96284873.0, "step": 152 }, { "epoch": 0.018091521816246896, "grad_norm": 0.3817720413208008, "learning_rate": 5.991984608809043e-05, "loss": 0.4561, "num_tokens": 96921347.0, "step": 153 }, { "epoch": 0.01820976705687596, "grad_norm": 0.35337361693382263, "learning_rate": 5.991743680763175e-05, "loss": 0.4072, "num_tokens": 97555490.0, "step": 154 }, { "epoch": 0.018328012297505025, "grad_norm": 0.32107430696487427, "learning_rate": 5.9914991907494234e-05, "loss": 0.422, "num_tokens": 98193055.0, "step": 155 }, { "epoch": 0.01844625753813409, "grad_norm": 0.40560588240623474, "learning_rate": 5.991251139091319e-05, "loss": 0.402, "num_tokens": 98826897.0, "step": 156 }, { "epoch": 0.018564502778763155, "grad_norm": 0.36023592948913574, "learning_rate": 5.990999526117108e-05, "loss": 0.4751, "num_tokens": 99462013.0, "step": 157 }, { "epoch": 0.01868274801939222, "grad_norm": 0.3359028100967407, "learning_rate": 5.9907443521597494e-05, "loss": 0.4851, "num_tokens": 100097628.0, "step": 158 }, { "epoch": 0.018800993260021284, "grad_norm": 0.34057164192199707, "learning_rate": 5.990485617556912e-05, "loss": 0.4222, "num_tokens": 100734556.0, "step": 159 }, { "epoch": 0.01891923850065035, "grad_norm": 0.3408139646053314, "learning_rate": 5.990223322650977e-05, "loss": 0.4679, "num_tokens": 101371427.0, "step": 160 }, { "epoch": 0.019037483741279414, "grad_norm": 0.3269839584827423, "learning_rate": 5.98995746778904e-05, "loss": 0.4716, "num_tokens": 102008014.0, "step": 161 }, { "epoch": 0.01915572898190848, "grad_norm": 0.3538208305835724, "learning_rate": 5.989688053322904e-05, "loss": 0.4351, "num_tokens": 102647195.0, "step": 162 }, { "epoch": 0.019273974222537543, "grad_norm": 0.30472224950790405, "learning_rate": 5.989415079609082e-05, "loss": 0.4485, "num_tokens": 103272117.0, "step": 163 }, { "epoch": 0.019392219463166608, "grad_norm": 0.30045244097709656, "learning_rate": 5.989138547008799e-05, "loss": 0.4532, "num_tokens": 103911617.0, "step": 164 }, { "epoch": 0.019510464703795673, "grad_norm": 0.33233657479286194, "learning_rate": 5.988858455887992e-05, "loss": 0.465, "num_tokens": 104546025.0, "step": 165 }, { "epoch": 0.019628709944424737, "grad_norm": 0.3113420307636261, "learning_rate": 5.9885748066172985e-05, "loss": 0.4252, "num_tokens": 105181492.0, "step": 166 }, { "epoch": 0.019746955185053802, "grad_norm": 0.31303688883781433, "learning_rate": 5.988287599572075e-05, "loss": 0.4236, "num_tokens": 105818707.0, "step": 167 }, { "epoch": 0.019865200425682867, "grad_norm": 0.27217867970466614, "learning_rate": 5.987996835132378e-05, "loss": 0.3983, "num_tokens": 106447672.0, "step": 168 }, { "epoch": 0.01998344566631193, "grad_norm": 0.3025364875793457, "learning_rate": 5.9877025136829754e-05, "loss": 0.4365, "num_tokens": 107079280.0, "step": 169 }, { "epoch": 0.020101690906940996, "grad_norm": 0.3207264840602875, "learning_rate": 5.987404635613339e-05, "loss": 0.4392, "num_tokens": 107716692.0, "step": 170 }, { "epoch": 0.02021993614757006, "grad_norm": 0.3323531746864319, "learning_rate": 5.9871032013176516e-05, "loss": 0.4369, "num_tokens": 108346190.0, "step": 171 }, { "epoch": 0.020338181388199126, "grad_norm": 0.35194694995880127, "learning_rate": 5.986798211194798e-05, "loss": 0.4255, "num_tokens": 108972655.0, "step": 172 }, { "epoch": 0.02045642662882819, "grad_norm": 0.375436395406723, "learning_rate": 5.98648966564837e-05, "loss": 0.4504, "num_tokens": 109606533.0, "step": 173 }, { "epoch": 0.020574671869457255, "grad_norm": 0.29956355690956116, "learning_rate": 5.986177565086665e-05, "loss": 0.4354, "num_tokens": 110242667.0, "step": 174 }, { "epoch": 0.02069291711008632, "grad_norm": 0.43081754446029663, "learning_rate": 5.985861909922682e-05, "loss": 0.4901, "num_tokens": 110879342.0, "step": 175 }, { "epoch": 0.020811162350715385, "grad_norm": 0.3560563921928406, "learning_rate": 5.985542700574125e-05, "loss": 0.4409, "num_tokens": 111515578.0, "step": 176 }, { "epoch": 0.02092940759134445, "grad_norm": 0.33471059799194336, "learning_rate": 5.985219937463405e-05, "loss": 0.4004, "num_tokens": 112148074.0, "step": 177 }, { "epoch": 0.021047652831973514, "grad_norm": 0.36091309785842896, "learning_rate": 5.98489362101763e-05, "loss": 0.4887, "num_tokens": 112784825.0, "step": 178 }, { "epoch": 0.02116589807260258, "grad_norm": 0.3456615209579468, "learning_rate": 5.984563751668612e-05, "loss": 0.4636, "num_tokens": 113415511.0, "step": 179 }, { "epoch": 0.021284143313231644, "grad_norm": 0.3597208559513092, "learning_rate": 5.984230329852868e-05, "loss": 0.4634, "num_tokens": 114014233.0, "step": 180 }, { "epoch": 0.02140238855386071, "grad_norm": 0.3545994162559509, "learning_rate": 5.98389335601161e-05, "loss": 0.4449, "num_tokens": 114646091.0, "step": 181 }, { "epoch": 0.021520633794489773, "grad_norm": 0.35723739862442017, "learning_rate": 5.9835528305907545e-05, "loss": 0.4196, "num_tokens": 115248849.0, "step": 182 }, { "epoch": 0.021638879035118838, "grad_norm": 0.3535923659801483, "learning_rate": 5.983208754040917e-05, "loss": 0.4149, "num_tokens": 115882858.0, "step": 183 }, { "epoch": 0.0217571242757479, "grad_norm": 0.3566373884677887, "learning_rate": 5.982861126817412e-05, "loss": 0.4492, "num_tokens": 116516321.0, "step": 184 }, { "epoch": 0.021875369516376964, "grad_norm": 0.32124945521354675, "learning_rate": 5.9825099493802524e-05, "loss": 0.425, "num_tokens": 117149980.0, "step": 185 }, { "epoch": 0.02199361475700603, "grad_norm": 0.34338271617889404, "learning_rate": 5.982155222194149e-05, "loss": 0.4097, "num_tokens": 117787055.0, "step": 186 }, { "epoch": 0.022111859997635094, "grad_norm": 0.37264284491539, "learning_rate": 5.98179694572851e-05, "loss": 0.4326, "num_tokens": 118414231.0, "step": 187 }, { "epoch": 0.02223010523826416, "grad_norm": 0.3190022110939026, "learning_rate": 5.981435120457439e-05, "loss": 0.4481, "num_tokens": 119042723.0, "step": 188 }, { "epoch": 0.022348350478893223, "grad_norm": 0.32043132185935974, "learning_rate": 5.98106974685974e-05, "loss": 0.476, "num_tokens": 119680244.0, "step": 189 }, { "epoch": 0.022466595719522288, "grad_norm": 0.42416223883628845, "learning_rate": 5.980700825418908e-05, "loss": 0.4332, "num_tokens": 120319137.0, "step": 190 }, { "epoch": 0.022584840960151353, "grad_norm": 0.33873337507247925, "learning_rate": 5.9803283566231345e-05, "loss": 0.4425, "num_tokens": 120954282.0, "step": 191 }, { "epoch": 0.022703086200780417, "grad_norm": 0.4517790675163269, "learning_rate": 5.979952340965306e-05, "loss": 0.4417, "num_tokens": 121589180.0, "step": 192 }, { "epoch": 0.022821331441409482, "grad_norm": 0.3599424958229065, "learning_rate": 5.9795727789430005e-05, "loss": 0.4758, "num_tokens": 122226045.0, "step": 193 }, { "epoch": 0.022939576682038547, "grad_norm": 0.4032820761203766, "learning_rate": 5.979189671058491e-05, "loss": 0.417, "num_tokens": 122863565.0, "step": 194 }, { "epoch": 0.02305782192266761, "grad_norm": 0.3690061867237091, "learning_rate": 5.9788030178187414e-05, "loss": 0.4114, "num_tokens": 123498631.0, "step": 195 }, { "epoch": 0.023176067163296676, "grad_norm": 0.3435264527797699, "learning_rate": 5.978412819735408e-05, "loss": 0.4407, "num_tokens": 124134670.0, "step": 196 }, { "epoch": 0.02329431240392574, "grad_norm": 0.33528485894203186, "learning_rate": 5.978019077324837e-05, "loss": 0.4188, "num_tokens": 124770499.0, "step": 197 }, { "epoch": 0.023412557644554806, "grad_norm": 0.3990873694419861, "learning_rate": 5.9776217911080664e-05, "loss": 0.4723, "num_tokens": 125409234.0, "step": 198 }, { "epoch": 0.02353080288518387, "grad_norm": 0.3296034038066864, "learning_rate": 5.9772209616108206e-05, "loss": 0.4415, "num_tokens": 126041885.0, "step": 199 }, { "epoch": 0.023649048125812935, "grad_norm": 0.36493048071861267, "learning_rate": 5.976816589363516e-05, "loss": 0.4607, "num_tokens": 126679673.0, "step": 200 }, { "epoch": 0.023767293366442, "grad_norm": 0.29360583424568176, "learning_rate": 5.976408674901256e-05, "loss": 0.4532, "num_tokens": 127318553.0, "step": 201 }, { "epoch": 0.023885538607071065, "grad_norm": 0.33180269598960876, "learning_rate": 5.9759972187638324e-05, "loss": 0.4232, "num_tokens": 127957565.0, "step": 202 }, { "epoch": 0.02400378384770013, "grad_norm": 0.3688262403011322, "learning_rate": 5.97558222149572e-05, "loss": 0.5016, "num_tokens": 128588826.0, "step": 203 }, { "epoch": 0.024122029088329194, "grad_norm": 0.31288033723831177, "learning_rate": 5.9751636836460835e-05, "loss": 0.4493, "num_tokens": 129222173.0, "step": 204 }, { "epoch": 0.02424027432895826, "grad_norm": 0.327677458524704, "learning_rate": 5.974741605768772e-05, "loss": 0.4646, "num_tokens": 129857385.0, "step": 205 }, { "epoch": 0.024358519569587324, "grad_norm": 0.2778308391571045, "learning_rate": 5.974315988422318e-05, "loss": 0.3875, "num_tokens": 130486516.0, "step": 206 }, { "epoch": 0.02447676481021639, "grad_norm": 0.2977498769760132, "learning_rate": 5.973886832169938e-05, "loss": 0.4597, "num_tokens": 131117306.0, "step": 207 }, { "epoch": 0.024595010050845453, "grad_norm": 0.2994617521762848, "learning_rate": 5.9734541375795316e-05, "loss": 0.4803, "num_tokens": 131749433.0, "step": 208 }, { "epoch": 0.024713255291474518, "grad_norm": 0.278374046087265, "learning_rate": 5.973017905223682e-05, "loss": 0.4307, "num_tokens": 132382579.0, "step": 209 }, { "epoch": 0.024831500532103583, "grad_norm": 0.31233716011047363, "learning_rate": 5.9725781356796527e-05, "loss": 0.4442, "num_tokens": 133017061.0, "step": 210 }, { "epoch": 0.024949745772732648, "grad_norm": 0.2855742871761322, "learning_rate": 5.972134829529387e-05, "loss": 0.435, "num_tokens": 133645920.0, "step": 211 }, { "epoch": 0.025067991013361712, "grad_norm": 0.278287410736084, "learning_rate": 5.971687987359511e-05, "loss": 0.4745, "num_tokens": 134277976.0, "step": 212 }, { "epoch": 0.025186236253990777, "grad_norm": 0.2984735667705536, "learning_rate": 5.971237609761326e-05, "loss": 0.4529, "num_tokens": 134913386.0, "step": 213 }, { "epoch": 0.025304481494619842, "grad_norm": 0.2837817370891571, "learning_rate": 5.970783697330815e-05, "loss": 0.438, "num_tokens": 135544399.0, "step": 214 }, { "epoch": 0.025422726735248907, "grad_norm": 0.30570748448371887, "learning_rate": 5.970326250668638e-05, "loss": 0.4475, "num_tokens": 136182707.0, "step": 215 }, { "epoch": 0.02554097197587797, "grad_norm": 0.2581757605075836, "learning_rate": 5.9698652703801296e-05, "loss": 0.4424, "num_tokens": 136819793.0, "step": 216 }, { "epoch": 0.025659217216507036, "grad_norm": 0.33199775218963623, "learning_rate": 5.969400757075305e-05, "loss": 0.4476, "num_tokens": 137459203.0, "step": 217 }, { "epoch": 0.0257774624571361, "grad_norm": 0.291856586933136, "learning_rate": 5.968932711368848e-05, "loss": 0.4408, "num_tokens": 138086108.0, "step": 218 }, { "epoch": 0.025895707697765166, "grad_norm": 0.26909223198890686, "learning_rate": 5.9684611338801244e-05, "loss": 0.4308, "num_tokens": 138717361.0, "step": 219 }, { "epoch": 0.02601395293839423, "grad_norm": 0.35612818598747253, "learning_rate": 5.967986025233169e-05, "loss": 0.4144, "num_tokens": 139353768.0, "step": 220 }, { "epoch": 0.026132198179023295, "grad_norm": 0.3378983736038208, "learning_rate": 5.967507386056688e-05, "loss": 0.4774, "num_tokens": 139986012.0, "step": 221 }, { "epoch": 0.02625044341965236, "grad_norm": 0.3200989365577698, "learning_rate": 5.9670252169840635e-05, "loss": 0.4127, "num_tokens": 140618557.0, "step": 222 }, { "epoch": 0.026368688660281425, "grad_norm": 0.31645864248275757, "learning_rate": 5.966539518653346e-05, "loss": 0.4037, "num_tokens": 141253328.0, "step": 223 }, { "epoch": 0.02648693390091049, "grad_norm": 0.30323508381843567, "learning_rate": 5.966050291707257e-05, "loss": 0.442, "num_tokens": 141885327.0, "step": 224 }, { "epoch": 0.026605179141539554, "grad_norm": 0.36218684911727905, "learning_rate": 5.965557536793189e-05, "loss": 0.4558, "num_tokens": 142518760.0, "step": 225 }, { "epoch": 0.02672342438216862, "grad_norm": 0.3109579384326935, "learning_rate": 5.965061254563201e-05, "loss": 0.462, "num_tokens": 143154500.0, "step": 226 }, { "epoch": 0.026841669622797684, "grad_norm": 0.29776236414909363, "learning_rate": 5.96456144567402e-05, "loss": 0.4467, "num_tokens": 143792062.0, "step": 227 }, { "epoch": 0.026959914863426748, "grad_norm": 0.298713743686676, "learning_rate": 5.964058110787039e-05, "loss": 0.3959, "num_tokens": 144424810.0, "step": 228 }, { "epoch": 0.027078160104055813, "grad_norm": 0.30076488852500916, "learning_rate": 5.963551250568319e-05, "loss": 0.4207, "num_tokens": 145021445.0, "step": 229 }, { "epoch": 0.027196405344684878, "grad_norm": 0.2877078652381897, "learning_rate": 5.963040865688585e-05, "loss": 0.4733, "num_tokens": 145656556.0, "step": 230 }, { "epoch": 0.027314650585313942, "grad_norm": 0.3390825092792511, "learning_rate": 5.962526956823225e-05, "loss": 0.4197, "num_tokens": 146293020.0, "step": 231 }, { "epoch": 0.027432895825943007, "grad_norm": 0.3341982364654541, "learning_rate": 5.962009524652293e-05, "loss": 0.4406, "num_tokens": 146922200.0, "step": 232 }, { "epoch": 0.027551141066572072, "grad_norm": 0.3136695921421051, "learning_rate": 5.961488569860504e-05, "loss": 0.4847, "num_tokens": 147560483.0, "step": 233 }, { "epoch": 0.027669386307201137, "grad_norm": 0.3606899380683899, "learning_rate": 5.9609640931372325e-05, "loss": 0.4268, "num_tokens": 148198475.0, "step": 234 }, { "epoch": 0.0277876315478302, "grad_norm": 0.28975990414619446, "learning_rate": 5.9604360951765175e-05, "loss": 0.4426, "num_tokens": 148811827.0, "step": 235 }, { "epoch": 0.027905876788459266, "grad_norm": 0.302625834941864, "learning_rate": 5.959904576677054e-05, "loss": 0.411, "num_tokens": 149446588.0, "step": 236 }, { "epoch": 0.028024122029088328, "grad_norm": 0.26446977257728577, "learning_rate": 5.959369538342198e-05, "loss": 0.3921, "num_tokens": 150080880.0, "step": 237 }, { "epoch": 0.028142367269717392, "grad_norm": 0.28426593542099, "learning_rate": 5.9588309808799606e-05, "loss": 0.462, "num_tokens": 150720517.0, "step": 238 }, { "epoch": 0.028260612510346457, "grad_norm": 0.31598517298698425, "learning_rate": 5.958288905003014e-05, "loss": 0.4303, "num_tokens": 151354865.0, "step": 239 }, { "epoch": 0.028378857750975522, "grad_norm": 0.2635610103607178, "learning_rate": 5.957743311428682e-05, "loss": 0.3943, "num_tokens": 151989302.0, "step": 240 }, { "epoch": 0.028497102991604586, "grad_norm": 0.3268015682697296, "learning_rate": 5.957194200878948e-05, "loss": 0.432, "num_tokens": 152622644.0, "step": 241 }, { "epoch": 0.02861534823223365, "grad_norm": 0.26707592606544495, "learning_rate": 5.9566415740804425e-05, "loss": 0.4668, "num_tokens": 153254978.0, "step": 242 }, { "epoch": 0.028733593472862716, "grad_norm": 0.2767883837223053, "learning_rate": 5.956085431764456e-05, "loss": 0.4062, "num_tokens": 153888439.0, "step": 243 }, { "epoch": 0.02885183871349178, "grad_norm": 0.252451628446579, "learning_rate": 5.955525774666927e-05, "loss": 0.3823, "num_tokens": 154521234.0, "step": 244 }, { "epoch": 0.028970083954120845, "grad_norm": 0.2643362283706665, "learning_rate": 5.954962603528448e-05, "loss": 0.4316, "num_tokens": 155156546.0, "step": 245 }, { "epoch": 0.02908832919474991, "grad_norm": 0.2746330499649048, "learning_rate": 5.954395919094257e-05, "loss": 0.4321, "num_tokens": 155791974.0, "step": 246 }, { "epoch": 0.029206574435378975, "grad_norm": 0.24371886253356934, "learning_rate": 5.953825722114247e-05, "loss": 0.3914, "num_tokens": 156417737.0, "step": 247 }, { "epoch": 0.02932481967600804, "grad_norm": 0.2923094928264618, "learning_rate": 5.953252013342955e-05, "loss": 0.4618, "num_tokens": 157054324.0, "step": 248 }, { "epoch": 0.029443064916637104, "grad_norm": 0.27547353506088257, "learning_rate": 5.9526747935395664e-05, "loss": 0.4488, "num_tokens": 157686875.0, "step": 249 }, { "epoch": 0.02956131015726617, "grad_norm": 0.31762856245040894, "learning_rate": 5.9520940634679115e-05, "loss": 0.4432, "num_tokens": 158318798.0, "step": 250 }, { "epoch": 0.029679555397895234, "grad_norm": 0.2715708911418915, "learning_rate": 5.951509823896469e-05, "loss": 0.457, "num_tokens": 158956587.0, "step": 251 }, { "epoch": 0.0297978006385243, "grad_norm": 0.2930499017238617, "learning_rate": 5.950922075598357e-05, "loss": 0.3783, "num_tokens": 159591859.0, "step": 252 }, { "epoch": 0.029916045879153363, "grad_norm": 0.29419347643852234, "learning_rate": 5.95033081935134e-05, "loss": 0.4169, "num_tokens": 160224798.0, "step": 253 }, { "epoch": 0.030034291119782428, "grad_norm": 0.2881893217563629, "learning_rate": 5.949736055937826e-05, "loss": 0.4212, "num_tokens": 160855622.0, "step": 254 }, { "epoch": 0.030152536360411493, "grad_norm": 0.29593420028686523, "learning_rate": 5.94913778614486e-05, "loss": 0.3898, "num_tokens": 161489536.0, "step": 255 }, { "epoch": 0.030270781601040558, "grad_norm": 0.31856146454811096, "learning_rate": 5.948536010764128e-05, "loss": 0.476, "num_tokens": 162123541.0, "step": 256 }, { "epoch": 0.030389026841669622, "grad_norm": 0.2588259279727936, "learning_rate": 5.947930730591957e-05, "loss": 0.4321, "num_tokens": 162754194.0, "step": 257 }, { "epoch": 0.030507272082298687, "grad_norm": 0.2927051782608032, "learning_rate": 5.94732194642931e-05, "loss": 0.4186, "num_tokens": 163389806.0, "step": 258 }, { "epoch": 0.030625517322927752, "grad_norm": 0.24726012349128723, "learning_rate": 5.946709659081787e-05, "loss": 0.4072, "num_tokens": 164022489.0, "step": 259 }, { "epoch": 0.030743762563556817, "grad_norm": 0.24236567318439484, "learning_rate": 5.946093869359624e-05, "loss": 0.4158, "num_tokens": 164655264.0, "step": 260 }, { "epoch": 0.03086200780418588, "grad_norm": 0.2743130922317505, "learning_rate": 5.945474578077693e-05, "loss": 0.408, "num_tokens": 165293663.0, "step": 261 }, { "epoch": 0.030980253044814946, "grad_norm": 0.26922914385795593, "learning_rate": 5.944851786055497e-05, "loss": 0.4308, "num_tokens": 165930344.0, "step": 262 }, { "epoch": 0.03109849828544401, "grad_norm": 0.27107909321784973, "learning_rate": 5.944225494117174e-05, "loss": 0.3898, "num_tokens": 166565332.0, "step": 263 }, { "epoch": 0.031216743526073076, "grad_norm": 0.2749185264110565, "learning_rate": 5.9435957030914914e-05, "loss": 0.4336, "num_tokens": 167200224.0, "step": 264 }, { "epoch": 0.03133498876670214, "grad_norm": 0.25396469235420227, "learning_rate": 5.942962413811848e-05, "loss": 0.4244, "num_tokens": 167832144.0, "step": 265 }, { "epoch": 0.0314532340073312, "grad_norm": 0.2632233202457428, "learning_rate": 5.942325627116272e-05, "loss": 0.4059, "num_tokens": 168469994.0, "step": 266 }, { "epoch": 0.03157147924796027, "grad_norm": 0.27231499552726746, "learning_rate": 5.941685343847419e-05, "loss": 0.4389, "num_tokens": 169106098.0, "step": 267 }, { "epoch": 0.03168972448858933, "grad_norm": 0.24510671198368073, "learning_rate": 5.941041564852572e-05, "loss": 0.3807, "num_tokens": 169737993.0, "step": 268 }, { "epoch": 0.0318079697292184, "grad_norm": 0.2536793351173401, "learning_rate": 5.940394290983639e-05, "loss": 0.3914, "num_tokens": 170368812.0, "step": 269 }, { "epoch": 0.03192621496984746, "grad_norm": 0.2706405818462372, "learning_rate": 5.939743523097154e-05, "loss": 0.4651, "num_tokens": 171006424.0, "step": 270 }, { "epoch": 0.03204446021047653, "grad_norm": 0.2482193559408188, "learning_rate": 5.939089262054276e-05, "loss": 0.4192, "num_tokens": 171645721.0, "step": 271 }, { "epoch": 0.03216270545110559, "grad_norm": 0.26313939690589905, "learning_rate": 5.9384315087207816e-05, "loss": 0.4055, "num_tokens": 172281075.0, "step": 272 }, { "epoch": 0.03228095069173466, "grad_norm": 0.23074069619178772, "learning_rate": 5.937770263967074e-05, "loss": 0.3695, "num_tokens": 172914063.0, "step": 273 }, { "epoch": 0.03239919593236372, "grad_norm": 0.2610953152179718, "learning_rate": 5.937105528668173e-05, "loss": 0.4333, "num_tokens": 173548325.0, "step": 274 }, { "epoch": 0.03251744117299279, "grad_norm": 0.2842814326286316, "learning_rate": 5.9364373037037196e-05, "loss": 0.431, "num_tokens": 174183975.0, "step": 275 }, { "epoch": 0.03263568641362185, "grad_norm": 0.25221580266952515, "learning_rate": 5.935765589957971e-05, "loss": 0.4047, "num_tokens": 174818549.0, "step": 276 }, { "epoch": 0.03275393165425092, "grad_norm": 0.2278755009174347, "learning_rate": 5.935090388319803e-05, "loss": 0.3917, "num_tokens": 175451113.0, "step": 277 }, { "epoch": 0.03287217689487998, "grad_norm": 0.32434311509132385, "learning_rate": 5.9344116996827055e-05, "loss": 0.4396, "num_tokens": 176087116.0, "step": 278 }, { "epoch": 0.03299042213550905, "grad_norm": 0.2737249433994293, "learning_rate": 5.933729524944781e-05, "loss": 0.4562, "num_tokens": 176724483.0, "step": 279 }, { "epoch": 0.03310866737613811, "grad_norm": 0.2810339033603668, "learning_rate": 5.9330438650087495e-05, "loss": 0.4315, "num_tokens": 177361164.0, "step": 280 }, { "epoch": 0.033226912616767176, "grad_norm": 0.2738715708255768, "learning_rate": 5.932354720781941e-05, "loss": 0.3845, "num_tokens": 177999098.0, "step": 281 }, { "epoch": 0.03334515785739624, "grad_norm": 0.28221046924591064, "learning_rate": 5.931662093176293e-05, "loss": 0.4291, "num_tokens": 178627820.0, "step": 282 }, { "epoch": 0.033463403098025306, "grad_norm": 0.25607389211654663, "learning_rate": 5.930965983108357e-05, "loss": 0.4149, "num_tokens": 179259176.0, "step": 283 }, { "epoch": 0.03358164833865437, "grad_norm": 0.28264743089675903, "learning_rate": 5.930266391499291e-05, "loss": 0.435, "num_tokens": 179890905.0, "step": 284 }, { "epoch": 0.033699893579283435, "grad_norm": 0.2758411467075348, "learning_rate": 5.929563319274862e-05, "loss": 0.4237, "num_tokens": 180518445.0, "step": 285 }, { "epoch": 0.0338181388199125, "grad_norm": 0.32677340507507324, "learning_rate": 5.9288567673654386e-05, "loss": 0.4559, "num_tokens": 181154667.0, "step": 286 }, { "epoch": 0.033936384060541565, "grad_norm": 0.2823239862918854, "learning_rate": 5.928146736705998e-05, "loss": 0.4384, "num_tokens": 181789184.0, "step": 287 }, { "epoch": 0.034054629301170626, "grad_norm": 0.2830294668674469, "learning_rate": 5.927433228236119e-05, "loss": 0.4218, "num_tokens": 182421462.0, "step": 288 }, { "epoch": 0.034172874541799694, "grad_norm": 0.30646511912345886, "learning_rate": 5.926716242899982e-05, "loss": 0.4081, "num_tokens": 183056216.0, "step": 289 }, { "epoch": 0.034291119782428756, "grad_norm": 0.27787357568740845, "learning_rate": 5.925995781646371e-05, "loss": 0.4386, "num_tokens": 183695586.0, "step": 290 }, { "epoch": 0.034409365023057824, "grad_norm": 0.3329799175262451, "learning_rate": 5.925271845428668e-05, "loss": 0.4078, "num_tokens": 184323170.0, "step": 291 }, { "epoch": 0.034527610263686885, "grad_norm": 0.32851293683052063, "learning_rate": 5.924544435204852e-05, "loss": 0.4838, "num_tokens": 184923402.0, "step": 292 }, { "epoch": 0.03464585550431595, "grad_norm": 0.2610059082508087, "learning_rate": 5.9238135519375016e-05, "loss": 0.4069, "num_tokens": 185562179.0, "step": 293 }, { "epoch": 0.034764100744945015, "grad_norm": 0.2528479993343353, "learning_rate": 5.9230791965937885e-05, "loss": 0.4184, "num_tokens": 186196488.0, "step": 294 }, { "epoch": 0.03488234598557408, "grad_norm": 0.27362382411956787, "learning_rate": 5.9223413701454826e-05, "loss": 0.4517, "num_tokens": 186795333.0, "step": 295 }, { "epoch": 0.035000591226203144, "grad_norm": 0.23410287499427795, "learning_rate": 5.921600073568944e-05, "loss": 0.3923, "num_tokens": 187431540.0, "step": 296 }, { "epoch": 0.03511883646683221, "grad_norm": 0.2733013927936554, "learning_rate": 5.920855307845126e-05, "loss": 0.4145, "num_tokens": 188067882.0, "step": 297 }, { "epoch": 0.035237081707461274, "grad_norm": 0.23181550204753876, "learning_rate": 5.920107073959573e-05, "loss": 0.452, "num_tokens": 188704706.0, "step": 298 }, { "epoch": 0.03535532694809034, "grad_norm": 0.26053255796432495, "learning_rate": 5.9193553729024175e-05, "loss": 0.3939, "num_tokens": 189339944.0, "step": 299 }, { "epoch": 0.0354735721887194, "grad_norm": 0.3104410767555237, "learning_rate": 5.918600205668382e-05, "loss": 0.4791, "num_tokens": 189977157.0, "step": 300 }, { "epoch": 0.03559181742934847, "grad_norm": 0.2637656629085541, "learning_rate": 5.917841573256773e-05, "loss": 0.4237, "num_tokens": 190586411.0, "step": 301 }, { "epoch": 0.03571006266997753, "grad_norm": 0.28651612997055054, "learning_rate": 5.917079476671486e-05, "loss": 0.4401, "num_tokens": 191219345.0, "step": 302 }, { "epoch": 0.0358283079106066, "grad_norm": 0.22322750091552734, "learning_rate": 5.916313916920998e-05, "loss": 0.4305, "num_tokens": 191855183.0, "step": 303 }, { "epoch": 0.03594655315123566, "grad_norm": 0.2441805899143219, "learning_rate": 5.915544895018368e-05, "loss": 0.4012, "num_tokens": 192485493.0, "step": 304 }, { "epoch": 0.03606479839186473, "grad_norm": 0.24022462964057922, "learning_rate": 5.914772411981241e-05, "loss": 0.3959, "num_tokens": 193122797.0, "step": 305 }, { "epoch": 0.03618304363249379, "grad_norm": 0.24951285123825073, "learning_rate": 5.9139964688318375e-05, "loss": 0.4222, "num_tokens": 193759752.0, "step": 306 }, { "epoch": 0.03630128887312286, "grad_norm": 0.24699069559574127, "learning_rate": 5.913217066596957e-05, "loss": 0.4144, "num_tokens": 194394288.0, "step": 307 }, { "epoch": 0.03641953411375192, "grad_norm": 0.2645781636238098, "learning_rate": 5.912434206307979e-05, "loss": 0.4124, "num_tokens": 195027778.0, "step": 308 }, { "epoch": 0.03653777935438099, "grad_norm": 0.24733687937259674, "learning_rate": 5.9116478890008596e-05, "loss": 0.3443, "num_tokens": 195660013.0, "step": 309 }, { "epoch": 0.03665602459501005, "grad_norm": 0.29403185844421387, "learning_rate": 5.910858115716124e-05, "loss": 0.4319, "num_tokens": 196290478.0, "step": 310 }, { "epoch": 0.03677426983563912, "grad_norm": 0.2918471395969391, "learning_rate": 5.910064887498876e-05, "loss": 0.4498, "num_tokens": 196927060.0, "step": 311 }, { "epoch": 0.03689251507626818, "grad_norm": 0.2860783040523529, "learning_rate": 5.9092682053987896e-05, "loss": 0.4462, "num_tokens": 197557085.0, "step": 312 }, { "epoch": 0.03701076031689725, "grad_norm": 0.30764201283454895, "learning_rate": 5.90846807047011e-05, "loss": 0.4329, "num_tokens": 198189535.0, "step": 313 }, { "epoch": 0.03712900555752631, "grad_norm": 0.2727659046649933, "learning_rate": 5.90766448377165e-05, "loss": 0.45, "num_tokens": 198820737.0, "step": 314 }, { "epoch": 0.03724725079815538, "grad_norm": 0.3000715374946594, "learning_rate": 5.906857446366791e-05, "loss": 0.407, "num_tokens": 199451792.0, "step": 315 }, { "epoch": 0.03736549603878444, "grad_norm": 0.2878008186817169, "learning_rate": 5.906046959323481e-05, "loss": 0.4122, "num_tokens": 200086012.0, "step": 316 }, { "epoch": 0.0374837412794135, "grad_norm": 0.26824718713760376, "learning_rate": 5.9052330237142316e-05, "loss": 0.4039, "num_tokens": 200723155.0, "step": 317 }, { "epoch": 0.03760198652004257, "grad_norm": 0.2783738076686859, "learning_rate": 5.90441564061612e-05, "loss": 0.4265, "num_tokens": 201362056.0, "step": 318 }, { "epoch": 0.03772023176067163, "grad_norm": 0.2819783091545105, "learning_rate": 5.9035948111107846e-05, "loss": 0.4094, "num_tokens": 201996006.0, "step": 319 }, { "epoch": 0.0378384770013007, "grad_norm": 0.24729867279529572, "learning_rate": 5.902770536284422e-05, "loss": 0.3785, "num_tokens": 202591057.0, "step": 320 }, { "epoch": 0.03795672224192976, "grad_norm": 0.26701050996780396, "learning_rate": 5.9019428172277925e-05, "loss": 0.3925, "num_tokens": 203228812.0, "step": 321 }, { "epoch": 0.03807496748255883, "grad_norm": 0.2603227198123932, "learning_rate": 5.901111655036209e-05, "loss": 0.423, "num_tokens": 203868294.0, "step": 322 }, { "epoch": 0.03819321272318789, "grad_norm": 0.23120039701461792, "learning_rate": 5.900277050809545e-05, "loss": 0.3883, "num_tokens": 204499481.0, "step": 323 }, { "epoch": 0.03831145796381696, "grad_norm": 0.25170889496803284, "learning_rate": 5.899439005652228e-05, "loss": 0.4032, "num_tokens": 205133494.0, "step": 324 }, { "epoch": 0.03842970320444602, "grad_norm": 0.2173241525888443, "learning_rate": 5.898597520673236e-05, "loss": 0.4166, "num_tokens": 205767024.0, "step": 325 }, { "epoch": 0.038547948445075086, "grad_norm": 0.2951737344264984, "learning_rate": 5.897752596986101e-05, "loss": 0.4365, "num_tokens": 206400825.0, "step": 326 }, { "epoch": 0.03866619368570415, "grad_norm": 0.2363547384738922, "learning_rate": 5.896904235708908e-05, "loss": 0.4203, "num_tokens": 207040239.0, "step": 327 }, { "epoch": 0.038784438926333216, "grad_norm": 0.26757577061653137, "learning_rate": 5.896052437964284e-05, "loss": 0.4025, "num_tokens": 207676732.0, "step": 328 }, { "epoch": 0.03890268416696228, "grad_norm": 0.2474527806043625, "learning_rate": 5.895197204879412e-05, "loss": 0.3947, "num_tokens": 208313021.0, "step": 329 }, { "epoch": 0.039020929407591345, "grad_norm": 0.27514564990997314, "learning_rate": 5.8943385375860145e-05, "loss": 0.455, "num_tokens": 208946344.0, "step": 330 }, { "epoch": 0.03913917464822041, "grad_norm": 0.2627350091934204, "learning_rate": 5.8934764372203596e-05, "loss": 0.4763, "num_tokens": 209577542.0, "step": 331 }, { "epoch": 0.039257419888849475, "grad_norm": 0.2800849676132202, "learning_rate": 5.892610904923262e-05, "loss": 0.4683, "num_tokens": 210213046.0, "step": 332 }, { "epoch": 0.039375665129478536, "grad_norm": 0.2469765841960907, "learning_rate": 5.891741941840074e-05, "loss": 0.4216, "num_tokens": 210848130.0, "step": 333 }, { "epoch": 0.039493910370107604, "grad_norm": 0.2550566494464874, "learning_rate": 5.890869549120689e-05, "loss": 0.4104, "num_tokens": 211482121.0, "step": 334 }, { "epoch": 0.039612155610736666, "grad_norm": 0.26573434472084045, "learning_rate": 5.8899937279195384e-05, "loss": 0.4571, "num_tokens": 212120668.0, "step": 335 }, { "epoch": 0.039730400851365734, "grad_norm": 0.23955504596233368, "learning_rate": 5.889114479395592e-05, "loss": 0.4236, "num_tokens": 212756102.0, "step": 336 }, { "epoch": 0.039848646091994795, "grad_norm": 0.27759063243865967, "learning_rate": 5.888231804712353e-05, "loss": 0.4502, "num_tokens": 213392130.0, "step": 337 }, { "epoch": 0.03996689133262386, "grad_norm": 0.24142037332057953, "learning_rate": 5.887345705037861e-05, "loss": 0.3996, "num_tokens": 214021365.0, "step": 338 }, { "epoch": 0.040085136573252925, "grad_norm": 0.2325737476348877, "learning_rate": 5.8864561815446864e-05, "loss": 0.4149, "num_tokens": 214651340.0, "step": 339 }, { "epoch": 0.04020338181388199, "grad_norm": 0.2732045352458954, "learning_rate": 5.885563235409927e-05, "loss": 0.4406, "num_tokens": 215284267.0, "step": 340 }, { "epoch": 0.040321627054511054, "grad_norm": 0.3444238305091858, "learning_rate": 5.884666867815216e-05, "loss": 0.4533, "num_tokens": 215920647.0, "step": 341 }, { "epoch": 0.04043987229514012, "grad_norm": 0.2464529573917389, "learning_rate": 5.88376707994671e-05, "loss": 0.3865, "num_tokens": 216555518.0, "step": 342 }, { "epoch": 0.040558117535769184, "grad_norm": 0.26775339245796204, "learning_rate": 5.882863872995093e-05, "loss": 0.3875, "num_tokens": 217191272.0, "step": 343 }, { "epoch": 0.04067636277639825, "grad_norm": 0.25909754633903503, "learning_rate": 5.8819572481555725e-05, "loss": 0.4182, "num_tokens": 217824700.0, "step": 344 }, { "epoch": 0.04079460801702731, "grad_norm": 0.25470298528671265, "learning_rate": 5.881047206627881e-05, "loss": 0.3802, "num_tokens": 218455560.0, "step": 345 }, { "epoch": 0.04091285325765638, "grad_norm": 0.26282477378845215, "learning_rate": 5.88013374961627e-05, "loss": 0.4288, "num_tokens": 219091298.0, "step": 346 }, { "epoch": 0.04103109849828544, "grad_norm": 0.2278517484664917, "learning_rate": 5.879216878329511e-05, "loss": 0.4011, "num_tokens": 219727733.0, "step": 347 }, { "epoch": 0.04114934373891451, "grad_norm": 0.2609797418117523, "learning_rate": 5.8782965939808954e-05, "loss": 0.4555, "num_tokens": 220360695.0, "step": 348 }, { "epoch": 0.04126758897954357, "grad_norm": 0.24882322549819946, "learning_rate": 5.877372897788229e-05, "loss": 0.4439, "num_tokens": 220993405.0, "step": 349 }, { "epoch": 0.04138583422017264, "grad_norm": 0.23803678154945374, "learning_rate": 5.8764457909738334e-05, "loss": 0.4379, "num_tokens": 221619384.0, "step": 350 }, { "epoch": 0.0415040794608017, "grad_norm": 0.2756311893463135, "learning_rate": 5.875515274764543e-05, "loss": 0.4223, "num_tokens": 222250253.0, "step": 351 }, { "epoch": 0.04162232470143077, "grad_norm": 0.25036007165908813, "learning_rate": 5.874581350391705e-05, "loss": 0.4087, "num_tokens": 222884935.0, "step": 352 }, { "epoch": 0.04174056994205983, "grad_norm": 0.26291555166244507, "learning_rate": 5.873644019091176e-05, "loss": 0.4097, "num_tokens": 223516629.0, "step": 353 }, { "epoch": 0.0418588151826889, "grad_norm": 0.263302743434906, "learning_rate": 5.872703282103319e-05, "loss": 0.4189, "num_tokens": 224150031.0, "step": 354 }, { "epoch": 0.04197706042331796, "grad_norm": 0.24322155117988586, "learning_rate": 5.871759140673007e-05, "loss": 0.4041, "num_tokens": 224786071.0, "step": 355 }, { "epoch": 0.04209530566394703, "grad_norm": 0.2757980525493622, "learning_rate": 5.870811596049618e-05, "loss": 0.4393, "num_tokens": 225421240.0, "step": 356 }, { "epoch": 0.04221355090457609, "grad_norm": 0.2762620151042938, "learning_rate": 5.8698606494870275e-05, "loss": 0.4039, "num_tokens": 226052174.0, "step": 357 }, { "epoch": 0.04233179614520516, "grad_norm": 0.2832268476486206, "learning_rate": 5.868906302243622e-05, "loss": 0.4447, "num_tokens": 226689398.0, "step": 358 }, { "epoch": 0.04245004138583422, "grad_norm": 0.23666320741176605, "learning_rate": 5.86794855558228e-05, "loss": 0.4085, "num_tokens": 227327616.0, "step": 359 }, { "epoch": 0.04256828662646329, "grad_norm": 0.27521422505378723, "learning_rate": 5.866987410770383e-05, "loss": 0.4136, "num_tokens": 227952686.0, "step": 360 }, { "epoch": 0.04268653186709235, "grad_norm": 0.26603540778160095, "learning_rate": 5.8660228690798094e-05, "loss": 0.4268, "num_tokens": 228587586.0, "step": 361 }, { "epoch": 0.04280477710772142, "grad_norm": 0.2510245740413666, "learning_rate": 5.86505493178693e-05, "loss": 0.4131, "num_tokens": 229185285.0, "step": 362 }, { "epoch": 0.04292302234835048, "grad_norm": 0.25701025128364563, "learning_rate": 5.8640836001726085e-05, "loss": 0.4067, "num_tokens": 229815886.0, "step": 363 }, { "epoch": 0.04304126758897955, "grad_norm": 0.2790713906288147, "learning_rate": 5.8631088755222046e-05, "loss": 0.4176, "num_tokens": 230449875.0, "step": 364 }, { "epoch": 0.04315951282960861, "grad_norm": 0.2744642198085785, "learning_rate": 5.8621307591255655e-05, "loss": 0.4202, "num_tokens": 231087769.0, "step": 365 }, { "epoch": 0.043277758070237676, "grad_norm": 0.26669076085090637, "learning_rate": 5.861149252277025e-05, "loss": 0.4053, "num_tokens": 231720719.0, "step": 366 }, { "epoch": 0.04339600331086674, "grad_norm": 0.24615958333015442, "learning_rate": 5.860164356275406e-05, "loss": 0.3992, "num_tokens": 232354694.0, "step": 367 }, { "epoch": 0.0435142485514958, "grad_norm": 0.26475226879119873, "learning_rate": 5.8591760724240154e-05, "loss": 0.4283, "num_tokens": 232986853.0, "step": 368 }, { "epoch": 0.04363249379212487, "grad_norm": 0.27385619282722473, "learning_rate": 5.858184402030643e-05, "loss": 0.4658, "num_tokens": 233626246.0, "step": 369 }, { "epoch": 0.04375073903275393, "grad_norm": 0.2431042194366455, "learning_rate": 5.85718934640756e-05, "loss": 0.3582, "num_tokens": 234256236.0, "step": 370 }, { "epoch": 0.043868984273383, "grad_norm": 0.28270891308784485, "learning_rate": 5.8561909068715165e-05, "loss": 0.4275, "num_tokens": 234894656.0, "step": 371 }, { "epoch": 0.04398722951401206, "grad_norm": 0.23586058616638184, "learning_rate": 5.855189084743743e-05, "loss": 0.4103, "num_tokens": 235530175.0, "step": 372 }, { "epoch": 0.044105474754641126, "grad_norm": 0.2272070050239563, "learning_rate": 5.854183881349944e-05, "loss": 0.3961, "num_tokens": 236165286.0, "step": 373 }, { "epoch": 0.04422371999527019, "grad_norm": 0.27485138177871704, "learning_rate": 5.8531752980203e-05, "loss": 0.3963, "num_tokens": 236799872.0, "step": 374 }, { "epoch": 0.044341965235899256, "grad_norm": 0.20811937749385834, "learning_rate": 5.8521633360894605e-05, "loss": 0.3703, "num_tokens": 237436361.0, "step": 375 }, { "epoch": 0.04446021047652832, "grad_norm": 0.2607218623161316, "learning_rate": 5.851147996896551e-05, "loss": 0.3905, "num_tokens": 238072053.0, "step": 376 }, { "epoch": 0.044578455717157385, "grad_norm": 0.2543987035751343, "learning_rate": 5.8501292817851625e-05, "loss": 0.4368, "num_tokens": 238707861.0, "step": 377 }, { "epoch": 0.044696700957786446, "grad_norm": 0.26138100028038025, "learning_rate": 5.849107192103355e-05, "loss": 0.4042, "num_tokens": 239340071.0, "step": 378 }, { "epoch": 0.044814946198415515, "grad_norm": 0.26055586338043213, "learning_rate": 5.848081729203653e-05, "loss": 0.4274, "num_tokens": 239974165.0, "step": 379 }, { "epoch": 0.044933191439044576, "grad_norm": 0.2314479798078537, "learning_rate": 5.847052894443045e-05, "loss": 0.3964, "num_tokens": 240606077.0, "step": 380 }, { "epoch": 0.045051436679673644, "grad_norm": 0.2874937951564789, "learning_rate": 5.8460206891829826e-05, "loss": 0.4107, "num_tokens": 241243195.0, "step": 381 }, { "epoch": 0.045169681920302705, "grad_norm": 0.2589111328125, "learning_rate": 5.8449851147893765e-05, "loss": 0.4553, "num_tokens": 241881692.0, "step": 382 }, { "epoch": 0.045287927160931774, "grad_norm": 0.28679126501083374, "learning_rate": 5.8439461726325956e-05, "loss": 0.4266, "num_tokens": 242520949.0, "step": 383 }, { "epoch": 0.045406172401560835, "grad_norm": 0.22613729536533356, "learning_rate": 5.8429038640874645e-05, "loss": 0.406, "num_tokens": 243160575.0, "step": 384 }, { "epoch": 0.0455244176421899, "grad_norm": 0.3079308271408081, "learning_rate": 5.841858190533266e-05, "loss": 0.4209, "num_tokens": 243790943.0, "step": 385 }, { "epoch": 0.045642662882818964, "grad_norm": 0.2861464023590088, "learning_rate": 5.840809153353732e-05, "loss": 0.468, "num_tokens": 244423844.0, "step": 386 }, { "epoch": 0.04576090812344803, "grad_norm": 0.24443581700325012, "learning_rate": 5.839756753937048e-05, "loss": 0.3987, "num_tokens": 245054470.0, "step": 387 }, { "epoch": 0.045879153364077094, "grad_norm": 0.2367839515209198, "learning_rate": 5.8387009936758456e-05, "loss": 0.3884, "num_tokens": 245688487.0, "step": 388 }, { "epoch": 0.04599739860470616, "grad_norm": 0.2450435906648636, "learning_rate": 5.837641873967209e-05, "loss": 0.4189, "num_tokens": 246325682.0, "step": 389 }, { "epoch": 0.04611564384533522, "grad_norm": 0.2244996875524521, "learning_rate": 5.836579396212662e-05, "loss": 0.439, "num_tokens": 246961506.0, "step": 390 }, { "epoch": 0.04623388908596429, "grad_norm": 0.2291857749223709, "learning_rate": 5.835513561818177e-05, "loss": 0.3873, "num_tokens": 247594032.0, "step": 391 }, { "epoch": 0.04635213432659335, "grad_norm": 0.25324153900146484, "learning_rate": 5.8344443721941645e-05, "loss": 0.4355, "num_tokens": 248227593.0, "step": 392 }, { "epoch": 0.04647037956722242, "grad_norm": 0.23453420400619507, "learning_rate": 5.833371828755477e-05, "loss": 0.4156, "num_tokens": 248859376.0, "step": 393 }, { "epoch": 0.04658862480785148, "grad_norm": 0.2575860619544983, "learning_rate": 5.832295932921405e-05, "loss": 0.4346, "num_tokens": 249493350.0, "step": 394 }, { "epoch": 0.04670687004848055, "grad_norm": 0.27037689089775085, "learning_rate": 5.8312166861156755e-05, "loss": 0.3934, "num_tokens": 250126507.0, "step": 395 }, { "epoch": 0.04682511528910961, "grad_norm": 0.23657937347888947, "learning_rate": 5.830134089766448e-05, "loss": 0.4322, "num_tokens": 250765825.0, "step": 396 }, { "epoch": 0.04694336052973868, "grad_norm": 0.2646414339542389, "learning_rate": 5.829048145306315e-05, "loss": 0.4261, "num_tokens": 251403372.0, "step": 397 }, { "epoch": 0.04706160577036774, "grad_norm": 0.26739707589149475, "learning_rate": 5.827958854172302e-05, "loss": 0.4417, "num_tokens": 252041762.0, "step": 398 }, { "epoch": 0.04717985101099681, "grad_norm": 0.25291574001312256, "learning_rate": 5.8268662178058614e-05, "loss": 0.412, "num_tokens": 252673716.0, "step": 399 }, { "epoch": 0.04729809625162587, "grad_norm": 0.24120593070983887, "learning_rate": 5.82577023765287e-05, "loss": 0.4209, "num_tokens": 253310805.0, "step": 400 }, { "epoch": 0.04741634149225494, "grad_norm": 0.23023198544979095, "learning_rate": 5.824670915163632e-05, "loss": 0.3477, "num_tokens": 253944650.0, "step": 401 }, { "epoch": 0.047534586732884, "grad_norm": 0.22422128915786743, "learning_rate": 5.823568251792876e-05, "loss": 0.3887, "num_tokens": 254579224.0, "step": 402 }, { "epoch": 0.04765283197351307, "grad_norm": 0.23010247945785522, "learning_rate": 5.8224622489997486e-05, "loss": 0.4055, "num_tokens": 255212393.0, "step": 403 }, { "epoch": 0.04777107721414213, "grad_norm": 0.22843380272388458, "learning_rate": 5.821352908247816e-05, "loss": 0.4294, "num_tokens": 255846216.0, "step": 404 }, { "epoch": 0.0478893224547712, "grad_norm": 0.24752621352672577, "learning_rate": 5.820240231005062e-05, "loss": 0.4379, "num_tokens": 256484541.0, "step": 405 }, { "epoch": 0.04800756769540026, "grad_norm": 0.26750537753105164, "learning_rate": 5.819124218743887e-05, "loss": 0.3972, "num_tokens": 257114691.0, "step": 406 }, { "epoch": 0.04812581293602933, "grad_norm": 0.22661860287189484, "learning_rate": 5.8180048729411025e-05, "loss": 0.4142, "num_tokens": 257748044.0, "step": 407 }, { "epoch": 0.04824405817665839, "grad_norm": 0.2039046287536621, "learning_rate": 5.816882195077929e-05, "loss": 0.369, "num_tokens": 258386984.0, "step": 408 }, { "epoch": 0.04836230341728746, "grad_norm": 0.2567331790924072, "learning_rate": 5.8157561866400044e-05, "loss": 0.4376, "num_tokens": 259023318.0, "step": 409 }, { "epoch": 0.04848054865791652, "grad_norm": 0.21170946955680847, "learning_rate": 5.814626849117365e-05, "loss": 0.3909, "num_tokens": 259657204.0, "step": 410 }, { "epoch": 0.048598793898545586, "grad_norm": 0.24077744781970978, "learning_rate": 5.813494184004459e-05, "loss": 0.3914, "num_tokens": 260290556.0, "step": 411 }, { "epoch": 0.04871703913917465, "grad_norm": 0.2552764415740967, "learning_rate": 5.812358192800132e-05, "loss": 0.4543, "num_tokens": 260928576.0, "step": 412 }, { "epoch": 0.048835284379803716, "grad_norm": 0.2142765074968338, "learning_rate": 5.811218877007637e-05, "loss": 0.4025, "num_tokens": 261563484.0, "step": 413 }, { "epoch": 0.04895352962043278, "grad_norm": 0.24795947968959808, "learning_rate": 5.8100762381346214e-05, "loss": 0.3724, "num_tokens": 262198058.0, "step": 414 }, { "epoch": 0.049071774861061845, "grad_norm": 0.2619639039039612, "learning_rate": 5.8089302776931333e-05, "loss": 0.4324, "num_tokens": 262825300.0, "step": 415 }, { "epoch": 0.04919002010169091, "grad_norm": 0.24405325949192047, "learning_rate": 5.8077809971996144e-05, "loss": 0.4465, "num_tokens": 263458362.0, "step": 416 }, { "epoch": 0.049308265342319975, "grad_norm": 0.2387343943119049, "learning_rate": 5.806628398174902e-05, "loss": 0.3886, "num_tokens": 264088367.0, "step": 417 }, { "epoch": 0.049426510582949036, "grad_norm": 0.254852831363678, "learning_rate": 5.805472482144222e-05, "loss": 0.4029, "num_tokens": 264724592.0, "step": 418 }, { "epoch": 0.049544755823578104, "grad_norm": 0.25719350576400757, "learning_rate": 5.8043132506371916e-05, "loss": 0.3904, "num_tokens": 265360262.0, "step": 419 }, { "epoch": 0.049663001064207166, "grad_norm": 0.2636720836162567, "learning_rate": 5.803150705187815e-05, "loss": 0.4441, "num_tokens": 265997689.0, "step": 420 }, { "epoch": 0.04978124630483623, "grad_norm": 0.29422527551651, "learning_rate": 5.80198484733448e-05, "loss": 0.4119, "num_tokens": 266631262.0, "step": 421 }, { "epoch": 0.049899491545465295, "grad_norm": 0.28710222244262695, "learning_rate": 5.800815678619961e-05, "loss": 0.4054, "num_tokens": 267268979.0, "step": 422 }, { "epoch": 0.050017736786094356, "grad_norm": 0.23449011147022247, "learning_rate": 5.799643200591411e-05, "loss": 0.41, "num_tokens": 267904627.0, "step": 423 }, { "epoch": 0.050135982026723425, "grad_norm": 0.2233870029449463, "learning_rate": 5.798467414800363e-05, "loss": 0.3647, "num_tokens": 268543181.0, "step": 424 }, { "epoch": 0.050254227267352486, "grad_norm": 0.2825268507003784, "learning_rate": 5.797288322802727e-05, "loss": 0.4117, "num_tokens": 269179996.0, "step": 425 }, { "epoch": 0.050372472507981554, "grad_norm": 0.21707065403461456, "learning_rate": 5.796105926158786e-05, "loss": 0.3921, "num_tokens": 269811253.0, "step": 426 }, { "epoch": 0.050490717748610615, "grad_norm": 0.24125707149505615, "learning_rate": 5.794920226433203e-05, "loss": 0.3987, "num_tokens": 270441984.0, "step": 427 }, { "epoch": 0.050608962989239684, "grad_norm": 0.2642814517021179, "learning_rate": 5.793731225195003e-05, "loss": 0.41, "num_tokens": 271074224.0, "step": 428 }, { "epoch": 0.050727208229868745, "grad_norm": 0.2821972370147705, "learning_rate": 5.792538924017587e-05, "loss": 0.4555, "num_tokens": 271705707.0, "step": 429 }, { "epoch": 0.05084545347049781, "grad_norm": 0.2493581920862198, "learning_rate": 5.791343324478718e-05, "loss": 0.3889, "num_tokens": 272318335.0, "step": 430 }, { "epoch": 0.050963698711126874, "grad_norm": 0.2921682596206665, "learning_rate": 5.790144428160525e-05, "loss": 0.4254, "num_tokens": 272955472.0, "step": 431 }, { "epoch": 0.05108194395175594, "grad_norm": 0.2746122181415558, "learning_rate": 5.7889422366495024e-05, "loss": 0.4138, "num_tokens": 273559901.0, "step": 432 }, { "epoch": 0.051200189192385004, "grad_norm": 0.24401576817035675, "learning_rate": 5.787736751536502e-05, "loss": 0.4117, "num_tokens": 274192648.0, "step": 433 }, { "epoch": 0.05131843443301407, "grad_norm": 0.2667708098888397, "learning_rate": 5.786527974416733e-05, "loss": 0.3815, "num_tokens": 274831825.0, "step": 434 }, { "epoch": 0.05143667967364313, "grad_norm": 0.23436440527439117, "learning_rate": 5.785315906889764e-05, "loss": 0.3677, "num_tokens": 275456510.0, "step": 435 }, { "epoch": 0.0515549249142722, "grad_norm": 0.27032753825187683, "learning_rate": 5.784100550559517e-05, "loss": 0.3713, "num_tokens": 276091876.0, "step": 436 }, { "epoch": 0.05167317015490126, "grad_norm": 0.2609861195087433, "learning_rate": 5.782881907034266e-05, "loss": 0.4188, "num_tokens": 276727335.0, "step": 437 }, { "epoch": 0.05179141539553033, "grad_norm": 0.2858883738517761, "learning_rate": 5.7816599779266336e-05, "loss": 0.4459, "num_tokens": 277359081.0, "step": 438 }, { "epoch": 0.05190966063615939, "grad_norm": 0.26549068093299866, "learning_rate": 5.78043476485359e-05, "loss": 0.3795, "num_tokens": 277990765.0, "step": 439 }, { "epoch": 0.05202790587678846, "grad_norm": 0.26636239886283875, "learning_rate": 5.779206269436455e-05, "loss": 0.4056, "num_tokens": 278612675.0, "step": 440 }, { "epoch": 0.05214615111741752, "grad_norm": 0.21832014620304108, "learning_rate": 5.7779744933008864e-05, "loss": 0.3829, "num_tokens": 279246447.0, "step": 441 }, { "epoch": 0.05226439635804659, "grad_norm": 0.2970152497291565, "learning_rate": 5.776739438076887e-05, "loss": 0.4533, "num_tokens": 279880451.0, "step": 442 }, { "epoch": 0.05238264159867565, "grad_norm": 0.27263355255126953, "learning_rate": 5.775501105398798e-05, "loss": 0.4431, "num_tokens": 280517149.0, "step": 443 }, { "epoch": 0.05250088683930472, "grad_norm": 0.23851481080055237, "learning_rate": 5.774259496905298e-05, "loss": 0.3809, "num_tokens": 281144009.0, "step": 444 }, { "epoch": 0.05261913207993378, "grad_norm": 0.26180770993232727, "learning_rate": 5.7730146142393974e-05, "loss": 0.3976, "num_tokens": 281781076.0, "step": 445 }, { "epoch": 0.05273737732056285, "grad_norm": 0.23845084011554718, "learning_rate": 5.771766459048445e-05, "loss": 0.4037, "num_tokens": 282418028.0, "step": 446 }, { "epoch": 0.05285562256119191, "grad_norm": 0.24678055942058563, "learning_rate": 5.770515032984114e-05, "loss": 0.3939, "num_tokens": 283052667.0, "step": 447 }, { "epoch": 0.05297386780182098, "grad_norm": 0.28906598687171936, "learning_rate": 5.769260337702411e-05, "loss": 0.4435, "num_tokens": 283689384.0, "step": 448 }, { "epoch": 0.05309211304245004, "grad_norm": 0.20284819602966309, "learning_rate": 5.768002374863666e-05, "loss": 0.369, "num_tokens": 284325980.0, "step": 449 }, { "epoch": 0.05321035828307911, "grad_norm": 0.2573983073234558, "learning_rate": 5.766741146132533e-05, "loss": 0.4255, "num_tokens": 284927410.0, "step": 450 }, { "epoch": 0.05332860352370817, "grad_norm": 0.26626330614089966, "learning_rate": 5.7654766531779884e-05, "loss": 0.4117, "num_tokens": 285561853.0, "step": 451 }, { "epoch": 0.05344684876433724, "grad_norm": 0.23338961601257324, "learning_rate": 5.7642088976733284e-05, "loss": 0.4227, "num_tokens": 286200667.0, "step": 452 }, { "epoch": 0.0535650940049663, "grad_norm": 0.2275472730398178, "learning_rate": 5.762937881296165e-05, "loss": 0.3856, "num_tokens": 286838297.0, "step": 453 }, { "epoch": 0.05368333924559537, "grad_norm": 0.2396043986082077, "learning_rate": 5.761663605728428e-05, "loss": 0.3895, "num_tokens": 287478013.0, "step": 454 }, { "epoch": 0.05380158448622443, "grad_norm": 0.27043527364730835, "learning_rate": 5.7603860726563574e-05, "loss": 0.4215, "num_tokens": 288113641.0, "step": 455 }, { "epoch": 0.053919829726853497, "grad_norm": 0.2147088646888733, "learning_rate": 5.759105283770505e-05, "loss": 0.4138, "num_tokens": 288748606.0, "step": 456 }, { "epoch": 0.05403807496748256, "grad_norm": 0.24205036461353302, "learning_rate": 5.757821240765731e-05, "loss": 0.3934, "num_tokens": 289387862.0, "step": 457 }, { "epoch": 0.054156320208111626, "grad_norm": 0.21859905123710632, "learning_rate": 5.756533945341201e-05, "loss": 0.3941, "num_tokens": 290025351.0, "step": 458 }, { "epoch": 0.05427456544874069, "grad_norm": 0.23405297100543976, "learning_rate": 5.755243399200386e-05, "loss": 0.3842, "num_tokens": 290660065.0, "step": 459 }, { "epoch": 0.054392810689369755, "grad_norm": 0.2372758984565735, "learning_rate": 5.7539496040510566e-05, "loss": 0.3901, "num_tokens": 291293769.0, "step": 460 }, { "epoch": 0.05451105592999882, "grad_norm": 0.22159790992736816, "learning_rate": 5.752652561605286e-05, "loss": 0.3958, "num_tokens": 291932970.0, "step": 461 }, { "epoch": 0.054629301170627885, "grad_norm": 0.22865094244480133, "learning_rate": 5.751352273579441e-05, "loss": 0.4059, "num_tokens": 292570196.0, "step": 462 }, { "epoch": 0.054747546411256946, "grad_norm": 0.2343677282333374, "learning_rate": 5.750048741694185e-05, "loss": 0.4382, "num_tokens": 293208953.0, "step": 463 }, { "epoch": 0.054865791651886014, "grad_norm": 0.22632277011871338, "learning_rate": 5.748741967674473e-05, "loss": 0.4056, "num_tokens": 293841194.0, "step": 464 }, { "epoch": 0.054984036892515076, "grad_norm": 0.24287892878055573, "learning_rate": 5.7474319532495534e-05, "loss": 0.4131, "num_tokens": 294478601.0, "step": 465 }, { "epoch": 0.055102282133144144, "grad_norm": 0.22494228184223175, "learning_rate": 5.7461187001529575e-05, "loss": 0.4069, "num_tokens": 295110727.0, "step": 466 }, { "epoch": 0.055220527373773205, "grad_norm": 0.22685138881206512, "learning_rate": 5.744802210122507e-05, "loss": 0.3812, "num_tokens": 295744614.0, "step": 467 }, { "epoch": 0.05533877261440227, "grad_norm": 0.22187015414237976, "learning_rate": 5.743482484900304e-05, "loss": 0.3966, "num_tokens": 296380861.0, "step": 468 }, { "epoch": 0.055457017855031335, "grad_norm": 0.22301408648490906, "learning_rate": 5.7421595262327336e-05, "loss": 0.405, "num_tokens": 297007279.0, "step": 469 }, { "epoch": 0.0555752630956604, "grad_norm": 0.22492894530296326, "learning_rate": 5.7408333358704576e-05, "loss": 0.3859, "num_tokens": 297623126.0, "step": 470 }, { "epoch": 0.055693508336289464, "grad_norm": 0.24347686767578125, "learning_rate": 5.739503915568416e-05, "loss": 0.4437, "num_tokens": 298255789.0, "step": 471 }, { "epoch": 0.05581175357691853, "grad_norm": 0.2275567501783371, "learning_rate": 5.738171267085824e-05, "loss": 0.4203, "num_tokens": 298885491.0, "step": 472 }, { "epoch": 0.055929998817547594, "grad_norm": 0.20554254949092865, "learning_rate": 5.736835392186165e-05, "loss": 0.3876, "num_tokens": 299521680.0, "step": 473 }, { "epoch": 0.056048244058176655, "grad_norm": 0.24628478288650513, "learning_rate": 5.735496292637196e-05, "loss": 0.3995, "num_tokens": 300161168.0, "step": 474 }, { "epoch": 0.05616648929880572, "grad_norm": 0.21095220744609833, "learning_rate": 5.734153970210938e-05, "loss": 0.3893, "num_tokens": 300793878.0, "step": 475 }, { "epoch": 0.056284734539434784, "grad_norm": 0.22608943283557892, "learning_rate": 5.732808426683678e-05, "loss": 0.4145, "num_tokens": 301427428.0, "step": 476 }, { "epoch": 0.05640297978006385, "grad_norm": 0.24192720651626587, "learning_rate": 5.731459663835966e-05, "loss": 0.4431, "num_tokens": 302063334.0, "step": 477 }, { "epoch": 0.056521225020692914, "grad_norm": 0.2017304003238678, "learning_rate": 5.730107683452611e-05, "loss": 0.3853, "num_tokens": 302696034.0, "step": 478 }, { "epoch": 0.05663947026132198, "grad_norm": 0.23585256934165955, "learning_rate": 5.7287524873226806e-05, "loss": 0.4106, "num_tokens": 303331655.0, "step": 479 }, { "epoch": 0.056757715501951043, "grad_norm": 0.26219794154167175, "learning_rate": 5.727394077239499e-05, "loss": 0.483, "num_tokens": 303962802.0, "step": 480 }, { "epoch": 0.05687596074258011, "grad_norm": 0.22113117575645447, "learning_rate": 5.7260324550006394e-05, "loss": 0.4007, "num_tokens": 304595703.0, "step": 481 }, { "epoch": 0.05699420598320917, "grad_norm": 0.23816938698291779, "learning_rate": 5.7246676224079296e-05, "loss": 0.4032, "num_tokens": 305227732.0, "step": 482 }, { "epoch": 0.05711245122383824, "grad_norm": 0.21741387248039246, "learning_rate": 5.723299581267444e-05, "loss": 0.4045, "num_tokens": 305861092.0, "step": 483 }, { "epoch": 0.0572306964644673, "grad_norm": 0.24627813696861267, "learning_rate": 5.721928333389502e-05, "loss": 0.4081, "num_tokens": 306497173.0, "step": 484 }, { "epoch": 0.05734894170509637, "grad_norm": 0.2061251550912857, "learning_rate": 5.72055388058867e-05, "loss": 0.3939, "num_tokens": 307133773.0, "step": 485 }, { "epoch": 0.05746718694572543, "grad_norm": 0.24562332034111023, "learning_rate": 5.719176224683751e-05, "loss": 0.3883, "num_tokens": 307760142.0, "step": 486 }, { "epoch": 0.0575854321863545, "grad_norm": 0.21945923566818237, "learning_rate": 5.717795367497789e-05, "loss": 0.4186, "num_tokens": 308392658.0, "step": 487 }, { "epoch": 0.05770367742698356, "grad_norm": 0.20955249667167664, "learning_rate": 5.7164113108580624e-05, "loss": 0.3745, "num_tokens": 309023200.0, "step": 488 }, { "epoch": 0.05782192266761263, "grad_norm": 0.2421126514673233, "learning_rate": 5.7150240565960876e-05, "loss": 0.4203, "num_tokens": 309657191.0, "step": 489 }, { "epoch": 0.05794016790824169, "grad_norm": 0.22103598713874817, "learning_rate": 5.713633606547608e-05, "loss": 0.4135, "num_tokens": 310290361.0, "step": 490 }, { "epoch": 0.05805841314887076, "grad_norm": 0.24057047069072723, "learning_rate": 5.712239962552599e-05, "loss": 0.4306, "num_tokens": 310922697.0, "step": 491 }, { "epoch": 0.05817665838949982, "grad_norm": 0.24529068171977997, "learning_rate": 5.710843126455258e-05, "loss": 0.422, "num_tokens": 311557453.0, "step": 492 }, { "epoch": 0.05829490363012889, "grad_norm": 0.22984932363033295, "learning_rate": 5.7094431001040134e-05, "loss": 0.423, "num_tokens": 312193193.0, "step": 493 }, { "epoch": 0.05841314887075795, "grad_norm": 0.21943072974681854, "learning_rate": 5.708039885351509e-05, "loss": 0.4211, "num_tokens": 312828473.0, "step": 494 }, { "epoch": 0.05853139411138702, "grad_norm": 0.21643073856830597, "learning_rate": 5.706633484054611e-05, "loss": 0.392, "num_tokens": 313462344.0, "step": 495 }, { "epoch": 0.05864963935201608, "grad_norm": 0.21632198989391327, "learning_rate": 5.705223898074402e-05, "loss": 0.3923, "num_tokens": 314080640.0, "step": 496 }, { "epoch": 0.05876788459264515, "grad_norm": 0.24517853558063507, "learning_rate": 5.703811129276178e-05, "loss": 0.4045, "num_tokens": 314717785.0, "step": 497 }, { "epoch": 0.05888612983327421, "grad_norm": 0.2291676551103592, "learning_rate": 5.7023951795294475e-05, "loss": 0.4608, "num_tokens": 315351322.0, "step": 498 }, { "epoch": 0.05900437507390328, "grad_norm": 0.23110775649547577, "learning_rate": 5.700976050707928e-05, "loss": 0.4141, "num_tokens": 315987690.0, "step": 499 }, { "epoch": 0.05912262031453234, "grad_norm": 197.8325958251953, "learning_rate": 5.699553744689544e-05, "loss": 5.1515, "num_tokens": 316588692.0, "step": 500 }, { "epoch": 0.05924086555516141, "grad_norm": 0.30840274691581726, "learning_rate": 5.6981282633564246e-05, "loss": 0.3961, "num_tokens": 317208507.0, "step": 501 }, { "epoch": 0.05935911079579047, "grad_norm": 0.24632979929447174, "learning_rate": 5.696699608594899e-05, "loss": 0.4435, "num_tokens": 317844277.0, "step": 502 }, { "epoch": 0.059477356036419536, "grad_norm": 0.24863989651203156, "learning_rate": 5.6952677822955e-05, "loss": 0.4299, "num_tokens": 318478032.0, "step": 503 }, { "epoch": 0.0595956012770486, "grad_norm": 0.25635260343551636, "learning_rate": 5.693832786352952e-05, "loss": 0.4575, "num_tokens": 319114663.0, "step": 504 }, { "epoch": 0.059713846517677666, "grad_norm": 0.2566048204898834, "learning_rate": 5.6923946226661766e-05, "loss": 0.3555, "num_tokens": 319744940.0, "step": 505 }, { "epoch": 0.05983209175830673, "grad_norm": 0.24760176241397858, "learning_rate": 5.690953293138289e-05, "loss": 0.4025, "num_tokens": 320377220.0, "step": 506 }, { "epoch": 0.059950336998935795, "grad_norm": 0.24116012454032898, "learning_rate": 5.68950879967659e-05, "loss": 0.4053, "num_tokens": 321010441.0, "step": 507 }, { "epoch": 0.060068582239564856, "grad_norm": 0.23760244250297546, "learning_rate": 5.688061144192569e-05, "loss": 0.387, "num_tokens": 321643644.0, "step": 508 }, { "epoch": 0.060186827480193925, "grad_norm": 0.22054804861545563, "learning_rate": 5.6866103286019014e-05, "loss": 0.3968, "num_tokens": 322275355.0, "step": 509 }, { "epoch": 0.060305072720822986, "grad_norm": 0.2905291020870209, "learning_rate": 5.6851563548244394e-05, "loss": 0.4227, "num_tokens": 322908591.0, "step": 510 }, { "epoch": 0.060423317961452054, "grad_norm": 0.2590107321739197, "learning_rate": 5.68369922478422e-05, "loss": 0.4215, "num_tokens": 323542459.0, "step": 511 }, { "epoch": 0.060541563202081115, "grad_norm": 0.25308477878570557, "learning_rate": 5.6822389404094526e-05, "loss": 0.4019, "num_tokens": 324176254.0, "step": 512 }, { "epoch": 0.060659808442710184, "grad_norm": 0.24590685963630676, "learning_rate": 5.680775503632525e-05, "loss": 0.3763, "num_tokens": 324803884.0, "step": 513 }, { "epoch": 0.060778053683339245, "grad_norm": 0.2458338737487793, "learning_rate": 5.679308916389993e-05, "loss": 0.4326, "num_tokens": 325442394.0, "step": 514 }, { "epoch": 0.06089629892396831, "grad_norm": 0.2458193451166153, "learning_rate": 5.677839180622581e-05, "loss": 0.4227, "num_tokens": 326081004.0, "step": 515 }, { "epoch": 0.061014544164597374, "grad_norm": 0.23497483134269714, "learning_rate": 5.676366298275183e-05, "loss": 0.4037, "num_tokens": 326711646.0, "step": 516 }, { "epoch": 0.06113278940522644, "grad_norm": 0.2159551978111267, "learning_rate": 5.674890271296856e-05, "loss": 0.3967, "num_tokens": 327346340.0, "step": 517 }, { "epoch": 0.061251034645855504, "grad_norm": 0.23301520943641663, "learning_rate": 5.673411101640814e-05, "loss": 0.3777, "num_tokens": 327982364.0, "step": 518 }, { "epoch": 0.06136927988648457, "grad_norm": 0.2253977358341217, "learning_rate": 5.6719287912644365e-05, "loss": 0.4402, "num_tokens": 328613513.0, "step": 519 }, { "epoch": 0.06148752512711363, "grad_norm": 0.23504820466041565, "learning_rate": 5.670443342129254e-05, "loss": 0.4156, "num_tokens": 329246797.0, "step": 520 }, { "epoch": 0.0616057703677427, "grad_norm": 0.23867231607437134, "learning_rate": 5.668954756200954e-05, "loss": 0.4268, "num_tokens": 329882051.0, "step": 521 }, { "epoch": 0.06172401560837176, "grad_norm": 0.23447009921073914, "learning_rate": 5.667463035449371e-05, "loss": 0.3978, "num_tokens": 330518781.0, "step": 522 }, { "epoch": 0.06184226084900083, "grad_norm": 0.2304748296737671, "learning_rate": 5.66596818184849e-05, "loss": 0.4029, "num_tokens": 331148822.0, "step": 523 }, { "epoch": 0.06196050608962989, "grad_norm": 0.22956591844558716, "learning_rate": 5.664470197376443e-05, "loss": 0.4224, "num_tokens": 331779530.0, "step": 524 }, { "epoch": 0.06207875133025896, "grad_norm": 0.21514792740345, "learning_rate": 5.6629690840155037e-05, "loss": 0.3633, "num_tokens": 332409781.0, "step": 525 }, { "epoch": 0.06219699657088802, "grad_norm": 0.2394063025712967, "learning_rate": 5.6614648437520845e-05, "loss": 0.4489, "num_tokens": 333047880.0, "step": 526 }, { "epoch": 0.06231524181151708, "grad_norm": 0.23102419078350067, "learning_rate": 5.6599574785767395e-05, "loss": 0.4125, "num_tokens": 333683586.0, "step": 527 }, { "epoch": 0.06243348705214615, "grad_norm": 0.22116991877555847, "learning_rate": 5.658446990484153e-05, "loss": 0.4137, "num_tokens": 334315506.0, "step": 528 }, { "epoch": 0.06255173229277522, "grad_norm": 0.2364656627178192, "learning_rate": 5.656933381473147e-05, "loss": 0.3834, "num_tokens": 334951291.0, "step": 529 }, { "epoch": 0.06266997753340428, "grad_norm": 0.23201730847358704, "learning_rate": 5.65541665354667e-05, "loss": 0.3932, "num_tokens": 335585867.0, "step": 530 }, { "epoch": 0.06278822277403334, "grad_norm": 0.2602282464504242, "learning_rate": 5.6538968087118e-05, "loss": 0.384, "num_tokens": 336220052.0, "step": 531 }, { "epoch": 0.0629064680146624, "grad_norm": 0.24242635071277618, "learning_rate": 5.652373848979735e-05, "loss": 0.4342, "num_tokens": 336856340.0, "step": 532 }, { "epoch": 0.06302471325529148, "grad_norm": 0.25628939270973206, "learning_rate": 5.6508477763658024e-05, "loss": 0.3873, "num_tokens": 337491980.0, "step": 533 }, { "epoch": 0.06314295849592054, "grad_norm": 0.2245175689458847, "learning_rate": 5.649318592889442e-05, "loss": 0.391, "num_tokens": 338130649.0, "step": 534 }, { "epoch": 0.0632612037365496, "grad_norm": 0.31959810853004456, "learning_rate": 5.6477863005742154e-05, "loss": 0.437, "num_tokens": 338765865.0, "step": 535 }, { "epoch": 0.06337944897717866, "grad_norm": 0.23283618688583374, "learning_rate": 5.6462509014477916e-05, "loss": 0.4077, "num_tokens": 339363366.0, "step": 536 }, { "epoch": 0.06349769421780774, "grad_norm": 0.23465241491794586, "learning_rate": 5.644712397541959e-05, "loss": 0.4525, "num_tokens": 339999650.0, "step": 537 }, { "epoch": 0.0636159394584368, "grad_norm": 0.26618894934654236, "learning_rate": 5.643170790892608e-05, "loss": 0.4114, "num_tokens": 340634297.0, "step": 538 }, { "epoch": 0.06373418469906586, "grad_norm": 0.23005127906799316, "learning_rate": 5.6416260835397386e-05, "loss": 0.4356, "num_tokens": 341269246.0, "step": 539 }, { "epoch": 0.06385242993969492, "grad_norm": 0.23913200199604034, "learning_rate": 5.6400782775274506e-05, "loss": 0.4225, "num_tokens": 341900107.0, "step": 540 }, { "epoch": 0.063970675180324, "grad_norm": 0.2653907835483551, "learning_rate": 5.638527374903947e-05, "loss": 0.4265, "num_tokens": 342539152.0, "step": 541 }, { "epoch": 0.06408892042095306, "grad_norm": 0.2343796193599701, "learning_rate": 5.6369733777215295e-05, "loss": 0.4254, "num_tokens": 343166271.0, "step": 542 }, { "epoch": 0.06420716566158212, "grad_norm": 0.21634334325790405, "learning_rate": 5.635416288036591e-05, "loss": 0.3838, "num_tokens": 343801657.0, "step": 543 }, { "epoch": 0.06432541090221118, "grad_norm": 0.22020314633846283, "learning_rate": 5.633856107909619e-05, "loss": 0.399, "num_tokens": 344436926.0, "step": 544 }, { "epoch": 0.06444365614284026, "grad_norm": 0.20335398614406586, "learning_rate": 5.632292839405191e-05, "loss": 0.3836, "num_tokens": 345071661.0, "step": 545 }, { "epoch": 0.06456190138346932, "grad_norm": 0.24160657823085785, "learning_rate": 5.63072648459197e-05, "loss": 0.4674, "num_tokens": 345710262.0, "step": 546 }, { "epoch": 0.06468014662409838, "grad_norm": 0.20083579421043396, "learning_rate": 5.629157045542704e-05, "loss": 0.4077, "num_tokens": 346345760.0, "step": 547 }, { "epoch": 0.06479839186472744, "grad_norm": 0.23045681416988373, "learning_rate": 5.627584524334222e-05, "loss": 0.3846, "num_tokens": 346978472.0, "step": 548 }, { "epoch": 0.06491663710535651, "grad_norm": 0.22518788278102875, "learning_rate": 5.626008923047432e-05, "loss": 0.3981, "num_tokens": 347615067.0, "step": 549 }, { "epoch": 0.06503488234598558, "grad_norm": 0.25393834710121155, "learning_rate": 5.624430243767317e-05, "loss": 0.421, "num_tokens": 348251545.0, "step": 550 }, { "epoch": 0.06515312758661464, "grad_norm": 0.20160511136054993, "learning_rate": 5.6228484885829335e-05, "loss": 0.4116, "num_tokens": 348888946.0, "step": 551 }, { "epoch": 0.0652713728272437, "grad_norm": 0.24297155439853668, "learning_rate": 5.621263659587408e-05, "loss": 0.4485, "num_tokens": 349519613.0, "step": 552 }, { "epoch": 0.06538961806787277, "grad_norm": 0.22340096533298492, "learning_rate": 5.619675758877937e-05, "loss": 0.4006, "num_tokens": 350152254.0, "step": 553 }, { "epoch": 0.06550786330850183, "grad_norm": 0.21662257611751556, "learning_rate": 5.618084788555778e-05, "loss": 0.3983, "num_tokens": 350786547.0, "step": 554 }, { "epoch": 0.0656261085491309, "grad_norm": 0.245235413312912, "learning_rate": 5.616490750726252e-05, "loss": 0.4061, "num_tokens": 351422383.0, "step": 555 }, { "epoch": 0.06574435378975996, "grad_norm": 0.23087024688720703, "learning_rate": 5.614893647498741e-05, "loss": 0.4178, "num_tokens": 352061087.0, "step": 556 }, { "epoch": 0.06586259903038903, "grad_norm": 0.2400379627943039, "learning_rate": 5.61329348098668e-05, "loss": 0.3789, "num_tokens": 352697788.0, "step": 557 }, { "epoch": 0.0659808442710181, "grad_norm": 0.21535944938659668, "learning_rate": 5.611690253307561e-05, "loss": 0.3992, "num_tokens": 353327836.0, "step": 558 }, { "epoch": 0.06609908951164715, "grad_norm": 0.21479098498821259, "learning_rate": 5.610083966582926e-05, "loss": 0.3911, "num_tokens": 353963351.0, "step": 559 }, { "epoch": 0.06621733475227622, "grad_norm": 0.23319794237613678, "learning_rate": 5.608474622938362e-05, "loss": 0.4251, "num_tokens": 354576403.0, "step": 560 }, { "epoch": 0.06633557999290529, "grad_norm": 0.22759908437728882, "learning_rate": 5.6068622245035045e-05, "loss": 0.4277, "num_tokens": 355211385.0, "step": 561 }, { "epoch": 0.06645382523353435, "grad_norm": 0.24206581711769104, "learning_rate": 5.605246773412032e-05, "loss": 0.4293, "num_tokens": 355842242.0, "step": 562 }, { "epoch": 0.06657207047416341, "grad_norm": 0.21902194619178772, "learning_rate": 5.6036282718016583e-05, "loss": 0.3806, "num_tokens": 356473678.0, "step": 563 }, { "epoch": 0.06669031571479248, "grad_norm": 0.21064871549606323, "learning_rate": 5.6020067218141395e-05, "loss": 0.3713, "num_tokens": 357112492.0, "step": 564 }, { "epoch": 0.06680856095542155, "grad_norm": 0.24632275104522705, "learning_rate": 5.6003821255952606e-05, "loss": 0.4005, "num_tokens": 357748442.0, "step": 565 }, { "epoch": 0.06692680619605061, "grad_norm": 0.2036793977022171, "learning_rate": 5.598754485294841e-05, "loss": 0.3638, "num_tokens": 358387012.0, "step": 566 }, { "epoch": 0.06704505143667967, "grad_norm": 0.1924084573984146, "learning_rate": 5.5971238030667256e-05, "loss": 0.3772, "num_tokens": 359025550.0, "step": 567 }, { "epoch": 0.06716329667730873, "grad_norm": 0.25849267840385437, "learning_rate": 5.5954900810687883e-05, "loss": 0.4201, "num_tokens": 359661915.0, "step": 568 }, { "epoch": 0.06728154191793781, "grad_norm": 0.20558328926563263, "learning_rate": 5.593853321462923e-05, "loss": 0.3806, "num_tokens": 360295739.0, "step": 569 }, { "epoch": 0.06739978715856687, "grad_norm": 0.2169775664806366, "learning_rate": 5.592213526415041e-05, "loss": 0.436, "num_tokens": 360929829.0, "step": 570 }, { "epoch": 0.06751803239919593, "grad_norm": 0.2151200920343399, "learning_rate": 5.5905706980950766e-05, "loss": 0.3729, "num_tokens": 361568418.0, "step": 571 }, { "epoch": 0.067636277639825, "grad_norm": 0.20888011157512665, "learning_rate": 5.588924838676973e-05, "loss": 0.3616, "num_tokens": 362197303.0, "step": 572 }, { "epoch": 0.06775452288045407, "grad_norm": 0.22360935807228088, "learning_rate": 5.587275950338685e-05, "loss": 0.4044, "num_tokens": 362825904.0, "step": 573 }, { "epoch": 0.06787276812108313, "grad_norm": 0.2185496687889099, "learning_rate": 5.5856240352621766e-05, "loss": 0.3965, "num_tokens": 363465140.0, "step": 574 }, { "epoch": 0.06799101336171219, "grad_norm": 0.19799013435840607, "learning_rate": 5.583969095633417e-05, "loss": 0.3984, "num_tokens": 364097168.0, "step": 575 }, { "epoch": 0.06810925860234125, "grad_norm": 0.20666642487049103, "learning_rate": 5.582311133642379e-05, "loss": 0.3763, "num_tokens": 364728470.0, "step": 576 }, { "epoch": 0.06822750384297033, "grad_norm": 0.20582424104213715, "learning_rate": 5.580650151483033e-05, "loss": 0.4387, "num_tokens": 365363497.0, "step": 577 }, { "epoch": 0.06834574908359939, "grad_norm": 0.2404540628194809, "learning_rate": 5.578986151353345e-05, "loss": 0.4566, "num_tokens": 366000699.0, "step": 578 }, { "epoch": 0.06846399432422845, "grad_norm": 0.19141684472560883, "learning_rate": 5.577319135455278e-05, "loss": 0.3639, "num_tokens": 366624851.0, "step": 579 }, { "epoch": 0.06858223956485751, "grad_norm": 0.22023002803325653, "learning_rate": 5.575649105994782e-05, "loss": 0.395, "num_tokens": 367261099.0, "step": 580 }, { "epoch": 0.06870048480548657, "grad_norm": 0.20336921513080597, "learning_rate": 5.5739760651817984e-05, "loss": 0.3665, "num_tokens": 367897458.0, "step": 581 }, { "epoch": 0.06881873004611565, "grad_norm": 0.23487062752246857, "learning_rate": 5.572300015230252e-05, "loss": 0.4354, "num_tokens": 368534927.0, "step": 582 }, { "epoch": 0.06893697528674471, "grad_norm": 0.233329176902771, "learning_rate": 5.570620958358048e-05, "loss": 0.426, "num_tokens": 369169358.0, "step": 583 }, { "epoch": 0.06905522052737377, "grad_norm": 0.21234264969825745, "learning_rate": 5.568938896787073e-05, "loss": 0.416, "num_tokens": 369806601.0, "step": 584 }, { "epoch": 0.06917346576800283, "grad_norm": 0.21879243850708008, "learning_rate": 5.567253832743187e-05, "loss": 0.4039, "num_tokens": 370442728.0, "step": 585 }, { "epoch": 0.0692917110086319, "grad_norm": 0.23026061058044434, "learning_rate": 5.565565768456228e-05, "loss": 0.415, "num_tokens": 371079275.0, "step": 586 }, { "epoch": 0.06940995624926097, "grad_norm": 0.2231062948703766, "learning_rate": 5.563874706159999e-05, "loss": 0.4094, "num_tokens": 371716282.0, "step": 587 }, { "epoch": 0.06952820148989003, "grad_norm": 0.21120983362197876, "learning_rate": 5.562180648092273e-05, "loss": 0.4116, "num_tokens": 372347661.0, "step": 588 }, { "epoch": 0.06964644673051909, "grad_norm": 0.21369966864585876, "learning_rate": 5.5604835964947864e-05, "loss": 0.4005, "num_tokens": 372977936.0, "step": 589 }, { "epoch": 0.06976469197114817, "grad_norm": 0.2117834985256195, "learning_rate": 5.558783553613237e-05, "loss": 0.42, "num_tokens": 373613192.0, "step": 590 }, { "epoch": 0.06988293721177723, "grad_norm": 0.2446117252111435, "learning_rate": 5.5570805216972816e-05, "loss": 0.4138, "num_tokens": 374251716.0, "step": 591 }, { "epoch": 0.07000118245240629, "grad_norm": 0.21218879520893097, "learning_rate": 5.555374503000531e-05, "loss": 0.3687, "num_tokens": 374880470.0, "step": 592 }, { "epoch": 0.07011942769303535, "grad_norm": 0.20297327637672424, "learning_rate": 5.5536654997805504e-05, "loss": 0.4106, "num_tokens": 375518014.0, "step": 593 }, { "epoch": 0.07023767293366442, "grad_norm": 0.23108522593975067, "learning_rate": 5.551953514298852e-05, "loss": 0.4121, "num_tokens": 376156899.0, "step": 594 }, { "epoch": 0.07035591817429349, "grad_norm": 0.20212985575199127, "learning_rate": 5.550238548820898e-05, "loss": 0.3945, "num_tokens": 376796416.0, "step": 595 }, { "epoch": 0.07047416341492255, "grad_norm": 0.22493824362754822, "learning_rate": 5.548520605616088e-05, "loss": 0.3948, "num_tokens": 377430299.0, "step": 596 }, { "epoch": 0.07059240865555161, "grad_norm": 0.1889137625694275, "learning_rate": 5.546799686957769e-05, "loss": 0.3441, "num_tokens": 378036010.0, "step": 597 }, { "epoch": 0.07071065389618068, "grad_norm": 0.24320292472839355, "learning_rate": 5.545075795123221e-05, "loss": 0.4358, "num_tokens": 378662576.0, "step": 598 }, { "epoch": 0.07082889913680974, "grad_norm": 0.20845460891723633, "learning_rate": 5.543348932393659e-05, "loss": 0.3632, "num_tokens": 379292706.0, "step": 599 }, { "epoch": 0.0709471443774388, "grad_norm": 0.2712063193321228, "learning_rate": 5.54161910105423e-05, "loss": 0.4241, "num_tokens": 379923752.0, "step": 600 }, { "epoch": 0.07106538961806787, "grad_norm": 0.2057293802499771, "learning_rate": 5.5398863033940096e-05, "loss": 0.3938, "num_tokens": 380556540.0, "step": 601 }, { "epoch": 0.07118363485869694, "grad_norm": 0.2481277585029602, "learning_rate": 5.538150541705998e-05, "loss": 0.3685, "num_tokens": 381191645.0, "step": 602 }, { "epoch": 0.071301880099326, "grad_norm": 0.2418372929096222, "learning_rate": 5.536411818287119e-05, "loss": 0.4251, "num_tokens": 381827763.0, "step": 603 }, { "epoch": 0.07142012533995507, "grad_norm": 0.19724474847316742, "learning_rate": 5.5346701354382125e-05, "loss": 0.3906, "num_tokens": 382466825.0, "step": 604 }, { "epoch": 0.07153837058058413, "grad_norm": 0.2242296040058136, "learning_rate": 5.532925495464039e-05, "loss": 0.3925, "num_tokens": 383103907.0, "step": 605 }, { "epoch": 0.0716566158212132, "grad_norm": 0.22040408849716187, "learning_rate": 5.5311779006732676e-05, "loss": 0.4127, "num_tokens": 383736705.0, "step": 606 }, { "epoch": 0.07177486106184226, "grad_norm": 0.23752287030220032, "learning_rate": 5.529427353378482e-05, "loss": 0.4197, "num_tokens": 384376310.0, "step": 607 }, { "epoch": 0.07189310630247132, "grad_norm": 0.21531735360622406, "learning_rate": 5.5276738558961686e-05, "loss": 0.3928, "num_tokens": 385007094.0, "step": 608 }, { "epoch": 0.07201135154310039, "grad_norm": 0.20882567763328552, "learning_rate": 5.525917410546721e-05, "loss": 0.3719, "num_tokens": 385646534.0, "step": 609 }, { "epoch": 0.07212959678372946, "grad_norm": 0.19994843006134033, "learning_rate": 5.5241580196544334e-05, "loss": 0.3651, "num_tokens": 386283106.0, "step": 610 }, { "epoch": 0.07224784202435852, "grad_norm": 0.22568222880363464, "learning_rate": 5.522395685547495e-05, "loss": 0.363, "num_tokens": 386916395.0, "step": 611 }, { "epoch": 0.07236608726498758, "grad_norm": 0.19781558215618134, "learning_rate": 5.520630410557994e-05, "loss": 0.4108, "num_tokens": 387553614.0, "step": 612 }, { "epoch": 0.07248433250561664, "grad_norm": 0.20043326914310455, "learning_rate": 5.5188621970219056e-05, "loss": 0.3767, "num_tokens": 388189908.0, "step": 613 }, { "epoch": 0.07260257774624572, "grad_norm": 0.22510674595832825, "learning_rate": 5.517091047279096e-05, "loss": 0.4186, "num_tokens": 388827274.0, "step": 614 }, { "epoch": 0.07272082298687478, "grad_norm": 0.19270777702331543, "learning_rate": 5.5153169636733194e-05, "loss": 0.3854, "num_tokens": 389460786.0, "step": 615 }, { "epoch": 0.07283906822750384, "grad_norm": 0.2161119431257248, "learning_rate": 5.5135399485522075e-05, "loss": 0.4093, "num_tokens": 390066154.0, "step": 616 }, { "epoch": 0.0729573134681329, "grad_norm": 0.20279474556446075, "learning_rate": 5.511760004267274e-05, "loss": 0.3766, "num_tokens": 390700370.0, "step": 617 }, { "epoch": 0.07307555870876198, "grad_norm": 0.23120693862438202, "learning_rate": 5.509977133173908e-05, "loss": 0.3815, "num_tokens": 391333815.0, "step": 618 }, { "epoch": 0.07319380394939104, "grad_norm": 0.21448132395744324, "learning_rate": 5.508191337631373e-05, "loss": 0.399, "num_tokens": 391972641.0, "step": 619 }, { "epoch": 0.0733120491900201, "grad_norm": 0.23450714349746704, "learning_rate": 5.5064026200028e-05, "loss": 0.4233, "num_tokens": 392607122.0, "step": 620 }, { "epoch": 0.07343029443064916, "grad_norm": 0.18135346472263336, "learning_rate": 5.5046109826551886e-05, "loss": 0.3721, "num_tokens": 393234821.0, "step": 621 }, { "epoch": 0.07354853967127824, "grad_norm": 0.26815736293792725, "learning_rate": 5.502816427959401e-05, "loss": 0.375, "num_tokens": 393867338.0, "step": 622 }, { "epoch": 0.0736667849119073, "grad_norm": 0.20579469203948975, "learning_rate": 5.5010189582901614e-05, "loss": 0.387, "num_tokens": 394505968.0, "step": 623 }, { "epoch": 0.07378503015253636, "grad_norm": 0.2314739227294922, "learning_rate": 5.499218576026049e-05, "loss": 0.4327, "num_tokens": 395143798.0, "step": 624 }, { "epoch": 0.07390327539316542, "grad_norm": 0.2270359843969345, "learning_rate": 5.497415283549501e-05, "loss": 0.3791, "num_tokens": 395781916.0, "step": 625 }, { "epoch": 0.0740215206337945, "grad_norm": 0.22188624739646912, "learning_rate": 5.4956090832468006e-05, "loss": 0.4233, "num_tokens": 396419440.0, "step": 626 }, { "epoch": 0.07413976587442356, "grad_norm": 0.18612177670001984, "learning_rate": 5.4937999775080824e-05, "loss": 0.3638, "num_tokens": 397053137.0, "step": 627 }, { "epoch": 0.07425801111505262, "grad_norm": 0.21651092171669006, "learning_rate": 5.4919879687273255e-05, "loss": 0.4044, "num_tokens": 397685003.0, "step": 628 }, { "epoch": 0.07437625635568168, "grad_norm": 0.19978873431682587, "learning_rate": 5.490173059302349e-05, "loss": 0.4171, "num_tokens": 398323686.0, "step": 629 }, { "epoch": 0.07449450159631076, "grad_norm": 0.21592815220355988, "learning_rate": 5.488355251634813e-05, "loss": 0.4154, "num_tokens": 398958685.0, "step": 630 }, { "epoch": 0.07461274683693982, "grad_norm": 0.2020513266324997, "learning_rate": 5.48653454813021e-05, "loss": 0.4077, "num_tokens": 399595823.0, "step": 631 }, { "epoch": 0.07473099207756888, "grad_norm": 0.2267449051141739, "learning_rate": 5.484710951197866e-05, "loss": 0.3978, "num_tokens": 400234363.0, "step": 632 }, { "epoch": 0.07484923731819794, "grad_norm": 0.22091686725616455, "learning_rate": 5.482884463250935e-05, "loss": 0.4122, "num_tokens": 400860312.0, "step": 633 }, { "epoch": 0.074967482558827, "grad_norm": 0.18706369400024414, "learning_rate": 5.481055086706399e-05, "loss": 0.405, "num_tokens": 401493314.0, "step": 634 }, { "epoch": 0.07508572779945608, "grad_norm": 0.22939816117286682, "learning_rate": 5.479222823985058e-05, "loss": 0.4171, "num_tokens": 402131410.0, "step": 635 }, { "epoch": 0.07520397304008514, "grad_norm": 0.20424328744411469, "learning_rate": 5.477387677511537e-05, "loss": 0.4301, "num_tokens": 402767694.0, "step": 636 }, { "epoch": 0.0753222182807142, "grad_norm": 0.19207920134067535, "learning_rate": 5.475549649714272e-05, "loss": 0.4156, "num_tokens": 403401170.0, "step": 637 }, { "epoch": 0.07544046352134326, "grad_norm": 0.2168489545583725, "learning_rate": 5.473708743025516e-05, "loss": 0.4025, "num_tokens": 404033223.0, "step": 638 }, { "epoch": 0.07555870876197233, "grad_norm": 0.20321166515350342, "learning_rate": 5.471864959881328e-05, "loss": 0.3945, "num_tokens": 404669142.0, "step": 639 }, { "epoch": 0.0756769540026014, "grad_norm": 0.20609459280967712, "learning_rate": 5.470018302721574e-05, "loss": 0.4091, "num_tokens": 405305698.0, "step": 640 }, { "epoch": 0.07579519924323046, "grad_norm": 0.2112593948841095, "learning_rate": 5.468168773989925e-05, "loss": 0.4354, "num_tokens": 405935366.0, "step": 641 }, { "epoch": 0.07591344448385952, "grad_norm": 0.21053996682167053, "learning_rate": 5.466316376133852e-05, "loss": 0.407, "num_tokens": 406563939.0, "step": 642 }, { "epoch": 0.0760316897244886, "grad_norm": 0.19004890322685242, "learning_rate": 5.46446111160462e-05, "loss": 0.3909, "num_tokens": 407201876.0, "step": 643 }, { "epoch": 0.07614993496511765, "grad_norm": 0.20329055190086365, "learning_rate": 5.4626029828572916e-05, "loss": 0.3846, "num_tokens": 407834876.0, "step": 644 }, { "epoch": 0.07626818020574672, "grad_norm": 0.19921205937862396, "learning_rate": 5.460741992350715e-05, "loss": 0.3864, "num_tokens": 408474126.0, "step": 645 }, { "epoch": 0.07638642544637578, "grad_norm": 0.19097398221492767, "learning_rate": 5.458878142547527e-05, "loss": 0.3468, "num_tokens": 409110752.0, "step": 646 }, { "epoch": 0.07650467068700485, "grad_norm": 0.22949428856372833, "learning_rate": 5.457011435914151e-05, "loss": 0.4106, "num_tokens": 409744230.0, "step": 647 }, { "epoch": 0.07662291592763391, "grad_norm": 0.2329481840133667, "learning_rate": 5.455141874920786e-05, "loss": 0.4005, "num_tokens": 410377730.0, "step": 648 }, { "epoch": 0.07674116116826298, "grad_norm": 0.21861277520656586, "learning_rate": 5.453269462041413e-05, "loss": 0.3794, "num_tokens": 411014041.0, "step": 649 }, { "epoch": 0.07685940640889204, "grad_norm": 0.25171056389808655, "learning_rate": 5.4513941997537826e-05, "loss": 0.389, "num_tokens": 411653738.0, "step": 650 }, { "epoch": 0.07697765164952111, "grad_norm": 0.20175261795520782, "learning_rate": 5.4495160905394185e-05, "loss": 0.3584, "num_tokens": 412288459.0, "step": 651 }, { "epoch": 0.07709589689015017, "grad_norm": 0.20550478994846344, "learning_rate": 5.447635136883611e-05, "loss": 0.3979, "num_tokens": 412924573.0, "step": 652 }, { "epoch": 0.07721414213077923, "grad_norm": 0.21349135041236877, "learning_rate": 5.445751341275414e-05, "loss": 0.353, "num_tokens": 413557882.0, "step": 653 }, { "epoch": 0.0773323873714083, "grad_norm": 0.20919151604175568, "learning_rate": 5.443864706207644e-05, "loss": 0.3545, "num_tokens": 414190765.0, "step": 654 }, { "epoch": 0.07745063261203737, "grad_norm": 0.17326898872852325, "learning_rate": 5.441975234176872e-05, "loss": 0.3625, "num_tokens": 414824481.0, "step": 655 }, { "epoch": 0.07756887785266643, "grad_norm": 0.22109591960906982, "learning_rate": 5.440082927683427e-05, "loss": 0.3867, "num_tokens": 415462470.0, "step": 656 }, { "epoch": 0.0776871230932955, "grad_norm": 0.21504008769989014, "learning_rate": 5.438187789231384e-05, "loss": 0.4169, "num_tokens": 416098823.0, "step": 657 }, { "epoch": 0.07780536833392455, "grad_norm": 0.21788012981414795, "learning_rate": 5.436289821328568e-05, "loss": 0.3969, "num_tokens": 416727909.0, "step": 658 }, { "epoch": 0.07792361357455363, "grad_norm": 0.2375272661447525, "learning_rate": 5.434389026486552e-05, "loss": 0.4216, "num_tokens": 417362676.0, "step": 659 }, { "epoch": 0.07804185881518269, "grad_norm": 0.20120349526405334, "learning_rate": 5.432485407220642e-05, "loss": 0.3754, "num_tokens": 417996258.0, "step": 660 }, { "epoch": 0.07816010405581175, "grad_norm": 0.20345494151115417, "learning_rate": 5.430578966049888e-05, "loss": 0.4174, "num_tokens": 418626034.0, "step": 661 }, { "epoch": 0.07827834929644081, "grad_norm": 0.22592638432979584, "learning_rate": 5.428669705497071e-05, "loss": 0.4421, "num_tokens": 419263225.0, "step": 662 }, { "epoch": 0.07839659453706989, "grad_norm": 0.18610456585884094, "learning_rate": 5.426757628088704e-05, "loss": 0.4022, "num_tokens": 419892898.0, "step": 663 }, { "epoch": 0.07851483977769895, "grad_norm": 0.17779679596424103, "learning_rate": 5.424842736355027e-05, "loss": 0.3886, "num_tokens": 420527528.0, "step": 664 }, { "epoch": 0.07863308501832801, "grad_norm": 0.198362335562706, "learning_rate": 5.422925032830005e-05, "loss": 0.3933, "num_tokens": 421156121.0, "step": 665 }, { "epoch": 0.07875133025895707, "grad_norm": 0.18584303557872772, "learning_rate": 5.421004520051323e-05, "loss": 0.3421, "num_tokens": 421790061.0, "step": 666 }, { "epoch": 0.07886957549958615, "grad_norm": 0.2071962207555771, "learning_rate": 5.419081200560384e-05, "loss": 0.4012, "num_tokens": 422426991.0, "step": 667 }, { "epoch": 0.07898782074021521, "grad_norm": 0.1821301132440567, "learning_rate": 5.417155076902304e-05, "loss": 0.3583, "num_tokens": 423063834.0, "step": 668 }, { "epoch": 0.07910606598084427, "grad_norm": 0.20297402143478394, "learning_rate": 5.415226151625912e-05, "loss": 0.3926, "num_tokens": 423703537.0, "step": 669 }, { "epoch": 0.07922431122147333, "grad_norm": 0.23203183710575104, "learning_rate": 5.413294427283741e-05, "loss": 0.4103, "num_tokens": 424330180.0, "step": 670 }, { "epoch": 0.0793425564621024, "grad_norm": 0.20411129295825958, "learning_rate": 5.411359906432033e-05, "loss": 0.4238, "num_tokens": 424966976.0, "step": 671 }, { "epoch": 0.07946080170273147, "grad_norm": 0.21121400594711304, "learning_rate": 5.409422591630725e-05, "loss": 0.3959, "num_tokens": 425604191.0, "step": 672 }, { "epoch": 0.07957904694336053, "grad_norm": 0.1865694373846054, "learning_rate": 5.4074824854434536e-05, "loss": 0.3455, "num_tokens": 426240218.0, "step": 673 }, { "epoch": 0.07969729218398959, "grad_norm": 0.2658561170101166, "learning_rate": 5.4055395904375516e-05, "loss": 0.4083, "num_tokens": 426878525.0, "step": 674 }, { "epoch": 0.07981553742461867, "grad_norm": 0.18216894567012787, "learning_rate": 5.403593909184038e-05, "loss": 0.3777, "num_tokens": 427512790.0, "step": 675 }, { "epoch": 0.07993378266524773, "grad_norm": 0.22485923767089844, "learning_rate": 5.401645444257622e-05, "loss": 0.4213, "num_tokens": 428150456.0, "step": 676 }, { "epoch": 0.08005202790587679, "grad_norm": 0.21150392293930054, "learning_rate": 5.399694198236695e-05, "loss": 0.3562, "num_tokens": 428783253.0, "step": 677 }, { "epoch": 0.08017027314650585, "grad_norm": 0.23367063701152802, "learning_rate": 5.397740173703328e-05, "loss": 0.3976, "num_tokens": 429419007.0, "step": 678 }, { "epoch": 0.08028851838713492, "grad_norm": 0.18307745456695557, "learning_rate": 5.395783373243272e-05, "loss": 0.3882, "num_tokens": 430051255.0, "step": 679 }, { "epoch": 0.08040676362776399, "grad_norm": 0.22259311378002167, "learning_rate": 5.3938237994459465e-05, "loss": 0.4348, "num_tokens": 430683994.0, "step": 680 }, { "epoch": 0.08052500886839305, "grad_norm": 0.18747752904891968, "learning_rate": 5.3918614549044446e-05, "loss": 0.3657, "num_tokens": 431314161.0, "step": 681 }, { "epoch": 0.08064325410902211, "grad_norm": 0.22098389267921448, "learning_rate": 5.389896342215524e-05, "loss": 0.389, "num_tokens": 431943550.0, "step": 682 }, { "epoch": 0.08076149934965117, "grad_norm": 0.18855495750904083, "learning_rate": 5.387928463979608e-05, "loss": 0.379, "num_tokens": 432580022.0, "step": 683 }, { "epoch": 0.08087974459028024, "grad_norm": 0.20843663811683655, "learning_rate": 5.385957822800776e-05, "loss": 0.4279, "num_tokens": 433215860.0, "step": 684 }, { "epoch": 0.0809979898309093, "grad_norm": 0.2124507576227188, "learning_rate": 5.3839844212867666e-05, "loss": 0.3809, "num_tokens": 433849766.0, "step": 685 }, { "epoch": 0.08111623507153837, "grad_norm": 0.17536836862564087, "learning_rate": 5.382008262048968e-05, "loss": 0.36, "num_tokens": 434484743.0, "step": 686 }, { "epoch": 0.08123448031216743, "grad_norm": 0.21414297819137573, "learning_rate": 5.3800293477024224e-05, "loss": 0.3713, "num_tokens": 435100372.0, "step": 687 }, { "epoch": 0.0813527255527965, "grad_norm": 0.21753741800785065, "learning_rate": 5.378047680865814e-05, "loss": 0.4386, "num_tokens": 435733891.0, "step": 688 }, { "epoch": 0.08147097079342557, "grad_norm": 0.21724680066108704, "learning_rate": 5.376063264161469e-05, "loss": 0.3765, "num_tokens": 436365690.0, "step": 689 }, { "epoch": 0.08158921603405463, "grad_norm": 0.21394267678260803, "learning_rate": 5.3740761002153536e-05, "loss": 0.3638, "num_tokens": 437003639.0, "step": 690 }, { "epoch": 0.08170746127468369, "grad_norm": 0.2070666402578354, "learning_rate": 5.372086191657073e-05, "loss": 0.3935, "num_tokens": 437642920.0, "step": 691 }, { "epoch": 0.08182570651531276, "grad_norm": 0.23084615170955658, "learning_rate": 5.370093541119857e-05, "loss": 0.3949, "num_tokens": 438277063.0, "step": 692 }, { "epoch": 0.08194395175594182, "grad_norm": 0.21036827564239502, "learning_rate": 5.368098151240571e-05, "loss": 0.4055, "num_tokens": 438915083.0, "step": 693 }, { "epoch": 0.08206219699657089, "grad_norm": 0.21798987686634064, "learning_rate": 5.366100024659699e-05, "loss": 0.4172, "num_tokens": 439554233.0, "step": 694 }, { "epoch": 0.08218044223719995, "grad_norm": 0.18871086835861206, "learning_rate": 5.3640991640213535e-05, "loss": 0.3707, "num_tokens": 440187716.0, "step": 695 }, { "epoch": 0.08229868747782902, "grad_norm": 0.20316803455352783, "learning_rate": 5.362095571973258e-05, "loss": 0.3978, "num_tokens": 440823462.0, "step": 696 }, { "epoch": 0.08241693271845808, "grad_norm": 0.19437344372272491, "learning_rate": 5.360089251166755e-05, "loss": 0.386, "num_tokens": 441461261.0, "step": 697 }, { "epoch": 0.08253517795908714, "grad_norm": 0.2129731923341751, "learning_rate": 5.358080204256795e-05, "loss": 0.4105, "num_tokens": 442092406.0, "step": 698 }, { "epoch": 0.0826534231997162, "grad_norm": 0.19358403980731964, "learning_rate": 5.356068433901938e-05, "loss": 0.415, "num_tokens": 442729228.0, "step": 699 }, { "epoch": 0.08277166844034528, "grad_norm": 0.20055876672267914, "learning_rate": 5.35405394276435e-05, "loss": 0.4277, "num_tokens": 443368453.0, "step": 700 }, { "epoch": 0.08288991368097434, "grad_norm": 0.19685010612010956, "learning_rate": 5.3520367335097916e-05, "loss": 0.3727, "num_tokens": 444004993.0, "step": 701 }, { "epoch": 0.0830081589216034, "grad_norm": 0.2335917055606842, "learning_rate": 5.350016808807624e-05, "loss": 0.4419, "num_tokens": 444641198.0, "step": 702 }, { "epoch": 0.08312640416223246, "grad_norm": 0.18580734729766846, "learning_rate": 5.3479941713308015e-05, "loss": 0.3951, "num_tokens": 445272456.0, "step": 703 }, { "epoch": 0.08324464940286154, "grad_norm": 0.23013712465763092, "learning_rate": 5.3459688237558684e-05, "loss": 0.4393, "num_tokens": 445909612.0, "step": 704 }, { "epoch": 0.0833628946434906, "grad_norm": 0.23703046143054962, "learning_rate": 5.343940768762954e-05, "loss": 0.4589, "num_tokens": 446544323.0, "step": 705 }, { "epoch": 0.08348113988411966, "grad_norm": 0.2044428586959839, "learning_rate": 5.341910009035771e-05, "loss": 0.4069, "num_tokens": 447177697.0, "step": 706 }, { "epoch": 0.08359938512474872, "grad_norm": 0.1987406462430954, "learning_rate": 5.3398765472616126e-05, "loss": 0.4164, "num_tokens": 447813578.0, "step": 707 }, { "epoch": 0.0837176303653778, "grad_norm": 0.19243036210536957, "learning_rate": 5.337840386131344e-05, "loss": 0.381, "num_tokens": 448448247.0, "step": 708 }, { "epoch": 0.08383587560600686, "grad_norm": 0.21122971177101135, "learning_rate": 5.335801528339407e-05, "loss": 0.4016, "num_tokens": 449081260.0, "step": 709 }, { "epoch": 0.08395412084663592, "grad_norm": 0.20589253306388855, "learning_rate": 5.3337599765838076e-05, "loss": 0.3636, "num_tokens": 449718960.0, "step": 710 }, { "epoch": 0.08407236608726498, "grad_norm": 0.22047452628612518, "learning_rate": 5.331715733566121e-05, "loss": 0.3761, "num_tokens": 450354221.0, "step": 711 }, { "epoch": 0.08419061132789406, "grad_norm": 0.22329314053058624, "learning_rate": 5.329668801991478e-05, "loss": 0.4277, "num_tokens": 450990864.0, "step": 712 }, { "epoch": 0.08430885656852312, "grad_norm": 0.21842564642429352, "learning_rate": 5.327619184568574e-05, "loss": 0.3584, "num_tokens": 451629963.0, "step": 713 }, { "epoch": 0.08442710180915218, "grad_norm": 0.201019287109375, "learning_rate": 5.325566884009654e-05, "loss": 0.3619, "num_tokens": 452263375.0, "step": 714 }, { "epoch": 0.08454534704978124, "grad_norm": 0.2327526956796646, "learning_rate": 5.323511903030513e-05, "loss": 0.4393, "num_tokens": 452899788.0, "step": 715 }, { "epoch": 0.08466359229041032, "grad_norm": 0.20655345916748047, "learning_rate": 5.3214542443504986e-05, "loss": 0.364, "num_tokens": 453532800.0, "step": 716 }, { "epoch": 0.08478183753103938, "grad_norm": 0.1914721131324768, "learning_rate": 5.319393910692494e-05, "loss": 0.4131, "num_tokens": 454167051.0, "step": 717 }, { "epoch": 0.08490008277166844, "grad_norm": 0.20321692526340485, "learning_rate": 5.317330904782927e-05, "loss": 0.4078, "num_tokens": 454801891.0, "step": 718 }, { "epoch": 0.0850183280122975, "grad_norm": 0.21024049818515778, "learning_rate": 5.315265229351762e-05, "loss": 0.3918, "num_tokens": 455434940.0, "step": 719 }, { "epoch": 0.08513657325292658, "grad_norm": 0.18730874359607697, "learning_rate": 5.313196887132494e-05, "loss": 0.4271, "num_tokens": 456070522.0, "step": 720 }, { "epoch": 0.08525481849355564, "grad_norm": 0.19962529838085175, "learning_rate": 5.311125880862147e-05, "loss": 0.4242, "num_tokens": 456700607.0, "step": 721 }, { "epoch": 0.0853730637341847, "grad_norm": 0.20009727776050568, "learning_rate": 5.309052213281273e-05, "loss": 0.3856, "num_tokens": 457333591.0, "step": 722 }, { "epoch": 0.08549130897481376, "grad_norm": 0.19860906898975372, "learning_rate": 5.30697588713394e-05, "loss": 0.4049, "num_tokens": 457972538.0, "step": 723 }, { "epoch": 0.08560955421544283, "grad_norm": 0.1968255639076233, "learning_rate": 5.304896905167741e-05, "loss": 0.3929, "num_tokens": 458608333.0, "step": 724 }, { "epoch": 0.0857277994560719, "grad_norm": 0.2130722552537918, "learning_rate": 5.302815270133777e-05, "loss": 0.3968, "num_tokens": 459238593.0, "step": 725 }, { "epoch": 0.08584604469670096, "grad_norm": 0.19868621230125427, "learning_rate": 5.300730984786665e-05, "loss": 0.3773, "num_tokens": 459848240.0, "step": 726 }, { "epoch": 0.08596428993733002, "grad_norm": 0.22184595465660095, "learning_rate": 5.298644051884526e-05, "loss": 0.4262, "num_tokens": 460483298.0, "step": 727 }, { "epoch": 0.0860825351779591, "grad_norm": 0.2032521814107895, "learning_rate": 5.2965544741889873e-05, "loss": 0.3885, "num_tokens": 461109500.0, "step": 728 }, { "epoch": 0.08620078041858815, "grad_norm": 0.22066350281238556, "learning_rate": 5.294462254465172e-05, "loss": 0.3901, "num_tokens": 461746594.0, "step": 729 }, { "epoch": 0.08631902565921722, "grad_norm": 0.19459012150764465, "learning_rate": 5.292367395481703e-05, "loss": 0.3939, "num_tokens": 462380799.0, "step": 730 }, { "epoch": 0.08643727089984628, "grad_norm": 0.19497635960578918, "learning_rate": 5.290269900010693e-05, "loss": 0.3987, "num_tokens": 463016862.0, "step": 731 }, { "epoch": 0.08655551614047535, "grad_norm": 0.2163160741329193, "learning_rate": 5.288169770827747e-05, "loss": 0.3683, "num_tokens": 463650306.0, "step": 732 }, { "epoch": 0.08667376138110441, "grad_norm": 0.2008008509874344, "learning_rate": 5.2860670107119505e-05, "loss": 0.389, "num_tokens": 464282371.0, "step": 733 }, { "epoch": 0.08679200662173348, "grad_norm": 0.20270873606204987, "learning_rate": 5.283961622445875e-05, "loss": 0.3805, "num_tokens": 464918493.0, "step": 734 }, { "epoch": 0.08691025186236254, "grad_norm": 0.21384552121162415, "learning_rate": 5.2818536088155667e-05, "loss": 0.3871, "num_tokens": 465551682.0, "step": 735 }, { "epoch": 0.0870284971029916, "grad_norm": 0.20108221471309662, "learning_rate": 5.279742972610548e-05, "loss": 0.3855, "num_tokens": 466189560.0, "step": 736 }, { "epoch": 0.08714674234362067, "grad_norm": 0.21781690418720245, "learning_rate": 5.27762971662381e-05, "loss": 0.3879, "num_tokens": 466826010.0, "step": 737 }, { "epoch": 0.08726498758424973, "grad_norm": 0.1880456656217575, "learning_rate": 5.2755138436518114e-05, "loss": 0.3571, "num_tokens": 467461700.0, "step": 738 }, { "epoch": 0.0873832328248788, "grad_norm": 0.1964695155620575, "learning_rate": 5.273395356494476e-05, "loss": 0.345, "num_tokens": 468097256.0, "step": 739 }, { "epoch": 0.08750147806550786, "grad_norm": 0.22812491655349731, "learning_rate": 5.271274257955181e-05, "loss": 0.4191, "num_tokens": 468731535.0, "step": 740 }, { "epoch": 0.08761972330613693, "grad_norm": 0.2017606943845749, "learning_rate": 5.269150550840766e-05, "loss": 0.3663, "num_tokens": 469363357.0, "step": 741 }, { "epoch": 0.087737968546766, "grad_norm": 0.19828036427497864, "learning_rate": 5.267024237961519e-05, "loss": 0.3901, "num_tokens": 469996228.0, "step": 742 }, { "epoch": 0.08785621378739505, "grad_norm": 0.19734492897987366, "learning_rate": 5.264895322131176e-05, "loss": 0.4177, "num_tokens": 470628575.0, "step": 743 }, { "epoch": 0.08797445902802412, "grad_norm": 0.19054393470287323, "learning_rate": 5.2627638061669204e-05, "loss": 0.3992, "num_tokens": 471265485.0, "step": 744 }, { "epoch": 0.08809270426865319, "grad_norm": 0.18877829611301422, "learning_rate": 5.260629692889371e-05, "loss": 0.3885, "num_tokens": 471898145.0, "step": 745 }, { "epoch": 0.08821094950928225, "grad_norm": 0.19310103356838226, "learning_rate": 5.258492985122588e-05, "loss": 0.3863, "num_tokens": 472533209.0, "step": 746 }, { "epoch": 0.08832919474991131, "grad_norm": 0.19495335221290588, "learning_rate": 5.2563536856940655e-05, "loss": 0.3962, "num_tokens": 473171790.0, "step": 747 }, { "epoch": 0.08844743999054037, "grad_norm": 0.1987488865852356, "learning_rate": 5.254211797434724e-05, "loss": 0.3608, "num_tokens": 473806600.0, "step": 748 }, { "epoch": 0.08856568523116945, "grad_norm": 0.20719455182552338, "learning_rate": 5.2520673231789095e-05, "loss": 0.3959, "num_tokens": 474446221.0, "step": 749 }, { "epoch": 0.08868393047179851, "grad_norm": 0.19631238281726837, "learning_rate": 5.2499202657643935e-05, "loss": 0.3641, "num_tokens": 475080946.0, "step": 750 }, { "epoch": 0.08880217571242757, "grad_norm": 0.17641642689704895, "learning_rate": 5.247770628032363e-05, "loss": 0.3358, "num_tokens": 475715589.0, "step": 751 }, { "epoch": 0.08892042095305663, "grad_norm": 0.21652136743068695, "learning_rate": 5.245618412827422e-05, "loss": 0.3982, "num_tokens": 476350099.0, "step": 752 }, { "epoch": 0.08903866619368571, "grad_norm": 0.20369255542755127, "learning_rate": 5.243463622997583e-05, "loss": 0.4119, "num_tokens": 476986608.0, "step": 753 }, { "epoch": 0.08915691143431477, "grad_norm": 0.18806983530521393, "learning_rate": 5.241306261394264e-05, "loss": 0.3947, "num_tokens": 477626286.0, "step": 754 }, { "epoch": 0.08927515667494383, "grad_norm": 0.20750251412391663, "learning_rate": 5.23914633087229e-05, "loss": 0.3962, "num_tokens": 478262327.0, "step": 755 }, { "epoch": 0.08939340191557289, "grad_norm": 0.20222771167755127, "learning_rate": 5.2369838342898844e-05, "loss": 0.3714, "num_tokens": 478899878.0, "step": 756 }, { "epoch": 0.08951164715620197, "grad_norm": 0.20789285004138947, "learning_rate": 5.2348187745086656e-05, "loss": 0.3862, "num_tokens": 479538362.0, "step": 757 }, { "epoch": 0.08962989239683103, "grad_norm": 0.19300925731658936, "learning_rate": 5.232651154393643e-05, "loss": 0.374, "num_tokens": 480177217.0, "step": 758 }, { "epoch": 0.08974813763746009, "grad_norm": 0.22329019010066986, "learning_rate": 5.2304809768132146e-05, "loss": 0.3966, "num_tokens": 480809891.0, "step": 759 }, { "epoch": 0.08986638287808915, "grad_norm": 0.20343434810638428, "learning_rate": 5.2283082446391654e-05, "loss": 0.3878, "num_tokens": 481446104.0, "step": 760 }, { "epoch": 0.08998462811871823, "grad_norm": 0.1930297464132309, "learning_rate": 5.226132960746656e-05, "loss": 0.3688, "num_tokens": 482084578.0, "step": 761 }, { "epoch": 0.09010287335934729, "grad_norm": 0.203902468085289, "learning_rate": 5.223955128014227e-05, "loss": 0.3799, "num_tokens": 482713767.0, "step": 762 }, { "epoch": 0.09022111859997635, "grad_norm": 0.23576878011226654, "learning_rate": 5.221774749323792e-05, "loss": 0.448, "num_tokens": 483353280.0, "step": 763 }, { "epoch": 0.09033936384060541, "grad_norm": 0.17504672706127167, "learning_rate": 5.219591827560631e-05, "loss": 0.3756, "num_tokens": 483985107.0, "step": 764 }, { "epoch": 0.09045760908123449, "grad_norm": 0.2097293883562088, "learning_rate": 5.2174063656133925e-05, "loss": 0.398, "num_tokens": 484615672.0, "step": 765 }, { "epoch": 0.09057585432186355, "grad_norm": 0.18698710203170776, "learning_rate": 5.215218366374085e-05, "loss": 0.3941, "num_tokens": 485253394.0, "step": 766 }, { "epoch": 0.09069409956249261, "grad_norm": 0.19252699613571167, "learning_rate": 5.213027832738073e-05, "loss": 0.3997, "num_tokens": 485886884.0, "step": 767 }, { "epoch": 0.09081234480312167, "grad_norm": 0.19506065547466278, "learning_rate": 5.2108347676040765e-05, "loss": 0.3886, "num_tokens": 486516495.0, "step": 768 }, { "epoch": 0.09093059004375074, "grad_norm": 0.2292434573173523, "learning_rate": 5.2086391738741674e-05, "loss": 0.4368, "num_tokens": 487151670.0, "step": 769 }, { "epoch": 0.0910488352843798, "grad_norm": 0.23250269889831543, "learning_rate": 5.206441054453758e-05, "loss": 0.3943, "num_tokens": 487786678.0, "step": 770 }, { "epoch": 0.09116708052500887, "grad_norm": 0.2288888543844223, "learning_rate": 5.204240412251609e-05, "loss": 0.4106, "num_tokens": 488423430.0, "step": 771 }, { "epoch": 0.09128532576563793, "grad_norm": 0.20469781756401062, "learning_rate": 5.2020372501798145e-05, "loss": 0.3816, "num_tokens": 489055057.0, "step": 772 }, { "epoch": 0.091403571006267, "grad_norm": 0.18626539409160614, "learning_rate": 5.199831571153806e-05, "loss": 0.3727, "num_tokens": 489690318.0, "step": 773 }, { "epoch": 0.09152181624689606, "grad_norm": 0.2176947295665741, "learning_rate": 5.197623378092347e-05, "loss": 0.4235, "num_tokens": 490315503.0, "step": 774 }, { "epoch": 0.09164006148752513, "grad_norm": 0.19569160044193268, "learning_rate": 5.1954126739175244e-05, "loss": 0.345, "num_tokens": 490951658.0, "step": 775 }, { "epoch": 0.09175830672815419, "grad_norm": 0.1919584423303604, "learning_rate": 5.193199461554748e-05, "loss": 0.3496, "num_tokens": 491582936.0, "step": 776 }, { "epoch": 0.09187655196878326, "grad_norm": 0.21266311407089233, "learning_rate": 5.1909837439327506e-05, "loss": 0.4171, "num_tokens": 492221808.0, "step": 777 }, { "epoch": 0.09199479720941232, "grad_norm": 0.21234160661697388, "learning_rate": 5.188765523983575e-05, "loss": 0.3959, "num_tokens": 492851928.0, "step": 778 }, { "epoch": 0.09211304245004139, "grad_norm": 0.19227167963981628, "learning_rate": 5.186544804642582e-05, "loss": 0.3497, "num_tokens": 493484521.0, "step": 779 }, { "epoch": 0.09223128769067045, "grad_norm": 0.22047032415866852, "learning_rate": 5.1843215888484313e-05, "loss": 0.4269, "num_tokens": 494120278.0, "step": 780 }, { "epoch": 0.09234953293129952, "grad_norm": 0.21378175914287567, "learning_rate": 5.182095879543093e-05, "loss": 0.421, "num_tokens": 494748375.0, "step": 781 }, { "epoch": 0.09246777817192858, "grad_norm": 0.24084855616092682, "learning_rate": 5.179867679671836e-05, "loss": 0.4098, "num_tokens": 495383913.0, "step": 782 }, { "epoch": 0.09258602341255764, "grad_norm": 0.20419980585575104, "learning_rate": 5.1776369921832214e-05, "loss": 0.3771, "num_tokens": 496008813.0, "step": 783 }, { "epoch": 0.0927042686531867, "grad_norm": 0.20603390038013458, "learning_rate": 5.1754038200291034e-05, "loss": 0.4053, "num_tokens": 496639648.0, "step": 784 }, { "epoch": 0.09282251389381578, "grad_norm": 0.19610314071178436, "learning_rate": 5.173168166164628e-05, "loss": 0.3518, "num_tokens": 497274171.0, "step": 785 }, { "epoch": 0.09294075913444484, "grad_norm": 0.19744454324245453, "learning_rate": 5.17093003354822e-05, "loss": 0.3817, "num_tokens": 497913138.0, "step": 786 }, { "epoch": 0.0930590043750739, "grad_norm": 0.2016357183456421, "learning_rate": 5.168689425141588e-05, "loss": 0.418, "num_tokens": 498551639.0, "step": 787 }, { "epoch": 0.09317724961570296, "grad_norm": 0.21235699951648712, "learning_rate": 5.166446343909713e-05, "loss": 0.4086, "num_tokens": 499182979.0, "step": 788 }, { "epoch": 0.09329549485633203, "grad_norm": 0.19116926193237305, "learning_rate": 5.164200792820852e-05, "loss": 0.3849, "num_tokens": 499817574.0, "step": 789 }, { "epoch": 0.0934137400969611, "grad_norm": 0.21546709537506104, "learning_rate": 5.1619527748465305e-05, "loss": 0.4332, "num_tokens": 500450987.0, "step": 790 }, { "epoch": 0.09353198533759016, "grad_norm": 0.2056332528591156, "learning_rate": 5.1597022929615354e-05, "loss": 0.4207, "num_tokens": 501086255.0, "step": 791 }, { "epoch": 0.09365023057821922, "grad_norm": 0.20669469237327576, "learning_rate": 5.1574493501439154e-05, "loss": 0.386, "num_tokens": 501724060.0, "step": 792 }, { "epoch": 0.09376847581884828, "grad_norm": 0.2072022706270218, "learning_rate": 5.155193949374976e-05, "loss": 0.3802, "num_tokens": 502336426.0, "step": 793 }, { "epoch": 0.09388672105947736, "grad_norm": 0.227503702044487, "learning_rate": 5.152936093639275e-05, "loss": 0.4131, "num_tokens": 502970977.0, "step": 794 }, { "epoch": 0.09400496630010642, "grad_norm": 0.18835189938545227, "learning_rate": 5.1506757859246195e-05, "loss": 0.3425, "num_tokens": 503604914.0, "step": 795 }, { "epoch": 0.09412321154073548, "grad_norm": 0.21025459468364716, "learning_rate": 5.148413029222061e-05, "loss": 0.3768, "num_tokens": 504241729.0, "step": 796 }, { "epoch": 0.09424145678136454, "grad_norm": 0.19752518832683563, "learning_rate": 5.146147826525892e-05, "loss": 0.3779, "num_tokens": 504874311.0, "step": 797 }, { "epoch": 0.09435970202199362, "grad_norm": 0.2358514666557312, "learning_rate": 5.14388018083364e-05, "loss": 0.3992, "num_tokens": 505507864.0, "step": 798 }, { "epoch": 0.09447794726262268, "grad_norm": 0.21056601405143738, "learning_rate": 5.141610095146066e-05, "loss": 0.4028, "num_tokens": 506141369.0, "step": 799 }, { "epoch": 0.09459619250325174, "grad_norm": 0.19571331143379211, "learning_rate": 5.139337572467163e-05, "loss": 0.3791, "num_tokens": 506777001.0, "step": 800 }, { "epoch": 0.0947144377438808, "grad_norm": 0.1915920525789261, "learning_rate": 5.137062615804145e-05, "loss": 0.3397, "num_tokens": 507406711.0, "step": 801 }, { "epoch": 0.09483268298450988, "grad_norm": 0.195199653506279, "learning_rate": 5.134785228167447e-05, "loss": 0.3947, "num_tokens": 508042267.0, "step": 802 }, { "epoch": 0.09495092822513894, "grad_norm": 0.20021317899227142, "learning_rate": 5.132505412570724e-05, "loss": 0.3644, "num_tokens": 508675609.0, "step": 803 }, { "epoch": 0.095069173465768, "grad_norm": 0.20839209854602814, "learning_rate": 5.1302231720308405e-05, "loss": 0.3671, "num_tokens": 509312065.0, "step": 804 }, { "epoch": 0.09518741870639706, "grad_norm": 0.20513001084327698, "learning_rate": 5.127938509567872e-05, "loss": 0.3783, "num_tokens": 509946717.0, "step": 805 }, { "epoch": 0.09530566394702614, "grad_norm": 0.19213712215423584, "learning_rate": 5.1256514282051e-05, "loss": 0.3806, "num_tokens": 510583518.0, "step": 806 }, { "epoch": 0.0954239091876552, "grad_norm": 0.2166701853275299, "learning_rate": 5.123361930969002e-05, "loss": 0.4097, "num_tokens": 511221107.0, "step": 807 }, { "epoch": 0.09554215442828426, "grad_norm": 0.19391781091690063, "learning_rate": 5.121070020889258e-05, "loss": 0.396, "num_tokens": 511860697.0, "step": 808 }, { "epoch": 0.09566039966891332, "grad_norm": 0.20301750302314758, "learning_rate": 5.1187757009987366e-05, "loss": 0.384, "num_tokens": 512498128.0, "step": 809 }, { "epoch": 0.0957786449095424, "grad_norm": 0.21539835631847382, "learning_rate": 5.1164789743335005e-05, "loss": 0.4199, "num_tokens": 513128610.0, "step": 810 }, { "epoch": 0.09589689015017146, "grad_norm": 0.18517307937145233, "learning_rate": 5.1141798439327895e-05, "loss": 0.3475, "num_tokens": 513765105.0, "step": 811 }, { "epoch": 0.09601513539080052, "grad_norm": 0.21439459919929504, "learning_rate": 5.111878312839033e-05, "loss": 0.4081, "num_tokens": 514394034.0, "step": 812 }, { "epoch": 0.09613338063142958, "grad_norm": 0.2012118101119995, "learning_rate": 5.10957438409783e-05, "loss": 0.3858, "num_tokens": 515033384.0, "step": 813 }, { "epoch": 0.09625162587205865, "grad_norm": 0.19617058336734772, "learning_rate": 5.107268060757957e-05, "loss": 0.3692, "num_tokens": 515667789.0, "step": 814 }, { "epoch": 0.09636987111268772, "grad_norm": 0.19214196503162384, "learning_rate": 5.1049593458713567e-05, "loss": 0.373, "num_tokens": 516301878.0, "step": 815 }, { "epoch": 0.09648811635331678, "grad_norm": 0.19815993309020996, "learning_rate": 5.1026482424931365e-05, "loss": 0.3633, "num_tokens": 516936923.0, "step": 816 }, { "epoch": 0.09660636159394584, "grad_norm": 0.2114209234714508, "learning_rate": 5.100334753681567e-05, "loss": 0.4106, "num_tokens": 517574893.0, "step": 817 }, { "epoch": 0.09672460683457491, "grad_norm": 0.18188177049160004, "learning_rate": 5.098018882498073e-05, "loss": 0.3795, "num_tokens": 518213624.0, "step": 818 }, { "epoch": 0.09684285207520398, "grad_norm": 0.20022772252559662, "learning_rate": 5.0957006320072334e-05, "loss": 0.3639, "num_tokens": 518852531.0, "step": 819 }, { "epoch": 0.09696109731583304, "grad_norm": 0.21430368721485138, "learning_rate": 5.093380005276774e-05, "loss": 0.4, "num_tokens": 519492204.0, "step": 820 }, { "epoch": 0.0970793425564621, "grad_norm": 0.19938379526138306, "learning_rate": 5.091057005377565e-05, "loss": 0.407, "num_tokens": 520130296.0, "step": 821 }, { "epoch": 0.09719758779709117, "grad_norm": 0.1988488733768463, "learning_rate": 5.088731635383619e-05, "loss": 0.3725, "num_tokens": 520765986.0, "step": 822 }, { "epoch": 0.09731583303772023, "grad_norm": 0.21430915594100952, "learning_rate": 5.0864038983720846e-05, "loss": 0.433, "num_tokens": 521400644.0, "step": 823 }, { "epoch": 0.0974340782783493, "grad_norm": 0.21381375193595886, "learning_rate": 5.0840737974232404e-05, "loss": 0.3746, "num_tokens": 522039463.0, "step": 824 }, { "epoch": 0.09755232351897836, "grad_norm": 0.2035173922777176, "learning_rate": 5.081741335620495e-05, "loss": 0.3772, "num_tokens": 522671508.0, "step": 825 }, { "epoch": 0.09767056875960743, "grad_norm": 0.1806216984987259, "learning_rate": 5.079406516050381e-05, "loss": 0.3692, "num_tokens": 523311225.0, "step": 826 }, { "epoch": 0.0977888140002365, "grad_norm": 0.1778479665517807, "learning_rate": 5.077069341802552e-05, "loss": 0.3509, "num_tokens": 523949133.0, "step": 827 }, { "epoch": 0.09790705924086555, "grad_norm": 0.18165086209774017, "learning_rate": 5.0747298159697744e-05, "loss": 0.3893, "num_tokens": 524577955.0, "step": 828 }, { "epoch": 0.09802530448149462, "grad_norm": 0.186898335814476, "learning_rate": 5.072387941647929e-05, "loss": 0.3885, "num_tokens": 525211514.0, "step": 829 }, { "epoch": 0.09814354972212369, "grad_norm": 0.1848706752061844, "learning_rate": 5.070043721936005e-05, "loss": 0.4114, "num_tokens": 525848086.0, "step": 830 }, { "epoch": 0.09826179496275275, "grad_norm": 0.1990211457014084, "learning_rate": 5.067697159936092e-05, "loss": 0.3728, "num_tokens": 526481722.0, "step": 831 }, { "epoch": 0.09838004020338181, "grad_norm": 0.20220965147018433, "learning_rate": 5.065348258753382e-05, "loss": 0.3912, "num_tokens": 527117956.0, "step": 832 }, { "epoch": 0.09849828544401087, "grad_norm": 0.20298202335834503, "learning_rate": 5.0629970214961606e-05, "loss": 0.4137, "num_tokens": 527750234.0, "step": 833 }, { "epoch": 0.09861653068463995, "grad_norm": 0.20618374645709991, "learning_rate": 5.060643451275808e-05, "loss": 0.3854, "num_tokens": 528386561.0, "step": 834 }, { "epoch": 0.09873477592526901, "grad_norm": 0.20952282845973969, "learning_rate": 5.0582875512067864e-05, "loss": 0.4212, "num_tokens": 529021750.0, "step": 835 }, { "epoch": 0.09885302116589807, "grad_norm": 0.19578179717063904, "learning_rate": 5.0559293244066455e-05, "loss": 0.3625, "num_tokens": 529661276.0, "step": 836 }, { "epoch": 0.09897126640652713, "grad_norm": 0.20233289897441864, "learning_rate": 5.0535687739960125e-05, "loss": 0.4032, "num_tokens": 530297609.0, "step": 837 }, { "epoch": 0.09908951164715621, "grad_norm": 0.18912136554718018, "learning_rate": 5.051205903098589e-05, "loss": 0.3768, "num_tokens": 530933315.0, "step": 838 }, { "epoch": 0.09920775688778527, "grad_norm": 0.19747322797775269, "learning_rate": 5.048840714841147e-05, "loss": 0.3904, "num_tokens": 531564788.0, "step": 839 }, { "epoch": 0.09932600212841433, "grad_norm": 0.2101273536682129, "learning_rate": 5.046473212353525e-05, "loss": 0.3947, "num_tokens": 532193430.0, "step": 840 }, { "epoch": 0.09944424736904339, "grad_norm": 0.21572358906269073, "learning_rate": 5.044103398768626e-05, "loss": 0.4025, "num_tokens": 532828601.0, "step": 841 }, { "epoch": 0.09956249260967245, "grad_norm": 0.2013438493013382, "learning_rate": 5.04173127722241e-05, "loss": 0.359, "num_tokens": 533466099.0, "step": 842 }, { "epoch": 0.09968073785030153, "grad_norm": 0.21333152055740356, "learning_rate": 5.03935685085389e-05, "loss": 0.3972, "num_tokens": 534102611.0, "step": 843 }, { "epoch": 0.09979898309093059, "grad_norm": 0.1875704973936081, "learning_rate": 5.036980122805129e-05, "loss": 0.3831, "num_tokens": 534731488.0, "step": 844 }, { "epoch": 0.09991722833155965, "grad_norm": 0.2032729685306549, "learning_rate": 5.0346010962212375e-05, "loss": 0.3874, "num_tokens": 535370116.0, "step": 845 }, { "epoch": 0.10003547357218871, "grad_norm": 0.18617123365402222, "learning_rate": 5.032219774250367e-05, "loss": 0.3922, "num_tokens": 536006561.0, "step": 846 }, { "epoch": 0.10015371881281779, "grad_norm": 0.2104693055152893, "learning_rate": 5.029836160043707e-05, "loss": 0.406, "num_tokens": 536639992.0, "step": 847 }, { "epoch": 0.10027196405344685, "grad_norm": 0.18108710646629333, "learning_rate": 5.027450256755477e-05, "loss": 0.3731, "num_tokens": 537253609.0, "step": 848 }, { "epoch": 0.10039020929407591, "grad_norm": 0.1897713840007782, "learning_rate": 5.025062067542931e-05, "loss": 0.3827, "num_tokens": 537890122.0, "step": 849 }, { "epoch": 0.10050845453470497, "grad_norm": 0.20141065120697021, "learning_rate": 5.022671595566343e-05, "loss": 0.4289, "num_tokens": 538526166.0, "step": 850 }, { "epoch": 0.10062669977533405, "grad_norm": 0.19830313324928284, "learning_rate": 5.0202788439890116e-05, "loss": 0.4037, "num_tokens": 539164864.0, "step": 851 }, { "epoch": 0.10074494501596311, "grad_norm": 0.18737861514091492, "learning_rate": 5.017883815977249e-05, "loss": 0.3712, "num_tokens": 539802408.0, "step": 852 }, { "epoch": 0.10086319025659217, "grad_norm": 0.18904809653759003, "learning_rate": 5.0154865147003816e-05, "loss": 0.3951, "num_tokens": 540432320.0, "step": 853 }, { "epoch": 0.10098143549722123, "grad_norm": 0.20891410112380981, "learning_rate": 5.013086943330743e-05, "loss": 0.3993, "num_tokens": 541067861.0, "step": 854 }, { "epoch": 0.1010996807378503, "grad_norm": 0.21142107248306274, "learning_rate": 5.010685105043673e-05, "loss": 0.4105, "num_tokens": 541704638.0, "step": 855 }, { "epoch": 0.10121792597847937, "grad_norm": 0.18683163821697235, "learning_rate": 5.008281003017507e-05, "loss": 0.3868, "num_tokens": 542339733.0, "step": 856 }, { "epoch": 0.10133617121910843, "grad_norm": 0.19870206713676453, "learning_rate": 5.0058746404335794e-05, "loss": 0.3736, "num_tokens": 542975991.0, "step": 857 }, { "epoch": 0.10145441645973749, "grad_norm": 0.17086005210876465, "learning_rate": 5.003466020476215e-05, "loss": 0.3705, "num_tokens": 543610157.0, "step": 858 }, { "epoch": 0.10157266170036656, "grad_norm": 0.20395129919052124, "learning_rate": 5.001055146332727e-05, "loss": 0.3879, "num_tokens": 544242202.0, "step": 859 }, { "epoch": 0.10169090694099563, "grad_norm": 0.19586624205112457, "learning_rate": 4.998642021193408e-05, "loss": 0.4023, "num_tokens": 544876956.0, "step": 860 }, { "epoch": 0.10180915218162469, "grad_norm": 0.20469500124454498, "learning_rate": 4.996226648251534e-05, "loss": 0.3793, "num_tokens": 545512522.0, "step": 861 }, { "epoch": 0.10192739742225375, "grad_norm": 0.21595309674739838, "learning_rate": 4.9938090307033514e-05, "loss": 0.4136, "num_tokens": 546149492.0, "step": 862 }, { "epoch": 0.10204564266288282, "grad_norm": 0.18492047488689423, "learning_rate": 4.9913891717480795e-05, "loss": 0.3663, "num_tokens": 546785991.0, "step": 863 }, { "epoch": 0.10216388790351189, "grad_norm": 0.18823403120040894, "learning_rate": 4.9889670745879035e-05, "loss": 0.4047, "num_tokens": 547421213.0, "step": 864 }, { "epoch": 0.10228213314414095, "grad_norm": 0.2192760556936264, "learning_rate": 4.986542742427968e-05, "loss": 0.4077, "num_tokens": 548057540.0, "step": 865 }, { "epoch": 0.10240037838477001, "grad_norm": 0.21480301022529602, "learning_rate": 4.984116178476379e-05, "loss": 0.4214, "num_tokens": 548694755.0, "step": 866 }, { "epoch": 0.10251862362539908, "grad_norm": 0.22339558601379395, "learning_rate": 4.9816873859441906e-05, "loss": 0.4529, "num_tokens": 549330967.0, "step": 867 }, { "epoch": 0.10263686886602814, "grad_norm": 0.1828283965587616, "learning_rate": 4.979256368045413e-05, "loss": 0.3812, "num_tokens": 549965342.0, "step": 868 }, { "epoch": 0.1027551141066572, "grad_norm": 0.1752055287361145, "learning_rate": 4.9768231279969936e-05, "loss": 0.3692, "num_tokens": 550595153.0, "step": 869 }, { "epoch": 0.10287335934728627, "grad_norm": 0.20586678385734558, "learning_rate": 4.9743876690188254e-05, "loss": 0.3934, "num_tokens": 551228741.0, "step": 870 }, { "epoch": 0.10299160458791534, "grad_norm": 0.18745943903923035, "learning_rate": 4.9719499943337376e-05, "loss": 0.4051, "num_tokens": 551838042.0, "step": 871 }, { "epoch": 0.1031098498285444, "grad_norm": 0.19367623329162598, "learning_rate": 4.969510107167488e-05, "loss": 0.3829, "num_tokens": 552467738.0, "step": 872 }, { "epoch": 0.10322809506917346, "grad_norm": 0.19626417756080627, "learning_rate": 4.9670680107487665e-05, "loss": 0.3905, "num_tokens": 553103599.0, "step": 873 }, { "epoch": 0.10334634030980253, "grad_norm": 0.1922215074300766, "learning_rate": 4.964623708309182e-05, "loss": 0.383, "num_tokens": 553736538.0, "step": 874 }, { "epoch": 0.1034645855504316, "grad_norm": 0.1745498776435852, "learning_rate": 4.962177203083267e-05, "loss": 0.3716, "num_tokens": 554369713.0, "step": 875 }, { "epoch": 0.10358283079106066, "grad_norm": 0.20037321746349335, "learning_rate": 4.959728498308465e-05, "loss": 0.3799, "num_tokens": 555000107.0, "step": 876 }, { "epoch": 0.10370107603168972, "grad_norm": 0.19089975953102112, "learning_rate": 4.957277597225133e-05, "loss": 0.368, "num_tokens": 555632229.0, "step": 877 }, { "epoch": 0.10381932127231878, "grad_norm": 0.18980231881141663, "learning_rate": 4.954824503076534e-05, "loss": 0.3878, "num_tokens": 556267726.0, "step": 878 }, { "epoch": 0.10393756651294786, "grad_norm": 0.19482596218585968, "learning_rate": 4.952369219108831e-05, "loss": 0.3518, "num_tokens": 556878328.0, "step": 879 }, { "epoch": 0.10405581175357692, "grad_norm": 0.1749802827835083, "learning_rate": 4.9499117485710866e-05, "loss": 0.3745, "num_tokens": 557513764.0, "step": 880 }, { "epoch": 0.10417405699420598, "grad_norm": 0.19543933868408203, "learning_rate": 4.947452094715258e-05, "loss": 0.3719, "num_tokens": 558143004.0, "step": 881 }, { "epoch": 0.10429230223483504, "grad_norm": 0.18101941049098969, "learning_rate": 4.944990260796188e-05, "loss": 0.3707, "num_tokens": 558778105.0, "step": 882 }, { "epoch": 0.10441054747546412, "grad_norm": 0.2059684544801712, "learning_rate": 4.9425262500716065e-05, "loss": 0.3884, "num_tokens": 559416233.0, "step": 883 }, { "epoch": 0.10452879271609318, "grad_norm": 0.17464634776115417, "learning_rate": 4.940060065802125e-05, "loss": 0.3874, "num_tokens": 560051126.0, "step": 884 }, { "epoch": 0.10464703795672224, "grad_norm": 0.19525666534900665, "learning_rate": 4.9375917112512283e-05, "loss": 0.3986, "num_tokens": 560687449.0, "step": 885 }, { "epoch": 0.1047652831973513, "grad_norm": 0.1954362988471985, "learning_rate": 4.9351211896852775e-05, "loss": 0.3728, "num_tokens": 561317929.0, "step": 886 }, { "epoch": 0.10488352843798038, "grad_norm": 0.19660866260528564, "learning_rate": 4.9326485043734956e-05, "loss": 0.3984, "num_tokens": 561956472.0, "step": 887 }, { "epoch": 0.10500177367860944, "grad_norm": 0.17625193297863007, "learning_rate": 4.9301736585879735e-05, "loss": 0.3933, "num_tokens": 562593057.0, "step": 888 }, { "epoch": 0.1051200189192385, "grad_norm": 0.20794154703617096, "learning_rate": 4.92769665560366e-05, "loss": 0.4091, "num_tokens": 563227832.0, "step": 889 }, { "epoch": 0.10523826415986756, "grad_norm": 0.166320338845253, "learning_rate": 4.9252174986983564e-05, "loss": 0.3735, "num_tokens": 563864035.0, "step": 890 }, { "epoch": 0.10535650940049664, "grad_norm": 0.20669224858283997, "learning_rate": 4.922736191152716e-05, "loss": 0.4333, "num_tokens": 564478244.0, "step": 891 }, { "epoch": 0.1054747546411257, "grad_norm": 0.1873048096895218, "learning_rate": 4.920252736250239e-05, "loss": 0.3448, "num_tokens": 565113319.0, "step": 892 }, { "epoch": 0.10559299988175476, "grad_norm": 0.20048575103282928, "learning_rate": 4.917767137277266e-05, "loss": 0.3979, "num_tokens": 565746893.0, "step": 893 }, { "epoch": 0.10571124512238382, "grad_norm": 0.19163161516189575, "learning_rate": 4.915279397522973e-05, "loss": 0.3874, "num_tokens": 566356947.0, "step": 894 }, { "epoch": 0.10582949036301288, "grad_norm": 0.211003378033638, "learning_rate": 4.912789520279373e-05, "loss": 0.4321, "num_tokens": 566994122.0, "step": 895 }, { "epoch": 0.10594773560364196, "grad_norm": 0.23276257514953613, "learning_rate": 4.9102975088413034e-05, "loss": 0.4238, "num_tokens": 567631151.0, "step": 896 }, { "epoch": 0.10606598084427102, "grad_norm": 0.2002890557050705, "learning_rate": 4.9078033665064294e-05, "loss": 0.3892, "num_tokens": 568268077.0, "step": 897 }, { "epoch": 0.10618422608490008, "grad_norm": 0.19507084786891937, "learning_rate": 4.905307096575234e-05, "loss": 0.3987, "num_tokens": 568906566.0, "step": 898 }, { "epoch": 0.10630247132552914, "grad_norm": 0.1791028529405594, "learning_rate": 4.902808702351013e-05, "loss": 0.3891, "num_tokens": 569542062.0, "step": 899 }, { "epoch": 0.10642071656615822, "grad_norm": 0.18552984297275543, "learning_rate": 4.900308187139879e-05, "loss": 0.386, "num_tokens": 570177721.0, "step": 900 }, { "epoch": 0.10653896180678728, "grad_norm": 0.21034976840019226, "learning_rate": 4.897805554250748e-05, "loss": 0.4465, "num_tokens": 570816714.0, "step": 901 }, { "epoch": 0.10665720704741634, "grad_norm": 0.178841233253479, "learning_rate": 4.8953008069953394e-05, "loss": 0.3741, "num_tokens": 571449734.0, "step": 902 }, { "epoch": 0.1067754522880454, "grad_norm": 0.2186090648174286, "learning_rate": 4.8927939486881676e-05, "loss": 0.4057, "num_tokens": 572088174.0, "step": 903 }, { "epoch": 0.10689369752867448, "grad_norm": 0.16908499598503113, "learning_rate": 4.890284982646545e-05, "loss": 0.3723, "num_tokens": 572726629.0, "step": 904 }, { "epoch": 0.10701194276930354, "grad_norm": 0.18440201878547668, "learning_rate": 4.887773912190571e-05, "loss": 0.3705, "num_tokens": 573356851.0, "step": 905 }, { "epoch": 0.1071301880099326, "grad_norm": 0.18719598650932312, "learning_rate": 4.885260740643127e-05, "loss": 0.3722, "num_tokens": 573993246.0, "step": 906 }, { "epoch": 0.10724843325056166, "grad_norm": 0.2074010819196701, "learning_rate": 4.8827454713298805e-05, "loss": 0.4022, "num_tokens": 574632107.0, "step": 907 }, { "epoch": 0.10736667849119073, "grad_norm": 0.17451098561286926, "learning_rate": 4.8802281075792705e-05, "loss": 0.3985, "num_tokens": 575267606.0, "step": 908 }, { "epoch": 0.1074849237318198, "grad_norm": 0.17612576484680176, "learning_rate": 4.877708652722509e-05, "loss": 0.3827, "num_tokens": 575898213.0, "step": 909 }, { "epoch": 0.10760316897244886, "grad_norm": 0.23687198758125305, "learning_rate": 4.875187110093576e-05, "loss": 0.4355, "num_tokens": 576535415.0, "step": 910 }, { "epoch": 0.10772141421307792, "grad_norm": 0.18335507810115814, "learning_rate": 4.8726634830292115e-05, "loss": 0.394, "num_tokens": 577170916.0, "step": 911 }, { "epoch": 0.10783965945370699, "grad_norm": 0.2192007303237915, "learning_rate": 4.8701377748689174e-05, "loss": 0.3917, "num_tokens": 577801174.0, "step": 912 }, { "epoch": 0.10795790469433605, "grad_norm": 0.18701738119125366, "learning_rate": 4.867609988954947e-05, "loss": 0.4236, "num_tokens": 578437720.0, "step": 913 }, { "epoch": 0.10807614993496512, "grad_norm": 0.19873884320259094, "learning_rate": 4.8650801286323057e-05, "loss": 0.3843, "num_tokens": 579073461.0, "step": 914 }, { "epoch": 0.10819439517559418, "grad_norm": 0.17789912223815918, "learning_rate": 4.8625481972487395e-05, "loss": 0.399, "num_tokens": 579712385.0, "step": 915 }, { "epoch": 0.10831264041622325, "grad_norm": 0.20318271219730377, "learning_rate": 4.86001419815474e-05, "loss": 0.3812, "num_tokens": 580346374.0, "step": 916 }, { "epoch": 0.10843088565685231, "grad_norm": 0.20375102758407593, "learning_rate": 4.8574781347035296e-05, "loss": 0.3876, "num_tokens": 580976923.0, "step": 917 }, { "epoch": 0.10854913089748137, "grad_norm": 0.20212046802043915, "learning_rate": 4.854940010251069e-05, "loss": 0.368, "num_tokens": 581611122.0, "step": 918 }, { "epoch": 0.10866737613811044, "grad_norm": 0.19325628876686096, "learning_rate": 4.8523998281560405e-05, "loss": 0.394, "num_tokens": 582241753.0, "step": 919 }, { "epoch": 0.10878562137873951, "grad_norm": 0.2155366986989975, "learning_rate": 4.849857591779851e-05, "loss": 0.4028, "num_tokens": 582880022.0, "step": 920 }, { "epoch": 0.10890386661936857, "grad_norm": 0.19220107793807983, "learning_rate": 4.847313304486629e-05, "loss": 0.3633, "num_tokens": 583516737.0, "step": 921 }, { "epoch": 0.10902211185999763, "grad_norm": 0.2031642198562622, "learning_rate": 4.844766969643212e-05, "loss": 0.3561, "num_tokens": 584155270.0, "step": 922 }, { "epoch": 0.1091403571006267, "grad_norm": 0.20130768418312073, "learning_rate": 4.8422185906191494e-05, "loss": 0.3893, "num_tokens": 584788413.0, "step": 923 }, { "epoch": 0.10925860234125577, "grad_norm": 0.19773003458976746, "learning_rate": 4.839668170786696e-05, "loss": 0.4072, "num_tokens": 585424357.0, "step": 924 }, { "epoch": 0.10937684758188483, "grad_norm": 0.21327199041843414, "learning_rate": 4.8371157135208066e-05, "loss": 0.3916, "num_tokens": 586058909.0, "step": 925 }, { "epoch": 0.10949509282251389, "grad_norm": 0.1890989989042282, "learning_rate": 4.834561222199132e-05, "loss": 0.3901, "num_tokens": 586688711.0, "step": 926 }, { "epoch": 0.10961333806314295, "grad_norm": 0.1838907152414322, "learning_rate": 4.832004700202017e-05, "loss": 0.3264, "num_tokens": 587318129.0, "step": 927 }, { "epoch": 0.10973158330377203, "grad_norm": 0.18171823024749756, "learning_rate": 4.829446150912488e-05, "loss": 0.3649, "num_tokens": 587954063.0, "step": 928 }, { "epoch": 0.10984982854440109, "grad_norm": 0.1974538266658783, "learning_rate": 4.826885577716258e-05, "loss": 0.3995, "num_tokens": 588586978.0, "step": 929 }, { "epoch": 0.10996807378503015, "grad_norm": 0.17991968989372253, "learning_rate": 4.824322984001721e-05, "loss": 0.3809, "num_tokens": 589223333.0, "step": 930 }, { "epoch": 0.11008631902565921, "grad_norm": 0.18628670275211334, "learning_rate": 4.821758373159938e-05, "loss": 0.3556, "num_tokens": 589857753.0, "step": 931 }, { "epoch": 0.11020456426628829, "grad_norm": 0.19802094995975494, "learning_rate": 4.819191748584646e-05, "loss": 0.3719, "num_tokens": 590492668.0, "step": 932 }, { "epoch": 0.11032280950691735, "grad_norm": 0.18508954346179962, "learning_rate": 4.81662311367224e-05, "loss": 0.3815, "num_tokens": 591130240.0, "step": 933 }, { "epoch": 0.11044105474754641, "grad_norm": 0.18970444798469543, "learning_rate": 4.814052471821782e-05, "loss": 0.4276, "num_tokens": 591744210.0, "step": 934 }, { "epoch": 0.11055929998817547, "grad_norm": 0.1893254518508911, "learning_rate": 4.8114798264349845e-05, "loss": 0.3506, "num_tokens": 592379832.0, "step": 935 }, { "epoch": 0.11067754522880455, "grad_norm": 0.19586031138896942, "learning_rate": 4.8089051809162144e-05, "loss": 0.3826, "num_tokens": 593014264.0, "step": 936 }, { "epoch": 0.11079579046943361, "grad_norm": 0.22042131423950195, "learning_rate": 4.8063285386724835e-05, "loss": 0.4103, "num_tokens": 593647737.0, "step": 937 }, { "epoch": 0.11091403571006267, "grad_norm": 0.1967095583677292, "learning_rate": 4.803749903113447e-05, "loss": 0.4202, "num_tokens": 594284342.0, "step": 938 }, { "epoch": 0.11103228095069173, "grad_norm": 0.1965053677558899, "learning_rate": 4.801169277651398e-05, "loss": 0.3929, "num_tokens": 594917472.0, "step": 939 }, { "epoch": 0.1111505261913208, "grad_norm": 0.19952984154224396, "learning_rate": 4.798586665701262e-05, "loss": 0.3875, "num_tokens": 595555577.0, "step": 940 }, { "epoch": 0.11126877143194987, "grad_norm": 0.20156317949295044, "learning_rate": 4.7960020706805934e-05, "loss": 0.4086, "num_tokens": 596194919.0, "step": 941 }, { "epoch": 0.11138701667257893, "grad_norm": 0.20338894426822662, "learning_rate": 4.7934154960095705e-05, "loss": 0.3963, "num_tokens": 596828877.0, "step": 942 }, { "epoch": 0.11150526191320799, "grad_norm": 0.21661074459552765, "learning_rate": 4.7908269451109926e-05, "loss": 0.4371, "num_tokens": 597463751.0, "step": 943 }, { "epoch": 0.11162350715383706, "grad_norm": 0.1983601450920105, "learning_rate": 4.7882364214102725e-05, "loss": 0.3729, "num_tokens": 598101711.0, "step": 944 }, { "epoch": 0.11174175239446613, "grad_norm": 0.19059373438358307, "learning_rate": 4.7856439283354345e-05, "loss": 0.3628, "num_tokens": 598735765.0, "step": 945 }, { "epoch": 0.11185999763509519, "grad_norm": 0.19377626478672028, "learning_rate": 4.7830494693171085e-05, "loss": 0.4069, "num_tokens": 599372011.0, "step": 946 }, { "epoch": 0.11197824287572425, "grad_norm": 0.2112198919057846, "learning_rate": 4.780453047788528e-05, "loss": 0.368, "num_tokens": 600001896.0, "step": 947 }, { "epoch": 0.11209648811635331, "grad_norm": 0.18281449377536774, "learning_rate": 4.7778546671855186e-05, "loss": 0.3621, "num_tokens": 600632801.0, "step": 948 }, { "epoch": 0.11221473335698239, "grad_norm": 0.17474791407585144, "learning_rate": 4.7752543309465044e-05, "loss": 0.3537, "num_tokens": 601266569.0, "step": 949 }, { "epoch": 0.11233297859761145, "grad_norm": 0.20017535984516144, "learning_rate": 4.772652042512491e-05, "loss": 0.3876, "num_tokens": 601904732.0, "step": 950 }, { "epoch": 0.11245122383824051, "grad_norm": 0.16637170314788818, "learning_rate": 4.770047805327074e-05, "loss": 0.3626, "num_tokens": 602535124.0, "step": 951 }, { "epoch": 0.11256946907886957, "grad_norm": 0.19100861251354218, "learning_rate": 4.7674416228364225e-05, "loss": 0.3927, "num_tokens": 603172120.0, "step": 952 }, { "epoch": 0.11268771431949864, "grad_norm": 0.20126068592071533, "learning_rate": 4.764833498489283e-05, "loss": 0.4082, "num_tokens": 603810653.0, "step": 953 }, { "epoch": 0.1128059595601277, "grad_norm": 0.1704559475183487, "learning_rate": 4.76222343573697e-05, "loss": 0.3481, "num_tokens": 604445259.0, "step": 954 }, { "epoch": 0.11292420480075677, "grad_norm": 0.1779528260231018, "learning_rate": 4.759611438033363e-05, "loss": 0.3596, "num_tokens": 605079841.0, "step": 955 }, { "epoch": 0.11304245004138583, "grad_norm": 0.20043320953845978, "learning_rate": 4.756997508834903e-05, "loss": 0.4095, "num_tokens": 605716847.0, "step": 956 }, { "epoch": 0.1131606952820149, "grad_norm": 0.19246995449066162, "learning_rate": 4.7543816516005865e-05, "loss": 0.3956, "num_tokens": 606349039.0, "step": 957 }, { "epoch": 0.11327894052264396, "grad_norm": 0.17408093810081482, "learning_rate": 4.7517638697919605e-05, "loss": 0.3872, "num_tokens": 606979557.0, "step": 958 }, { "epoch": 0.11339718576327303, "grad_norm": 0.18687652051448822, "learning_rate": 4.7491441668731214e-05, "loss": 0.3314, "num_tokens": 607606199.0, "step": 959 }, { "epoch": 0.11351543100390209, "grad_norm": 0.19232109189033508, "learning_rate": 4.746522546310704e-05, "loss": 0.3693, "num_tokens": 608215251.0, "step": 960 }, { "epoch": 0.11363367624453116, "grad_norm": 0.19829553365707397, "learning_rate": 4.7438990115738836e-05, "loss": 0.4049, "num_tokens": 608851164.0, "step": 961 }, { "epoch": 0.11375192148516022, "grad_norm": 0.19942665100097656, "learning_rate": 4.741273566134368e-05, "loss": 0.3855, "num_tokens": 609484264.0, "step": 962 }, { "epoch": 0.11387016672578928, "grad_norm": 0.17975790798664093, "learning_rate": 4.738646213466392e-05, "loss": 0.3708, "num_tokens": 610095969.0, "step": 963 }, { "epoch": 0.11398841196641835, "grad_norm": 0.18488220870494843, "learning_rate": 4.7360169570467165e-05, "loss": 0.3615, "num_tokens": 610726014.0, "step": 964 }, { "epoch": 0.11410665720704742, "grad_norm": 0.16256028413772583, "learning_rate": 4.733385800354619e-05, "loss": 0.3282, "num_tokens": 611354133.0, "step": 965 }, { "epoch": 0.11422490244767648, "grad_norm": 0.18399277329444885, "learning_rate": 4.7307527468718944e-05, "loss": 0.3705, "num_tokens": 611983546.0, "step": 966 }, { "epoch": 0.11434314768830554, "grad_norm": 0.1812833547592163, "learning_rate": 4.728117800082844e-05, "loss": 0.3815, "num_tokens": 612614615.0, "step": 967 }, { "epoch": 0.1144613929289346, "grad_norm": 0.1888638585805893, "learning_rate": 4.72548096347428e-05, "loss": 0.401, "num_tokens": 613251367.0, "step": 968 }, { "epoch": 0.11457963816956368, "grad_norm": 0.18793244659900665, "learning_rate": 4.7228422405355095e-05, "loss": 0.4288, "num_tokens": 613882406.0, "step": 969 }, { "epoch": 0.11469788341019274, "grad_norm": 0.1834799200296402, "learning_rate": 4.7202016347583395e-05, "loss": 0.3824, "num_tokens": 614516798.0, "step": 970 }, { "epoch": 0.1148161286508218, "grad_norm": 0.18810245394706726, "learning_rate": 4.717559149637066e-05, "loss": 0.4009, "num_tokens": 615155542.0, "step": 971 }, { "epoch": 0.11493437389145086, "grad_norm": 0.1651647984981537, "learning_rate": 4.7149147886684756e-05, "loss": 0.3113, "num_tokens": 615790325.0, "step": 972 }, { "epoch": 0.11505261913207994, "grad_norm": 0.17223705351352692, "learning_rate": 4.712268555351834e-05, "loss": 0.3466, "num_tokens": 616420924.0, "step": 973 }, { "epoch": 0.115170864372709, "grad_norm": 0.19105683267116547, "learning_rate": 4.709620453188883e-05, "loss": 0.3543, "num_tokens": 617058249.0, "step": 974 }, { "epoch": 0.11528910961333806, "grad_norm": 0.17421837151050568, "learning_rate": 4.706970485683845e-05, "loss": 0.3644, "num_tokens": 617692601.0, "step": 975 }, { "epoch": 0.11540735485396712, "grad_norm": 0.18170055747032166, "learning_rate": 4.7043186563434016e-05, "loss": 0.3664, "num_tokens": 618329266.0, "step": 976 }, { "epoch": 0.1155256000945962, "grad_norm": 0.19499434530735016, "learning_rate": 4.7016649686767036e-05, "loss": 0.3672, "num_tokens": 618961318.0, "step": 977 }, { "epoch": 0.11564384533522526, "grad_norm": 0.2040436565876007, "learning_rate": 4.69900942619536e-05, "loss": 0.4315, "num_tokens": 619595493.0, "step": 978 }, { "epoch": 0.11576209057585432, "grad_norm": 0.20933543145656586, "learning_rate": 4.696352032413432e-05, "loss": 0.3817, "num_tokens": 620231952.0, "step": 979 }, { "epoch": 0.11588033581648338, "grad_norm": 0.1690838783979416, "learning_rate": 4.693692790847436e-05, "loss": 0.3621, "num_tokens": 620865927.0, "step": 980 }, { "epoch": 0.11599858105711246, "grad_norm": 0.21045120060443878, "learning_rate": 4.691031705016327e-05, "loss": 0.4237, "num_tokens": 621502818.0, "step": 981 }, { "epoch": 0.11611682629774152, "grad_norm": 0.19847846031188965, "learning_rate": 4.688368778441504e-05, "loss": 0.3925, "num_tokens": 622139430.0, "step": 982 }, { "epoch": 0.11623507153837058, "grad_norm": 0.1916196495294571, "learning_rate": 4.685704014646803e-05, "loss": 0.3488, "num_tokens": 622775467.0, "step": 983 }, { "epoch": 0.11635331677899964, "grad_norm": 0.18403828144073486, "learning_rate": 4.683037417158486e-05, "loss": 0.3859, "num_tokens": 623412555.0, "step": 984 }, { "epoch": 0.11647156201962872, "grad_norm": 0.20068930089473724, "learning_rate": 4.6803689895052496e-05, "loss": 0.4022, "num_tokens": 624051117.0, "step": 985 }, { "epoch": 0.11658980726025778, "grad_norm": 0.19401557743549347, "learning_rate": 4.677698735218205e-05, "loss": 0.363, "num_tokens": 624689341.0, "step": 986 }, { "epoch": 0.11670805250088684, "grad_norm": 0.21485280990600586, "learning_rate": 4.6750266578308843e-05, "loss": 0.368, "num_tokens": 625321056.0, "step": 987 }, { "epoch": 0.1168262977415159, "grad_norm": 0.1814233511686325, "learning_rate": 4.6723527608792315e-05, "loss": 0.3721, "num_tokens": 625951744.0, "step": 988 }, { "epoch": 0.11694454298214498, "grad_norm": 0.19866612553596497, "learning_rate": 4.669677047901596e-05, "loss": 0.3925, "num_tokens": 626587647.0, "step": 989 }, { "epoch": 0.11706278822277404, "grad_norm": 0.19822439551353455, "learning_rate": 4.666999522438734e-05, "loss": 0.3769, "num_tokens": 627226191.0, "step": 990 }, { "epoch": 0.1171810334634031, "grad_norm": 0.19480082392692566, "learning_rate": 4.664320188033798e-05, "loss": 0.4012, "num_tokens": 627854887.0, "step": 991 }, { "epoch": 0.11729927870403216, "grad_norm": 0.17783285677433014, "learning_rate": 4.6616390482323344e-05, "loss": 0.3813, "num_tokens": 628470814.0, "step": 992 }, { "epoch": 0.11741752394466123, "grad_norm": 0.20395652949810028, "learning_rate": 4.65895610658228e-05, "loss": 0.3888, "num_tokens": 629095432.0, "step": 993 }, { "epoch": 0.1175357691852903, "grad_norm": 0.18363986909389496, "learning_rate": 4.6562713666339545e-05, "loss": 0.4066, "num_tokens": 629735034.0, "step": 994 }, { "epoch": 0.11765401442591936, "grad_norm": 0.18329794704914093, "learning_rate": 4.653584831940058e-05, "loss": 0.3867, "num_tokens": 630370989.0, "step": 995 }, { "epoch": 0.11777225966654842, "grad_norm": 0.17211481928825378, "learning_rate": 4.650896506055665e-05, "loss": 0.3991, "num_tokens": 631007330.0, "step": 996 }, { "epoch": 0.11789050490717749, "grad_norm": 0.19009293615818024, "learning_rate": 4.648206392538221e-05, "loss": 0.387, "num_tokens": 631644616.0, "step": 997 }, { "epoch": 0.11800875014780655, "grad_norm": 0.18162861466407776, "learning_rate": 4.6455144949475364e-05, "loss": 0.3846, "num_tokens": 632278554.0, "step": 998 }, { "epoch": 0.11812699538843562, "grad_norm": 0.16204237937927246, "learning_rate": 4.642820816845783e-05, "loss": 0.3872, "num_tokens": 632912104.0, "step": 999 }, { "epoch": 0.11824524062906468, "grad_norm": 0.17694039642810822, "learning_rate": 4.640125361797488e-05, "loss": 0.4142, "num_tokens": 633548167.0, "step": 1000 }, { "epoch": 0.11836348586969374, "grad_norm": 0.18365511298179626, "learning_rate": 4.6374281333695306e-05, "loss": 0.3618, "num_tokens": 634181746.0, "step": 1001 }, { "epoch": 0.11848173111032281, "grad_norm": 0.1922062337398529, "learning_rate": 4.634729135131137e-05, "loss": 0.4147, "num_tokens": 634816694.0, "step": 1002 }, { "epoch": 0.11859997635095187, "grad_norm": 0.1679506003856659, "learning_rate": 4.6320283706538755e-05, "loss": 0.3828, "num_tokens": 635455070.0, "step": 1003 }, { "epoch": 0.11871822159158094, "grad_norm": 0.18912957608699799, "learning_rate": 4.6293258435116506e-05, "loss": 0.4158, "num_tokens": 636090133.0, "step": 1004 }, { "epoch": 0.11883646683221, "grad_norm": 0.1797725409269333, "learning_rate": 4.6266215572806995e-05, "loss": 0.4062, "num_tokens": 636718996.0, "step": 1005 }, { "epoch": 0.11895471207283907, "grad_norm": 0.18465858697891235, "learning_rate": 4.623915515539587e-05, "loss": 0.4067, "num_tokens": 637355713.0, "step": 1006 }, { "epoch": 0.11907295731346813, "grad_norm": 0.1974141150712967, "learning_rate": 4.6212077218692036e-05, "loss": 0.3873, "num_tokens": 637990698.0, "step": 1007 }, { "epoch": 0.1191912025540972, "grad_norm": 0.18730655312538147, "learning_rate": 4.618498179852754e-05, "loss": 0.3847, "num_tokens": 638622647.0, "step": 1008 }, { "epoch": 0.11930944779472626, "grad_norm": 0.2009619027376175, "learning_rate": 4.61578689307576e-05, "loss": 0.4005, "num_tokens": 639251231.0, "step": 1009 }, { "epoch": 0.11942769303535533, "grad_norm": 0.18502581119537354, "learning_rate": 4.61307386512605e-05, "loss": 0.386, "num_tokens": 639886902.0, "step": 1010 }, { "epoch": 0.11954593827598439, "grad_norm": 0.18856503069400787, "learning_rate": 4.610359099593757e-05, "loss": 0.3709, "num_tokens": 640524463.0, "step": 1011 }, { "epoch": 0.11966418351661345, "grad_norm": 0.1779439002275467, "learning_rate": 4.607642600071314e-05, "loss": 0.4024, "num_tokens": 641163335.0, "step": 1012 }, { "epoch": 0.11978242875724252, "grad_norm": 0.19025777280330658, "learning_rate": 4.6049243701534476e-05, "loss": 0.3973, "num_tokens": 641802244.0, "step": 1013 }, { "epoch": 0.11990067399787159, "grad_norm": 0.19890962541103363, "learning_rate": 4.6022044134371756e-05, "loss": 0.4081, "num_tokens": 642437930.0, "step": 1014 }, { "epoch": 0.12001891923850065, "grad_norm": 0.18591611087322235, "learning_rate": 4.5994827335217994e-05, "loss": 0.3868, "num_tokens": 643076089.0, "step": 1015 }, { "epoch": 0.12013716447912971, "grad_norm": 0.17976440489292145, "learning_rate": 4.596759334008902e-05, "loss": 0.4177, "num_tokens": 643711689.0, "step": 1016 }, { "epoch": 0.12025540971975877, "grad_norm": 0.20330271124839783, "learning_rate": 4.59403421850234e-05, "loss": 0.3857, "num_tokens": 644349772.0, "step": 1017 }, { "epoch": 0.12037365496038785, "grad_norm": 0.18609577417373657, "learning_rate": 4.5913073906082425e-05, "loss": 0.3813, "num_tokens": 644988741.0, "step": 1018 }, { "epoch": 0.12049190020101691, "grad_norm": 0.18191839754581451, "learning_rate": 4.588578853935005e-05, "loss": 0.3577, "num_tokens": 645614498.0, "step": 1019 }, { "epoch": 0.12061014544164597, "grad_norm": 0.1779017597436905, "learning_rate": 4.585848612093283e-05, "loss": 0.3244, "num_tokens": 646252937.0, "step": 1020 }, { "epoch": 0.12072839068227503, "grad_norm": 0.19630023837089539, "learning_rate": 4.583116668695988e-05, "loss": 0.4196, "num_tokens": 646886273.0, "step": 1021 }, { "epoch": 0.12084663592290411, "grad_norm": 0.1875283569097519, "learning_rate": 4.5803830273582845e-05, "loss": 0.4122, "num_tokens": 647519729.0, "step": 1022 }, { "epoch": 0.12096488116353317, "grad_norm": 0.17070959508419037, "learning_rate": 4.5776476916975824e-05, "loss": 0.3611, "num_tokens": 648154792.0, "step": 1023 }, { "epoch": 0.12108312640416223, "grad_norm": 0.18787986040115356, "learning_rate": 4.5749106653335366e-05, "loss": 0.3684, "num_tokens": 648787406.0, "step": 1024 }, { "epoch": 0.12120137164479129, "grad_norm": 0.18751688301563263, "learning_rate": 4.5721719518880354e-05, "loss": 0.3911, "num_tokens": 649419550.0, "step": 1025 }, { "epoch": 0.12131961688542037, "grad_norm": 0.16276107728481293, "learning_rate": 4.569431554985202e-05, "loss": 0.3601, "num_tokens": 650055332.0, "step": 1026 }, { "epoch": 0.12143786212604943, "grad_norm": 0.19446961581707, "learning_rate": 4.566689478251388e-05, "loss": 0.4196, "num_tokens": 650692693.0, "step": 1027 }, { "epoch": 0.12155610736667849, "grad_norm": 0.18988844752311707, "learning_rate": 4.563945725315164e-05, "loss": 0.3915, "num_tokens": 651324330.0, "step": 1028 }, { "epoch": 0.12167435260730755, "grad_norm": 0.17802836000919342, "learning_rate": 4.561200299807324e-05, "loss": 0.3373, "num_tokens": 651953179.0, "step": 1029 }, { "epoch": 0.12179259784793663, "grad_norm": 0.19093678891658783, "learning_rate": 4.558453205360872e-05, "loss": 0.3633, "num_tokens": 652577781.0, "step": 1030 }, { "epoch": 0.12191084308856569, "grad_norm": 0.18175625801086426, "learning_rate": 4.55570444561102e-05, "loss": 0.3651, "num_tokens": 653215791.0, "step": 1031 }, { "epoch": 0.12202908832919475, "grad_norm": 0.1993628293275833, "learning_rate": 4.5529540241951866e-05, "loss": 0.398, "num_tokens": 653817011.0, "step": 1032 }, { "epoch": 0.12214733356982381, "grad_norm": 0.16179391741752625, "learning_rate": 4.550201944752987e-05, "loss": 0.3216, "num_tokens": 654453498.0, "step": 1033 }, { "epoch": 0.12226557881045289, "grad_norm": 0.19868984818458557, "learning_rate": 4.547448210926231e-05, "loss": 0.4069, "num_tokens": 655090159.0, "step": 1034 }, { "epoch": 0.12238382405108195, "grad_norm": 0.20796185731887817, "learning_rate": 4.544692826358916e-05, "loss": 0.4258, "num_tokens": 655725879.0, "step": 1035 }, { "epoch": 0.12250206929171101, "grad_norm": 0.19127629697322845, "learning_rate": 4.541935794697228e-05, "loss": 0.3639, "num_tokens": 656361031.0, "step": 1036 }, { "epoch": 0.12262031453234007, "grad_norm": 0.18504977226257324, "learning_rate": 4.53917711958953e-05, "loss": 0.4332, "num_tokens": 656995890.0, "step": 1037 }, { "epoch": 0.12273855977296914, "grad_norm": 0.1742541640996933, "learning_rate": 4.536416804686357e-05, "loss": 0.3658, "num_tokens": 657629671.0, "step": 1038 }, { "epoch": 0.1228568050135982, "grad_norm": 0.18453232944011688, "learning_rate": 4.53365485364042e-05, "loss": 0.3864, "num_tokens": 658262765.0, "step": 1039 }, { "epoch": 0.12297505025422727, "grad_norm": 0.18397222459316254, "learning_rate": 4.5308912701065896e-05, "loss": 0.3739, "num_tokens": 658892913.0, "step": 1040 }, { "epoch": 0.12309329549485633, "grad_norm": 0.19018225371837616, "learning_rate": 4.5281260577419006e-05, "loss": 0.3694, "num_tokens": 659528177.0, "step": 1041 }, { "epoch": 0.1232115407354854, "grad_norm": 0.1874883472919464, "learning_rate": 4.525359220205541e-05, "loss": 0.3817, "num_tokens": 660159039.0, "step": 1042 }, { "epoch": 0.12332978597611446, "grad_norm": 0.18878668546676636, "learning_rate": 4.522590761158849e-05, "loss": 0.3723, "num_tokens": 660793735.0, "step": 1043 }, { "epoch": 0.12344803121674353, "grad_norm": 0.18397924304008484, "learning_rate": 4.51982068426531e-05, "loss": 0.3895, "num_tokens": 661430126.0, "step": 1044 }, { "epoch": 0.12356627645737259, "grad_norm": 0.19383513927459717, "learning_rate": 4.517048993190551e-05, "loss": 0.3989, "num_tokens": 662069040.0, "step": 1045 }, { "epoch": 0.12368452169800166, "grad_norm": 0.1832335889339447, "learning_rate": 4.514275691602332e-05, "loss": 0.3551, "num_tokens": 662703135.0, "step": 1046 }, { "epoch": 0.12380276693863072, "grad_norm": 0.2143872082233429, "learning_rate": 4.511500783170546e-05, "loss": 0.4042, "num_tokens": 663337022.0, "step": 1047 }, { "epoch": 0.12392101217925978, "grad_norm": 0.20561742782592773, "learning_rate": 4.508724271567214e-05, "loss": 0.3758, "num_tokens": 663969631.0, "step": 1048 }, { "epoch": 0.12403925741988885, "grad_norm": 0.18738333880901337, "learning_rate": 4.505946160466476e-05, "loss": 0.4281, "num_tokens": 664607928.0, "step": 1049 }, { "epoch": 0.12415750266051792, "grad_norm": 0.1881737858057022, "learning_rate": 4.503166453544587e-05, "loss": 0.4124, "num_tokens": 665246971.0, "step": 1050 }, { "epoch": 0.12427574790114698, "grad_norm": 0.2155761420726776, "learning_rate": 4.500385154479921e-05, "loss": 0.3704, "num_tokens": 665880124.0, "step": 1051 }, { "epoch": 0.12439399314177604, "grad_norm": 0.17276139557361603, "learning_rate": 4.497602266952949e-05, "loss": 0.3857, "num_tokens": 666516590.0, "step": 1052 }, { "epoch": 0.1245122383824051, "grad_norm": 0.18994863331317902, "learning_rate": 4.494817794646252e-05, "loss": 0.3534, "num_tokens": 667141423.0, "step": 1053 }, { "epoch": 0.12463048362303417, "grad_norm": 0.2097732126712799, "learning_rate": 4.492031741244504e-05, "loss": 0.3947, "num_tokens": 667775999.0, "step": 1054 }, { "epoch": 0.12474872886366324, "grad_norm": 0.20371286571025848, "learning_rate": 4.4892441104344724e-05, "loss": 0.384, "num_tokens": 668413601.0, "step": 1055 }, { "epoch": 0.1248669741042923, "grad_norm": 0.17550261318683624, "learning_rate": 4.486454905905012e-05, "loss": 0.3732, "num_tokens": 669045590.0, "step": 1056 }, { "epoch": 0.12498521934492136, "grad_norm": 0.21981221437454224, "learning_rate": 4.48366413134706e-05, "loss": 0.399, "num_tokens": 669680260.0, "step": 1057 }, { "epoch": 0.12510346458555044, "grad_norm": 0.20199020206928253, "learning_rate": 4.480871790453632e-05, "loss": 0.3682, "num_tokens": 670314552.0, "step": 1058 }, { "epoch": 0.1252217098261795, "grad_norm": 0.20767715573310852, "learning_rate": 4.478077886919814e-05, "loss": 0.3715, "num_tokens": 670948491.0, "step": 1059 }, { "epoch": 0.12533995506680856, "grad_norm": 0.1847984343767166, "learning_rate": 4.4752824244427635e-05, "loss": 0.354, "num_tokens": 671580682.0, "step": 1060 }, { "epoch": 0.12545820030743762, "grad_norm": 0.1866157501935959, "learning_rate": 4.472485406721697e-05, "loss": 0.3585, "num_tokens": 672210589.0, "step": 1061 }, { "epoch": 0.12557644554806668, "grad_norm": 0.20393408834934235, "learning_rate": 4.4696868374578916e-05, "loss": 0.3824, "num_tokens": 672840526.0, "step": 1062 }, { "epoch": 0.12569469078869575, "grad_norm": 0.19358152151107788, "learning_rate": 4.4668867203546766e-05, "loss": 0.3702, "num_tokens": 673466186.0, "step": 1063 }, { "epoch": 0.1258129360293248, "grad_norm": 0.16034334897994995, "learning_rate": 4.46408505911743e-05, "loss": 0.3063, "num_tokens": 674089397.0, "step": 1064 }, { "epoch": 0.1259311812699539, "grad_norm": 0.2177608758211136, "learning_rate": 4.461281857453572e-05, "loss": 0.3839, "num_tokens": 674726487.0, "step": 1065 }, { "epoch": 0.12604942651058296, "grad_norm": 0.16433891654014587, "learning_rate": 4.4584771190725644e-05, "loss": 0.3526, "num_tokens": 675360889.0, "step": 1066 }, { "epoch": 0.12616767175121202, "grad_norm": 0.17344233393669128, "learning_rate": 4.4556708476858985e-05, "loss": 0.3615, "num_tokens": 675999411.0, "step": 1067 }, { "epoch": 0.12628591699184108, "grad_norm": 0.1769314557313919, "learning_rate": 4.4528630470070964e-05, "loss": 0.3477, "num_tokens": 676631267.0, "step": 1068 }, { "epoch": 0.12640416223247014, "grad_norm": 0.18346329033374786, "learning_rate": 4.450053720751703e-05, "loss": 0.3735, "num_tokens": 677241383.0, "step": 1069 }, { "epoch": 0.1265224074730992, "grad_norm": 0.1862960308790207, "learning_rate": 4.447242872637282e-05, "loss": 0.3876, "num_tokens": 677877358.0, "step": 1070 }, { "epoch": 0.12664065271372826, "grad_norm": 0.17572778463363647, "learning_rate": 4.444430506383414e-05, "loss": 0.3651, "num_tokens": 678505498.0, "step": 1071 }, { "epoch": 0.12675889795435732, "grad_norm": 0.19853392243385315, "learning_rate": 4.441616625711684e-05, "loss": 0.359, "num_tokens": 679124916.0, "step": 1072 }, { "epoch": 0.1268771431949864, "grad_norm": 0.1989092230796814, "learning_rate": 4.4388012343456826e-05, "loss": 0.403, "num_tokens": 679749193.0, "step": 1073 }, { "epoch": 0.12699538843561547, "grad_norm": 0.16579356789588928, "learning_rate": 4.435984336011001e-05, "loss": 0.3409, "num_tokens": 680380293.0, "step": 1074 }, { "epoch": 0.12711363367624454, "grad_norm": 0.17490504682064056, "learning_rate": 4.433165934435223e-05, "loss": 0.3768, "num_tokens": 681017380.0, "step": 1075 }, { "epoch": 0.1272318789168736, "grad_norm": 0.1843899041414261, "learning_rate": 4.430346033347923e-05, "loss": 0.3796, "num_tokens": 681647130.0, "step": 1076 }, { "epoch": 0.12735012415750266, "grad_norm": 0.17463913559913635, "learning_rate": 4.427524636480658e-05, "loss": 0.3586, "num_tokens": 682285081.0, "step": 1077 }, { "epoch": 0.12746836939813172, "grad_norm": 0.1665404886007309, "learning_rate": 4.4247017475669646e-05, "loss": 0.376, "num_tokens": 682919537.0, "step": 1078 }, { "epoch": 0.12758661463876078, "grad_norm": 0.18626192212104797, "learning_rate": 4.421877370342355e-05, "loss": 0.3868, "num_tokens": 683554203.0, "step": 1079 }, { "epoch": 0.12770485987938984, "grad_norm": 0.18904843926429749, "learning_rate": 4.4190515085443104e-05, "loss": 0.3929, "num_tokens": 684192561.0, "step": 1080 }, { "epoch": 0.12782310512001893, "grad_norm": 0.16643720865249634, "learning_rate": 4.416224165912276e-05, "loss": 0.3875, "num_tokens": 684826318.0, "step": 1081 }, { "epoch": 0.127941350360648, "grad_norm": 0.17822794616222382, "learning_rate": 4.4133953461876576e-05, "loss": 0.3997, "num_tokens": 685457932.0, "step": 1082 }, { "epoch": 0.12805959560127705, "grad_norm": 0.1729457676410675, "learning_rate": 4.410565053113814e-05, "loss": 0.3648, "num_tokens": 686094682.0, "step": 1083 }, { "epoch": 0.12817784084190612, "grad_norm": 0.17672215402126312, "learning_rate": 4.407733290436055e-05, "loss": 0.3787, "num_tokens": 686724923.0, "step": 1084 }, { "epoch": 0.12829608608253518, "grad_norm": 0.16756568849086761, "learning_rate": 4.4049000619016345e-05, "loss": 0.3775, "num_tokens": 687358762.0, "step": 1085 }, { "epoch": 0.12841433132316424, "grad_norm": 0.17241579294204712, "learning_rate": 4.402065371259747e-05, "loss": 0.3743, "num_tokens": 687997038.0, "step": 1086 }, { "epoch": 0.1285325765637933, "grad_norm": 0.16503721475601196, "learning_rate": 4.399229222261522e-05, "loss": 0.3895, "num_tokens": 688632728.0, "step": 1087 }, { "epoch": 0.12865082180442236, "grad_norm": 0.18014270067214966, "learning_rate": 4.3963916186600164e-05, "loss": 0.3913, "num_tokens": 689237615.0, "step": 1088 }, { "epoch": 0.12876906704505145, "grad_norm": 0.18278226256370544, "learning_rate": 4.393552564210214e-05, "loss": 0.3951, "num_tokens": 689872830.0, "step": 1089 }, { "epoch": 0.1288873122856805, "grad_norm": 0.17956025898456573, "learning_rate": 4.3907120626690194e-05, "loss": 0.3713, "num_tokens": 690505835.0, "step": 1090 }, { "epoch": 0.12900555752630957, "grad_norm": 0.18163177371025085, "learning_rate": 4.387870117795249e-05, "loss": 0.4, "num_tokens": 691138889.0, "step": 1091 }, { "epoch": 0.12912380276693863, "grad_norm": 0.17895618081092834, "learning_rate": 4.385026733349632e-05, "loss": 0.3824, "num_tokens": 691778212.0, "step": 1092 }, { "epoch": 0.1292420480075677, "grad_norm": 0.17014320194721222, "learning_rate": 4.382181913094801e-05, "loss": 0.356, "num_tokens": 692400014.0, "step": 1093 }, { "epoch": 0.12936029324819676, "grad_norm": 0.17010079324245453, "learning_rate": 4.37933566079529e-05, "loss": 0.3583, "num_tokens": 693035177.0, "step": 1094 }, { "epoch": 0.12947853848882582, "grad_norm": 0.1737738847732544, "learning_rate": 4.376487980217527e-05, "loss": 0.3793, "num_tokens": 693672098.0, "step": 1095 }, { "epoch": 0.12959678372945488, "grad_norm": 0.157965287566185, "learning_rate": 4.3736388751298306e-05, "loss": 0.3352, "num_tokens": 694306278.0, "step": 1096 }, { "epoch": 0.12971502897008397, "grad_norm": 0.19977279007434845, "learning_rate": 4.370788349302403e-05, "loss": 0.4292, "num_tokens": 694943202.0, "step": 1097 }, { "epoch": 0.12983327421071303, "grad_norm": 0.1841733455657959, "learning_rate": 4.3679364065073274e-05, "loss": 0.398, "num_tokens": 695581748.0, "step": 1098 }, { "epoch": 0.1299515194513421, "grad_norm": 0.18417219817638397, "learning_rate": 4.365083050518563e-05, "loss": 0.4095, "num_tokens": 696217907.0, "step": 1099 }, { "epoch": 0.13006976469197115, "grad_norm": 0.17778581380844116, "learning_rate": 4.362228285111939e-05, "loss": 0.3461, "num_tokens": 696832400.0, "step": 1100 }, { "epoch": 0.1301880099326002, "grad_norm": 0.20819556713104248, "learning_rate": 4.359372114065146e-05, "loss": 0.4048, "num_tokens": 697462714.0, "step": 1101 }, { "epoch": 0.13030625517322927, "grad_norm": 0.1679416298866272, "learning_rate": 4.356514541157742e-05, "loss": 0.3306, "num_tokens": 698096072.0, "step": 1102 }, { "epoch": 0.13042450041385834, "grad_norm": 0.17605753242969513, "learning_rate": 4.3536555701711316e-05, "loss": 0.3654, "num_tokens": 698732104.0, "step": 1103 }, { "epoch": 0.1305427456544874, "grad_norm": 0.18534035980701447, "learning_rate": 4.350795204888576e-05, "loss": 0.3637, "num_tokens": 699360778.0, "step": 1104 }, { "epoch": 0.13066099089511646, "grad_norm": 0.1880866438150406, "learning_rate": 4.3479334490951784e-05, "loss": 0.3639, "num_tokens": 699998919.0, "step": 1105 }, { "epoch": 0.13077923613574555, "grad_norm": 0.19816787540912628, "learning_rate": 4.345070306577882e-05, "loss": 0.3717, "num_tokens": 700636435.0, "step": 1106 }, { "epoch": 0.1308974813763746, "grad_norm": 0.1735011786222458, "learning_rate": 4.342205781125466e-05, "loss": 0.3669, "num_tokens": 701266798.0, "step": 1107 }, { "epoch": 0.13101572661700367, "grad_norm": 0.2126895934343338, "learning_rate": 4.33933987652854e-05, "loss": 0.398, "num_tokens": 701896284.0, "step": 1108 }, { "epoch": 0.13113397185763273, "grad_norm": 0.19128330051898956, "learning_rate": 4.336472596579538e-05, "loss": 0.4147, "num_tokens": 702532392.0, "step": 1109 }, { "epoch": 0.1312522170982618, "grad_norm": 0.17880013585090637, "learning_rate": 4.333603945072713e-05, "loss": 0.3671, "num_tokens": 703167803.0, "step": 1110 }, { "epoch": 0.13137046233889085, "grad_norm": 0.206880122423172, "learning_rate": 4.330733925804134e-05, "loss": 0.4211, "num_tokens": 703804312.0, "step": 1111 }, { "epoch": 0.13148870757951991, "grad_norm": 0.178485706448555, "learning_rate": 4.3278625425716805e-05, "loss": 0.3527, "num_tokens": 704434487.0, "step": 1112 }, { "epoch": 0.13160695282014898, "grad_norm": 0.2033427357673645, "learning_rate": 4.324989799175037e-05, "loss": 0.3989, "num_tokens": 705069074.0, "step": 1113 }, { "epoch": 0.13172519806077806, "grad_norm": 0.19211621582508087, "learning_rate": 4.322115699415685e-05, "loss": 0.3784, "num_tokens": 705694863.0, "step": 1114 }, { "epoch": 0.13184344330140713, "grad_norm": 0.19245471060276031, "learning_rate": 4.319240247096905e-05, "loss": 0.3802, "num_tokens": 706332062.0, "step": 1115 }, { "epoch": 0.1319616885420362, "grad_norm": 0.18468594551086426, "learning_rate": 4.316363446023764e-05, "loss": 0.3732, "num_tokens": 706961719.0, "step": 1116 }, { "epoch": 0.13207993378266525, "grad_norm": 0.18160372972488403, "learning_rate": 4.313485300003117e-05, "loss": 0.3549, "num_tokens": 707593241.0, "step": 1117 }, { "epoch": 0.1321981790232943, "grad_norm": 0.17797401547431946, "learning_rate": 4.310605812843595e-05, "loss": 0.4154, "num_tokens": 708223670.0, "step": 1118 }, { "epoch": 0.13231642426392337, "grad_norm": 0.19047312438488007, "learning_rate": 4.3077249883556075e-05, "loss": 0.3714, "num_tokens": 708853803.0, "step": 1119 }, { "epoch": 0.13243466950455243, "grad_norm": 0.19819514453411102, "learning_rate": 4.304842830351331e-05, "loss": 0.3865, "num_tokens": 709488191.0, "step": 1120 }, { "epoch": 0.1325529147451815, "grad_norm": 0.17723028361797333, "learning_rate": 4.301959342644706e-05, "loss": 0.3588, "num_tokens": 710121271.0, "step": 1121 }, { "epoch": 0.13267115998581058, "grad_norm": 0.1742127537727356, "learning_rate": 4.299074529051437e-05, "loss": 0.3779, "num_tokens": 710755348.0, "step": 1122 }, { "epoch": 0.13278940522643964, "grad_norm": 0.19025954604148865, "learning_rate": 4.296188393388978e-05, "loss": 0.4193, "num_tokens": 711388900.0, "step": 1123 }, { "epoch": 0.1329076504670687, "grad_norm": 0.18485389649868011, "learning_rate": 4.293300939476534e-05, "loss": 0.391, "num_tokens": 712027710.0, "step": 1124 }, { "epoch": 0.13302589570769777, "grad_norm": 0.20137916505336761, "learning_rate": 4.290412171135057e-05, "loss": 0.386, "num_tokens": 712662431.0, "step": 1125 }, { "epoch": 0.13314414094832683, "grad_norm": 0.18418936431407928, "learning_rate": 4.287522092187237e-05, "loss": 0.3778, "num_tokens": 713298321.0, "step": 1126 }, { "epoch": 0.1332623861889559, "grad_norm": 0.1847471296787262, "learning_rate": 4.284630706457495e-05, "loss": 0.3939, "num_tokens": 713929502.0, "step": 1127 }, { "epoch": 0.13338063142958495, "grad_norm": 0.18358609080314636, "learning_rate": 4.281738017771985e-05, "loss": 0.392, "num_tokens": 714563684.0, "step": 1128 }, { "epoch": 0.133498876670214, "grad_norm": 0.19019533693790436, "learning_rate": 4.278844029958586e-05, "loss": 0.4139, "num_tokens": 715194139.0, "step": 1129 }, { "epoch": 0.1336171219108431, "grad_norm": 0.17144839465618134, "learning_rate": 4.275948746846891e-05, "loss": 0.3872, "num_tokens": 715829633.0, "step": 1130 }, { "epoch": 0.13373536715147216, "grad_norm": 0.20015180110931396, "learning_rate": 4.273052172268213e-05, "loss": 0.3855, "num_tokens": 716437097.0, "step": 1131 }, { "epoch": 0.13385361239210122, "grad_norm": 0.18676939606666565, "learning_rate": 4.2701543100555715e-05, "loss": 0.3251, "num_tokens": 717069185.0, "step": 1132 }, { "epoch": 0.13397185763273028, "grad_norm": 0.16965261101722717, "learning_rate": 4.2672551640436876e-05, "loss": 0.3968, "num_tokens": 717707723.0, "step": 1133 }, { "epoch": 0.13409010287335935, "grad_norm": 0.1700984537601471, "learning_rate": 4.264354738068985e-05, "loss": 0.3727, "num_tokens": 718339982.0, "step": 1134 }, { "epoch": 0.1342083481139884, "grad_norm": 0.2030845433473587, "learning_rate": 4.2614530359695786e-05, "loss": 0.3893, "num_tokens": 718970674.0, "step": 1135 }, { "epoch": 0.13432659335461747, "grad_norm": 0.1986686736345291, "learning_rate": 4.258550061585275e-05, "loss": 0.4172, "num_tokens": 719606724.0, "step": 1136 }, { "epoch": 0.13444483859524653, "grad_norm": 0.17563633620738983, "learning_rate": 4.25564581875756e-05, "loss": 0.3858, "num_tokens": 720209640.0, "step": 1137 }, { "epoch": 0.13456308383587562, "grad_norm": 0.17847663164138794, "learning_rate": 4.2527403113296014e-05, "loss": 0.3804, "num_tokens": 720845332.0, "step": 1138 }, { "epoch": 0.13468132907650468, "grad_norm": 0.17714640498161316, "learning_rate": 4.249833543146239e-05, "loss": 0.3708, "num_tokens": 721471001.0, "step": 1139 }, { "epoch": 0.13479957431713374, "grad_norm": 0.17541660368442535, "learning_rate": 4.246925518053981e-05, "loss": 0.3568, "num_tokens": 722105712.0, "step": 1140 }, { "epoch": 0.1349178195577628, "grad_norm": 0.18325071036815643, "learning_rate": 4.2440162399010005e-05, "loss": 0.4037, "num_tokens": 722735260.0, "step": 1141 }, { "epoch": 0.13503606479839186, "grad_norm": 0.16364189982414246, "learning_rate": 4.241105712537126e-05, "loss": 0.3644, "num_tokens": 723369238.0, "step": 1142 }, { "epoch": 0.13515431003902093, "grad_norm": 0.18739944696426392, "learning_rate": 4.238193939813841e-05, "loss": 0.3838, "num_tokens": 724005091.0, "step": 1143 }, { "epoch": 0.13527255527965, "grad_norm": 0.1882290542125702, "learning_rate": 4.235280925584277e-05, "loss": 0.3571, "num_tokens": 724644703.0, "step": 1144 }, { "epoch": 0.13539080052027905, "grad_norm": 0.17208614945411682, "learning_rate": 4.2323666737032075e-05, "loss": 0.3854, "num_tokens": 725277008.0, "step": 1145 }, { "epoch": 0.13550904576090814, "grad_norm": 0.1727161407470703, "learning_rate": 4.2294511880270435e-05, "loss": 0.3558, "num_tokens": 725912854.0, "step": 1146 }, { "epoch": 0.1356272910015372, "grad_norm": 0.19190965592861176, "learning_rate": 4.226534472413829e-05, "loss": 0.4023, "num_tokens": 726552374.0, "step": 1147 }, { "epoch": 0.13574553624216626, "grad_norm": 0.18595655262470245, "learning_rate": 4.223616530723238e-05, "loss": 0.3585, "num_tokens": 727190596.0, "step": 1148 }, { "epoch": 0.13586378148279532, "grad_norm": 0.17069599032402039, "learning_rate": 4.220697366816563e-05, "loss": 0.3421, "num_tokens": 727820231.0, "step": 1149 }, { "epoch": 0.13598202672342438, "grad_norm": 0.1840461939573288, "learning_rate": 4.2177769845567157e-05, "loss": 0.3332, "num_tokens": 728440540.0, "step": 1150 }, { "epoch": 0.13610027196405344, "grad_norm": 0.2065514773130417, "learning_rate": 4.2148553878082203e-05, "loss": 0.4219, "num_tokens": 729072563.0, "step": 1151 }, { "epoch": 0.1362185172046825, "grad_norm": 0.19268664717674255, "learning_rate": 4.211932580437208e-05, "loss": 0.4187, "num_tokens": 729704300.0, "step": 1152 }, { "epoch": 0.13633676244531157, "grad_norm": 0.1874132603406906, "learning_rate": 4.2090085663114126e-05, "loss": 0.3949, "num_tokens": 730308427.0, "step": 1153 }, { "epoch": 0.13645500768594065, "grad_norm": 0.2015058696269989, "learning_rate": 4.206083349300163e-05, "loss": 0.4352, "num_tokens": 730946156.0, "step": 1154 }, { "epoch": 0.13657325292656972, "grad_norm": 0.1677466779947281, "learning_rate": 4.20315693327438e-05, "loss": 0.3534, "num_tokens": 731577262.0, "step": 1155 }, { "epoch": 0.13669149816719878, "grad_norm": 0.21406646072864532, "learning_rate": 4.2002293221065737e-05, "loss": 0.403, "num_tokens": 732204517.0, "step": 1156 }, { "epoch": 0.13680974340782784, "grad_norm": 0.17139025032520294, "learning_rate": 4.197300519670834e-05, "loss": 0.3519, "num_tokens": 732841654.0, "step": 1157 }, { "epoch": 0.1369279886484569, "grad_norm": 0.19764773547649384, "learning_rate": 4.1943705298428264e-05, "loss": 0.3785, "num_tokens": 733481344.0, "step": 1158 }, { "epoch": 0.13704623388908596, "grad_norm": 0.1817982941865921, "learning_rate": 4.191439356499788e-05, "loss": 0.3851, "num_tokens": 734116157.0, "step": 1159 }, { "epoch": 0.13716447912971502, "grad_norm": 0.1699836552143097, "learning_rate": 4.188507003520524e-05, "loss": 0.3549, "num_tokens": 734752960.0, "step": 1160 }, { "epoch": 0.13728272437034408, "grad_norm": 0.18850567936897278, "learning_rate": 4.1855734747853974e-05, "loss": 0.3689, "num_tokens": 735390605.0, "step": 1161 }, { "epoch": 0.13740096961097314, "grad_norm": 0.1694057732820511, "learning_rate": 4.1826387741763294e-05, "loss": 0.3493, "num_tokens": 736025653.0, "step": 1162 }, { "epoch": 0.13751921485160223, "grad_norm": 0.19740477204322815, "learning_rate": 4.1797029055767917e-05, "loss": 0.412, "num_tokens": 736662381.0, "step": 1163 }, { "epoch": 0.1376374600922313, "grad_norm": 0.1692422777414322, "learning_rate": 4.1767658728718005e-05, "loss": 0.3781, "num_tokens": 737298285.0, "step": 1164 }, { "epoch": 0.13775570533286036, "grad_norm": 0.18291229009628296, "learning_rate": 4.173827679947914e-05, "loss": 0.3646, "num_tokens": 737932027.0, "step": 1165 }, { "epoch": 0.13787395057348942, "grad_norm": 0.17919090390205383, "learning_rate": 4.170888330693226e-05, "loss": 0.402, "num_tokens": 738566204.0, "step": 1166 }, { "epoch": 0.13799219581411848, "grad_norm": 0.16608217358589172, "learning_rate": 4.167947828997356e-05, "loss": 0.3582, "num_tokens": 739201202.0, "step": 1167 }, { "epoch": 0.13811044105474754, "grad_norm": 0.1820799559354782, "learning_rate": 4.1650061787514534e-05, "loss": 0.3757, "num_tokens": 739833572.0, "step": 1168 }, { "epoch": 0.1382286862953766, "grad_norm": 0.1864081174135208, "learning_rate": 4.162063383848186e-05, "loss": 0.3813, "num_tokens": 740467809.0, "step": 1169 }, { "epoch": 0.13834693153600566, "grad_norm": 0.19109085202217102, "learning_rate": 4.159119448181737e-05, "loss": 0.3954, "num_tokens": 741101167.0, "step": 1170 }, { "epoch": 0.13846517677663475, "grad_norm": 0.18018218874931335, "learning_rate": 4.156174375647798e-05, "loss": 0.3666, "num_tokens": 741738147.0, "step": 1171 }, { "epoch": 0.1385834220172638, "grad_norm": 0.1801140159368515, "learning_rate": 4.1532281701435644e-05, "loss": 0.3747, "num_tokens": 742371844.0, "step": 1172 }, { "epoch": 0.13870166725789287, "grad_norm": 0.18970145285129547, "learning_rate": 4.150280835567733e-05, "loss": 0.3969, "num_tokens": 743011533.0, "step": 1173 }, { "epoch": 0.13881991249852194, "grad_norm": 0.20032352209091187, "learning_rate": 4.1473323758204925e-05, "loss": 0.4045, "num_tokens": 743646159.0, "step": 1174 }, { "epoch": 0.138938157739151, "grad_norm": 0.16128578782081604, "learning_rate": 4.144382794803523e-05, "loss": 0.3336, "num_tokens": 744282963.0, "step": 1175 }, { "epoch": 0.13905640297978006, "grad_norm": 0.2133799046278, "learning_rate": 4.1414320964199846e-05, "loss": 0.4102, "num_tokens": 744920861.0, "step": 1176 }, { "epoch": 0.13917464822040912, "grad_norm": 0.17610150575637817, "learning_rate": 4.1384802845745216e-05, "loss": 0.4052, "num_tokens": 745552170.0, "step": 1177 }, { "epoch": 0.13929289346103818, "grad_norm": 0.17871306836605072, "learning_rate": 4.1355273631732466e-05, "loss": 0.3641, "num_tokens": 746184040.0, "step": 1178 }, { "epoch": 0.13941113870166727, "grad_norm": 0.1855345219373703, "learning_rate": 4.132573336123741e-05, "loss": 0.3781, "num_tokens": 746821128.0, "step": 1179 }, { "epoch": 0.13952938394229633, "grad_norm": 0.1980353444814682, "learning_rate": 4.1296182073350536e-05, "loss": 0.393, "num_tokens": 747451607.0, "step": 1180 }, { "epoch": 0.1396476291829254, "grad_norm": 0.19999416172504425, "learning_rate": 4.126661980717686e-05, "loss": 0.378, "num_tokens": 748083163.0, "step": 1181 }, { "epoch": 0.13976587442355445, "grad_norm": 0.17589710652828217, "learning_rate": 4.123704660183597e-05, "loss": 0.3488, "num_tokens": 748721979.0, "step": 1182 }, { "epoch": 0.13988411966418351, "grad_norm": 0.20237183570861816, "learning_rate": 4.120746249646188e-05, "loss": 0.3976, "num_tokens": 749355743.0, "step": 1183 }, { "epoch": 0.14000236490481258, "grad_norm": 0.18042594194412231, "learning_rate": 4.1177867530203066e-05, "loss": 0.3601, "num_tokens": 749990459.0, "step": 1184 }, { "epoch": 0.14012061014544164, "grad_norm": 0.2047455608844757, "learning_rate": 4.114826174222237e-05, "loss": 0.3776, "num_tokens": 750625529.0, "step": 1185 }, { "epoch": 0.1402388553860707, "grad_norm": 0.17314770817756653, "learning_rate": 4.111864517169694e-05, "loss": 0.3613, "num_tokens": 751257308.0, "step": 1186 }, { "epoch": 0.1403571006266998, "grad_norm": 0.16046233475208282, "learning_rate": 4.108901785781821e-05, "loss": 0.3525, "num_tokens": 751890563.0, "step": 1187 }, { "epoch": 0.14047534586732885, "grad_norm": 0.18692271411418915, "learning_rate": 4.105937983979183e-05, "loss": 0.3497, "num_tokens": 752523163.0, "step": 1188 }, { "epoch": 0.1405935911079579, "grad_norm": 0.16772404313087463, "learning_rate": 4.102973115683758e-05, "loss": 0.3636, "num_tokens": 753157270.0, "step": 1189 }, { "epoch": 0.14071183634858697, "grad_norm": 0.1749587059020996, "learning_rate": 4.1000071848189406e-05, "loss": 0.3721, "num_tokens": 753790631.0, "step": 1190 }, { "epoch": 0.14083008158921603, "grad_norm": 0.17635898292064667, "learning_rate": 4.0970401953095275e-05, "loss": 0.4219, "num_tokens": 754422344.0, "step": 1191 }, { "epoch": 0.1409483268298451, "grad_norm": 0.1795620322227478, "learning_rate": 4.094072151081719e-05, "loss": 0.4066, "num_tokens": 755054602.0, "step": 1192 }, { "epoch": 0.14106657207047416, "grad_norm": 0.19329644739627838, "learning_rate": 4.091103056063108e-05, "loss": 0.3965, "num_tokens": 755686194.0, "step": 1193 }, { "epoch": 0.14118481731110322, "grad_norm": 0.18483629822731018, "learning_rate": 4.088132914182682e-05, "loss": 0.3922, "num_tokens": 756324764.0, "step": 1194 }, { "epoch": 0.1413030625517323, "grad_norm": 0.193592831492424, "learning_rate": 4.085161729370809e-05, "loss": 0.412, "num_tokens": 756963038.0, "step": 1195 }, { "epoch": 0.14142130779236137, "grad_norm": 0.1793985515832901, "learning_rate": 4.0821895055592415e-05, "loss": 0.3256, "num_tokens": 757590896.0, "step": 1196 }, { "epoch": 0.14153955303299043, "grad_norm": 0.1903679072856903, "learning_rate": 4.0792162466811046e-05, "loss": 0.3892, "num_tokens": 758226989.0, "step": 1197 }, { "epoch": 0.1416577982736195, "grad_norm": 0.18117378652095795, "learning_rate": 4.076241956670894e-05, "loss": 0.3966, "num_tokens": 758863618.0, "step": 1198 }, { "epoch": 0.14177604351424855, "grad_norm": 0.17214766144752502, "learning_rate": 4.0732666394644675e-05, "loss": 0.3483, "num_tokens": 759499176.0, "step": 1199 }, { "epoch": 0.1418942887548776, "grad_norm": 0.2005872279405594, "learning_rate": 4.0702902989990464e-05, "loss": 0.403, "num_tokens": 760138725.0, "step": 1200 }, { "epoch": 0.14201253399550667, "grad_norm": 0.16486693918704987, "learning_rate": 4.0673129392132014e-05, "loss": 0.3784, "num_tokens": 760778173.0, "step": 1201 }, { "epoch": 0.14213077923613573, "grad_norm": 0.20838482677936554, "learning_rate": 4.064334564046856e-05, "loss": 0.427, "num_tokens": 761411949.0, "step": 1202 }, { "epoch": 0.14224902447676482, "grad_norm": 0.1762368530035019, "learning_rate": 4.061355177441275e-05, "loss": 0.37, "num_tokens": 762049507.0, "step": 1203 }, { "epoch": 0.14236726971739389, "grad_norm": 0.19547240436077118, "learning_rate": 4.058374783339063e-05, "loss": 0.41, "num_tokens": 762685373.0, "step": 1204 }, { "epoch": 0.14248551495802295, "grad_norm": 0.18869923055171967, "learning_rate": 4.0553933856841584e-05, "loss": 0.3857, "num_tokens": 763323824.0, "step": 1205 }, { "epoch": 0.142603760198652, "grad_norm": 0.18279902637004852, "learning_rate": 4.052410988421824e-05, "loss": 0.421, "num_tokens": 763962195.0, "step": 1206 }, { "epoch": 0.14272200543928107, "grad_norm": 0.17345914244651794, "learning_rate": 4.049427595498649e-05, "loss": 0.3871, "num_tokens": 764599228.0, "step": 1207 }, { "epoch": 0.14284025067991013, "grad_norm": 0.1748848259449005, "learning_rate": 4.046443210862538e-05, "loss": 0.342, "num_tokens": 765233848.0, "step": 1208 }, { "epoch": 0.1429584959205392, "grad_norm": 0.17195607721805573, "learning_rate": 4.043457838462712e-05, "loss": 0.39, "num_tokens": 765873589.0, "step": 1209 }, { "epoch": 0.14307674116116825, "grad_norm": 0.16931648552417755, "learning_rate": 4.040471482249691e-05, "loss": 0.3322, "num_tokens": 766504261.0, "step": 1210 }, { "epoch": 0.14319498640179731, "grad_norm": 0.19360356032848358, "learning_rate": 4.037484146175306e-05, "loss": 0.4047, "num_tokens": 767140922.0, "step": 1211 }, { "epoch": 0.1433132316424264, "grad_norm": 0.2044503539800644, "learning_rate": 4.034495834192678e-05, "loss": 0.3987, "num_tokens": 767775931.0, "step": 1212 }, { "epoch": 0.14343147688305546, "grad_norm": 0.18666554987430573, "learning_rate": 4.031506550256223e-05, "loss": 0.4094, "num_tokens": 768411905.0, "step": 1213 }, { "epoch": 0.14354972212368453, "grad_norm": 0.1968887448310852, "learning_rate": 4.028516298321643e-05, "loss": 0.3707, "num_tokens": 769046890.0, "step": 1214 }, { "epoch": 0.1436679673643136, "grad_norm": 0.20507104694843292, "learning_rate": 4.025525082345917e-05, "loss": 0.3778, "num_tokens": 769671163.0, "step": 1215 }, { "epoch": 0.14378621260494265, "grad_norm": 0.18357984721660614, "learning_rate": 4.022532906287306e-05, "loss": 0.3759, "num_tokens": 770306885.0, "step": 1216 }, { "epoch": 0.1439044578455717, "grad_norm": 0.17793887853622437, "learning_rate": 4.0195397741053374e-05, "loss": 0.3404, "num_tokens": 770935952.0, "step": 1217 }, { "epoch": 0.14402270308620077, "grad_norm": 0.1842331439256668, "learning_rate": 4.016545689760804e-05, "loss": 0.3636, "num_tokens": 771566252.0, "step": 1218 }, { "epoch": 0.14414094832682983, "grad_norm": 0.17892083525657654, "learning_rate": 4.013550657215759e-05, "loss": 0.3593, "num_tokens": 772196019.0, "step": 1219 }, { "epoch": 0.14425919356745892, "grad_norm": 0.1608760952949524, "learning_rate": 4.01055468043351e-05, "loss": 0.3764, "num_tokens": 772829557.0, "step": 1220 }, { "epoch": 0.14437743880808798, "grad_norm": 0.17508582770824432, "learning_rate": 4.007557763378616e-05, "loss": 0.3865, "num_tokens": 773462306.0, "step": 1221 }, { "epoch": 0.14449568404871704, "grad_norm": 0.17223335802555084, "learning_rate": 4.0045599100168794e-05, "loss": 0.418, "num_tokens": 774094333.0, "step": 1222 }, { "epoch": 0.1446139292893461, "grad_norm": 0.18835633993148804, "learning_rate": 4.001561124315339e-05, "loss": 0.3892, "num_tokens": 774727506.0, "step": 1223 }, { "epoch": 0.14473217452997517, "grad_norm": 0.19669027626514435, "learning_rate": 3.998561410242271e-05, "loss": 0.4244, "num_tokens": 775365455.0, "step": 1224 }, { "epoch": 0.14485041977060423, "grad_norm": 0.18262512981891632, "learning_rate": 3.99556077176718e-05, "loss": 0.4127, "num_tokens": 775989745.0, "step": 1225 }, { "epoch": 0.1449686650112333, "grad_norm": 0.18306951224803925, "learning_rate": 3.992559212860789e-05, "loss": 0.3604, "num_tokens": 776627802.0, "step": 1226 }, { "epoch": 0.14508691025186235, "grad_norm": 0.17740747332572937, "learning_rate": 3.989556737495045e-05, "loss": 0.3525, "num_tokens": 777261077.0, "step": 1227 }, { "epoch": 0.14520515549249144, "grad_norm": 0.21090878546237946, "learning_rate": 3.9865533496431045e-05, "loss": 0.3955, "num_tokens": 777900544.0, "step": 1228 }, { "epoch": 0.1453234007331205, "grad_norm": 0.17277488112449646, "learning_rate": 3.983549053279333e-05, "loss": 0.3326, "num_tokens": 778526351.0, "step": 1229 }, { "epoch": 0.14544164597374956, "grad_norm": 0.18703323602676392, "learning_rate": 3.980543852379295e-05, "loss": 0.3794, "num_tokens": 779161010.0, "step": 1230 }, { "epoch": 0.14555989121437862, "grad_norm": 0.1652577966451645, "learning_rate": 3.977537750919757e-05, "loss": 0.3797, "num_tokens": 779795294.0, "step": 1231 }, { "epoch": 0.14567813645500768, "grad_norm": 0.17143456637859344, "learning_rate": 3.974530752878674e-05, "loss": 0.3601, "num_tokens": 780434525.0, "step": 1232 }, { "epoch": 0.14579638169563675, "grad_norm": 0.16284136474132538, "learning_rate": 3.971522862235187e-05, "loss": 0.3492, "num_tokens": 781047733.0, "step": 1233 }, { "epoch": 0.1459146269362658, "grad_norm": 0.1673222929239273, "learning_rate": 3.968514082969621e-05, "loss": 0.3657, "num_tokens": 781680882.0, "step": 1234 }, { "epoch": 0.14603287217689487, "grad_norm": 0.17836733162403107, "learning_rate": 3.9655044190634725e-05, "loss": 0.3779, "num_tokens": 782316078.0, "step": 1235 }, { "epoch": 0.14615111741752396, "grad_norm": 0.19530552625656128, "learning_rate": 3.9624938744994145e-05, "loss": 0.4156, "num_tokens": 782945690.0, "step": 1236 }, { "epoch": 0.14626936265815302, "grad_norm": 0.1764390617609024, "learning_rate": 3.9594824532612794e-05, "loss": 0.4112, "num_tokens": 783578548.0, "step": 1237 }, { "epoch": 0.14638760789878208, "grad_norm": 0.17299224436283112, "learning_rate": 3.956470159334064e-05, "loss": 0.3551, "num_tokens": 784211272.0, "step": 1238 }, { "epoch": 0.14650585313941114, "grad_norm": 0.17603452503681183, "learning_rate": 3.953456996703918e-05, "loss": 0.3959, "num_tokens": 784844206.0, "step": 1239 }, { "epoch": 0.1466240983800402, "grad_norm": 0.17413505911827087, "learning_rate": 3.95044296935814e-05, "loss": 0.3834, "num_tokens": 785479541.0, "step": 1240 }, { "epoch": 0.14674234362066926, "grad_norm": 0.17274941504001617, "learning_rate": 3.9474280812851754e-05, "loss": 0.3539, "num_tokens": 786105562.0, "step": 1241 }, { "epoch": 0.14686058886129832, "grad_norm": 0.18288247287273407, "learning_rate": 3.944412336474607e-05, "loss": 0.3926, "num_tokens": 786739257.0, "step": 1242 }, { "epoch": 0.14697883410192739, "grad_norm": 0.17496144771575928, "learning_rate": 3.941395738917149e-05, "loss": 0.3441, "num_tokens": 787367527.0, "step": 1243 }, { "epoch": 0.14709707934255647, "grad_norm": 0.1842147409915924, "learning_rate": 3.9383782926046496e-05, "loss": 0.3555, "num_tokens": 787999760.0, "step": 1244 }, { "epoch": 0.14721532458318554, "grad_norm": 0.1850278675556183, "learning_rate": 3.935360001530075e-05, "loss": 0.356, "num_tokens": 788637623.0, "step": 1245 }, { "epoch": 0.1473335698238146, "grad_norm": 0.16519823670387268, "learning_rate": 3.932340869687512e-05, "loss": 0.3481, "num_tokens": 789276023.0, "step": 1246 }, { "epoch": 0.14745181506444366, "grad_norm": 0.17701783776283264, "learning_rate": 3.929320901072159e-05, "loss": 0.3793, "num_tokens": 789907579.0, "step": 1247 }, { "epoch": 0.14757006030507272, "grad_norm": 0.18543802201747894, "learning_rate": 3.9263000996803226e-05, "loss": 0.4052, "num_tokens": 790542282.0, "step": 1248 }, { "epoch": 0.14768830554570178, "grad_norm": 0.1734195053577423, "learning_rate": 3.92327846950941e-05, "loss": 0.385, "num_tokens": 791177851.0, "step": 1249 }, { "epoch": 0.14780655078633084, "grad_norm": 0.16981084644794464, "learning_rate": 3.9202560145579255e-05, "loss": 0.3653, "num_tokens": 791809529.0, "step": 1250 }, { "epoch": 0.1479247960269599, "grad_norm": 0.180636465549469, "learning_rate": 3.917232738825466e-05, "loss": 0.4003, "num_tokens": 792448822.0, "step": 1251 }, { "epoch": 0.148043041267589, "grad_norm": 0.16907177865505219, "learning_rate": 3.914208646312713e-05, "loss": 0.3903, "num_tokens": 793083686.0, "step": 1252 }, { "epoch": 0.14816128650821805, "grad_norm": 0.1896291971206665, "learning_rate": 3.911183741021431e-05, "loss": 0.4406, "num_tokens": 793720893.0, "step": 1253 }, { "epoch": 0.14827953174884712, "grad_norm": 0.1729743331670761, "learning_rate": 3.908158026954457e-05, "loss": 0.3581, "num_tokens": 794350771.0, "step": 1254 }, { "epoch": 0.14839777698947618, "grad_norm": 0.17992742359638214, "learning_rate": 3.9051315081156996e-05, "loss": 0.3826, "num_tokens": 794988418.0, "step": 1255 }, { "epoch": 0.14851602223010524, "grad_norm": 0.18747080862522125, "learning_rate": 3.902104188510134e-05, "loss": 0.4175, "num_tokens": 795619860.0, "step": 1256 }, { "epoch": 0.1486342674707343, "grad_norm": 0.18046019971370697, "learning_rate": 3.899076072143793e-05, "loss": 0.3723, "num_tokens": 796258236.0, "step": 1257 }, { "epoch": 0.14875251271136336, "grad_norm": 0.17821986973285675, "learning_rate": 3.896047163023764e-05, "loss": 0.3618, "num_tokens": 796889178.0, "step": 1258 }, { "epoch": 0.14887075795199242, "grad_norm": 0.18142741918563843, "learning_rate": 3.893017465158184e-05, "loss": 0.3865, "num_tokens": 797526666.0, "step": 1259 }, { "epoch": 0.1489890031926215, "grad_norm": 0.16956116259098053, "learning_rate": 3.889986982556235e-05, "loss": 0.3713, "num_tokens": 798162731.0, "step": 1260 }, { "epoch": 0.14910724843325057, "grad_norm": 0.1800818145275116, "learning_rate": 3.886955719228133e-05, "loss": 0.3861, "num_tokens": 798797285.0, "step": 1261 }, { "epoch": 0.14922549367387963, "grad_norm": 0.1661878526210785, "learning_rate": 3.8839236791851344e-05, "loss": 0.3797, "num_tokens": 799430881.0, "step": 1262 }, { "epoch": 0.1493437389145087, "grad_norm": 0.15878503024578094, "learning_rate": 3.880890866439515e-05, "loss": 0.3368, "num_tokens": 800065809.0, "step": 1263 }, { "epoch": 0.14946198415513776, "grad_norm": 0.1672976016998291, "learning_rate": 3.8778572850045794e-05, "loss": 0.3263, "num_tokens": 800695505.0, "step": 1264 }, { "epoch": 0.14958022939576682, "grad_norm": 0.1818859577178955, "learning_rate": 3.874822938894647e-05, "loss": 0.3847, "num_tokens": 801324547.0, "step": 1265 }, { "epoch": 0.14969847463639588, "grad_norm": 0.17939628660678864, "learning_rate": 3.871787832125049e-05, "loss": 0.3743, "num_tokens": 801954766.0, "step": 1266 }, { "epoch": 0.14981671987702494, "grad_norm": 0.1639385223388672, "learning_rate": 3.868751968712125e-05, "loss": 0.3375, "num_tokens": 802585735.0, "step": 1267 }, { "epoch": 0.149934965117654, "grad_norm": 0.19370990991592407, "learning_rate": 3.8657153526732125e-05, "loss": 0.4178, "num_tokens": 803222978.0, "step": 1268 }, { "epoch": 0.1500532103582831, "grad_norm": 0.15046223998069763, "learning_rate": 3.862677988026647e-05, "loss": 0.2976, "num_tokens": 803861621.0, "step": 1269 }, { "epoch": 0.15017145559891215, "grad_norm": 0.15617400407791138, "learning_rate": 3.859639878791756e-05, "loss": 0.2841, "num_tokens": 804493467.0, "step": 1270 }, { "epoch": 0.1502897008395412, "grad_norm": 0.19728296995162964, "learning_rate": 3.856601028988849e-05, "loss": 0.366, "num_tokens": 805126588.0, "step": 1271 }, { "epoch": 0.15040794608017027, "grad_norm": 0.16635596752166748, "learning_rate": 3.85356144263922e-05, "loss": 0.3633, "num_tokens": 805765213.0, "step": 1272 }, { "epoch": 0.15052619132079934, "grad_norm": 0.1875937432050705, "learning_rate": 3.850521123765132e-05, "loss": 0.3738, "num_tokens": 806404016.0, "step": 1273 }, { "epoch": 0.1506444365614284, "grad_norm": 0.15865705907344818, "learning_rate": 3.847480076389821e-05, "loss": 0.3508, "num_tokens": 807036599.0, "step": 1274 }, { "epoch": 0.15076268180205746, "grad_norm": 0.16535243391990662, "learning_rate": 3.844438304537487e-05, "loss": 0.3677, "num_tokens": 807672182.0, "step": 1275 }, { "epoch": 0.15088092704268652, "grad_norm": 0.18247942626476288, "learning_rate": 3.8413958122332876e-05, "loss": 0.3947, "num_tokens": 808281458.0, "step": 1276 }, { "epoch": 0.1509991722833156, "grad_norm": 0.1740826964378357, "learning_rate": 3.838352603503335e-05, "loss": 0.3841, "num_tokens": 808881586.0, "step": 1277 }, { "epoch": 0.15111741752394467, "grad_norm": 0.18411533534526825, "learning_rate": 3.835308682374687e-05, "loss": 0.3988, "num_tokens": 809481267.0, "step": 1278 }, { "epoch": 0.15123566276457373, "grad_norm": 0.17475385963916779, "learning_rate": 3.8322640528753474e-05, "loss": 0.3473, "num_tokens": 810113774.0, "step": 1279 }, { "epoch": 0.1513539080052028, "grad_norm": 0.1791556179523468, "learning_rate": 3.829218719034255e-05, "loss": 0.3772, "num_tokens": 810745803.0, "step": 1280 }, { "epoch": 0.15147215324583185, "grad_norm": 0.17454758286476135, "learning_rate": 3.826172684881281e-05, "loss": 0.4019, "num_tokens": 811385263.0, "step": 1281 }, { "epoch": 0.15159039848646091, "grad_norm": 0.1617545187473297, "learning_rate": 3.8231259544472245e-05, "loss": 0.3424, "num_tokens": 812016051.0, "step": 1282 }, { "epoch": 0.15170864372708998, "grad_norm": 0.16357214748859406, "learning_rate": 3.820078531763805e-05, "loss": 0.3235, "num_tokens": 812649108.0, "step": 1283 }, { "epoch": 0.15182688896771904, "grad_norm": 0.1793568879365921, "learning_rate": 3.817030420863658e-05, "loss": 0.3588, "num_tokens": 813283409.0, "step": 1284 }, { "epoch": 0.15194513420834813, "grad_norm": 0.17174500226974487, "learning_rate": 3.81398162578033e-05, "loss": 0.3685, "num_tokens": 813917493.0, "step": 1285 }, { "epoch": 0.1520633794489772, "grad_norm": 0.185197114944458, "learning_rate": 3.810932150548272e-05, "loss": 0.3773, "num_tokens": 814556038.0, "step": 1286 }, { "epoch": 0.15218162468960625, "grad_norm": 0.16052372753620148, "learning_rate": 3.807881999202837e-05, "loss": 0.366, "num_tokens": 815190976.0, "step": 1287 }, { "epoch": 0.1522998699302353, "grad_norm": 0.1711684763431549, "learning_rate": 3.8048311757802705e-05, "loss": 0.3666, "num_tokens": 815830227.0, "step": 1288 }, { "epoch": 0.15241811517086437, "grad_norm": 0.17269568145275116, "learning_rate": 3.80177968431771e-05, "loss": 0.3716, "num_tokens": 816464763.0, "step": 1289 }, { "epoch": 0.15253636041149343, "grad_norm": 0.17598651349544525, "learning_rate": 3.7987275288531744e-05, "loss": 0.3783, "num_tokens": 817072689.0, "step": 1290 }, { "epoch": 0.1526546056521225, "grad_norm": 0.17497462034225464, "learning_rate": 3.795674713425562e-05, "loss": 0.3813, "num_tokens": 817703995.0, "step": 1291 }, { "epoch": 0.15277285089275155, "grad_norm": 0.15532535314559937, "learning_rate": 3.7926212420746455e-05, "loss": 0.3322, "num_tokens": 818306204.0, "step": 1292 }, { "epoch": 0.15289109613338064, "grad_norm": 0.18098512291908264, "learning_rate": 3.789567118841063e-05, "loss": 0.3948, "num_tokens": 818944794.0, "step": 1293 }, { "epoch": 0.1530093413740097, "grad_norm": 0.18815064430236816, "learning_rate": 3.78651234776632e-05, "loss": 0.4214, "num_tokens": 819580090.0, "step": 1294 }, { "epoch": 0.15312758661463877, "grad_norm": 0.1842847764492035, "learning_rate": 3.783456932892774e-05, "loss": 0.3878, "num_tokens": 820195511.0, "step": 1295 }, { "epoch": 0.15324583185526783, "grad_norm": 0.16440126299858093, "learning_rate": 3.7804008782636374e-05, "loss": 0.3707, "num_tokens": 820832912.0, "step": 1296 }, { "epoch": 0.1533640770958969, "grad_norm": 0.17478272318840027, "learning_rate": 3.7773441879229685e-05, "loss": 0.3795, "num_tokens": 821463804.0, "step": 1297 }, { "epoch": 0.15348232233652595, "grad_norm": 0.18590036034584045, "learning_rate": 3.774286865915667e-05, "loss": 0.376, "num_tokens": 822096771.0, "step": 1298 }, { "epoch": 0.153600567577155, "grad_norm": 0.2140692174434662, "learning_rate": 3.771228916287468e-05, "loss": 0.3896, "num_tokens": 822724976.0, "step": 1299 }, { "epoch": 0.15371881281778407, "grad_norm": 0.16716571152210236, "learning_rate": 3.768170343084937e-05, "loss": 0.3616, "num_tokens": 823360078.0, "step": 1300 }, { "epoch": 0.15383705805841316, "grad_norm": 0.16711951792240143, "learning_rate": 3.7651111503554676e-05, "loss": 0.3574, "num_tokens": 823995903.0, "step": 1301 }, { "epoch": 0.15395530329904222, "grad_norm": 0.2010648250579834, "learning_rate": 3.762051342147268e-05, "loss": 0.4097, "num_tokens": 824634564.0, "step": 1302 }, { "epoch": 0.15407354853967128, "grad_norm": 0.20303165912628174, "learning_rate": 3.758990922509364e-05, "loss": 0.4327, "num_tokens": 825267057.0, "step": 1303 }, { "epoch": 0.15419179378030035, "grad_norm": 0.17012476921081543, "learning_rate": 3.7559298954915914e-05, "loss": 0.3591, "num_tokens": 825899088.0, "step": 1304 }, { "epoch": 0.1543100390209294, "grad_norm": 0.18373215198516846, "learning_rate": 3.752868265144587e-05, "loss": 0.3915, "num_tokens": 826535290.0, "step": 1305 }, { "epoch": 0.15442828426155847, "grad_norm": 0.17862318456172943, "learning_rate": 3.7498060355197896e-05, "loss": 0.373, "num_tokens": 827173746.0, "step": 1306 }, { "epoch": 0.15454652950218753, "grad_norm": 0.1615932434797287, "learning_rate": 3.7467432106694276e-05, "loss": 0.3663, "num_tokens": 827804731.0, "step": 1307 }, { "epoch": 0.1546647747428166, "grad_norm": 0.16810201108455658, "learning_rate": 3.743679794646517e-05, "loss": 0.369, "num_tokens": 828436832.0, "step": 1308 }, { "epoch": 0.15478301998344568, "grad_norm": 0.16492970287799835, "learning_rate": 3.740615791504859e-05, "loss": 0.3439, "num_tokens": 829065711.0, "step": 1309 }, { "epoch": 0.15490126522407474, "grad_norm": 0.17141704261302948, "learning_rate": 3.737551205299029e-05, "loss": 0.3655, "num_tokens": 829699491.0, "step": 1310 }, { "epoch": 0.1550195104647038, "grad_norm": 0.1736629605293274, "learning_rate": 3.734486040084377e-05, "loss": 0.3821, "num_tokens": 830301332.0, "step": 1311 }, { "epoch": 0.15513775570533286, "grad_norm": 0.17586372792720795, "learning_rate": 3.731420299917015e-05, "loss": 0.3464, "num_tokens": 830932269.0, "step": 1312 }, { "epoch": 0.15525600094596193, "grad_norm": 0.18535244464874268, "learning_rate": 3.72835398885382e-05, "loss": 0.3894, "num_tokens": 831571426.0, "step": 1313 }, { "epoch": 0.155374246186591, "grad_norm": 0.1786143183708191, "learning_rate": 3.7252871109524224e-05, "loss": 0.3856, "num_tokens": 832211161.0, "step": 1314 }, { "epoch": 0.15549249142722005, "grad_norm": 0.19510836899280548, "learning_rate": 3.722219670271201e-05, "loss": 0.4069, "num_tokens": 832846569.0, "step": 1315 }, { "epoch": 0.1556107366678491, "grad_norm": 0.16250097751617432, "learning_rate": 3.719151670869284e-05, "loss": 0.3705, "num_tokens": 833477855.0, "step": 1316 }, { "epoch": 0.15572898190847817, "grad_norm": 0.18421269953250885, "learning_rate": 3.716083116806533e-05, "loss": 0.3765, "num_tokens": 834112533.0, "step": 1317 }, { "epoch": 0.15584722714910726, "grad_norm": 0.15677453577518463, "learning_rate": 3.713014012143551e-05, "loss": 0.3367, "num_tokens": 834750383.0, "step": 1318 }, { "epoch": 0.15596547238973632, "grad_norm": 0.1714835911989212, "learning_rate": 3.709944360941662e-05, "loss": 0.3865, "num_tokens": 835382487.0, "step": 1319 }, { "epoch": 0.15608371763036538, "grad_norm": 0.16007648408412933, "learning_rate": 3.706874167262916e-05, "loss": 0.3555, "num_tokens": 836011005.0, "step": 1320 }, { "epoch": 0.15620196287099444, "grad_norm": 0.16921104490756989, "learning_rate": 3.703803435170084e-05, "loss": 0.3547, "num_tokens": 836645534.0, "step": 1321 }, { "epoch": 0.1563202081116235, "grad_norm": 0.1705523282289505, "learning_rate": 3.700732168726645e-05, "loss": 0.3333, "num_tokens": 837281263.0, "step": 1322 }, { "epoch": 0.15643845335225257, "grad_norm": 0.1723070591688156, "learning_rate": 3.69766037199679e-05, "loss": 0.3559, "num_tokens": 837911899.0, "step": 1323 }, { "epoch": 0.15655669859288163, "grad_norm": 0.20030608773231506, "learning_rate": 3.6945880490454045e-05, "loss": 0.4059, "num_tokens": 838549137.0, "step": 1324 }, { "epoch": 0.1566749438335107, "grad_norm": 0.19786883890628815, "learning_rate": 3.691515203938077e-05, "loss": 0.3872, "num_tokens": 839185355.0, "step": 1325 }, { "epoch": 0.15679318907413978, "grad_norm": 0.18107512593269348, "learning_rate": 3.6884418407410845e-05, "loss": 0.3701, "num_tokens": 839817125.0, "step": 1326 }, { "epoch": 0.15691143431476884, "grad_norm": 0.1745939999818802, "learning_rate": 3.68536796352139e-05, "loss": 0.3598, "num_tokens": 840448393.0, "step": 1327 }, { "epoch": 0.1570296795553979, "grad_norm": 0.1749391406774521, "learning_rate": 3.6822935763466356e-05, "loss": 0.3511, "num_tokens": 841086731.0, "step": 1328 }, { "epoch": 0.15714792479602696, "grad_norm": 0.19824792444705963, "learning_rate": 3.6792186832851385e-05, "loss": 0.3976, "num_tokens": 841723258.0, "step": 1329 }, { "epoch": 0.15726617003665602, "grad_norm": 0.16669194400310516, "learning_rate": 3.676143288405887e-05, "loss": 0.3501, "num_tokens": 842354306.0, "step": 1330 }, { "epoch": 0.15738441527728508, "grad_norm": 0.16550509631633759, "learning_rate": 3.6730673957785314e-05, "loss": 0.3631, "num_tokens": 842990332.0, "step": 1331 }, { "epoch": 0.15750266051791414, "grad_norm": 0.17728877067565918, "learning_rate": 3.6699910094733816e-05, "loss": 0.3809, "num_tokens": 843624261.0, "step": 1332 }, { "epoch": 0.1576209057585432, "grad_norm": 0.1832035630941391, "learning_rate": 3.666914133561401e-05, "loss": 0.3751, "num_tokens": 844262144.0, "step": 1333 }, { "epoch": 0.1577391509991723, "grad_norm": 0.17735359072685242, "learning_rate": 3.6638367721142006e-05, "loss": 0.3715, "num_tokens": 844897584.0, "step": 1334 }, { "epoch": 0.15785739623980136, "grad_norm": 0.17729835212230682, "learning_rate": 3.660758929204034e-05, "loss": 0.3829, "num_tokens": 845532833.0, "step": 1335 }, { "epoch": 0.15797564148043042, "grad_norm": 0.18032696843147278, "learning_rate": 3.6576806089037916e-05, "loss": 0.3598, "num_tokens": 846132555.0, "step": 1336 }, { "epoch": 0.15809388672105948, "grad_norm": 0.19697603583335876, "learning_rate": 3.654601815286995e-05, "loss": 0.4304, "num_tokens": 846771073.0, "step": 1337 }, { "epoch": 0.15821213196168854, "grad_norm": 0.16080917418003082, "learning_rate": 3.6515225524277955e-05, "loss": 0.3532, "num_tokens": 847407047.0, "step": 1338 }, { "epoch": 0.1583303772023176, "grad_norm": 0.17054249346256256, "learning_rate": 3.648442824400959e-05, "loss": 0.3906, "num_tokens": 848038762.0, "step": 1339 }, { "epoch": 0.15844862244294666, "grad_norm": 0.17448587715625763, "learning_rate": 3.6453626352818744e-05, "loss": 0.3581, "num_tokens": 848671922.0, "step": 1340 }, { "epoch": 0.15856686768357572, "grad_norm": 0.18276441097259521, "learning_rate": 3.642281989146534e-05, "loss": 0.3741, "num_tokens": 849309141.0, "step": 1341 }, { "epoch": 0.1586851129242048, "grad_norm": 0.16970501840114594, "learning_rate": 3.63920089007154e-05, "loss": 0.3815, "num_tokens": 849947357.0, "step": 1342 }, { "epoch": 0.15880335816483387, "grad_norm": 0.18194201588630676, "learning_rate": 3.63611934213409e-05, "loss": 0.3632, "num_tokens": 850586919.0, "step": 1343 }, { "epoch": 0.15892160340546294, "grad_norm": 0.20223063230514526, "learning_rate": 3.633037349411978e-05, "loss": 0.4044, "num_tokens": 851164524.0, "step": 1344 }, { "epoch": 0.159039848646092, "grad_norm": 0.1658789962530136, "learning_rate": 3.629954915983588e-05, "loss": 0.3732, "num_tokens": 851795535.0, "step": 1345 }, { "epoch": 0.15915809388672106, "grad_norm": 0.16001170873641968, "learning_rate": 3.626872045927883e-05, "loss": 0.3657, "num_tokens": 852429267.0, "step": 1346 }, { "epoch": 0.15927633912735012, "grad_norm": 0.1767129898071289, "learning_rate": 3.6237887433244076e-05, "loss": 0.4073, "num_tokens": 853066581.0, "step": 1347 }, { "epoch": 0.15939458436797918, "grad_norm": 0.16027915477752686, "learning_rate": 3.6207050122532766e-05, "loss": 0.3671, "num_tokens": 853705006.0, "step": 1348 }, { "epoch": 0.15951282960860824, "grad_norm": 0.16998380422592163, "learning_rate": 3.617620856795171e-05, "loss": 0.3815, "num_tokens": 854335569.0, "step": 1349 }, { "epoch": 0.15963107484923733, "grad_norm": 0.1529499590396881, "learning_rate": 3.614536281031337e-05, "loss": 0.3546, "num_tokens": 854974579.0, "step": 1350 }, { "epoch": 0.1597493200898664, "grad_norm": 0.16457729041576385, "learning_rate": 3.611451289043573e-05, "loss": 0.3878, "num_tokens": 855612244.0, "step": 1351 }, { "epoch": 0.15986756533049545, "grad_norm": 0.17140960693359375, "learning_rate": 3.608365884914232e-05, "loss": 0.3636, "num_tokens": 856244164.0, "step": 1352 }, { "epoch": 0.15998581057112451, "grad_norm": 0.17074401676654816, "learning_rate": 3.605280072726208e-05, "loss": 0.376, "num_tokens": 856879791.0, "step": 1353 }, { "epoch": 0.16010405581175358, "grad_norm": 0.17245988547801971, "learning_rate": 3.602193856562937e-05, "loss": 0.3687, "num_tokens": 857513354.0, "step": 1354 }, { "epoch": 0.16022230105238264, "grad_norm": 0.15877239406108856, "learning_rate": 3.599107240508394e-05, "loss": 0.3487, "num_tokens": 858148669.0, "step": 1355 }, { "epoch": 0.1603405462930117, "grad_norm": 0.1687905192375183, "learning_rate": 3.596020228647074e-05, "loss": 0.3516, "num_tokens": 858784311.0, "step": 1356 }, { "epoch": 0.16045879153364076, "grad_norm": 0.15725862979888916, "learning_rate": 3.592932825064004e-05, "loss": 0.3544, "num_tokens": 859417782.0, "step": 1357 }, { "epoch": 0.16057703677426985, "grad_norm": 0.16414190828800201, "learning_rate": 3.5898450338447266e-05, "loss": 0.3568, "num_tokens": 860048747.0, "step": 1358 }, { "epoch": 0.1606952820148989, "grad_norm": 0.17501533031463623, "learning_rate": 3.586756859075294e-05, "loss": 0.3958, "num_tokens": 860684168.0, "step": 1359 }, { "epoch": 0.16081352725552797, "grad_norm": 0.1815037876367569, "learning_rate": 3.5836683048422714e-05, "loss": 0.3914, "num_tokens": 861317334.0, "step": 1360 }, { "epoch": 0.16093177249615703, "grad_norm": 0.1688801646232605, "learning_rate": 3.5805793752327226e-05, "loss": 0.3954, "num_tokens": 861948731.0, "step": 1361 }, { "epoch": 0.1610500177367861, "grad_norm": 0.17557959258556366, "learning_rate": 3.5774900743342096e-05, "loss": 0.4118, "num_tokens": 862586570.0, "step": 1362 }, { "epoch": 0.16116826297741516, "grad_norm": 0.16620570421218872, "learning_rate": 3.5744004062347845e-05, "loss": 0.3704, "num_tokens": 863222204.0, "step": 1363 }, { "epoch": 0.16128650821804422, "grad_norm": 0.17702950537204742, "learning_rate": 3.571310375022988e-05, "loss": 0.3715, "num_tokens": 863857243.0, "step": 1364 }, { "epoch": 0.16140475345867328, "grad_norm": 0.14711260795593262, "learning_rate": 3.568219984787838e-05, "loss": 0.3481, "num_tokens": 864495982.0, "step": 1365 }, { "epoch": 0.16152299869930234, "grad_norm": 0.1549108326435089, "learning_rate": 3.5651292396188284e-05, "loss": 0.3172, "num_tokens": 865131852.0, "step": 1366 }, { "epoch": 0.16164124393993143, "grad_norm": 0.15956881642341614, "learning_rate": 3.5620381436059246e-05, "loss": 0.3455, "num_tokens": 865767401.0, "step": 1367 }, { "epoch": 0.1617594891805605, "grad_norm": 0.17599697411060333, "learning_rate": 3.5589467008395545e-05, "loss": 0.3688, "num_tokens": 866405290.0, "step": 1368 }, { "epoch": 0.16187773442118955, "grad_norm": 0.17518922686576843, "learning_rate": 3.555854915410605e-05, "loss": 0.3865, "num_tokens": 867039158.0, "step": 1369 }, { "epoch": 0.1619959796618186, "grad_norm": 0.1700814664363861, "learning_rate": 3.5527627914104176e-05, "loss": 0.367, "num_tokens": 867673395.0, "step": 1370 }, { "epoch": 0.16211422490244767, "grad_norm": 0.17121228575706482, "learning_rate": 3.54967033293078e-05, "loss": 0.3901, "num_tokens": 868310685.0, "step": 1371 }, { "epoch": 0.16223247014307673, "grad_norm": 0.17767947912216187, "learning_rate": 3.546577544063924e-05, "loss": 0.3855, "num_tokens": 868923042.0, "step": 1372 }, { "epoch": 0.1623507153837058, "grad_norm": 0.1763809621334076, "learning_rate": 3.5434844289025174e-05, "loss": 0.4122, "num_tokens": 869556905.0, "step": 1373 }, { "epoch": 0.16246896062433486, "grad_norm": 0.15335842967033386, "learning_rate": 3.5403909915396624e-05, "loss": 0.321, "num_tokens": 870190559.0, "step": 1374 }, { "epoch": 0.16258720586496395, "grad_norm": 0.16744902729988098, "learning_rate": 3.537297236068884e-05, "loss": 0.353, "num_tokens": 870829637.0, "step": 1375 }, { "epoch": 0.162705451105593, "grad_norm": 0.1898547112941742, "learning_rate": 3.53420316658413e-05, "loss": 0.3477, "num_tokens": 871466630.0, "step": 1376 }, { "epoch": 0.16282369634622207, "grad_norm": 0.18806128203868866, "learning_rate": 3.531108787179764e-05, "loss": 0.3784, "num_tokens": 872077498.0, "step": 1377 }, { "epoch": 0.16294194158685113, "grad_norm": 0.18817409873008728, "learning_rate": 3.5280141019505576e-05, "loss": 0.3864, "num_tokens": 872706532.0, "step": 1378 }, { "epoch": 0.1630601868274802, "grad_norm": 0.17816320061683655, "learning_rate": 3.524919114991693e-05, "loss": 0.3553, "num_tokens": 873345301.0, "step": 1379 }, { "epoch": 0.16317843206810925, "grad_norm": 0.15472441911697388, "learning_rate": 3.521823830398743e-05, "loss": 0.3527, "num_tokens": 873977689.0, "step": 1380 }, { "epoch": 0.16329667730873831, "grad_norm": 0.17606398463249207, "learning_rate": 3.518728252267681e-05, "loss": 0.3672, "num_tokens": 874615260.0, "step": 1381 }, { "epoch": 0.16341492254936738, "grad_norm": 0.15786398947238922, "learning_rate": 3.515632384694865e-05, "loss": 0.3754, "num_tokens": 875248450.0, "step": 1382 }, { "epoch": 0.16353316778999646, "grad_norm": 0.1698189079761505, "learning_rate": 3.512536231777039e-05, "loss": 0.4117, "num_tokens": 875858896.0, "step": 1383 }, { "epoch": 0.16365141303062553, "grad_norm": 0.17061932384967804, "learning_rate": 3.5094397976113226e-05, "loss": 0.349, "num_tokens": 876495972.0, "step": 1384 }, { "epoch": 0.1637696582712546, "grad_norm": 0.1739518940448761, "learning_rate": 3.5063430862952073e-05, "loss": 0.3964, "num_tokens": 877126376.0, "step": 1385 }, { "epoch": 0.16388790351188365, "grad_norm": 0.16720589995384216, "learning_rate": 3.503246101926554e-05, "loss": 0.3339, "num_tokens": 877761789.0, "step": 1386 }, { "epoch": 0.1640061487525127, "grad_norm": 0.18069598078727722, "learning_rate": 3.500148848603582e-05, "loss": 0.3926, "num_tokens": 878356527.0, "step": 1387 }, { "epoch": 0.16412439399314177, "grad_norm": 0.1658611297607422, "learning_rate": 3.4970513304248674e-05, "loss": 0.3745, "num_tokens": 878990451.0, "step": 1388 }, { "epoch": 0.16424263923377083, "grad_norm": 0.1776137799024582, "learning_rate": 3.493953551489337e-05, "loss": 0.3313, "num_tokens": 879620939.0, "step": 1389 }, { "epoch": 0.1643608844743999, "grad_norm": 0.1723213642835617, "learning_rate": 3.490855515896263e-05, "loss": 0.3715, "num_tokens": 880257254.0, "step": 1390 }, { "epoch": 0.16447912971502898, "grad_norm": 0.19617415964603424, "learning_rate": 3.487757227745257e-05, "loss": 0.4065, "num_tokens": 880896268.0, "step": 1391 }, { "epoch": 0.16459737495565804, "grad_norm": 0.19237631559371948, "learning_rate": 3.484658691136265e-05, "loss": 0.3585, "num_tokens": 881531804.0, "step": 1392 }, { "epoch": 0.1647156201962871, "grad_norm": 0.208879753947258, "learning_rate": 3.48155991016956e-05, "loss": 0.3575, "num_tokens": 882135234.0, "step": 1393 }, { "epoch": 0.16483386543691617, "grad_norm": 0.17721176147460938, "learning_rate": 3.4784608889457406e-05, "loss": 0.3502, "num_tokens": 882730906.0, "step": 1394 }, { "epoch": 0.16495211067754523, "grad_norm": 0.18262936174869537, "learning_rate": 3.475361631565723e-05, "loss": 0.3943, "num_tokens": 883362306.0, "step": 1395 }, { "epoch": 0.1650703559181743, "grad_norm": 0.18020063638687134, "learning_rate": 3.472262142130736e-05, "loss": 0.3545, "num_tokens": 884001035.0, "step": 1396 }, { "epoch": 0.16518860115880335, "grad_norm": 0.18471196293830872, "learning_rate": 3.4691624247423126e-05, "loss": 0.3795, "num_tokens": 884635015.0, "step": 1397 }, { "epoch": 0.1653068463994324, "grad_norm": 0.17687906324863434, "learning_rate": 3.466062483502293e-05, "loss": 0.3879, "num_tokens": 885272613.0, "step": 1398 }, { "epoch": 0.1654250916400615, "grad_norm": 0.17874866724014282, "learning_rate": 3.4629623225128084e-05, "loss": 0.3235, "num_tokens": 885905643.0, "step": 1399 }, { "epoch": 0.16554333688069056, "grad_norm": 0.1700168401002884, "learning_rate": 3.459861945876282e-05, "loss": 0.3607, "num_tokens": 886536895.0, "step": 1400 }, { "epoch": 0.16566158212131962, "grad_norm": 0.16640955209732056, "learning_rate": 3.456761357695425e-05, "loss": 0.3412, "num_tokens": 887173320.0, "step": 1401 }, { "epoch": 0.16577982736194868, "grad_norm": 0.17980210483074188, "learning_rate": 3.453660562073225e-05, "loss": 0.3777, "num_tokens": 887807056.0, "step": 1402 }, { "epoch": 0.16589807260257775, "grad_norm": 0.19972193241119385, "learning_rate": 3.4505595631129484e-05, "loss": 0.386, "num_tokens": 888446558.0, "step": 1403 }, { "epoch": 0.1660163178432068, "grad_norm": 0.17348936200141907, "learning_rate": 3.4474583649181254e-05, "loss": 0.3606, "num_tokens": 889079741.0, "step": 1404 }, { "epoch": 0.16613456308383587, "grad_norm": 0.17330406606197357, "learning_rate": 3.4443569715925534e-05, "loss": 0.4002, "num_tokens": 889708375.0, "step": 1405 }, { "epoch": 0.16625280832446493, "grad_norm": 0.16449137032032013, "learning_rate": 3.441255387240288e-05, "loss": 0.3451, "num_tokens": 890343205.0, "step": 1406 }, { "epoch": 0.16637105356509402, "grad_norm": 0.1590339094400406, "learning_rate": 3.4381536159656354e-05, "loss": 0.3473, "num_tokens": 890980377.0, "step": 1407 }, { "epoch": 0.16648929880572308, "grad_norm": 0.16445298492908478, "learning_rate": 3.435051661873152e-05, "loss": 0.346, "num_tokens": 891610939.0, "step": 1408 }, { "epoch": 0.16660754404635214, "grad_norm": 0.18500487506389618, "learning_rate": 3.431949529067633e-05, "loss": 0.3941, "num_tokens": 892248590.0, "step": 1409 }, { "epoch": 0.1667257892869812, "grad_norm": 0.19658434391021729, "learning_rate": 3.4288472216541126e-05, "loss": 0.4552, "num_tokens": 892884534.0, "step": 1410 }, { "epoch": 0.16684403452761026, "grad_norm": 0.1724197417497635, "learning_rate": 3.4257447437378544e-05, "loss": 0.3398, "num_tokens": 893522439.0, "step": 1411 }, { "epoch": 0.16696227976823932, "grad_norm": 0.17510159313678741, "learning_rate": 3.42264209942435e-05, "loss": 0.3507, "num_tokens": 894159327.0, "step": 1412 }, { "epoch": 0.16708052500886839, "grad_norm": 0.20202413201332092, "learning_rate": 3.419539292819308e-05, "loss": 0.3959, "num_tokens": 894796406.0, "step": 1413 }, { "epoch": 0.16719877024949745, "grad_norm": 0.16532990336418152, "learning_rate": 3.416436328028654e-05, "loss": 0.3679, "num_tokens": 895433169.0, "step": 1414 }, { "epoch": 0.16731701549012654, "grad_norm": 0.16116783022880554, "learning_rate": 3.413333209158522e-05, "loss": 0.3715, "num_tokens": 896065522.0, "step": 1415 }, { "epoch": 0.1674352607307556, "grad_norm": 0.1760171353816986, "learning_rate": 3.410229940315251e-05, "loss": 0.3474, "num_tokens": 896697566.0, "step": 1416 }, { "epoch": 0.16755350597138466, "grad_norm": 0.17760524153709412, "learning_rate": 3.4071265256053755e-05, "loss": 0.3651, "num_tokens": 897331944.0, "step": 1417 }, { "epoch": 0.16767175121201372, "grad_norm": 0.1891304850578308, "learning_rate": 3.404022969135628e-05, "loss": 0.4184, "num_tokens": 897965210.0, "step": 1418 }, { "epoch": 0.16778999645264278, "grad_norm": 0.1605445295572281, "learning_rate": 3.400919275012923e-05, "loss": 0.3771, "num_tokens": 898598679.0, "step": 1419 }, { "epoch": 0.16790824169327184, "grad_norm": 0.18207436800003052, "learning_rate": 3.397815447344361e-05, "loss": 0.4081, "num_tokens": 899230065.0, "step": 1420 }, { "epoch": 0.1680264869339009, "grad_norm": 0.1534513235092163, "learning_rate": 3.3947114902372195e-05, "loss": 0.3266, "num_tokens": 899866736.0, "step": 1421 }, { "epoch": 0.16814473217452997, "grad_norm": 0.17347653210163116, "learning_rate": 3.391607407798943e-05, "loss": 0.347, "num_tokens": 900503362.0, "step": 1422 }, { "epoch": 0.16826297741515903, "grad_norm": 0.157567098736763, "learning_rate": 3.3885032041371474e-05, "loss": 0.3707, "num_tokens": 901142459.0, "step": 1423 }, { "epoch": 0.16838122265578812, "grad_norm": 0.1753988415002823, "learning_rate": 3.385398883359605e-05, "loss": 0.3704, "num_tokens": 901768662.0, "step": 1424 }, { "epoch": 0.16849946789641718, "grad_norm": 0.16486535966396332, "learning_rate": 3.3822944495742436e-05, "loss": 0.3642, "num_tokens": 902398246.0, "step": 1425 }, { "epoch": 0.16861771313704624, "grad_norm": 0.16971829533576965, "learning_rate": 3.379189906889144e-05, "loss": 0.3555, "num_tokens": 903032227.0, "step": 1426 }, { "epoch": 0.1687359583776753, "grad_norm": 0.1704730987548828, "learning_rate": 3.376085259412525e-05, "loss": 0.3856, "num_tokens": 903666346.0, "step": 1427 }, { "epoch": 0.16885420361830436, "grad_norm": 0.1611000895500183, "learning_rate": 3.372980511252752e-05, "loss": 0.3937, "num_tokens": 904303678.0, "step": 1428 }, { "epoch": 0.16897244885893342, "grad_norm": 0.18555743992328644, "learning_rate": 3.369875666518314e-05, "loss": 0.3608, "num_tokens": 904930363.0, "step": 1429 }, { "epoch": 0.16909069409956248, "grad_norm": 0.16819705069065094, "learning_rate": 3.366770729317837e-05, "loss": 0.4225, "num_tokens": 905564490.0, "step": 1430 }, { "epoch": 0.16920893934019154, "grad_norm": 0.17546449601650238, "learning_rate": 3.363665703760063e-05, "loss": 0.3882, "num_tokens": 906200069.0, "step": 1431 }, { "epoch": 0.16932718458082063, "grad_norm": 0.16625650227069855, "learning_rate": 3.3605605939538556e-05, "loss": 0.3445, "num_tokens": 906837954.0, "step": 1432 }, { "epoch": 0.1694454298214497, "grad_norm": 0.17123083770275116, "learning_rate": 3.357455404008186e-05, "loss": 0.3372, "num_tokens": 907469285.0, "step": 1433 }, { "epoch": 0.16956367506207876, "grad_norm": 0.1995430290699005, "learning_rate": 3.3543501380321315e-05, "loss": 0.4258, "num_tokens": 908105346.0, "step": 1434 }, { "epoch": 0.16968192030270782, "grad_norm": 0.16351737082004547, "learning_rate": 3.351244800134875e-05, "loss": 0.3553, "num_tokens": 908736680.0, "step": 1435 }, { "epoch": 0.16980016554333688, "grad_norm": 0.16494734585285187, "learning_rate": 3.348139394425687e-05, "loss": 0.3672, "num_tokens": 909376079.0, "step": 1436 }, { "epoch": 0.16991841078396594, "grad_norm": 0.17809386551380157, "learning_rate": 3.3450339250139353e-05, "loss": 0.3834, "num_tokens": 910014511.0, "step": 1437 }, { "epoch": 0.170036656024595, "grad_norm": 0.17304128408432007, "learning_rate": 3.341928396009067e-05, "loss": 0.3539, "num_tokens": 910650479.0, "step": 1438 }, { "epoch": 0.17015490126522406, "grad_norm": 0.1687421351671219, "learning_rate": 3.338822811520609e-05, "loss": 0.3586, "num_tokens": 911289548.0, "step": 1439 }, { "epoch": 0.17027314650585315, "grad_norm": 0.17560400068759918, "learning_rate": 3.335717175658164e-05, "loss": 0.3764, "num_tokens": 911920115.0, "step": 1440 }, { "epoch": 0.1703913917464822, "grad_norm": 0.18659047782421112, "learning_rate": 3.3326114925313995e-05, "loss": 0.3793, "num_tokens": 912552910.0, "step": 1441 }, { "epoch": 0.17050963698711127, "grad_norm": 0.15252403914928436, "learning_rate": 3.329505766250047e-05, "loss": 0.356, "num_tokens": 913186992.0, "step": 1442 }, { "epoch": 0.17062788222774034, "grad_norm": 0.1647413671016693, "learning_rate": 3.326400000923896e-05, "loss": 0.3644, "num_tokens": 913822413.0, "step": 1443 }, { "epoch": 0.1707461274683694, "grad_norm": 0.15846136212348938, "learning_rate": 3.323294200662785e-05, "loss": 0.3376, "num_tokens": 914458604.0, "step": 1444 }, { "epoch": 0.17086437270899846, "grad_norm": 0.16936224699020386, "learning_rate": 3.3201883695766014e-05, "loss": 0.3769, "num_tokens": 915097439.0, "step": 1445 }, { "epoch": 0.17098261794962752, "grad_norm": 0.17426526546478271, "learning_rate": 3.317082511775272e-05, "loss": 0.3862, "num_tokens": 915732434.0, "step": 1446 }, { "epoch": 0.17110086319025658, "grad_norm": 0.18202632665634155, "learning_rate": 3.313976631368759e-05, "loss": 0.3781, "num_tokens": 916361639.0, "step": 1447 }, { "epoch": 0.17121910843088567, "grad_norm": 0.18634334206581116, "learning_rate": 3.310870732467057e-05, "loss": 0.3626, "num_tokens": 916961319.0, "step": 1448 }, { "epoch": 0.17133735367151473, "grad_norm": 0.160066619515419, "learning_rate": 3.3077648191801817e-05, "loss": 0.3788, "num_tokens": 917600686.0, "step": 1449 }, { "epoch": 0.1714555989121438, "grad_norm": 0.18605881929397583, "learning_rate": 3.304658895618168e-05, "loss": 0.3704, "num_tokens": 918239869.0, "step": 1450 }, { "epoch": 0.17157384415277285, "grad_norm": 0.1764983981847763, "learning_rate": 3.3015529658910665e-05, "loss": 0.3624, "num_tokens": 918877611.0, "step": 1451 }, { "epoch": 0.17169208939340191, "grad_norm": 0.1722479909658432, "learning_rate": 3.2984470341089346e-05, "loss": 0.3914, "num_tokens": 919517243.0, "step": 1452 }, { "epoch": 0.17181033463403098, "grad_norm": 0.17516613006591797, "learning_rate": 3.295341104381833e-05, "loss": 0.2958, "num_tokens": 920155279.0, "step": 1453 }, { "epoch": 0.17192857987466004, "grad_norm": 0.18306639790534973, "learning_rate": 3.2922351808198195e-05, "loss": 0.3519, "num_tokens": 920791529.0, "step": 1454 }, { "epoch": 0.1720468251152891, "grad_norm": 0.18455427885055542, "learning_rate": 3.2891292675329434e-05, "loss": 0.4022, "num_tokens": 921421972.0, "step": 1455 }, { "epoch": 0.1721650703559182, "grad_norm": 0.17631007730960846, "learning_rate": 3.286023368631241e-05, "loss": 0.395, "num_tokens": 922059966.0, "step": 1456 }, { "epoch": 0.17228331559654725, "grad_norm": 0.17840716242790222, "learning_rate": 3.2829174882247295e-05, "loss": 0.3539, "num_tokens": 922694070.0, "step": 1457 }, { "epoch": 0.1724015608371763, "grad_norm": 0.1761758029460907, "learning_rate": 3.2798116304234e-05, "loss": 0.3708, "num_tokens": 923326220.0, "step": 1458 }, { "epoch": 0.17251980607780537, "grad_norm": 0.20283976197242737, "learning_rate": 3.276705799337217e-05, "loss": 0.3979, "num_tokens": 923963438.0, "step": 1459 }, { "epoch": 0.17263805131843443, "grad_norm": 0.18546903133392334, "learning_rate": 3.273599999076105e-05, "loss": 0.3961, "num_tokens": 924593547.0, "step": 1460 }, { "epoch": 0.1727562965590635, "grad_norm": 0.16381438076496124, "learning_rate": 3.270494233749953e-05, "loss": 0.3438, "num_tokens": 925228798.0, "step": 1461 }, { "epoch": 0.17287454179969255, "grad_norm": 0.18618075549602509, "learning_rate": 3.267388507468602e-05, "loss": 0.3847, "num_tokens": 925861981.0, "step": 1462 }, { "epoch": 0.17299278704032162, "grad_norm": 0.20563967525959015, "learning_rate": 3.2642828243418365e-05, "loss": 0.3768, "num_tokens": 926497587.0, "step": 1463 }, { "epoch": 0.1731110322809507, "grad_norm": 0.1740288883447647, "learning_rate": 3.261177188479391e-05, "loss": 0.3562, "num_tokens": 927132750.0, "step": 1464 }, { "epoch": 0.17322927752157977, "grad_norm": 0.1666458398103714, "learning_rate": 3.2580716039909335e-05, "loss": 0.3594, "num_tokens": 927764160.0, "step": 1465 }, { "epoch": 0.17334752276220883, "grad_norm": 0.166953906416893, "learning_rate": 3.254966074986065e-05, "loss": 0.3439, "num_tokens": 928403130.0, "step": 1466 }, { "epoch": 0.1734657680028379, "grad_norm": 0.18671217560768127, "learning_rate": 3.251860605574314e-05, "loss": 0.3665, "num_tokens": 929035983.0, "step": 1467 }, { "epoch": 0.17358401324346695, "grad_norm": 0.18403495848178864, "learning_rate": 3.248755199865126e-05, "loss": 0.3999, "num_tokens": 929673737.0, "step": 1468 }, { "epoch": 0.173702258484096, "grad_norm": 0.17461302876472473, "learning_rate": 3.245649861967869e-05, "loss": 0.3818, "num_tokens": 930303540.0, "step": 1469 }, { "epoch": 0.17382050372472507, "grad_norm": 0.2092389613389969, "learning_rate": 3.242544595991815e-05, "loss": 0.4015, "num_tokens": 930934690.0, "step": 1470 }, { "epoch": 0.17393874896535413, "grad_norm": 0.17646804451942444, "learning_rate": 3.239439406046145e-05, "loss": 0.3822, "num_tokens": 931570813.0, "step": 1471 }, { "epoch": 0.1740569942059832, "grad_norm": 0.16729430854320526, "learning_rate": 3.236334296239937e-05, "loss": 0.3666, "num_tokens": 932203713.0, "step": 1472 }, { "epoch": 0.17417523944661228, "grad_norm": 0.16469994187355042, "learning_rate": 3.233229270682163e-05, "loss": 0.3798, "num_tokens": 932842100.0, "step": 1473 }, { "epoch": 0.17429348468724135, "grad_norm": 0.16082432866096497, "learning_rate": 3.230124333481686e-05, "loss": 0.347, "num_tokens": 933475258.0, "step": 1474 }, { "epoch": 0.1744117299278704, "grad_norm": 0.16905388236045837, "learning_rate": 3.227019488747249e-05, "loss": 0.3569, "num_tokens": 934112965.0, "step": 1475 }, { "epoch": 0.17452997516849947, "grad_norm": 0.20290949940681458, "learning_rate": 3.2239147405874745e-05, "loss": 0.3906, "num_tokens": 934737655.0, "step": 1476 }, { "epoch": 0.17464822040912853, "grad_norm": 0.17375296354293823, "learning_rate": 3.220810093110857e-05, "loss": 0.3608, "num_tokens": 935377274.0, "step": 1477 }, { "epoch": 0.1747664656497576, "grad_norm": 0.17994755506515503, "learning_rate": 3.217705550425756e-05, "loss": 0.3519, "num_tokens": 936011871.0, "step": 1478 }, { "epoch": 0.17488471089038665, "grad_norm": 0.1969660371541977, "learning_rate": 3.214601116640396e-05, "loss": 0.4092, "num_tokens": 936647042.0, "step": 1479 }, { "epoch": 0.1750029561310157, "grad_norm": 0.1726323664188385, "learning_rate": 3.211496795862853e-05, "loss": 0.3596, "num_tokens": 937283368.0, "step": 1480 }, { "epoch": 0.1751212013716448, "grad_norm": 0.16877536475658417, "learning_rate": 3.208392592201058e-05, "loss": 0.3688, "num_tokens": 937919785.0, "step": 1481 }, { "epoch": 0.17523944661227386, "grad_norm": 0.17268769443035126, "learning_rate": 3.205288509762781e-05, "loss": 0.36, "num_tokens": 938554025.0, "step": 1482 }, { "epoch": 0.17535769185290292, "grad_norm": 0.17848555743694305, "learning_rate": 3.2021845526556394e-05, "loss": 0.3572, "num_tokens": 939191403.0, "step": 1483 }, { "epoch": 0.175475937093532, "grad_norm": 0.18568621575832367, "learning_rate": 3.199080724987078e-05, "loss": 0.3988, "num_tokens": 939827159.0, "step": 1484 }, { "epoch": 0.17559418233416105, "grad_norm": 0.1646295189857483, "learning_rate": 3.195977030864373e-05, "loss": 0.3475, "num_tokens": 940465371.0, "step": 1485 }, { "epoch": 0.1757124275747901, "grad_norm": 0.17712347209453583, "learning_rate": 3.1928734743946243e-05, "loss": 0.4087, "num_tokens": 941102114.0, "step": 1486 }, { "epoch": 0.17583067281541917, "grad_norm": 0.1864871382713318, "learning_rate": 3.189770059684749e-05, "loss": 0.3831, "num_tokens": 941738142.0, "step": 1487 }, { "epoch": 0.17594891805604823, "grad_norm": 0.16088663041591644, "learning_rate": 3.186666790841478e-05, "loss": 0.3303, "num_tokens": 942366943.0, "step": 1488 }, { "epoch": 0.17606716329667732, "grad_norm": 0.15922009944915771, "learning_rate": 3.183563671971347e-05, "loss": 0.3795, "num_tokens": 942995207.0, "step": 1489 }, { "epoch": 0.17618540853730638, "grad_norm": 0.1968304067850113, "learning_rate": 3.1804607071806916e-05, "loss": 0.3883, "num_tokens": 943631334.0, "step": 1490 }, { "epoch": 0.17630365377793544, "grad_norm": 0.17453576624393463, "learning_rate": 3.17735790057565e-05, "loss": 0.3661, "num_tokens": 944264751.0, "step": 1491 }, { "epoch": 0.1764218990185645, "grad_norm": 0.17541645467281342, "learning_rate": 3.1742552562621454e-05, "loss": 0.3984, "num_tokens": 944901410.0, "step": 1492 }, { "epoch": 0.17654014425919357, "grad_norm": 0.16635823249816895, "learning_rate": 3.171152778345888e-05, "loss": 0.3735, "num_tokens": 945538374.0, "step": 1493 }, { "epoch": 0.17665838949982263, "grad_norm": 0.1661323755979538, "learning_rate": 3.168050470932368e-05, "loss": 0.353, "num_tokens": 946170416.0, "step": 1494 }, { "epoch": 0.1767766347404517, "grad_norm": 0.17597319185733795, "learning_rate": 3.164948338126849e-05, "loss": 0.3577, "num_tokens": 946805320.0, "step": 1495 }, { "epoch": 0.17689487998108075, "grad_norm": 0.1651628017425537, "learning_rate": 3.161846384034366e-05, "loss": 0.4059, "num_tokens": 947437269.0, "step": 1496 }, { "epoch": 0.17701312522170984, "grad_norm": 0.18678541481494904, "learning_rate": 3.1587446127597126e-05, "loss": 0.3983, "num_tokens": 948074252.0, "step": 1497 }, { "epoch": 0.1771313704623389, "grad_norm": 0.15683230757713318, "learning_rate": 3.155643028407447e-05, "loss": 0.3214, "num_tokens": 948701457.0, "step": 1498 }, { "epoch": 0.17724961570296796, "grad_norm": 0.17081496119499207, "learning_rate": 3.152541635081875e-05, "loss": 0.3665, "num_tokens": 949332037.0, "step": 1499 }, { "epoch": 0.17736786094359702, "grad_norm": 0.17161160707473755, "learning_rate": 3.149440436887053e-05, "loss": 0.3598, "num_tokens": 949967678.0, "step": 1500 }, { "epoch": 0.17748610618422608, "grad_norm": 0.15939413011074066, "learning_rate": 3.1463394379267755e-05, "loss": 0.3541, "num_tokens": 950600709.0, "step": 1501 }, { "epoch": 0.17760435142485514, "grad_norm": 0.17879928648471832, "learning_rate": 3.1432386423045756e-05, "loss": 0.3888, "num_tokens": 951227404.0, "step": 1502 }, { "epoch": 0.1777225966654842, "grad_norm": 0.16773827373981476, "learning_rate": 3.1401380541237186e-05, "loss": 0.3721, "num_tokens": 951866625.0, "step": 1503 }, { "epoch": 0.17784084190611327, "grad_norm": 0.17299412190914154, "learning_rate": 3.137037677487193e-05, "loss": 0.3596, "num_tokens": 952502095.0, "step": 1504 }, { "epoch": 0.17795908714674236, "grad_norm": 0.16810157895088196, "learning_rate": 3.133937516497708e-05, "loss": 0.3773, "num_tokens": 953134327.0, "step": 1505 }, { "epoch": 0.17807733238737142, "grad_norm": 0.15980418026447296, "learning_rate": 3.130837575257688e-05, "loss": 0.356, "num_tokens": 953773701.0, "step": 1506 }, { "epoch": 0.17819557762800048, "grad_norm": 0.1758408099412918, "learning_rate": 3.1277378578692645e-05, "loss": 0.3844, "num_tokens": 954406325.0, "step": 1507 }, { "epoch": 0.17831382286862954, "grad_norm": 0.1675221174955368, "learning_rate": 3.124638368434277e-05, "loss": 0.3644, "num_tokens": 955041716.0, "step": 1508 }, { "epoch": 0.1784320681092586, "grad_norm": 0.18556548655033112, "learning_rate": 3.121539111054259e-05, "loss": 0.3729, "num_tokens": 955678383.0, "step": 1509 }, { "epoch": 0.17855031334988766, "grad_norm": 0.17029190063476562, "learning_rate": 3.118440089830441e-05, "loss": 0.3887, "num_tokens": 956315561.0, "step": 1510 }, { "epoch": 0.17866855859051672, "grad_norm": 0.17651323974132538, "learning_rate": 3.115341308863736e-05, "loss": 0.3898, "num_tokens": 956944551.0, "step": 1511 }, { "epoch": 0.17878680383114579, "grad_norm": 0.1759771704673767, "learning_rate": 3.1122427722547435e-05, "loss": 0.3802, "num_tokens": 957583170.0, "step": 1512 }, { "epoch": 0.17890504907177487, "grad_norm": 0.15872859954833984, "learning_rate": 3.109144484103738e-05, "loss": 0.3358, "num_tokens": 958218955.0, "step": 1513 }, { "epoch": 0.17902329431240394, "grad_norm": 0.17943841218948364, "learning_rate": 3.106046448510663e-05, "loss": 0.3519, "num_tokens": 958852938.0, "step": 1514 }, { "epoch": 0.179141539553033, "grad_norm": 0.17151592671871185, "learning_rate": 3.102948669575133e-05, "loss": 0.364, "num_tokens": 959490199.0, "step": 1515 }, { "epoch": 0.17925978479366206, "grad_norm": 0.18046090006828308, "learning_rate": 3.099851151396419e-05, "loss": 0.3769, "num_tokens": 960127283.0, "step": 1516 }, { "epoch": 0.17937803003429112, "grad_norm": 0.16845285892486572, "learning_rate": 3.096753898073447e-05, "loss": 0.3736, "num_tokens": 960756987.0, "step": 1517 }, { "epoch": 0.17949627527492018, "grad_norm": 0.16035690903663635, "learning_rate": 3.093656913704793e-05, "loss": 0.38, "num_tokens": 961395831.0, "step": 1518 }, { "epoch": 0.17961452051554924, "grad_norm": 0.16901935636997223, "learning_rate": 3.0905602023886786e-05, "loss": 0.363, "num_tokens": 962031388.0, "step": 1519 }, { "epoch": 0.1797327657561783, "grad_norm": 0.1605140119791031, "learning_rate": 3.087463768222962e-05, "loss": 0.3202, "num_tokens": 962666836.0, "step": 1520 }, { "epoch": 0.1798510109968074, "grad_norm": 0.17046019434928894, "learning_rate": 3.084367615305136e-05, "loss": 0.3469, "num_tokens": 963305624.0, "step": 1521 }, { "epoch": 0.17996925623743645, "grad_norm": 0.1570424735546112, "learning_rate": 3.08127174773232e-05, "loss": 0.3616, "num_tokens": 963941024.0, "step": 1522 }, { "epoch": 0.18008750147806551, "grad_norm": 0.17264924943447113, "learning_rate": 3.078176169601259e-05, "loss": 0.3659, "num_tokens": 964574609.0, "step": 1523 }, { "epoch": 0.18020574671869458, "grad_norm": 0.21252690255641937, "learning_rate": 3.075080885008308e-05, "loss": 0.4115, "num_tokens": 965214035.0, "step": 1524 }, { "epoch": 0.18032399195932364, "grad_norm": 0.15354739129543304, "learning_rate": 3.071985898049442e-05, "loss": 0.3541, "num_tokens": 965848776.0, "step": 1525 }, { "epoch": 0.1804422371999527, "grad_norm": 0.1739215850830078, "learning_rate": 3.068891212820237e-05, "loss": 0.3533, "num_tokens": 966478626.0, "step": 1526 }, { "epoch": 0.18056048244058176, "grad_norm": 0.19520846009254456, "learning_rate": 3.0657968334158715e-05, "loss": 0.3982, "num_tokens": 967079379.0, "step": 1527 }, { "epoch": 0.18067872768121082, "grad_norm": 0.16799034178256989, "learning_rate": 3.0627027639311165e-05, "loss": 0.3487, "num_tokens": 967715213.0, "step": 1528 }, { "epoch": 0.18079697292183988, "grad_norm": 0.19211645424365997, "learning_rate": 3.059609008460339e-05, "loss": 0.377, "num_tokens": 968326010.0, "step": 1529 }, { "epoch": 0.18091521816246897, "grad_norm": 0.17046518623828888, "learning_rate": 3.0565155710974824e-05, "loss": 0.3554, "num_tokens": 968964068.0, "step": 1530 }, { "epoch": 0.18103346340309803, "grad_norm": 0.167215034365654, "learning_rate": 3.053422455936077e-05, "loss": 0.3903, "num_tokens": 969598545.0, "step": 1531 }, { "epoch": 0.1811517086437271, "grad_norm": 0.18514011800289154, "learning_rate": 3.050329667069221e-05, "loss": 0.3952, "num_tokens": 970231579.0, "step": 1532 }, { "epoch": 0.18126995388435616, "grad_norm": 0.18290914595127106, "learning_rate": 3.047237208589583e-05, "loss": 0.3563, "num_tokens": 970868020.0, "step": 1533 }, { "epoch": 0.18138819912498522, "grad_norm": 0.18019051849842072, "learning_rate": 3.044145084589395e-05, "loss": 0.3801, "num_tokens": 971501577.0, "step": 1534 }, { "epoch": 0.18150644436561428, "grad_norm": 0.18275585770606995, "learning_rate": 3.041053299160446e-05, "loss": 0.4036, "num_tokens": 972136461.0, "step": 1535 }, { "epoch": 0.18162468960624334, "grad_norm": 0.16142059862613678, "learning_rate": 3.0379618563940756e-05, "loss": 0.3384, "num_tokens": 972774594.0, "step": 1536 }, { "epoch": 0.1817429348468724, "grad_norm": 0.20104844868183136, "learning_rate": 3.034870760381172e-05, "loss": 0.3699, "num_tokens": 973410075.0, "step": 1537 }, { "epoch": 0.1818611800875015, "grad_norm": 0.18426941335201263, "learning_rate": 3.0317800152121625e-05, "loss": 0.3835, "num_tokens": 974049163.0, "step": 1538 }, { "epoch": 0.18197942532813055, "grad_norm": 0.1625235378742218, "learning_rate": 3.0286896249770123e-05, "loss": 0.3486, "num_tokens": 974681502.0, "step": 1539 }, { "epoch": 0.1820976705687596, "grad_norm": 0.19007031619548798, "learning_rate": 3.0255995937652153e-05, "loss": 0.392, "num_tokens": 975312361.0, "step": 1540 }, { "epoch": 0.18221591580938867, "grad_norm": 0.1634375900030136, "learning_rate": 3.022509925665791e-05, "loss": 0.3726, "num_tokens": 975944571.0, "step": 1541 }, { "epoch": 0.18233416105001773, "grad_norm": 0.17391811311244965, "learning_rate": 3.0194206247672786e-05, "loss": 0.3431, "num_tokens": 976578990.0, "step": 1542 }, { "epoch": 0.1824524062906468, "grad_norm": 0.17183931171894073, "learning_rate": 3.0163316951577294e-05, "loss": 0.3434, "num_tokens": 977213113.0, "step": 1543 }, { "epoch": 0.18257065153127586, "grad_norm": 0.18079477548599243, "learning_rate": 3.0132431409247073e-05, "loss": 0.3712, "num_tokens": 977846686.0, "step": 1544 }, { "epoch": 0.18268889677190492, "grad_norm": 0.1796652376651764, "learning_rate": 3.0101549661552743e-05, "loss": 0.351, "num_tokens": 978484082.0, "step": 1545 }, { "epoch": 0.182807142012534, "grad_norm": 0.18004387617111206, "learning_rate": 3.0070671749359963e-05, "loss": 0.3715, "num_tokens": 979120007.0, "step": 1546 }, { "epoch": 0.18292538725316307, "grad_norm": 0.1583901345729828, "learning_rate": 3.0039797713529264e-05, "loss": 0.321, "num_tokens": 979754257.0, "step": 1547 }, { "epoch": 0.18304363249379213, "grad_norm": 0.1803177446126938, "learning_rate": 3.000892759491607e-05, "loss": 0.364, "num_tokens": 980379873.0, "step": 1548 }, { "epoch": 0.1831618777344212, "grad_norm": 0.16974784433841705, "learning_rate": 2.9978061434370635e-05, "loss": 0.3492, "num_tokens": 981012470.0, "step": 1549 }, { "epoch": 0.18328012297505025, "grad_norm": 0.17915767431259155, "learning_rate": 2.9947199272737932e-05, "loss": 0.41, "num_tokens": 981645741.0, "step": 1550 }, { "epoch": 0.1833983682156793, "grad_norm": 0.17839612066745758, "learning_rate": 2.9916341150857692e-05, "loss": 0.3728, "num_tokens": 982273942.0, "step": 1551 }, { "epoch": 0.18351661345630838, "grad_norm": 0.17455312609672546, "learning_rate": 2.988548710956428e-05, "loss": 0.3698, "num_tokens": 982905114.0, "step": 1552 }, { "epoch": 0.18363485869693744, "grad_norm": 32.93268966674805, "learning_rate": 2.9854637189686637e-05, "loss": 1.4744, "num_tokens": 983508182.0, "step": 1553 }, { "epoch": 0.18375310393756653, "grad_norm": 0.21559761464595795, "learning_rate": 2.9823791432048297e-05, "loss": 0.3675, "num_tokens": 984147363.0, "step": 1554 }, { "epoch": 0.1838713491781956, "grad_norm": 0.18873609602451324, "learning_rate": 2.9792949877467246e-05, "loss": 0.3667, "num_tokens": 984781250.0, "step": 1555 }, { "epoch": 0.18398959441882465, "grad_norm": 0.17174896597862244, "learning_rate": 2.9762112566755922e-05, "loss": 0.3658, "num_tokens": 985418934.0, "step": 1556 }, { "epoch": 0.1841078396594537, "grad_norm": 0.1909123659133911, "learning_rate": 2.9731279540721175e-05, "loss": 0.3605, "num_tokens": 986058183.0, "step": 1557 }, { "epoch": 0.18422608490008277, "grad_norm": 0.17101429402828217, "learning_rate": 2.970045084016412e-05, "loss": 0.3281, "num_tokens": 986689892.0, "step": 1558 }, { "epoch": 0.18434433014071183, "grad_norm": 0.17448195815086365, "learning_rate": 2.966962650588022e-05, "loss": 0.352, "num_tokens": 987323095.0, "step": 1559 }, { "epoch": 0.1844625753813409, "grad_norm": 0.19304078817367554, "learning_rate": 2.963880657865911e-05, "loss": 0.3399, "num_tokens": 987953264.0, "step": 1560 }, { "epoch": 0.18458082062196995, "grad_norm": 0.16238828003406525, "learning_rate": 2.960799109928461e-05, "loss": 0.3489, "num_tokens": 988590158.0, "step": 1561 }, { "epoch": 0.18469906586259904, "grad_norm": 0.1621323823928833, "learning_rate": 2.9577180108534666e-05, "loss": 0.3777, "num_tokens": 989215297.0, "step": 1562 }, { "epoch": 0.1848173111032281, "grad_norm": 0.17066222429275513, "learning_rate": 2.9546373647181265e-05, "loss": 0.3627, "num_tokens": 989854912.0, "step": 1563 }, { "epoch": 0.18493555634385717, "grad_norm": 0.1540173441171646, "learning_rate": 2.9515571755990414e-05, "loss": 0.3255, "num_tokens": 990494043.0, "step": 1564 }, { "epoch": 0.18505380158448623, "grad_norm": 0.18189725279808044, "learning_rate": 2.9484774475722057e-05, "loss": 0.3697, "num_tokens": 991126843.0, "step": 1565 }, { "epoch": 0.1851720468251153, "grad_norm": 0.17115595936775208, "learning_rate": 2.945398184713005e-05, "loss": 0.3548, "num_tokens": 991761760.0, "step": 1566 }, { "epoch": 0.18529029206574435, "grad_norm": 0.16794586181640625, "learning_rate": 2.9423193910962092e-05, "loss": 0.3356, "num_tokens": 992401240.0, "step": 1567 }, { "epoch": 0.1854085373063734, "grad_norm": 0.16514816880226135, "learning_rate": 2.939241070795967e-05, "loss": 0.3708, "num_tokens": 993035492.0, "step": 1568 }, { "epoch": 0.18552678254700247, "grad_norm": 0.17977920174598694, "learning_rate": 2.9361632278858003e-05, "loss": 0.3992, "num_tokens": 993670701.0, "step": 1569 }, { "epoch": 0.18564502778763156, "grad_norm": 0.17868345975875854, "learning_rate": 2.9330858664385995e-05, "loss": 0.3977, "num_tokens": 994304128.0, "step": 1570 }, { "epoch": 0.18576327302826062, "grad_norm": 0.16653095185756683, "learning_rate": 2.930008990526619e-05, "loss": 0.3277, "num_tokens": 994941330.0, "step": 1571 }, { "epoch": 0.18588151826888968, "grad_norm": 0.17928676307201385, "learning_rate": 2.9269326042214695e-05, "loss": 0.3898, "num_tokens": 995567115.0, "step": 1572 }, { "epoch": 0.18599976350951875, "grad_norm": 0.168831005692482, "learning_rate": 2.923856711594114e-05, "loss": 0.3929, "num_tokens": 996201713.0, "step": 1573 }, { "epoch": 0.1861180087501478, "grad_norm": 0.1701105684041977, "learning_rate": 2.9207813167148624e-05, "loss": 0.3397, "num_tokens": 996834913.0, "step": 1574 }, { "epoch": 0.18623625399077687, "grad_norm": 0.16095875203609467, "learning_rate": 2.917706423653365e-05, "loss": 0.3619, "num_tokens": 997471417.0, "step": 1575 }, { "epoch": 0.18635449923140593, "grad_norm": 0.18966802954673767, "learning_rate": 2.9146320364786106e-05, "loss": 0.4041, "num_tokens": 998105800.0, "step": 1576 }, { "epoch": 0.186472744472035, "grad_norm": 0.15751314163208008, "learning_rate": 2.9115581592589153e-05, "loss": 0.4016, "num_tokens": 998742897.0, "step": 1577 }, { "epoch": 0.18659098971266405, "grad_norm": 0.1764875054359436, "learning_rate": 2.908484796061923e-05, "loss": 0.3892, "num_tokens": 999375870.0, "step": 1578 }, { "epoch": 0.18670923495329314, "grad_norm": 0.16247054934501648, "learning_rate": 2.9054119509545957e-05, "loss": 0.3617, "num_tokens": 1000009533.0, "step": 1579 }, { "epoch": 0.1868274801939222, "grad_norm": 0.16236217319965363, "learning_rate": 2.902339628003211e-05, "loss": 0.3605, "num_tokens": 1000648319.0, "step": 1580 }, { "epoch": 0.18694572543455126, "grad_norm": 0.17573755979537964, "learning_rate": 2.8992678312733547e-05, "loss": 0.3755, "num_tokens": 1001283756.0, "step": 1581 }, { "epoch": 0.18706397067518032, "grad_norm": 0.1554783433675766, "learning_rate": 2.8961965648299166e-05, "loss": 0.3265, "num_tokens": 1001920534.0, "step": 1582 }, { "epoch": 0.18718221591580939, "grad_norm": 0.1593015044927597, "learning_rate": 2.8931258327370845e-05, "loss": 0.339, "num_tokens": 1002538598.0, "step": 1583 }, { "epoch": 0.18730046115643845, "grad_norm": 0.1607053279876709, "learning_rate": 2.8900556390583394e-05, "loss": 0.3348, "num_tokens": 1003174599.0, "step": 1584 }, { "epoch": 0.1874187063970675, "grad_norm": 0.16559194028377533, "learning_rate": 2.8869859878564498e-05, "loss": 0.3475, "num_tokens": 1003810442.0, "step": 1585 }, { "epoch": 0.18753695163769657, "grad_norm": 0.18042126297950745, "learning_rate": 2.883916883193467e-05, "loss": 0.4163, "num_tokens": 1004449210.0, "step": 1586 }, { "epoch": 0.18765519687832566, "grad_norm": 0.153243288397789, "learning_rate": 2.8808483291307167e-05, "loss": 0.3196, "num_tokens": 1005080267.0, "step": 1587 }, { "epoch": 0.18777344211895472, "grad_norm": 0.17950351536273956, "learning_rate": 2.877780329728799e-05, "loss": 0.4136, "num_tokens": 1005714238.0, "step": 1588 }, { "epoch": 0.18789168735958378, "grad_norm": 0.1732146143913269, "learning_rate": 2.8747128890475784e-05, "loss": 0.3666, "num_tokens": 1006353420.0, "step": 1589 }, { "epoch": 0.18800993260021284, "grad_norm": 0.19320537149906158, "learning_rate": 2.8716460111461807e-05, "loss": 0.3835, "num_tokens": 1006983318.0, "step": 1590 }, { "epoch": 0.1881281778408419, "grad_norm": 0.1723458468914032, "learning_rate": 2.8685797000829853e-05, "loss": 0.3519, "num_tokens": 1007615434.0, "step": 1591 }, { "epoch": 0.18824642308147096, "grad_norm": 0.1752302050590515, "learning_rate": 2.8655139599156236e-05, "loss": 0.3644, "num_tokens": 1008247783.0, "step": 1592 }, { "epoch": 0.18836466832210003, "grad_norm": 0.15836386382579803, "learning_rate": 2.8624487947009718e-05, "loss": 0.3466, "num_tokens": 1008884013.0, "step": 1593 }, { "epoch": 0.1884829135627291, "grad_norm": 0.17339007556438446, "learning_rate": 2.8593842084951417e-05, "loss": 0.3779, "num_tokens": 1009517526.0, "step": 1594 }, { "epoch": 0.18860115880335818, "grad_norm": 0.15724654495716095, "learning_rate": 2.8563202053534844e-05, "loss": 0.3636, "num_tokens": 1010147212.0, "step": 1595 }, { "epoch": 0.18871940404398724, "grad_norm": 0.15183372795581818, "learning_rate": 2.8532567893305735e-05, "loss": 0.369, "num_tokens": 1010778276.0, "step": 1596 }, { "epoch": 0.1888376492846163, "grad_norm": 0.17174997925758362, "learning_rate": 2.8501939644802106e-05, "loss": 0.3508, "num_tokens": 1011412350.0, "step": 1597 }, { "epoch": 0.18895589452524536, "grad_norm": 0.18961110711097717, "learning_rate": 2.8471317348554125e-05, "loss": 0.3696, "num_tokens": 1012042617.0, "step": 1598 }, { "epoch": 0.18907413976587442, "grad_norm": 0.16691336035728455, "learning_rate": 2.844070104508409e-05, "loss": 0.3571, "num_tokens": 1012678514.0, "step": 1599 }, { "epoch": 0.18919238500650348, "grad_norm": 0.1767212152481079, "learning_rate": 2.841009077490636e-05, "loss": 0.3939, "num_tokens": 1013317458.0, "step": 1600 }, { "epoch": 0.18931063024713254, "grad_norm": 0.1530785858631134, "learning_rate": 2.8379486578527328e-05, "loss": 0.3187, "num_tokens": 1013951330.0, "step": 1601 }, { "epoch": 0.1894288754877616, "grad_norm": 0.16864624619483948, "learning_rate": 2.8348888496445332e-05, "loss": 0.3559, "num_tokens": 1014583639.0, "step": 1602 }, { "epoch": 0.1895471207283907, "grad_norm": 0.1874200999736786, "learning_rate": 2.831829656915063e-05, "loss": 0.3987, "num_tokens": 1015222884.0, "step": 1603 }, { "epoch": 0.18966536596901976, "grad_norm": 0.16399650275707245, "learning_rate": 2.8287710837125326e-05, "loss": 0.3792, "num_tokens": 1015860526.0, "step": 1604 }, { "epoch": 0.18978361120964882, "grad_norm": 0.16257967054843903, "learning_rate": 2.825713134084334e-05, "loss": 0.3507, "num_tokens": 1016498048.0, "step": 1605 }, { "epoch": 0.18990185645027788, "grad_norm": 0.1760384738445282, "learning_rate": 2.8226558120770317e-05, "loss": 0.3818, "num_tokens": 1017129789.0, "step": 1606 }, { "epoch": 0.19002010169090694, "grad_norm": 0.16825507581233978, "learning_rate": 2.8195991217363634e-05, "loss": 0.4254, "num_tokens": 1017761831.0, "step": 1607 }, { "epoch": 0.190138346931536, "grad_norm": 0.18675942718982697, "learning_rate": 2.8165430671072266e-05, "loss": 0.3746, "num_tokens": 1018397544.0, "step": 1608 }, { "epoch": 0.19025659217216506, "grad_norm": 4.314301013946533, "learning_rate": 2.8134876522336804e-05, "loss": 0.4993, "num_tokens": 1018998279.0, "step": 1609 }, { "epoch": 0.19037483741279412, "grad_norm": 0.19050520658493042, "learning_rate": 2.8104328811589373e-05, "loss": 0.365, "num_tokens": 1019634188.0, "step": 1610 }, { "epoch": 0.1904930826534232, "grad_norm": 0.19670063257217407, "learning_rate": 2.8073787579253554e-05, "loss": 0.3534, "num_tokens": 1020259191.0, "step": 1611 }, { "epoch": 0.19061132789405227, "grad_norm": 0.15332163870334625, "learning_rate": 2.8043252865744388e-05, "loss": 0.3383, "num_tokens": 1020890735.0, "step": 1612 }, { "epoch": 0.19072957313468134, "grad_norm": 0.1597706526517868, "learning_rate": 2.8012724711468257e-05, "loss": 0.3426, "num_tokens": 1021516346.0, "step": 1613 }, { "epoch": 0.1908478183753104, "grad_norm": 0.1752755492925644, "learning_rate": 2.79822031568229e-05, "loss": 0.374, "num_tokens": 1022154101.0, "step": 1614 }, { "epoch": 0.19096606361593946, "grad_norm": 0.18779323995113373, "learning_rate": 2.7951688242197293e-05, "loss": 0.402, "num_tokens": 1022784679.0, "step": 1615 }, { "epoch": 0.19108430885656852, "grad_norm": 0.16711129248142242, "learning_rate": 2.7921180007971632e-05, "loss": 0.335, "num_tokens": 1023419003.0, "step": 1616 }, { "epoch": 0.19120255409719758, "grad_norm": 0.16607694327831268, "learning_rate": 2.7890678494517283e-05, "loss": 0.3741, "num_tokens": 1024053560.0, "step": 1617 }, { "epoch": 0.19132079933782664, "grad_norm": 0.15859146416187286, "learning_rate": 2.7860183742196708e-05, "loss": 0.3197, "num_tokens": 1024689470.0, "step": 1618 }, { "epoch": 0.19143904457845573, "grad_norm": 0.15850317478179932, "learning_rate": 2.7829695791363426e-05, "loss": 0.3432, "num_tokens": 1025315733.0, "step": 1619 }, { "epoch": 0.1915572898190848, "grad_norm": 0.17494311928749084, "learning_rate": 2.7799214682361958e-05, "loss": 0.3673, "num_tokens": 1025951911.0, "step": 1620 }, { "epoch": 0.19167553505971385, "grad_norm": 0.16905559599399567, "learning_rate": 2.7768740455527753e-05, "loss": 0.3741, "num_tokens": 1026586199.0, "step": 1621 }, { "epoch": 0.19179378030034291, "grad_norm": 0.1669166088104248, "learning_rate": 2.7738273151187193e-05, "loss": 0.3583, "num_tokens": 1027217632.0, "step": 1622 }, { "epoch": 0.19191202554097198, "grad_norm": 0.20083673298358917, "learning_rate": 2.7707812809657455e-05, "loss": 0.3712, "num_tokens": 1027847572.0, "step": 1623 }, { "epoch": 0.19203027078160104, "grad_norm": 0.17961816489696503, "learning_rate": 2.7677359471246534e-05, "loss": 0.3693, "num_tokens": 1028475842.0, "step": 1624 }, { "epoch": 0.1921485160222301, "grad_norm": 0.16991600394248962, "learning_rate": 2.7646913176253133e-05, "loss": 0.3568, "num_tokens": 1029111898.0, "step": 1625 }, { "epoch": 0.19226676126285916, "grad_norm": 0.16991819441318512, "learning_rate": 2.7616473964966657e-05, "loss": 0.3727, "num_tokens": 1029743386.0, "step": 1626 }, { "epoch": 0.19238500650348825, "grad_norm": 0.17013666033744812, "learning_rate": 2.758604187766713e-05, "loss": 0.3677, "num_tokens": 1030378115.0, "step": 1627 }, { "epoch": 0.1925032517441173, "grad_norm": 0.18004712462425232, "learning_rate": 2.755561695462513e-05, "loss": 0.4029, "num_tokens": 1031003252.0, "step": 1628 }, { "epoch": 0.19262149698474637, "grad_norm": 0.16978485882282257, "learning_rate": 2.7525199236101797e-05, "loss": 0.3712, "num_tokens": 1031616777.0, "step": 1629 }, { "epoch": 0.19273974222537543, "grad_norm": 0.18321801722049713, "learning_rate": 2.7494788762348694e-05, "loss": 0.3895, "num_tokens": 1032254017.0, "step": 1630 }, { "epoch": 0.1928579874660045, "grad_norm": 0.1728045642375946, "learning_rate": 2.7464385573607808e-05, "loss": 0.4044, "num_tokens": 1032888699.0, "step": 1631 }, { "epoch": 0.19297623270663355, "grad_norm": 0.17345483601093292, "learning_rate": 2.743398971011151e-05, "loss": 0.3602, "num_tokens": 1033518207.0, "step": 1632 }, { "epoch": 0.19309447794726262, "grad_norm": 0.15876762568950653, "learning_rate": 2.7403601212082446e-05, "loss": 0.3591, "num_tokens": 1034156394.0, "step": 1633 }, { "epoch": 0.19321272318789168, "grad_norm": 0.1655876785516739, "learning_rate": 2.737322011973353e-05, "loss": 0.3447, "num_tokens": 1034757861.0, "step": 1634 }, { "epoch": 0.19333096842852074, "grad_norm": 0.15525826811790466, "learning_rate": 2.734284647326788e-05, "loss": 0.3321, "num_tokens": 1035390904.0, "step": 1635 }, { "epoch": 0.19344921366914983, "grad_norm": 0.16308487951755524, "learning_rate": 2.7312480312878753e-05, "loss": 0.3468, "num_tokens": 1036022608.0, "step": 1636 }, { "epoch": 0.1935674589097789, "grad_norm": 0.15808793902397156, "learning_rate": 2.7282121678749508e-05, "loss": 0.3231, "num_tokens": 1036653565.0, "step": 1637 }, { "epoch": 0.19368570415040795, "grad_norm": 0.1726795732975006, "learning_rate": 2.7251770611053532e-05, "loss": 0.3604, "num_tokens": 1037289640.0, "step": 1638 }, { "epoch": 0.193803949391037, "grad_norm": 0.18068532645702362, "learning_rate": 2.7221427149954214e-05, "loss": 0.3619, "num_tokens": 1037923126.0, "step": 1639 }, { "epoch": 0.19392219463166607, "grad_norm": 0.15515606105327606, "learning_rate": 2.7191091335604855e-05, "loss": 0.3135, "num_tokens": 1038552223.0, "step": 1640 }, { "epoch": 0.19404043987229513, "grad_norm": 0.14960326254367828, "learning_rate": 2.7160763208148668e-05, "loss": 0.3557, "num_tokens": 1039187143.0, "step": 1641 }, { "epoch": 0.1941586851129242, "grad_norm": 0.19315950572490692, "learning_rate": 2.713044280771867e-05, "loss": 0.4029, "num_tokens": 1039825115.0, "step": 1642 }, { "epoch": 0.19427693035355326, "grad_norm": 0.16578054428100586, "learning_rate": 2.7100130174437664e-05, "loss": 0.3622, "num_tokens": 1040457103.0, "step": 1643 }, { "epoch": 0.19439517559418235, "grad_norm": 0.16419482231140137, "learning_rate": 2.7069825348418168e-05, "loss": 0.326, "num_tokens": 1041089601.0, "step": 1644 }, { "epoch": 0.1945134208348114, "grad_norm": 0.17337539792060852, "learning_rate": 2.7039528369762364e-05, "loss": 0.3629, "num_tokens": 1041725934.0, "step": 1645 }, { "epoch": 0.19463166607544047, "grad_norm": 0.18786442279815674, "learning_rate": 2.700923927856208e-05, "loss": 0.3456, "num_tokens": 1042365339.0, "step": 1646 }, { "epoch": 0.19474991131606953, "grad_norm": 0.19918936491012573, "learning_rate": 2.6978958114898666e-05, "loss": 0.3638, "num_tokens": 1042996166.0, "step": 1647 }, { "epoch": 0.1948681565566986, "grad_norm": 0.15421392023563385, "learning_rate": 2.6948684918843013e-05, "loss": 0.3328, "num_tokens": 1043623270.0, "step": 1648 }, { "epoch": 0.19498640179732765, "grad_norm": 0.18007415533065796, "learning_rate": 2.6918419730455445e-05, "loss": 0.3468, "num_tokens": 1044254863.0, "step": 1649 }, { "epoch": 0.1951046470379567, "grad_norm": 0.19268766045570374, "learning_rate": 2.6888162589785698e-05, "loss": 0.4225, "num_tokens": 1044894564.0, "step": 1650 }, { "epoch": 0.19522289227858577, "grad_norm": 0.16468672454357147, "learning_rate": 2.6857913536872868e-05, "loss": 0.3486, "num_tokens": 1045527684.0, "step": 1651 }, { "epoch": 0.19534113751921486, "grad_norm": 0.15274105966091156, "learning_rate": 2.682767261174534e-05, "loss": 0.3578, "num_tokens": 1046158648.0, "step": 1652 }, { "epoch": 0.19545938275984392, "grad_norm": 0.16642116010189056, "learning_rate": 2.6797439854420747e-05, "loss": 0.3553, "num_tokens": 1046792143.0, "step": 1653 }, { "epoch": 0.195577628000473, "grad_norm": 0.19157806038856506, "learning_rate": 2.676721530490591e-05, "loss": 0.3833, "num_tokens": 1047425498.0, "step": 1654 }, { "epoch": 0.19569587324110205, "grad_norm": 0.18173323571681976, "learning_rate": 2.6736999003196772e-05, "loss": 0.3635, "num_tokens": 1048059505.0, "step": 1655 }, { "epoch": 0.1958141184817311, "grad_norm": 0.18953822553157806, "learning_rate": 2.6706790989278408e-05, "loss": 0.3783, "num_tokens": 1048693153.0, "step": 1656 }, { "epoch": 0.19593236372236017, "grad_norm": 0.17194925248622894, "learning_rate": 2.667659130312488e-05, "loss": 0.3987, "num_tokens": 1049329857.0, "step": 1657 }, { "epoch": 0.19605060896298923, "grad_norm": 0.16898545622825623, "learning_rate": 2.6646399984699254e-05, "loss": 0.3372, "num_tokens": 1049961195.0, "step": 1658 }, { "epoch": 0.1961688542036183, "grad_norm": 0.21434806287288666, "learning_rate": 2.6616217073953512e-05, "loss": 0.3945, "num_tokens": 1050595023.0, "step": 1659 }, { "epoch": 0.19628709944424738, "grad_norm": 0.18597570061683655, "learning_rate": 2.658604261082851e-05, "loss": 0.3993, "num_tokens": 1051234499.0, "step": 1660 }, { "epoch": 0.19640534468487644, "grad_norm": 0.16699214279651642, "learning_rate": 2.655587663525395e-05, "loss": 0.3743, "num_tokens": 1051865728.0, "step": 1661 }, { "epoch": 0.1965235899255055, "grad_norm": 0.17103753983974457, "learning_rate": 2.6525719187148245e-05, "loss": 0.356, "num_tokens": 1052494691.0, "step": 1662 }, { "epoch": 0.19664183516613457, "grad_norm": 0.1663488894701004, "learning_rate": 2.6495570306418603e-05, "loss": 0.3462, "num_tokens": 1053129843.0, "step": 1663 }, { "epoch": 0.19676008040676363, "grad_norm": 0.18791471421718597, "learning_rate": 2.6465430032960825e-05, "loss": 0.3607, "num_tokens": 1053762168.0, "step": 1664 }, { "epoch": 0.1968783256473927, "grad_norm": 0.18649154901504517, "learning_rate": 2.643529840665936e-05, "loss": 0.3853, "num_tokens": 1054399760.0, "step": 1665 }, { "epoch": 0.19699657088802175, "grad_norm": 0.15763476490974426, "learning_rate": 2.640517546738721e-05, "loss": 0.3716, "num_tokens": 1055028654.0, "step": 1666 }, { "epoch": 0.1971148161286508, "grad_norm": 0.19678330421447754, "learning_rate": 2.637506125500586e-05, "loss": 0.3564, "num_tokens": 1055660000.0, "step": 1667 }, { "epoch": 0.1972330613692799, "grad_norm": 0.17163364589214325, "learning_rate": 2.634495580936528e-05, "loss": 0.3615, "num_tokens": 1056296438.0, "step": 1668 }, { "epoch": 0.19735130660990896, "grad_norm": 0.16406387090682983, "learning_rate": 2.6314859170303796e-05, "loss": 0.3503, "num_tokens": 1056927317.0, "step": 1669 }, { "epoch": 0.19746955185053802, "grad_norm": 0.17279218137264252, "learning_rate": 2.6284771377648132e-05, "loss": 0.3772, "num_tokens": 1057562315.0, "step": 1670 }, { "epoch": 0.19758779709116708, "grad_norm": 0.19749115407466888, "learning_rate": 2.625469247121327e-05, "loss": 0.3677, "num_tokens": 1058196627.0, "step": 1671 }, { "epoch": 0.19770604233179614, "grad_norm": 0.1557900458574295, "learning_rate": 2.6224622490802433e-05, "loss": 0.3297, "num_tokens": 1058830504.0, "step": 1672 }, { "epoch": 0.1978242875724252, "grad_norm": 0.1759173572063446, "learning_rate": 2.6194561476207054e-05, "loss": 0.3722, "num_tokens": 1059458402.0, "step": 1673 }, { "epoch": 0.19794253281305427, "grad_norm": 0.18761344254016876, "learning_rate": 2.6164509467206678e-05, "loss": 0.3305, "num_tokens": 1060092121.0, "step": 1674 }, { "epoch": 0.19806077805368333, "grad_norm": 0.1713910549879074, "learning_rate": 2.613446650356896e-05, "loss": 0.3497, "num_tokens": 1060729280.0, "step": 1675 }, { "epoch": 0.19817902329431242, "grad_norm": 0.15927839279174805, "learning_rate": 2.610443262504956e-05, "loss": 0.3563, "num_tokens": 1061357345.0, "step": 1676 }, { "epoch": 0.19829726853494148, "grad_norm": 0.1804131269454956, "learning_rate": 2.6074407871392115e-05, "loss": 0.3501, "num_tokens": 1061986290.0, "step": 1677 }, { "epoch": 0.19841551377557054, "grad_norm": 0.1799393892288208, "learning_rate": 2.604439228232821e-05, "loss": 0.3674, "num_tokens": 1062616696.0, "step": 1678 }, { "epoch": 0.1985337590161996, "grad_norm": 0.17438676953315735, "learning_rate": 2.6014385897577285e-05, "loss": 0.3641, "num_tokens": 1063253499.0, "step": 1679 }, { "epoch": 0.19865200425682866, "grad_norm": 0.15909676253795624, "learning_rate": 2.5984388756846617e-05, "loss": 0.3865, "num_tokens": 1063890393.0, "step": 1680 }, { "epoch": 0.19877024949745772, "grad_norm": 0.18874254822731018, "learning_rate": 2.595440089983121e-05, "loss": 0.3992, "num_tokens": 1064526042.0, "step": 1681 }, { "epoch": 0.19888849473808679, "grad_norm": 0.1858903169631958, "learning_rate": 2.5924422366213847e-05, "loss": 0.3896, "num_tokens": 1065162440.0, "step": 1682 }, { "epoch": 0.19900673997871585, "grad_norm": 0.15958355367183685, "learning_rate": 2.5894453195664908e-05, "loss": 0.3594, "num_tokens": 1065798256.0, "step": 1683 }, { "epoch": 0.1991249852193449, "grad_norm": 0.18213844299316406, "learning_rate": 2.586449342784242e-05, "loss": 0.3542, "num_tokens": 1066431917.0, "step": 1684 }, { "epoch": 0.199243230459974, "grad_norm": 0.162159264087677, "learning_rate": 2.583454310239197e-05, "loss": 0.3436, "num_tokens": 1067062494.0, "step": 1685 }, { "epoch": 0.19936147570060306, "grad_norm": 0.16742713749408722, "learning_rate": 2.580460225894663e-05, "loss": 0.3506, "num_tokens": 1067697069.0, "step": 1686 }, { "epoch": 0.19947972094123212, "grad_norm": 0.17315392196178436, "learning_rate": 2.5774670937126936e-05, "loss": 0.3955, "num_tokens": 1068295187.0, "step": 1687 }, { "epoch": 0.19959796618186118, "grad_norm": 0.1787833422422409, "learning_rate": 2.5744749176540828e-05, "loss": 0.3472, "num_tokens": 1068932515.0, "step": 1688 }, { "epoch": 0.19971621142249024, "grad_norm": 0.166206493973732, "learning_rate": 2.5714837016783576e-05, "loss": 0.3418, "num_tokens": 1069570945.0, "step": 1689 }, { "epoch": 0.1998344566631193, "grad_norm": 0.1507144272327423, "learning_rate": 2.568493449743777e-05, "loss": 0.3273, "num_tokens": 1070176282.0, "step": 1690 }, { "epoch": 0.19995270190374836, "grad_norm": 0.1695074588060379, "learning_rate": 2.565504165807322e-05, "loss": 0.3839, "num_tokens": 1070811105.0, "step": 1691 }, { "epoch": 0.20007094714437743, "grad_norm": 0.16034561395645142, "learning_rate": 2.5625158538246946e-05, "loss": 0.3275, "num_tokens": 1071447938.0, "step": 1692 }, { "epoch": 0.20018919238500651, "grad_norm": 0.17031002044677734, "learning_rate": 2.5595285177503097e-05, "loss": 0.3471, "num_tokens": 1072084659.0, "step": 1693 }, { "epoch": 0.20030743762563558, "grad_norm": 0.1551053524017334, "learning_rate": 2.556542161537289e-05, "loss": 0.3391, "num_tokens": 1072721591.0, "step": 1694 }, { "epoch": 0.20042568286626464, "grad_norm": 0.1724974513053894, "learning_rate": 2.5535567891374617e-05, "loss": 0.393, "num_tokens": 1073359151.0, "step": 1695 }, { "epoch": 0.2005439281068937, "grad_norm": 0.1494995504617691, "learning_rate": 2.5505724045013514e-05, "loss": 0.3014, "num_tokens": 1073993201.0, "step": 1696 }, { "epoch": 0.20066217334752276, "grad_norm": 0.17537805438041687, "learning_rate": 2.5475890115781768e-05, "loss": 0.3731, "num_tokens": 1074627609.0, "step": 1697 }, { "epoch": 0.20078041858815182, "grad_norm": 0.1633339673280716, "learning_rate": 2.544606614315842e-05, "loss": 0.3697, "num_tokens": 1075266812.0, "step": 1698 }, { "epoch": 0.20089866382878088, "grad_norm": 0.19342614710330963, "learning_rate": 2.5416252166609366e-05, "loss": 0.3752, "num_tokens": 1075904344.0, "step": 1699 }, { "epoch": 0.20101690906940994, "grad_norm": 0.17040970921516418, "learning_rate": 2.5386448225587255e-05, "loss": 0.3567, "num_tokens": 1076539387.0, "step": 1700 }, { "epoch": 0.20113515431003903, "grad_norm": 0.151948943734169, "learning_rate": 2.5356654359531447e-05, "loss": 0.3295, "num_tokens": 1077171298.0, "step": 1701 }, { "epoch": 0.2012533995506681, "grad_norm": 0.16101253032684326, "learning_rate": 2.5326870607868e-05, "loss": 0.3489, "num_tokens": 1077802493.0, "step": 1702 }, { "epoch": 0.20137164479129716, "grad_norm": 0.1759985238313675, "learning_rate": 2.529709701000955e-05, "loss": 0.3765, "num_tokens": 1078441368.0, "step": 1703 }, { "epoch": 0.20148989003192622, "grad_norm": 0.17887002229690552, "learning_rate": 2.526733360535533e-05, "loss": 0.33, "num_tokens": 1079072648.0, "step": 1704 }, { "epoch": 0.20160813527255528, "grad_norm": 0.15795493125915527, "learning_rate": 2.523758043329107e-05, "loss": 0.3403, "num_tokens": 1079706981.0, "step": 1705 }, { "epoch": 0.20172638051318434, "grad_norm": 0.19332772493362427, "learning_rate": 2.5207837533188952e-05, "loss": 0.4124, "num_tokens": 1080339530.0, "step": 1706 }, { "epoch": 0.2018446257538134, "grad_norm": 0.1674606204032898, "learning_rate": 2.5178104944407583e-05, "loss": 0.3376, "num_tokens": 1080970260.0, "step": 1707 }, { "epoch": 0.20196287099444246, "grad_norm": 0.17070183157920837, "learning_rate": 2.5148382706291906e-05, "loss": 0.3715, "num_tokens": 1081590177.0, "step": 1708 }, { "epoch": 0.20208111623507155, "grad_norm": 0.1682654172182083, "learning_rate": 2.5118670858173184e-05, "loss": 0.3623, "num_tokens": 1082225090.0, "step": 1709 }, { "epoch": 0.2021993614757006, "grad_norm": 0.17373022437095642, "learning_rate": 2.5088969439368924e-05, "loss": 0.3347, "num_tokens": 1082857496.0, "step": 1710 }, { "epoch": 0.20231760671632967, "grad_norm": 0.19974088668823242, "learning_rate": 2.5059278489182814e-05, "loss": 0.4124, "num_tokens": 1083488770.0, "step": 1711 }, { "epoch": 0.20243585195695873, "grad_norm": 0.1808067411184311, "learning_rate": 2.5029598046904727e-05, "loss": 0.3373, "num_tokens": 1084124695.0, "step": 1712 }, { "epoch": 0.2025540971975878, "grad_norm": 0.16108886897563934, "learning_rate": 2.49999281518106e-05, "loss": 0.3304, "num_tokens": 1084757117.0, "step": 1713 }, { "epoch": 0.20267234243821686, "grad_norm": 0.17718425393104553, "learning_rate": 2.4970268843162426e-05, "loss": 0.353, "num_tokens": 1085390682.0, "step": 1714 }, { "epoch": 0.20279058767884592, "grad_norm": 0.17533092200756073, "learning_rate": 2.494062016020818e-05, "loss": 0.33, "num_tokens": 1086017829.0, "step": 1715 }, { "epoch": 0.20290883291947498, "grad_norm": 0.1918751746416092, "learning_rate": 2.49109821421818e-05, "loss": 0.3822, "num_tokens": 1086653372.0, "step": 1716 }, { "epoch": 0.20302707816010407, "grad_norm": 0.1925252079963684, "learning_rate": 2.4881354828303063e-05, "loss": 0.3553, "num_tokens": 1087290946.0, "step": 1717 }, { "epoch": 0.20314532340073313, "grad_norm": 0.18237020075321198, "learning_rate": 2.4851738257777637e-05, "loss": 0.3843, "num_tokens": 1087920118.0, "step": 1718 }, { "epoch": 0.2032635686413622, "grad_norm": 0.1720273792743683, "learning_rate": 2.4822132469796943e-05, "loss": 0.3434, "num_tokens": 1088552138.0, "step": 1719 }, { "epoch": 0.20338181388199125, "grad_norm": 0.18255725502967834, "learning_rate": 2.4792537503538127e-05, "loss": 0.3755, "num_tokens": 1089182532.0, "step": 1720 }, { "epoch": 0.2035000591226203, "grad_norm": 0.1829424500465393, "learning_rate": 2.476295339816404e-05, "loss": 0.3441, "num_tokens": 1089818206.0, "step": 1721 }, { "epoch": 0.20361830436324937, "grad_norm": 0.17252077162265778, "learning_rate": 2.473338019282314e-05, "loss": 0.3621, "num_tokens": 1090453385.0, "step": 1722 }, { "epoch": 0.20373654960387844, "grad_norm": 0.17650392651557922, "learning_rate": 2.470381792664947e-05, "loss": 0.3314, "num_tokens": 1091090465.0, "step": 1723 }, { "epoch": 0.2038547948445075, "grad_norm": 0.16138392686843872, "learning_rate": 2.467426663876259e-05, "loss": 0.3463, "num_tokens": 1091724132.0, "step": 1724 }, { "epoch": 0.2039730400851366, "grad_norm": 0.18246984481811523, "learning_rate": 2.4644726368267546e-05, "loss": 0.3902, "num_tokens": 1092359583.0, "step": 1725 }, { "epoch": 0.20409128532576565, "grad_norm": 0.17352929711341858, "learning_rate": 2.461519715425479e-05, "loss": 0.3564, "num_tokens": 1092994941.0, "step": 1726 }, { "epoch": 0.2042095305663947, "grad_norm": 0.16574281454086304, "learning_rate": 2.4585679035800152e-05, "loss": 0.3718, "num_tokens": 1093633821.0, "step": 1727 }, { "epoch": 0.20432777580702377, "grad_norm": 0.15748518705368042, "learning_rate": 2.4556172051964777e-05, "loss": 0.3956, "num_tokens": 1094271599.0, "step": 1728 }, { "epoch": 0.20444602104765283, "grad_norm": 0.1616101861000061, "learning_rate": 2.452667624179508e-05, "loss": 0.3509, "num_tokens": 1094899179.0, "step": 1729 }, { "epoch": 0.2045642662882819, "grad_norm": 0.1773674488067627, "learning_rate": 2.4497191644322678e-05, "loss": 0.3386, "num_tokens": 1095533149.0, "step": 1730 }, { "epoch": 0.20468251152891095, "grad_norm": 0.21328280866146088, "learning_rate": 2.4467718298564364e-05, "loss": 0.4216, "num_tokens": 1096169826.0, "step": 1731 }, { "epoch": 0.20480075676954002, "grad_norm": 0.17868073284626007, "learning_rate": 2.4438256243522022e-05, "loss": 0.3998, "num_tokens": 1096805072.0, "step": 1732 }, { "epoch": 0.2049190020101691, "grad_norm": 0.18678660690784454, "learning_rate": 2.4408805518182634e-05, "loss": 0.3624, "num_tokens": 1097431523.0, "step": 1733 }, { "epoch": 0.20503724725079817, "grad_norm": 0.16271109879016876, "learning_rate": 2.437936616151814e-05, "loss": 0.345, "num_tokens": 1098064979.0, "step": 1734 }, { "epoch": 0.20515549249142723, "grad_norm": 0.1655609905719757, "learning_rate": 2.434993821248547e-05, "loss": 0.3303, "num_tokens": 1098696612.0, "step": 1735 }, { "epoch": 0.2052737377320563, "grad_norm": 0.18166378140449524, "learning_rate": 2.432052171002645e-05, "loss": 0.38, "num_tokens": 1099334882.0, "step": 1736 }, { "epoch": 0.20539198297268535, "grad_norm": 0.1831236183643341, "learning_rate": 2.429111669306775e-05, "loss": 0.3816, "num_tokens": 1099965725.0, "step": 1737 }, { "epoch": 0.2055102282133144, "grad_norm": 0.1645020991563797, "learning_rate": 2.4261723200520855e-05, "loss": 0.3243, "num_tokens": 1100599444.0, "step": 1738 }, { "epoch": 0.20562847345394347, "grad_norm": 0.17984479665756226, "learning_rate": 2.4232341271281997e-05, "loss": 0.3783, "num_tokens": 1101235842.0, "step": 1739 }, { "epoch": 0.20574671869457253, "grad_norm": 0.1687169224023819, "learning_rate": 2.420297094423209e-05, "loss": 0.361, "num_tokens": 1101872512.0, "step": 1740 }, { "epoch": 0.2058649639352016, "grad_norm": 0.18295352160930634, "learning_rate": 2.417361225823671e-05, "loss": 0.3829, "num_tokens": 1102511392.0, "step": 1741 }, { "epoch": 0.20598320917583068, "grad_norm": 0.1548542082309723, "learning_rate": 2.4144265252146034e-05, "loss": 0.3123, "num_tokens": 1103143502.0, "step": 1742 }, { "epoch": 0.20610145441645975, "grad_norm": 0.17851783335208893, "learning_rate": 2.4114929964794767e-05, "loss": 0.3264, "num_tokens": 1103778115.0, "step": 1743 }, { "epoch": 0.2062196996570888, "grad_norm": 0.1701647937297821, "learning_rate": 2.4085606435002127e-05, "loss": 0.339, "num_tokens": 1104417351.0, "step": 1744 }, { "epoch": 0.20633794489771787, "grad_norm": 0.17332208156585693, "learning_rate": 2.405629470157174e-05, "loss": 0.3602, "num_tokens": 1105051447.0, "step": 1745 }, { "epoch": 0.20645619013834693, "grad_norm": 0.1794467568397522, "learning_rate": 2.402699480329167e-05, "loss": 0.3944, "num_tokens": 1105684481.0, "step": 1746 }, { "epoch": 0.206574435378976, "grad_norm": 0.18091942369937897, "learning_rate": 2.3997706778934262e-05, "loss": 0.348, "num_tokens": 1106316069.0, "step": 1747 }, { "epoch": 0.20669268061960505, "grad_norm": 0.16843995451927185, "learning_rate": 2.3968430667256202e-05, "loss": 0.3588, "num_tokens": 1106946321.0, "step": 1748 }, { "epoch": 0.2068109258602341, "grad_norm": 0.1799001395702362, "learning_rate": 2.3939166506998377e-05, "loss": 0.3617, "num_tokens": 1107578837.0, "step": 1749 }, { "epoch": 0.2069291711008632, "grad_norm": 0.1762133687734604, "learning_rate": 2.3909914336885882e-05, "loss": 0.3737, "num_tokens": 1108209790.0, "step": 1750 }, { "epoch": 0.20704741634149226, "grad_norm": 0.1713460087776184, "learning_rate": 2.388067419562792e-05, "loss": 0.3642, "num_tokens": 1108848427.0, "step": 1751 }, { "epoch": 0.20716566158212132, "grad_norm": 0.1797306388616562, "learning_rate": 2.3851446121917798e-05, "loss": 0.3477, "num_tokens": 1109482368.0, "step": 1752 }, { "epoch": 0.20728390682275039, "grad_norm": 0.16762728989124298, "learning_rate": 2.3822230154432852e-05, "loss": 0.3364, "num_tokens": 1110116891.0, "step": 1753 }, { "epoch": 0.20740215206337945, "grad_norm": 0.17379309237003326, "learning_rate": 2.3793026331834378e-05, "loss": 0.3409, "num_tokens": 1110751086.0, "step": 1754 }, { "epoch": 0.2075203973040085, "grad_norm": 0.17598725855350494, "learning_rate": 2.3763834692767625e-05, "loss": 0.3564, "num_tokens": 1111379871.0, "step": 1755 }, { "epoch": 0.20763864254463757, "grad_norm": 0.16308774054050446, "learning_rate": 2.373465527586171e-05, "loss": 0.3693, "num_tokens": 1112019590.0, "step": 1756 }, { "epoch": 0.20775688778526663, "grad_norm": 0.17762301862239838, "learning_rate": 2.370548811972957e-05, "loss": 0.3517, "num_tokens": 1112657954.0, "step": 1757 }, { "epoch": 0.20787513302589572, "grad_norm": 0.16102401912212372, "learning_rate": 2.3676333262967934e-05, "loss": 0.3487, "num_tokens": 1113297065.0, "step": 1758 }, { "epoch": 0.20799337826652478, "grad_norm": 0.16705387830734253, "learning_rate": 2.364719074415723e-05, "loss": 0.3675, "num_tokens": 1113925418.0, "step": 1759 }, { "epoch": 0.20811162350715384, "grad_norm": 0.1803237944841385, "learning_rate": 2.3618060601861592e-05, "loss": 0.3594, "num_tokens": 1114557861.0, "step": 1760 }, { "epoch": 0.2082298687477829, "grad_norm": 0.1534406542778015, "learning_rate": 2.3588942874628747e-05, "loss": 0.3347, "num_tokens": 1115190865.0, "step": 1761 }, { "epoch": 0.20834811398841196, "grad_norm": 0.17831236124038696, "learning_rate": 2.3559837600989997e-05, "loss": 0.3416, "num_tokens": 1115828417.0, "step": 1762 }, { "epoch": 0.20846635922904103, "grad_norm": 0.16847272217273712, "learning_rate": 2.3530744819460194e-05, "loss": 0.3683, "num_tokens": 1116467782.0, "step": 1763 }, { "epoch": 0.2085846044696701, "grad_norm": 0.16194409132003784, "learning_rate": 2.350166456853762e-05, "loss": 0.3391, "num_tokens": 1117103530.0, "step": 1764 }, { "epoch": 0.20870284971029915, "grad_norm": 0.1709127426147461, "learning_rate": 2.3472596886703995e-05, "loss": 0.3738, "num_tokens": 1117737680.0, "step": 1765 }, { "epoch": 0.20882109495092824, "grad_norm": 0.17543098330497742, "learning_rate": 2.344354181242441e-05, "loss": 0.3768, "num_tokens": 1118373673.0, "step": 1766 }, { "epoch": 0.2089393401915573, "grad_norm": 0.18270130455493927, "learning_rate": 2.341449938414726e-05, "loss": 0.3374, "num_tokens": 1118998834.0, "step": 1767 }, { "epoch": 0.20905758543218636, "grad_norm": 0.20289045572280884, "learning_rate": 2.3385469640304216e-05, "loss": 0.3752, "num_tokens": 1119628929.0, "step": 1768 }, { "epoch": 0.20917583067281542, "grad_norm": 0.1586165726184845, "learning_rate": 2.335645261931015e-05, "loss": 0.3196, "num_tokens": 1120265763.0, "step": 1769 }, { "epoch": 0.20929407591344448, "grad_norm": 0.15601249039173126, "learning_rate": 2.3327448359563132e-05, "loss": 0.3641, "num_tokens": 1120900582.0, "step": 1770 }, { "epoch": 0.20941232115407354, "grad_norm": 0.1694789081811905, "learning_rate": 2.329845689944429e-05, "loss": 0.3457, "num_tokens": 1121525988.0, "step": 1771 }, { "epoch": 0.2095305663947026, "grad_norm": 0.17258980870246887, "learning_rate": 2.3269478277317866e-05, "loss": 0.3553, "num_tokens": 1122163965.0, "step": 1772 }, { "epoch": 0.20964881163533167, "grad_norm": 0.17151235044002533, "learning_rate": 2.3240512531531096e-05, "loss": 0.3624, "num_tokens": 1122801406.0, "step": 1773 }, { "epoch": 0.20976705687596076, "grad_norm": 0.16250571608543396, "learning_rate": 2.321155970041415e-05, "loss": 0.3499, "num_tokens": 1123437251.0, "step": 1774 }, { "epoch": 0.20988530211658982, "grad_norm": 0.1781967729330063, "learning_rate": 2.3182619822280153e-05, "loss": 0.3889, "num_tokens": 1124069900.0, "step": 1775 }, { "epoch": 0.21000354735721888, "grad_norm": 0.16175100207328796, "learning_rate": 2.3153692935425053e-05, "loss": 0.3405, "num_tokens": 1124707468.0, "step": 1776 }, { "epoch": 0.21012179259784794, "grad_norm": 0.16345171630382538, "learning_rate": 2.312477907812764e-05, "loss": 0.3356, "num_tokens": 1125310855.0, "step": 1777 }, { "epoch": 0.210240037838477, "grad_norm": 0.17669813334941864, "learning_rate": 2.309587828864943e-05, "loss": 0.3464, "num_tokens": 1125920063.0, "step": 1778 }, { "epoch": 0.21035828307910606, "grad_norm": 0.17977413535118103, "learning_rate": 2.306699060523466e-05, "loss": 0.3255, "num_tokens": 1126553148.0, "step": 1779 }, { "epoch": 0.21047652831973512, "grad_norm": 0.17399907112121582, "learning_rate": 2.3038116066110236e-05, "loss": 0.3508, "num_tokens": 1127180645.0, "step": 1780 }, { "epoch": 0.21059477356036418, "grad_norm": 0.16113130748271942, "learning_rate": 2.300925470948564e-05, "loss": 0.3891, "num_tokens": 1127814988.0, "step": 1781 }, { "epoch": 0.21071301880099327, "grad_norm": 0.16437307000160217, "learning_rate": 2.2980406573552945e-05, "loss": 0.3555, "num_tokens": 1128450513.0, "step": 1782 }, { "epoch": 0.21083126404162233, "grad_norm": 0.17271071672439575, "learning_rate": 2.29515716964867e-05, "loss": 0.3724, "num_tokens": 1129084385.0, "step": 1783 }, { "epoch": 0.2109495092822514, "grad_norm": 0.15812721848487854, "learning_rate": 2.2922750116443927e-05, "loss": 0.3517, "num_tokens": 1129723803.0, "step": 1784 }, { "epoch": 0.21106775452288046, "grad_norm": 0.17772532999515533, "learning_rate": 2.289394187156405e-05, "loss": 0.4023, "num_tokens": 1130361719.0, "step": 1785 }, { "epoch": 0.21118599976350952, "grad_norm": 0.17160187661647797, "learning_rate": 2.286514699996883e-05, "loss": 0.3672, "num_tokens": 1130996067.0, "step": 1786 }, { "epoch": 0.21130424500413858, "grad_norm": 0.1835414469242096, "learning_rate": 2.2836365539762362e-05, "loss": 0.3969, "num_tokens": 1131630388.0, "step": 1787 }, { "epoch": 0.21142249024476764, "grad_norm": 0.16501553356647491, "learning_rate": 2.2807597529030955e-05, "loss": 0.3428, "num_tokens": 1132265944.0, "step": 1788 }, { "epoch": 0.2115407354853967, "grad_norm": 0.16104234755039215, "learning_rate": 2.2778843005843156e-05, "loss": 0.3425, "num_tokens": 1132901366.0, "step": 1789 }, { "epoch": 0.21165898072602576, "grad_norm": 0.16584618389606476, "learning_rate": 2.275010200824964e-05, "loss": 0.3303, "num_tokens": 1133533829.0, "step": 1790 }, { "epoch": 0.21177722596665485, "grad_norm": 0.18711631000041962, "learning_rate": 2.27213745742832e-05, "loss": 0.3629, "num_tokens": 1134167426.0, "step": 1791 }, { "epoch": 0.21189547120728391, "grad_norm": 0.15587639808654785, "learning_rate": 2.2692660741958667e-05, "loss": 0.3351, "num_tokens": 1134798148.0, "step": 1792 }, { "epoch": 0.21201371644791298, "grad_norm": 0.1677485555410385, "learning_rate": 2.2663960549272876e-05, "loss": 0.3583, "num_tokens": 1135435451.0, "step": 1793 }, { "epoch": 0.21213196168854204, "grad_norm": 0.17804494500160217, "learning_rate": 2.2635274034204626e-05, "loss": 0.4105, "num_tokens": 1136074929.0, "step": 1794 }, { "epoch": 0.2122502069291711, "grad_norm": 0.16529813408851624, "learning_rate": 2.260660123471461e-05, "loss": 0.3778, "num_tokens": 1136709588.0, "step": 1795 }, { "epoch": 0.21236845216980016, "grad_norm": 0.15820159018039703, "learning_rate": 2.257794218874535e-05, "loss": 0.3406, "num_tokens": 1137345661.0, "step": 1796 }, { "epoch": 0.21248669741042922, "grad_norm": 0.16220960021018982, "learning_rate": 2.2549296934221193e-05, "loss": 0.3483, "num_tokens": 1137949799.0, "step": 1797 }, { "epoch": 0.21260494265105828, "grad_norm": 0.15993450582027435, "learning_rate": 2.2520665509048228e-05, "loss": 0.347, "num_tokens": 1138575265.0, "step": 1798 }, { "epoch": 0.21272318789168737, "grad_norm": 0.1684616208076477, "learning_rate": 2.2492047951114248e-05, "loss": 0.3428, "num_tokens": 1139211263.0, "step": 1799 }, { "epoch": 0.21284143313231643, "grad_norm": 0.17923472821712494, "learning_rate": 2.246344429828869e-05, "loss": 0.3602, "num_tokens": 1139850606.0, "step": 1800 }, { "epoch": 0.2129596783729455, "grad_norm": 0.1479499489068985, "learning_rate": 2.2434854588422596e-05, "loss": 0.3212, "num_tokens": 1140482025.0, "step": 1801 }, { "epoch": 0.21307792361357455, "grad_norm": 0.15312820672988892, "learning_rate": 2.2406278859348544e-05, "loss": 0.3394, "num_tokens": 1141111370.0, "step": 1802 }, { "epoch": 0.21319616885420362, "grad_norm": 0.1724095493555069, "learning_rate": 2.237771714888062e-05, "loss": 0.3533, "num_tokens": 1141741981.0, "step": 1803 }, { "epoch": 0.21331441409483268, "grad_norm": 0.17572695016860962, "learning_rate": 2.2349169494814382e-05, "loss": 0.369, "num_tokens": 1142381483.0, "step": 1804 }, { "epoch": 0.21343265933546174, "grad_norm": 0.14945538341999054, "learning_rate": 2.232063593492673e-05, "loss": 0.3266, "num_tokens": 1143015721.0, "step": 1805 }, { "epoch": 0.2135509045760908, "grad_norm": 0.15192392468452454, "learning_rate": 2.2292116506975985e-05, "loss": 0.3078, "num_tokens": 1143647685.0, "step": 1806 }, { "epoch": 0.2136691498167199, "grad_norm": 0.1759961098432541, "learning_rate": 2.2263611248701712e-05, "loss": 0.3452, "num_tokens": 1144279423.0, "step": 1807 }, { "epoch": 0.21378739505734895, "grad_norm": 0.1545567512512207, "learning_rate": 2.2235120197824736e-05, "loss": 0.3535, "num_tokens": 1144914752.0, "step": 1808 }, { "epoch": 0.213905640297978, "grad_norm": 0.16601650416851044, "learning_rate": 2.2206643392047105e-05, "loss": 0.373, "num_tokens": 1145548289.0, "step": 1809 }, { "epoch": 0.21402388553860707, "grad_norm": 0.17092914879322052, "learning_rate": 2.2178180869051998e-05, "loss": 0.3989, "num_tokens": 1146177907.0, "step": 1810 }, { "epoch": 0.21414213077923613, "grad_norm": 0.173860102891922, "learning_rate": 2.2149732666503694e-05, "loss": 0.3462, "num_tokens": 1146811669.0, "step": 1811 }, { "epoch": 0.2142603760198652, "grad_norm": 0.15742754936218262, "learning_rate": 2.2121298822047527e-05, "loss": 0.354, "num_tokens": 1147450564.0, "step": 1812 }, { "epoch": 0.21437862126049426, "grad_norm": 0.15500089526176453, "learning_rate": 2.209287937330982e-05, "loss": 0.3549, "num_tokens": 1148081304.0, "step": 1813 }, { "epoch": 0.21449686650112332, "grad_norm": 0.17439618706703186, "learning_rate": 2.206447435789787e-05, "loss": 0.3861, "num_tokens": 1148714735.0, "step": 1814 }, { "epoch": 0.2146151117417524, "grad_norm": 0.1759570837020874, "learning_rate": 2.2036083813399845e-05, "loss": 0.3825, "num_tokens": 1149346233.0, "step": 1815 }, { "epoch": 0.21473335698238147, "grad_norm": 0.15746824443340302, "learning_rate": 2.2007707777384788e-05, "loss": 0.3292, "num_tokens": 1149980039.0, "step": 1816 }, { "epoch": 0.21485160222301053, "grad_norm": 0.17177729308605194, "learning_rate": 2.1979346287402532e-05, "loss": 0.3406, "num_tokens": 1150610366.0, "step": 1817 }, { "epoch": 0.2149698474636396, "grad_norm": 0.1686304658651352, "learning_rate": 2.1950999380983664e-05, "loss": 0.3788, "num_tokens": 1151249431.0, "step": 1818 }, { "epoch": 0.21508809270426865, "grad_norm": 0.16297440230846405, "learning_rate": 2.1922667095639465e-05, "loss": 0.3473, "num_tokens": 1151882110.0, "step": 1819 }, { "epoch": 0.2152063379448977, "grad_norm": 0.15822143852710724, "learning_rate": 2.189434946886187e-05, "loss": 0.3684, "num_tokens": 1152521530.0, "step": 1820 }, { "epoch": 0.21532458318552677, "grad_norm": 0.17296434938907623, "learning_rate": 2.186604653812344e-05, "loss": 0.3491, "num_tokens": 1153156986.0, "step": 1821 }, { "epoch": 0.21544282842615584, "grad_norm": 0.15859577059745789, "learning_rate": 2.1837758340877247e-05, "loss": 0.3857, "num_tokens": 1153794759.0, "step": 1822 }, { "epoch": 0.21556107366678492, "grad_norm": 0.14899693429470062, "learning_rate": 2.1809484914556904e-05, "loss": 0.322, "num_tokens": 1154433046.0, "step": 1823 }, { "epoch": 0.21567931890741399, "grad_norm": 0.15135660767555237, "learning_rate": 2.1781226296576466e-05, "loss": 0.328, "num_tokens": 1155067168.0, "step": 1824 }, { "epoch": 0.21579756414804305, "grad_norm": 0.17024415731430054, "learning_rate": 2.1752982524330366e-05, "loss": 0.3625, "num_tokens": 1155703377.0, "step": 1825 }, { "epoch": 0.2159158093886721, "grad_norm": 0.16312475502490997, "learning_rate": 2.172475363519343e-05, "loss": 0.3427, "num_tokens": 1156333194.0, "step": 1826 }, { "epoch": 0.21603405462930117, "grad_norm": 0.16713650524616241, "learning_rate": 2.169653966652078e-05, "loss": 0.3487, "num_tokens": 1156965595.0, "step": 1827 }, { "epoch": 0.21615229986993023, "grad_norm": 0.1780661642551422, "learning_rate": 2.166834065564777e-05, "loss": 0.3925, "num_tokens": 1157595055.0, "step": 1828 }, { "epoch": 0.2162705451105593, "grad_norm": 0.17596584558486938, "learning_rate": 2.1640156639889993e-05, "loss": 0.397, "num_tokens": 1158227500.0, "step": 1829 }, { "epoch": 0.21638879035118835, "grad_norm": 0.16619384288787842, "learning_rate": 2.1611987656543183e-05, "loss": 0.3714, "num_tokens": 1158863732.0, "step": 1830 }, { "epoch": 0.21650703559181744, "grad_norm": 0.18381381034851074, "learning_rate": 2.1583833742883174e-05, "loss": 0.3617, "num_tokens": 1159489840.0, "step": 1831 }, { "epoch": 0.2166252808324465, "grad_norm": 0.17254766821861267, "learning_rate": 2.155569493616587e-05, "loss": 0.3697, "num_tokens": 1160128980.0, "step": 1832 }, { "epoch": 0.21674352607307557, "grad_norm": 0.1754225194454193, "learning_rate": 2.1527571273627178e-05, "loss": 0.3441, "num_tokens": 1160764145.0, "step": 1833 }, { "epoch": 0.21686177131370463, "grad_norm": 0.17318950593471527, "learning_rate": 2.149946279248298e-05, "loss": 0.3917, "num_tokens": 1161400464.0, "step": 1834 }, { "epoch": 0.2169800165543337, "grad_norm": 0.17255404591560364, "learning_rate": 2.147136952992905e-05, "loss": 0.3627, "num_tokens": 1162027629.0, "step": 1835 }, { "epoch": 0.21709826179496275, "grad_norm": 0.17934633791446686, "learning_rate": 2.1443291523141023e-05, "loss": 0.3925, "num_tokens": 1162661769.0, "step": 1836 }, { "epoch": 0.2172165070355918, "grad_norm": 0.1712150275707245, "learning_rate": 2.1415228809274368e-05, "loss": 0.3511, "num_tokens": 1163294488.0, "step": 1837 }, { "epoch": 0.21733475227622087, "grad_norm": 0.16180448234081268, "learning_rate": 2.1387181425464276e-05, "loss": 0.3528, "num_tokens": 1163907989.0, "step": 1838 }, { "epoch": 0.21745299751684993, "grad_norm": 0.16935303807258606, "learning_rate": 2.1359149408825705e-05, "loss": 0.3569, "num_tokens": 1164541061.0, "step": 1839 }, { "epoch": 0.21757124275747902, "grad_norm": 0.16335564851760864, "learning_rate": 2.1331132796453243e-05, "loss": 0.3667, "num_tokens": 1165172101.0, "step": 1840 }, { "epoch": 0.21768948799810808, "grad_norm": 0.15852032601833344, "learning_rate": 2.1303131625421096e-05, "loss": 0.313, "num_tokens": 1165801010.0, "step": 1841 }, { "epoch": 0.21780773323873714, "grad_norm": 0.15676049888134003, "learning_rate": 2.1275145932783048e-05, "loss": 0.3358, "num_tokens": 1166401559.0, "step": 1842 }, { "epoch": 0.2179259784793662, "grad_norm": 0.14861942827701569, "learning_rate": 2.1247175755572366e-05, "loss": 0.3039, "num_tokens": 1167038615.0, "step": 1843 }, { "epoch": 0.21804422371999527, "grad_norm": 0.17299704253673553, "learning_rate": 2.1219221130801866e-05, "loss": 0.371, "num_tokens": 1167676463.0, "step": 1844 }, { "epoch": 0.21816246896062433, "grad_norm": 0.17603974044322968, "learning_rate": 2.1191282095463685e-05, "loss": 0.3728, "num_tokens": 1168309031.0, "step": 1845 }, { "epoch": 0.2182807142012534, "grad_norm": 0.17879034578800201, "learning_rate": 2.1163358686529412e-05, "loss": 0.3727, "num_tokens": 1168935781.0, "step": 1846 }, { "epoch": 0.21839895944188245, "grad_norm": 0.17116892337799072, "learning_rate": 2.113545094094989e-05, "loss": 0.3907, "num_tokens": 1169567656.0, "step": 1847 }, { "epoch": 0.21851720468251154, "grad_norm": 0.17895719408988953, "learning_rate": 2.1107558895655284e-05, "loss": 0.3965, "num_tokens": 1170206328.0, "step": 1848 }, { "epoch": 0.2186354499231406, "grad_norm": 0.15772230923175812, "learning_rate": 2.107968258755497e-05, "loss": 0.3806, "num_tokens": 1170845768.0, "step": 1849 }, { "epoch": 0.21875369516376966, "grad_norm": 0.1658557802438736, "learning_rate": 2.1051822053537494e-05, "loss": 0.3329, "num_tokens": 1171477604.0, "step": 1850 }, { "epoch": 0.21887194040439872, "grad_norm": 0.1760043352842331, "learning_rate": 2.1023977330470522e-05, "loss": 0.4029, "num_tokens": 1172110395.0, "step": 1851 }, { "epoch": 0.21899018564502779, "grad_norm": 0.16340604424476624, "learning_rate": 2.0996148455200813e-05, "loss": 0.3348, "num_tokens": 1172747360.0, "step": 1852 }, { "epoch": 0.21910843088565685, "grad_norm": 0.15813681483268738, "learning_rate": 2.0968335464554125e-05, "loss": 0.3893, "num_tokens": 1173383816.0, "step": 1853 }, { "epoch": 0.2192266761262859, "grad_norm": 0.1610841453075409, "learning_rate": 2.0940538395335255e-05, "loss": 0.3427, "num_tokens": 1174017986.0, "step": 1854 }, { "epoch": 0.21934492136691497, "grad_norm": 0.16629956662654877, "learning_rate": 2.091275728432787e-05, "loss": 0.327, "num_tokens": 1174648098.0, "step": 1855 }, { "epoch": 0.21946316660754406, "grad_norm": 0.16128353774547577, "learning_rate": 2.088499216829455e-05, "loss": 0.3558, "num_tokens": 1175285162.0, "step": 1856 }, { "epoch": 0.21958141184817312, "grad_norm": 0.16071675717830658, "learning_rate": 2.085724308397668e-05, "loss": 0.3922, "num_tokens": 1175918416.0, "step": 1857 }, { "epoch": 0.21969965708880218, "grad_norm": 0.14863473176956177, "learning_rate": 2.0829510068094495e-05, "loss": 0.3345, "num_tokens": 1176553703.0, "step": 1858 }, { "epoch": 0.21981790232943124, "grad_norm": 0.15749160945415497, "learning_rate": 2.0801793157346902e-05, "loss": 0.3516, "num_tokens": 1177188385.0, "step": 1859 }, { "epoch": 0.2199361475700603, "grad_norm": 0.14628109335899353, "learning_rate": 2.0774092388411518e-05, "loss": 0.3319, "num_tokens": 1177825789.0, "step": 1860 }, { "epoch": 0.22005439281068936, "grad_norm": 0.1442510187625885, "learning_rate": 2.0746407797944604e-05, "loss": 0.3596, "num_tokens": 1178461722.0, "step": 1861 }, { "epoch": 0.22017263805131843, "grad_norm": 0.15561442077159882, "learning_rate": 2.0718739422580996e-05, "loss": 0.3587, "num_tokens": 1179095968.0, "step": 1862 }, { "epoch": 0.2202908832919475, "grad_norm": 0.14741671085357666, "learning_rate": 2.069108729893411e-05, "loss": 0.3306, "num_tokens": 1179730603.0, "step": 1863 }, { "epoch": 0.22040912853257658, "grad_norm": 0.15964746475219727, "learning_rate": 2.066345146359581e-05, "loss": 0.3892, "num_tokens": 1180365636.0, "step": 1864 }, { "epoch": 0.22052737377320564, "grad_norm": 0.1483248472213745, "learning_rate": 2.0635831953136443e-05, "loss": 0.3476, "num_tokens": 1181002112.0, "step": 1865 }, { "epoch": 0.2206456190138347, "grad_norm": 0.1502329409122467, "learning_rate": 2.060822880410472e-05, "loss": 0.3226, "num_tokens": 1181635764.0, "step": 1866 }, { "epoch": 0.22076386425446376, "grad_norm": 0.1646731048822403, "learning_rate": 2.0580642053027724e-05, "loss": 0.3723, "num_tokens": 1182274273.0, "step": 1867 }, { "epoch": 0.22088210949509282, "grad_norm": 0.16638028621673584, "learning_rate": 2.0553071736410846e-05, "loss": 0.351, "num_tokens": 1182907997.0, "step": 1868 }, { "epoch": 0.22100035473572188, "grad_norm": 0.164053276181221, "learning_rate": 2.0525517890737708e-05, "loss": 0.3696, "num_tokens": 1183539711.0, "step": 1869 }, { "epoch": 0.22111859997635094, "grad_norm": 0.1691984087228775, "learning_rate": 2.0497980552470146e-05, "loss": 0.3387, "num_tokens": 1184178482.0, "step": 1870 }, { "epoch": 0.22123684521698, "grad_norm": 0.15557564795017242, "learning_rate": 2.047045975804815e-05, "loss": 0.3557, "num_tokens": 1184815893.0, "step": 1871 }, { "epoch": 0.2213550904576091, "grad_norm": 0.15779171884059906, "learning_rate": 2.04429555438898e-05, "loss": 0.3745, "num_tokens": 1185451168.0, "step": 1872 }, { "epoch": 0.22147333569823816, "grad_norm": 0.17602765560150146, "learning_rate": 2.041546794639129e-05, "loss": 0.3826, "num_tokens": 1186086488.0, "step": 1873 }, { "epoch": 0.22159158093886722, "grad_norm": 0.155890092253685, "learning_rate": 2.038799700192677e-05, "loss": 0.3807, "num_tokens": 1186722050.0, "step": 1874 }, { "epoch": 0.22170982617949628, "grad_norm": 0.16406124830245972, "learning_rate": 2.0360542746848368e-05, "loss": 0.3492, "num_tokens": 1187352361.0, "step": 1875 }, { "epoch": 0.22182807142012534, "grad_norm": 0.15780766308307648, "learning_rate": 2.033310521748614e-05, "loss": 0.3253, "num_tokens": 1187987963.0, "step": 1876 }, { "epoch": 0.2219463166607544, "grad_norm": 0.15500418841838837, "learning_rate": 2.030568445014798e-05, "loss": 0.3191, "num_tokens": 1188622119.0, "step": 1877 }, { "epoch": 0.22206456190138346, "grad_norm": 0.15499471127986908, "learning_rate": 2.0278280481119655e-05, "loss": 0.3668, "num_tokens": 1189257899.0, "step": 1878 }, { "epoch": 0.22218280714201252, "grad_norm": 0.15043438971042633, "learning_rate": 2.0250893346664646e-05, "loss": 0.3691, "num_tokens": 1189890204.0, "step": 1879 }, { "epoch": 0.2223010523826416, "grad_norm": 0.15676124393939972, "learning_rate": 2.022352308302418e-05, "loss": 0.3604, "num_tokens": 1190523164.0, "step": 1880 }, { "epoch": 0.22241929762327067, "grad_norm": 0.15230387449264526, "learning_rate": 2.0196169726417173e-05, "loss": 0.3347, "num_tokens": 1191157689.0, "step": 1881 }, { "epoch": 0.22253754286389973, "grad_norm": 0.1457720249891281, "learning_rate": 2.016883331304013e-05, "loss": 0.2949, "num_tokens": 1191786401.0, "step": 1882 }, { "epoch": 0.2226557881045288, "grad_norm": 0.1585022658109665, "learning_rate": 2.014151387906718e-05, "loss": 0.3152, "num_tokens": 1192420292.0, "step": 1883 }, { "epoch": 0.22277403334515786, "grad_norm": 0.17930035293102264, "learning_rate": 2.011421146064996e-05, "loss": 0.3671, "num_tokens": 1193050347.0, "step": 1884 }, { "epoch": 0.22289227858578692, "grad_norm": 0.14422306418418884, "learning_rate": 2.0086926093917587e-05, "loss": 0.3369, "num_tokens": 1193683618.0, "step": 1885 }, { "epoch": 0.22301052382641598, "grad_norm": 0.1651952862739563, "learning_rate": 2.0059657814976615e-05, "loss": 0.3584, "num_tokens": 1194316482.0, "step": 1886 }, { "epoch": 0.22312876906704504, "grad_norm": 0.1569311022758484, "learning_rate": 2.003240665991099e-05, "loss": 0.3566, "num_tokens": 1194953767.0, "step": 1887 }, { "epoch": 0.22324701430767413, "grad_norm": 0.16590744256973267, "learning_rate": 2.0005172664782014e-05, "loss": 0.3481, "num_tokens": 1195589094.0, "step": 1888 }, { "epoch": 0.2233652595483032, "grad_norm": 0.168272465467453, "learning_rate": 1.9977955865628256e-05, "loss": 0.4074, "num_tokens": 1196219735.0, "step": 1889 }, { "epoch": 0.22348350478893225, "grad_norm": 0.15438778698444366, "learning_rate": 1.9950756298465535e-05, "loss": 0.3393, "num_tokens": 1196849912.0, "step": 1890 }, { "epoch": 0.2236017500295613, "grad_norm": 0.1607561856508255, "learning_rate": 1.9923573999286865e-05, "loss": 0.368, "num_tokens": 1197483986.0, "step": 1891 }, { "epoch": 0.22371999527019037, "grad_norm": 0.1599637120962143, "learning_rate": 1.989640900406244e-05, "loss": 0.3834, "num_tokens": 1198120332.0, "step": 1892 }, { "epoch": 0.22383824051081944, "grad_norm": 0.14852549135684967, "learning_rate": 1.986926134873951e-05, "loss": 0.3055, "num_tokens": 1198750956.0, "step": 1893 }, { "epoch": 0.2239564857514485, "grad_norm": 0.17405691742897034, "learning_rate": 1.9842131069242408e-05, "loss": 0.4235, "num_tokens": 1199388319.0, "step": 1894 }, { "epoch": 0.22407473099207756, "grad_norm": 0.15194599330425262, "learning_rate": 1.9815018201472474e-05, "loss": 0.3375, "num_tokens": 1200026396.0, "step": 1895 }, { "epoch": 0.22419297623270662, "grad_norm": 0.14832772314548492, "learning_rate": 1.978792278130797e-05, "loss": 0.3393, "num_tokens": 1200653214.0, "step": 1896 }, { "epoch": 0.2243112214733357, "grad_norm": 0.16637760400772095, "learning_rate": 1.976084484460413e-05, "loss": 0.3649, "num_tokens": 1201283854.0, "step": 1897 }, { "epoch": 0.22442946671396477, "grad_norm": 0.14979857206344604, "learning_rate": 1.9733784427193017e-05, "loss": 0.3534, "num_tokens": 1201916015.0, "step": 1898 }, { "epoch": 0.22454771195459383, "grad_norm": 0.16878588497638702, "learning_rate": 1.970674156488351e-05, "loss": 0.3841, "num_tokens": 1202554692.0, "step": 1899 }, { "epoch": 0.2246659571952229, "grad_norm": 0.14891672134399414, "learning_rate": 1.9679716293461257e-05, "loss": 0.3472, "num_tokens": 1203191938.0, "step": 1900 }, { "epoch": 0.22478420243585195, "grad_norm": 0.15611931681632996, "learning_rate": 1.965270864868863e-05, "loss": 0.3515, "num_tokens": 1203824362.0, "step": 1901 }, { "epoch": 0.22490244767648102, "grad_norm": 0.1861187368631363, "learning_rate": 1.9625718666304702e-05, "loss": 0.3427, "num_tokens": 1204462936.0, "step": 1902 }, { "epoch": 0.22502069291711008, "grad_norm": 0.16823303699493408, "learning_rate": 1.9598746382025132e-05, "loss": 0.3585, "num_tokens": 1205095441.0, "step": 1903 }, { "epoch": 0.22513893815773914, "grad_norm": 0.14871849119663239, "learning_rate": 1.957179183154219e-05, "loss": 0.3145, "num_tokens": 1205729677.0, "step": 1904 }, { "epoch": 0.22525718339836823, "grad_norm": 0.1572839319705963, "learning_rate": 1.954485505052465e-05, "loss": 0.3257, "num_tokens": 1206366321.0, "step": 1905 }, { "epoch": 0.2253754286389973, "grad_norm": 0.1735132485628128, "learning_rate": 1.95179360746178e-05, "loss": 0.3594, "num_tokens": 1207005752.0, "step": 1906 }, { "epoch": 0.22549367387962635, "grad_norm": 0.16872599720954895, "learning_rate": 1.9491034939443357e-05, "loss": 0.3981, "num_tokens": 1207633804.0, "step": 1907 }, { "epoch": 0.2256119191202554, "grad_norm": 0.16365472972393036, "learning_rate": 1.9464151680599428e-05, "loss": 0.3514, "num_tokens": 1208270549.0, "step": 1908 }, { "epoch": 0.22573016436088447, "grad_norm": 0.15993882715702057, "learning_rate": 1.9437286333660467e-05, "loss": 0.3293, "num_tokens": 1208909763.0, "step": 1909 }, { "epoch": 0.22584840960151353, "grad_norm": 0.14977790415287018, "learning_rate": 1.941043893417721e-05, "loss": 0.3612, "num_tokens": 1209548729.0, "step": 1910 }, { "epoch": 0.2259666548421426, "grad_norm": 0.17881079018115997, "learning_rate": 1.938360951767666e-05, "loss": 0.3851, "num_tokens": 1210184584.0, "step": 1911 }, { "epoch": 0.22608490008277166, "grad_norm": 0.16349364817142487, "learning_rate": 1.935679811966203e-05, "loss": 0.3471, "num_tokens": 1210820281.0, "step": 1912 }, { "epoch": 0.22620314532340074, "grad_norm": 0.16041794419288635, "learning_rate": 1.9330004775612675e-05, "loss": 0.3604, "num_tokens": 1211459450.0, "step": 1913 }, { "epoch": 0.2263213905640298, "grad_norm": 0.15849412977695465, "learning_rate": 1.9303229520984058e-05, "loss": 0.3428, "num_tokens": 1212089281.0, "step": 1914 }, { "epoch": 0.22643963580465887, "grad_norm": 0.16442883014678955, "learning_rate": 1.9276472391207707e-05, "loss": 0.3337, "num_tokens": 1212723355.0, "step": 1915 }, { "epoch": 0.22655788104528793, "grad_norm": 0.17862072587013245, "learning_rate": 1.9249733421691158e-05, "loss": 0.3599, "num_tokens": 1213354384.0, "step": 1916 }, { "epoch": 0.226676126285917, "grad_norm": 0.15677925944328308, "learning_rate": 1.9223012647817955e-05, "loss": 0.3329, "num_tokens": 1213990075.0, "step": 1917 }, { "epoch": 0.22679437152654605, "grad_norm": 0.16935351490974426, "learning_rate": 1.9196310104947513e-05, "loss": 0.3831, "num_tokens": 1214629125.0, "step": 1918 }, { "epoch": 0.2269126167671751, "grad_norm": 0.15546615421772003, "learning_rate": 1.9169625828415145e-05, "loss": 0.345, "num_tokens": 1215264589.0, "step": 1919 }, { "epoch": 0.22703086200780417, "grad_norm": 0.17183943092823029, "learning_rate": 1.9142959853531995e-05, "loss": 0.3633, "num_tokens": 1215902900.0, "step": 1920 }, { "epoch": 0.22714910724843326, "grad_norm": 0.15895579755306244, "learning_rate": 1.9116312215584967e-05, "loss": 0.3658, "num_tokens": 1216535762.0, "step": 1921 }, { "epoch": 0.22726735248906232, "grad_norm": 0.17026469111442566, "learning_rate": 1.9089682949836745e-05, "loss": 0.361, "num_tokens": 1217171423.0, "step": 1922 }, { "epoch": 0.22738559772969139, "grad_norm": 0.1577785164117813, "learning_rate": 1.9063072091525654e-05, "loss": 0.3583, "num_tokens": 1217808427.0, "step": 1923 }, { "epoch": 0.22750384297032045, "grad_norm": 0.16298867762088776, "learning_rate": 1.9036479675865684e-05, "loss": 0.3772, "num_tokens": 1218442317.0, "step": 1924 }, { "epoch": 0.2276220882109495, "grad_norm": 0.1716330200433731, "learning_rate": 1.900990573804641e-05, "loss": 0.3368, "num_tokens": 1219075224.0, "step": 1925 }, { "epoch": 0.22774033345157857, "grad_norm": 0.1755548119544983, "learning_rate": 1.898335031323297e-05, "loss": 0.3707, "num_tokens": 1219706682.0, "step": 1926 }, { "epoch": 0.22785857869220763, "grad_norm": 0.1405853033065796, "learning_rate": 1.8956813436565992e-05, "loss": 0.3538, "num_tokens": 1220346000.0, "step": 1927 }, { "epoch": 0.2279768239328367, "grad_norm": 0.17048880457878113, "learning_rate": 1.893029514316156e-05, "loss": 0.3885, "num_tokens": 1220976247.0, "step": 1928 }, { "epoch": 0.22809506917346578, "grad_norm": 0.15518951416015625, "learning_rate": 1.8903795468111172e-05, "loss": 0.3594, "num_tokens": 1221609494.0, "step": 1929 }, { "epoch": 0.22821331441409484, "grad_norm": 0.15728840231895447, "learning_rate": 1.8877314446481666e-05, "loss": 0.3474, "num_tokens": 1222243620.0, "step": 1930 }, { "epoch": 0.2283315596547239, "grad_norm": 0.16775105893611908, "learning_rate": 1.8850852113315253e-05, "loss": 0.3665, "num_tokens": 1222872003.0, "step": 1931 }, { "epoch": 0.22844980489535296, "grad_norm": 0.15760478377342224, "learning_rate": 1.8824408503629342e-05, "loss": 0.3556, "num_tokens": 1223509301.0, "step": 1932 }, { "epoch": 0.22856805013598203, "grad_norm": 0.15447109937667847, "learning_rate": 1.879798365241662e-05, "loss": 0.3195, "num_tokens": 1224141171.0, "step": 1933 }, { "epoch": 0.2286862953766111, "grad_norm": 0.1555739790201187, "learning_rate": 1.8771577594644917e-05, "loss": 0.3644, "num_tokens": 1224777797.0, "step": 1934 }, { "epoch": 0.22880454061724015, "grad_norm": 0.14799030125141144, "learning_rate": 1.8745190365257206e-05, "loss": 0.3647, "num_tokens": 1225410917.0, "step": 1935 }, { "epoch": 0.2289227858578692, "grad_norm": 0.1485302895307541, "learning_rate": 1.871882199917156e-05, "loss": 0.3351, "num_tokens": 1226044770.0, "step": 1936 }, { "epoch": 0.2290410310984983, "grad_norm": 0.17278270423412323, "learning_rate": 1.869247253128107e-05, "loss": 0.3745, "num_tokens": 1226684345.0, "step": 1937 }, { "epoch": 0.22915927633912736, "grad_norm": 0.15906833112239838, "learning_rate": 1.866614199645382e-05, "loss": 0.3528, "num_tokens": 1227323937.0, "step": 1938 }, { "epoch": 0.22927752157975642, "grad_norm": 0.15691624581813812, "learning_rate": 1.863983042953285e-05, "loss": 0.3271, "num_tokens": 1227957801.0, "step": 1939 }, { "epoch": 0.22939576682038548, "grad_norm": 0.16830727458000183, "learning_rate": 1.861353786533608e-05, "loss": 0.3724, "num_tokens": 1228594724.0, "step": 1940 }, { "epoch": 0.22951401206101454, "grad_norm": 0.16309845447540283, "learning_rate": 1.8587264338656328e-05, "loss": 0.3578, "num_tokens": 1229220164.0, "step": 1941 }, { "epoch": 0.2296322573016436, "grad_norm": 0.1481633484363556, "learning_rate": 1.8561009884261172e-05, "loss": 0.3372, "num_tokens": 1229851524.0, "step": 1942 }, { "epoch": 0.22975050254227267, "grad_norm": 0.16284671425819397, "learning_rate": 1.8534774536892972e-05, "loss": 0.356, "num_tokens": 1230484467.0, "step": 1943 }, { "epoch": 0.22986874778290173, "grad_norm": 0.15934152901172638, "learning_rate": 1.85085583312688e-05, "loss": 0.3263, "num_tokens": 1231116359.0, "step": 1944 }, { "epoch": 0.2299869930235308, "grad_norm": 0.1626678705215454, "learning_rate": 1.8482361302080397e-05, "loss": 0.3768, "num_tokens": 1231753859.0, "step": 1945 }, { "epoch": 0.23010523826415988, "grad_norm": 0.16223514080047607, "learning_rate": 1.8456183483994143e-05, "loss": 0.3259, "num_tokens": 1232388378.0, "step": 1946 }, { "epoch": 0.23022348350478894, "grad_norm": 0.15437988936901093, "learning_rate": 1.8430024911650977e-05, "loss": 0.3622, "num_tokens": 1233026448.0, "step": 1947 }, { "epoch": 0.230341728745418, "grad_norm": 0.16027230024337769, "learning_rate": 1.840388561966638e-05, "loss": 0.3378, "num_tokens": 1233660542.0, "step": 1948 }, { "epoch": 0.23045997398604706, "grad_norm": 0.15265357494354248, "learning_rate": 1.837776564263031e-05, "loss": 0.3416, "num_tokens": 1234287661.0, "step": 1949 }, { "epoch": 0.23057821922667612, "grad_norm": 0.15387766063213348, "learning_rate": 1.835166501510717e-05, "loss": 0.3404, "num_tokens": 1234922170.0, "step": 1950 }, { "epoch": 0.23069646446730518, "grad_norm": 0.15113648772239685, "learning_rate": 1.832558377163578e-05, "loss": 0.3178, "num_tokens": 1235555317.0, "step": 1951 }, { "epoch": 0.23081470970793425, "grad_norm": 0.15565477311611176, "learning_rate": 1.829952194672927e-05, "loss": 0.311, "num_tokens": 1236192199.0, "step": 1952 }, { "epoch": 0.2309329549485633, "grad_norm": 0.16680525243282318, "learning_rate": 1.8273479574875098e-05, "loss": 0.3504, "num_tokens": 1236799053.0, "step": 1953 }, { "epoch": 0.2310512001891924, "grad_norm": 0.14852072298526764, "learning_rate": 1.8247456690534978e-05, "loss": 0.3451, "num_tokens": 1237432544.0, "step": 1954 }, { "epoch": 0.23116944542982146, "grad_norm": 0.17207488417625427, "learning_rate": 1.8221453328144822e-05, "loss": 0.436, "num_tokens": 1238070500.0, "step": 1955 }, { "epoch": 0.23128769067045052, "grad_norm": 0.1915634572505951, "learning_rate": 1.8195469522114735e-05, "loss": 0.351, "num_tokens": 1238698864.0, "step": 1956 }, { "epoch": 0.23140593591107958, "grad_norm": 0.1548355221748352, "learning_rate": 1.816950530682892e-05, "loss": 0.3673, "num_tokens": 1239326142.0, "step": 1957 }, { "epoch": 0.23152418115170864, "grad_norm": 0.15716081857681274, "learning_rate": 1.814356071664567e-05, "loss": 0.3334, "num_tokens": 1239965759.0, "step": 1958 }, { "epoch": 0.2316424263923377, "grad_norm": 0.1694408804178238, "learning_rate": 1.811763578589728e-05, "loss": 0.3517, "num_tokens": 1240593937.0, "step": 1959 }, { "epoch": 0.23176067163296676, "grad_norm": 0.1577712446451187, "learning_rate": 1.8091730548890083e-05, "loss": 0.3392, "num_tokens": 1241232600.0, "step": 1960 }, { "epoch": 0.23187891687359583, "grad_norm": 0.17078039050102234, "learning_rate": 1.8065845039904307e-05, "loss": 0.3795, "num_tokens": 1241868072.0, "step": 1961 }, { "epoch": 0.23199716211422491, "grad_norm": 0.16106127202510834, "learning_rate": 1.803997929319408e-05, "loss": 0.3745, "num_tokens": 1242499329.0, "step": 1962 }, { "epoch": 0.23211540735485398, "grad_norm": 0.14302292466163635, "learning_rate": 1.8014133342987396e-05, "loss": 0.321, "num_tokens": 1243132947.0, "step": 1963 }, { "epoch": 0.23223365259548304, "grad_norm": 0.16162584722042084, "learning_rate": 1.7988307223486026e-05, "loss": 0.3751, "num_tokens": 1243768759.0, "step": 1964 }, { "epoch": 0.2323518978361121, "grad_norm": 0.16313178837299347, "learning_rate": 1.7962500968865538e-05, "loss": 0.3436, "num_tokens": 1244397968.0, "step": 1965 }, { "epoch": 0.23247014307674116, "grad_norm": 0.16243085265159607, "learning_rate": 1.7936714613275173e-05, "loss": 0.3467, "num_tokens": 1245030355.0, "step": 1966 }, { "epoch": 0.23258838831737022, "grad_norm": 0.1638815701007843, "learning_rate": 1.791094819083787e-05, "loss": 0.3549, "num_tokens": 1245652710.0, "step": 1967 }, { "epoch": 0.23270663355799928, "grad_norm": 0.15447312593460083, "learning_rate": 1.7885201735650167e-05, "loss": 0.3206, "num_tokens": 1246288548.0, "step": 1968 }, { "epoch": 0.23282487879862834, "grad_norm": 0.1560949981212616, "learning_rate": 1.7859475281782182e-05, "loss": 0.3352, "num_tokens": 1246925966.0, "step": 1969 }, { "epoch": 0.23294312403925743, "grad_norm": 0.1852051019668579, "learning_rate": 1.7833768863277604e-05, "loss": 0.3462, "num_tokens": 1247554529.0, "step": 1970 }, { "epoch": 0.2330613692798865, "grad_norm": 0.16054436564445496, "learning_rate": 1.7808082514153553e-05, "loss": 0.3926, "num_tokens": 1248186600.0, "step": 1971 }, { "epoch": 0.23317961452051555, "grad_norm": 0.159676194190979, "learning_rate": 1.7782416268400622e-05, "loss": 0.3597, "num_tokens": 1248802763.0, "step": 1972 }, { "epoch": 0.23329785976114462, "grad_norm": 0.17004135251045227, "learning_rate": 1.7756770159982806e-05, "loss": 0.3076, "num_tokens": 1249434773.0, "step": 1973 }, { "epoch": 0.23341610500177368, "grad_norm": 0.159078449010849, "learning_rate": 1.773114422283742e-05, "loss": 0.3524, "num_tokens": 1250071859.0, "step": 1974 }, { "epoch": 0.23353435024240274, "grad_norm": 0.1678972691297531, "learning_rate": 1.770553849087513e-05, "loss": 0.3434, "num_tokens": 1250709705.0, "step": 1975 }, { "epoch": 0.2336525954830318, "grad_norm": 0.16334383189678192, "learning_rate": 1.767995299797985e-05, "loss": 0.3898, "num_tokens": 1251344489.0, "step": 1976 }, { "epoch": 0.23377084072366086, "grad_norm": 0.164784237742424, "learning_rate": 1.7654387778008686e-05, "loss": 0.3489, "num_tokens": 1251982650.0, "step": 1977 }, { "epoch": 0.23388908596428995, "grad_norm": 0.1682564616203308, "learning_rate": 1.7628842864791946e-05, "loss": 0.3622, "num_tokens": 1252618531.0, "step": 1978 }, { "epoch": 0.234007331204919, "grad_norm": 0.1823110282421112, "learning_rate": 1.7603318292133045e-05, "loss": 0.3435, "num_tokens": 1253250959.0, "step": 1979 }, { "epoch": 0.23412557644554807, "grad_norm": 0.16742032766342163, "learning_rate": 1.7577814093808514e-05, "loss": 0.3631, "num_tokens": 1253889968.0, "step": 1980 }, { "epoch": 0.23424382168617713, "grad_norm": 0.16165918111801147, "learning_rate": 1.7552330303567894e-05, "loss": 0.3466, "num_tokens": 1254520645.0, "step": 1981 }, { "epoch": 0.2343620669268062, "grad_norm": 0.16288359463214874, "learning_rate": 1.7526866955133725e-05, "loss": 0.3784, "num_tokens": 1255148745.0, "step": 1982 }, { "epoch": 0.23448031216743526, "grad_norm": 0.1556723266839981, "learning_rate": 1.75014240822015e-05, "loss": 0.3634, "num_tokens": 1255784484.0, "step": 1983 }, { "epoch": 0.23459855740806432, "grad_norm": 0.16116824746131897, "learning_rate": 1.7476001718439606e-05, "loss": 0.3831, "num_tokens": 1256417769.0, "step": 1984 }, { "epoch": 0.23471680264869338, "grad_norm": 0.17379483580589294, "learning_rate": 1.7450599897489317e-05, "loss": 0.3431, "num_tokens": 1257052424.0, "step": 1985 }, { "epoch": 0.23483504788932247, "grad_norm": 0.14859539270401, "learning_rate": 1.742521865296471e-05, "loss": 0.3339, "num_tokens": 1257689558.0, "step": 1986 }, { "epoch": 0.23495329312995153, "grad_norm": 0.16267679631710052, "learning_rate": 1.7399858018452617e-05, "loss": 0.3677, "num_tokens": 1258323494.0, "step": 1987 }, { "epoch": 0.2350715383705806, "grad_norm": 0.16116073727607727, "learning_rate": 1.7374518027512616e-05, "loss": 0.3364, "num_tokens": 1258955356.0, "step": 1988 }, { "epoch": 0.23518978361120965, "grad_norm": 0.15993084013462067, "learning_rate": 1.734919871367695e-05, "loss": 0.3552, "num_tokens": 1259588215.0, "step": 1989 }, { "epoch": 0.2353080288518387, "grad_norm": 0.16909614205360413, "learning_rate": 1.732390011045053e-05, "loss": 0.3785, "num_tokens": 1260226408.0, "step": 1990 }, { "epoch": 0.23542627409246777, "grad_norm": 0.17005151510238647, "learning_rate": 1.729862225131083e-05, "loss": 0.353, "num_tokens": 1260860288.0, "step": 1991 }, { "epoch": 0.23554451933309684, "grad_norm": 0.17822960019111633, "learning_rate": 1.72733651697079e-05, "loss": 0.3646, "num_tokens": 1261494410.0, "step": 1992 }, { "epoch": 0.2356627645737259, "grad_norm": 0.15992970764636993, "learning_rate": 1.724812889906425e-05, "loss": 0.3578, "num_tokens": 1262129492.0, "step": 1993 }, { "epoch": 0.23578100981435499, "grad_norm": 0.1606341302394867, "learning_rate": 1.722291347277492e-05, "loss": 0.3344, "num_tokens": 1262767078.0, "step": 1994 }, { "epoch": 0.23589925505498405, "grad_norm": 0.17137299478054047, "learning_rate": 1.7197718924207304e-05, "loss": 0.3301, "num_tokens": 1263402389.0, "step": 1995 }, { "epoch": 0.2360175002956131, "grad_norm": 0.18807576596736908, "learning_rate": 1.7172545286701207e-05, "loss": 0.407, "num_tokens": 1264035180.0, "step": 1996 }, { "epoch": 0.23613574553624217, "grad_norm": 0.15306997299194336, "learning_rate": 1.714739259356874e-05, "loss": 0.3203, "num_tokens": 1264670288.0, "step": 1997 }, { "epoch": 0.23625399077687123, "grad_norm": 0.17509116232395172, "learning_rate": 1.71222608780943e-05, "loss": 0.3251, "num_tokens": 1265305830.0, "step": 1998 }, { "epoch": 0.2363722360175003, "grad_norm": 0.1641552448272705, "learning_rate": 1.709715017353455e-05, "loss": 0.3425, "num_tokens": 1265939517.0, "step": 1999 }, { "epoch": 0.23649048125812935, "grad_norm": 0.17969214916229248, "learning_rate": 1.7072060513118325e-05, "loss": 0.3889, "num_tokens": 1266577606.0, "step": 2000 }, { "epoch": 0.23660872649875841, "grad_norm": 0.17133449018001556, "learning_rate": 1.7046991930046615e-05, "loss": 0.3745, "num_tokens": 1267214198.0, "step": 2001 }, { "epoch": 0.23672697173938748, "grad_norm": 0.17307010293006897, "learning_rate": 1.7021944457492525e-05, "loss": 0.3641, "num_tokens": 1267844709.0, "step": 2002 }, { "epoch": 0.23684521698001657, "grad_norm": 0.14777572453022003, "learning_rate": 1.6996918128601207e-05, "loss": 0.3265, "num_tokens": 1268478703.0, "step": 2003 }, { "epoch": 0.23696346222064563, "grad_norm": 0.1670704334974289, "learning_rate": 1.6971912976489874e-05, "loss": 0.3539, "num_tokens": 1269115700.0, "step": 2004 }, { "epoch": 0.2370817074612747, "grad_norm": 0.1588928997516632, "learning_rate": 1.6946929034247678e-05, "loss": 0.3508, "num_tokens": 1269747201.0, "step": 2005 }, { "epoch": 0.23719995270190375, "grad_norm": 0.17382748425006866, "learning_rate": 1.6921966334935715e-05, "loss": 0.3284, "num_tokens": 1270382337.0, "step": 2006 }, { "epoch": 0.2373181979425328, "grad_norm": 0.1680542528629303, "learning_rate": 1.689702491158697e-05, "loss": 0.3535, "num_tokens": 1271020341.0, "step": 2007 }, { "epoch": 0.23743644318316187, "grad_norm": 0.16459088027477264, "learning_rate": 1.6872104797206273e-05, "loss": 0.3467, "num_tokens": 1271652135.0, "step": 2008 }, { "epoch": 0.23755468842379093, "grad_norm": 0.14711426198482513, "learning_rate": 1.6847206024770276e-05, "loss": 0.3425, "num_tokens": 1272287068.0, "step": 2009 }, { "epoch": 0.23767293366442, "grad_norm": 0.1500907987356186, "learning_rate": 1.6822328627227352e-05, "loss": 0.3461, "num_tokens": 1272924545.0, "step": 2010 }, { "epoch": 0.23779117890504908, "grad_norm": 0.17159508168697357, "learning_rate": 1.6797472637497623e-05, "loss": 0.3872, "num_tokens": 1273561738.0, "step": 2011 }, { "epoch": 0.23790942414567814, "grad_norm": 0.14653366804122925, "learning_rate": 1.6772638088472852e-05, "loss": 0.3359, "num_tokens": 1274195285.0, "step": 2012 }, { "epoch": 0.2380276693863072, "grad_norm": 0.1653970181941986, "learning_rate": 1.6747825013016444e-05, "loss": 0.3477, "num_tokens": 1274817746.0, "step": 2013 }, { "epoch": 0.23814591462693627, "grad_norm": 0.15158611536026, "learning_rate": 1.672303344396341e-05, "loss": 0.2962, "num_tokens": 1275435439.0, "step": 2014 }, { "epoch": 0.23826415986756533, "grad_norm": 0.1698819398880005, "learning_rate": 1.6698263414120277e-05, "loss": 0.3778, "num_tokens": 1276068813.0, "step": 2015 }, { "epoch": 0.2383824051081944, "grad_norm": 0.16204829514026642, "learning_rate": 1.667351495626506e-05, "loss": 0.3426, "num_tokens": 1276698654.0, "step": 2016 }, { "epoch": 0.23850065034882345, "grad_norm": 0.18355444073677063, "learning_rate": 1.6648788103147243e-05, "loss": 0.3838, "num_tokens": 1277333784.0, "step": 2017 }, { "epoch": 0.2386188955894525, "grad_norm": 0.17248386144638062, "learning_rate": 1.6624082887487718e-05, "loss": 0.3434, "num_tokens": 1277962469.0, "step": 2018 }, { "epoch": 0.2387371408300816, "grad_norm": 0.16043022274971008, "learning_rate": 1.6599399341978764e-05, "loss": 0.3751, "num_tokens": 1278598036.0, "step": 2019 }, { "epoch": 0.23885538607071066, "grad_norm": 0.16142123937606812, "learning_rate": 1.6574737499283947e-05, "loss": 0.3677, "num_tokens": 1279232150.0, "step": 2020 }, { "epoch": 0.23897363131133972, "grad_norm": 0.15865042805671692, "learning_rate": 1.655009739203814e-05, "loss": 0.3495, "num_tokens": 1279866936.0, "step": 2021 }, { "epoch": 0.23909187655196878, "grad_norm": 0.1501854509115219, "learning_rate": 1.6525479052847438e-05, "loss": 0.326, "num_tokens": 1280501697.0, "step": 2022 }, { "epoch": 0.23921012179259785, "grad_norm": 0.18602120876312256, "learning_rate": 1.6500882514289136e-05, "loss": 0.4071, "num_tokens": 1281129166.0, "step": 2023 }, { "epoch": 0.2393283670332269, "grad_norm": 0.16055260598659515, "learning_rate": 1.6476307808911697e-05, "loss": 0.3392, "num_tokens": 1281765313.0, "step": 2024 }, { "epoch": 0.23944661227385597, "grad_norm": 0.16048748791217804, "learning_rate": 1.645175496923467e-05, "loss": 0.3806, "num_tokens": 1282401699.0, "step": 2025 }, { "epoch": 0.23956485751448503, "grad_norm": 0.1903311014175415, "learning_rate": 1.6427224027748674e-05, "loss": 0.3922, "num_tokens": 1283036777.0, "step": 2026 }, { "epoch": 0.23968310275511412, "grad_norm": 0.1615394949913025, "learning_rate": 1.640271501691535e-05, "loss": 0.348, "num_tokens": 1283671025.0, "step": 2027 }, { "epoch": 0.23980134799574318, "grad_norm": 0.14248009026050568, "learning_rate": 1.637822796916734e-05, "loss": 0.3353, "num_tokens": 1284302915.0, "step": 2028 }, { "epoch": 0.23991959323637224, "grad_norm": 0.15840069949626923, "learning_rate": 1.6353762916908183e-05, "loss": 0.3626, "num_tokens": 1284941325.0, "step": 2029 }, { "epoch": 0.2400378384770013, "grad_norm": 0.15821804106235504, "learning_rate": 1.6329319892512347e-05, "loss": 0.3585, "num_tokens": 1285577381.0, "step": 2030 }, { "epoch": 0.24015608371763036, "grad_norm": 0.15751008689403534, "learning_rate": 1.6304898928325127e-05, "loss": 0.3771, "num_tokens": 1286210952.0, "step": 2031 }, { "epoch": 0.24027432895825943, "grad_norm": 0.14285607635974884, "learning_rate": 1.628050005666263e-05, "loss": 0.3395, "num_tokens": 1286839896.0, "step": 2032 }, { "epoch": 0.2403925741988885, "grad_norm": 0.15762567520141602, "learning_rate": 1.625612330981175e-05, "loss": 0.361, "num_tokens": 1287473506.0, "step": 2033 }, { "epoch": 0.24051081943951755, "grad_norm": 0.16265840828418732, "learning_rate": 1.6231768720030072e-05, "loss": 0.3703, "num_tokens": 1288110471.0, "step": 2034 }, { "epoch": 0.24062906468014664, "grad_norm": 0.1618996113538742, "learning_rate": 1.620743631954589e-05, "loss": 0.3638, "num_tokens": 1288746833.0, "step": 2035 }, { "epoch": 0.2407473099207757, "grad_norm": 0.16014817357063293, "learning_rate": 1.6183126140558103e-05, "loss": 0.359, "num_tokens": 1289384237.0, "step": 2036 }, { "epoch": 0.24086555516140476, "grad_norm": 0.16067685186862946, "learning_rate": 1.615883821523622e-05, "loss": 0.3702, "num_tokens": 1290019192.0, "step": 2037 }, { "epoch": 0.24098380040203382, "grad_norm": 0.16710048913955688, "learning_rate": 1.6134572575720325e-05, "loss": 0.3528, "num_tokens": 1290658276.0, "step": 2038 }, { "epoch": 0.24110204564266288, "grad_norm": 0.17060130834579468, "learning_rate": 1.6110329254120973e-05, "loss": 0.3591, "num_tokens": 1291293510.0, "step": 2039 }, { "epoch": 0.24122029088329194, "grad_norm": 0.14449091255664825, "learning_rate": 1.608610828251921e-05, "loss": 0.3289, "num_tokens": 1291917409.0, "step": 2040 }, { "epoch": 0.241338536123921, "grad_norm": 0.1513248085975647, "learning_rate": 1.6061909692966495e-05, "loss": 0.3397, "num_tokens": 1292552987.0, "step": 2041 }, { "epoch": 0.24145678136455007, "grad_norm": 0.16299116611480713, "learning_rate": 1.6037733517484666e-05, "loss": 0.3375, "num_tokens": 1293186477.0, "step": 2042 }, { "epoch": 0.24157502660517916, "grad_norm": 0.17591592669487, "learning_rate": 1.6013579788065923e-05, "loss": 0.3867, "num_tokens": 1293819286.0, "step": 2043 }, { "epoch": 0.24169327184580822, "grad_norm": 0.15682390332221985, "learning_rate": 1.598944853667274e-05, "loss": 0.3087, "num_tokens": 1294453861.0, "step": 2044 }, { "epoch": 0.24181151708643728, "grad_norm": 0.17341744899749756, "learning_rate": 1.5965339795237856e-05, "loss": 0.3969, "num_tokens": 1295089864.0, "step": 2045 }, { "epoch": 0.24192976232706634, "grad_norm": 0.16315826773643494, "learning_rate": 1.594125359566422e-05, "loss": 0.3458, "num_tokens": 1295726569.0, "step": 2046 }, { "epoch": 0.2420480075676954, "grad_norm": 0.1428024023771286, "learning_rate": 1.5917189969824936e-05, "loss": 0.325, "num_tokens": 1296360993.0, "step": 2047 }, { "epoch": 0.24216625280832446, "grad_norm": 0.17544488608837128, "learning_rate": 1.589314894956328e-05, "loss": 0.382, "num_tokens": 1297000395.0, "step": 2048 }, { "epoch": 0.24228449804895352, "grad_norm": 0.1569821685552597, "learning_rate": 1.586913056669257e-05, "loss": 0.3416, "num_tokens": 1297637794.0, "step": 2049 }, { "epoch": 0.24240274328958258, "grad_norm": 0.15297435224056244, "learning_rate": 1.584513485299619e-05, "loss": 0.3497, "num_tokens": 1298274450.0, "step": 2050 }, { "epoch": 0.24252098853021165, "grad_norm": 0.16815827786922455, "learning_rate": 1.5821161840227515e-05, "loss": 0.3923, "num_tokens": 1298912342.0, "step": 2051 }, { "epoch": 0.24263923377084073, "grad_norm": 0.17284280061721802, "learning_rate": 1.5797211560109886e-05, "loss": 0.3817, "num_tokens": 1299542171.0, "step": 2052 }, { "epoch": 0.2427574790114698, "grad_norm": 0.17015445232391357, "learning_rate": 1.5773284044336574e-05, "loss": 0.3735, "num_tokens": 1300176165.0, "step": 2053 }, { "epoch": 0.24287572425209886, "grad_norm": 0.1607237309217453, "learning_rate": 1.5749379324570697e-05, "loss": 0.3621, "num_tokens": 1300812545.0, "step": 2054 }, { "epoch": 0.24299396949272792, "grad_norm": 0.16473503410816193, "learning_rate": 1.5725497432445237e-05, "loss": 0.39, "num_tokens": 1301447175.0, "step": 2055 }, { "epoch": 0.24311221473335698, "grad_norm": 0.16471925377845764, "learning_rate": 1.5701638399562947e-05, "loss": 0.3688, "num_tokens": 1302081362.0, "step": 2056 }, { "epoch": 0.24323045997398604, "grad_norm": 0.15902522206306458, "learning_rate": 1.5677802257496335e-05, "loss": 0.3687, "num_tokens": 1302715490.0, "step": 2057 }, { "epoch": 0.2433487052146151, "grad_norm": 0.1593167930841446, "learning_rate": 1.565398903778763e-05, "loss": 0.3566, "num_tokens": 1303349090.0, "step": 2058 }, { "epoch": 0.24346695045524416, "grad_norm": 0.1661464273929596, "learning_rate": 1.5630198771948724e-05, "loss": 0.4114, "num_tokens": 1303981074.0, "step": 2059 }, { "epoch": 0.24358519569587325, "grad_norm": 0.15248556435108185, "learning_rate": 1.5606431491461118e-05, "loss": 0.3632, "num_tokens": 1304616565.0, "step": 2060 }, { "epoch": 0.2437034409365023, "grad_norm": 0.16896015405654907, "learning_rate": 1.55826872277759e-05, "loss": 0.3243, "num_tokens": 1305250305.0, "step": 2061 }, { "epoch": 0.24382168617713137, "grad_norm": 0.15472634136676788, "learning_rate": 1.555896601231374e-05, "loss": 0.3547, "num_tokens": 1305885171.0, "step": 2062 }, { "epoch": 0.24393993141776044, "grad_norm": 0.18401753902435303, "learning_rate": 1.553526787646475e-05, "loss": 0.3802, "num_tokens": 1306515269.0, "step": 2063 }, { "epoch": 0.2440581766583895, "grad_norm": 0.16186417639255524, "learning_rate": 1.5511592851588545e-05, "loss": 0.3458, "num_tokens": 1307140965.0, "step": 2064 }, { "epoch": 0.24417642189901856, "grad_norm": 0.14521846175193787, "learning_rate": 1.5487940969014125e-05, "loss": 0.3484, "num_tokens": 1307772843.0, "step": 2065 }, { "epoch": 0.24429466713964762, "grad_norm": 0.16249442100524902, "learning_rate": 1.5464312260039874e-05, "loss": 0.3702, "num_tokens": 1308403711.0, "step": 2066 }, { "epoch": 0.24441291238027668, "grad_norm": 0.14522194862365723, "learning_rate": 1.544070675593355e-05, "loss": 0.3237, "num_tokens": 1309036194.0, "step": 2067 }, { "epoch": 0.24453115762090577, "grad_norm": 0.1677798628807068, "learning_rate": 1.541712448793214e-05, "loss": 0.3669, "num_tokens": 1309668349.0, "step": 2068 }, { "epoch": 0.24464940286153483, "grad_norm": 0.15699172019958496, "learning_rate": 1.539356548724193e-05, "loss": 0.3702, "num_tokens": 1310305545.0, "step": 2069 }, { "epoch": 0.2447676481021639, "grad_norm": 0.1510690301656723, "learning_rate": 1.5370029785038402e-05, "loss": 0.3348, "num_tokens": 1310939242.0, "step": 2070 }, { "epoch": 0.24488589334279295, "grad_norm": 0.16037912666797638, "learning_rate": 1.5346517412466186e-05, "loss": 0.3513, "num_tokens": 1311572053.0, "step": 2071 }, { "epoch": 0.24500413858342202, "grad_norm": 0.14442725479602814, "learning_rate": 1.532302840063909e-05, "loss": 0.3478, "num_tokens": 1312205190.0, "step": 2072 }, { "epoch": 0.24512238382405108, "grad_norm": 0.15727399289608002, "learning_rate": 1.529956278063996e-05, "loss": 0.3902, "num_tokens": 1312841010.0, "step": 2073 }, { "epoch": 0.24524062906468014, "grad_norm": 0.16083134710788727, "learning_rate": 1.527612058352072e-05, "loss": 0.3668, "num_tokens": 1313472468.0, "step": 2074 }, { "epoch": 0.2453588743053092, "grad_norm": 0.1568143367767334, "learning_rate": 1.5252701840302268e-05, "loss": 0.3468, "num_tokens": 1314111009.0, "step": 2075 }, { "epoch": 0.2454771195459383, "grad_norm": 0.14847031235694885, "learning_rate": 1.5229306581974486e-05, "loss": 0.3506, "num_tokens": 1314744729.0, "step": 2076 }, { "epoch": 0.24559536478656735, "grad_norm": 0.15530890226364136, "learning_rate": 1.5205934839496191e-05, "loss": 0.3321, "num_tokens": 1315375503.0, "step": 2077 }, { "epoch": 0.2457136100271964, "grad_norm": 0.1590510606765747, "learning_rate": 1.5182586643795055e-05, "loss": 0.3644, "num_tokens": 1316012926.0, "step": 2078 }, { "epoch": 0.24583185526782547, "grad_norm": 0.15811114013195038, "learning_rate": 1.515926202576761e-05, "loss": 0.3715, "num_tokens": 1316650789.0, "step": 2079 }, { "epoch": 0.24595010050845453, "grad_norm": 0.1463797390460968, "learning_rate": 1.5135961016279167e-05, "loss": 0.3414, "num_tokens": 1317282748.0, "step": 2080 }, { "epoch": 0.2460683457490836, "grad_norm": 0.1546652913093567, "learning_rate": 1.5112683646163814e-05, "loss": 0.3419, "num_tokens": 1317922154.0, "step": 2081 }, { "epoch": 0.24618659098971266, "grad_norm": 0.15111781656742096, "learning_rate": 1.5089429946224357e-05, "loss": 0.3415, "num_tokens": 1318555926.0, "step": 2082 }, { "epoch": 0.24630483623034172, "grad_norm": 0.163945734500885, "learning_rate": 1.5066199947232275e-05, "loss": 0.3931, "num_tokens": 1319192513.0, "step": 2083 }, { "epoch": 0.2464230814709708, "grad_norm": 0.14155668020248413, "learning_rate": 1.5042993679927678e-05, "loss": 0.3145, "num_tokens": 1319824850.0, "step": 2084 }, { "epoch": 0.24654132671159987, "grad_norm": 0.16178345680236816, "learning_rate": 1.5019811175019274e-05, "loss": 0.3892, "num_tokens": 1320458462.0, "step": 2085 }, { "epoch": 0.24665957195222893, "grad_norm": 0.1564897894859314, "learning_rate": 1.499665246318433e-05, "loss": 0.344, "num_tokens": 1321092111.0, "step": 2086 }, { "epoch": 0.246777817192858, "grad_norm": 0.14574626088142395, "learning_rate": 1.4973517575068638e-05, "loss": 0.3251, "num_tokens": 1321724270.0, "step": 2087 }, { "epoch": 0.24689606243348705, "grad_norm": 0.16285115480422974, "learning_rate": 1.4950406541286447e-05, "loss": 0.3619, "num_tokens": 1322359786.0, "step": 2088 }, { "epoch": 0.2470143076741161, "grad_norm": 0.1563335806131363, "learning_rate": 1.4927319392420443e-05, "loss": 0.3497, "num_tokens": 1322984880.0, "step": 2089 }, { "epoch": 0.24713255291474517, "grad_norm": 0.15988491475582123, "learning_rate": 1.4904256159021713e-05, "loss": 0.3561, "num_tokens": 1323623217.0, "step": 2090 }, { "epoch": 0.24725079815537424, "grad_norm": 0.1461767852306366, "learning_rate": 1.4881216871609679e-05, "loss": 0.3369, "num_tokens": 1324257199.0, "step": 2091 }, { "epoch": 0.24736904339600332, "grad_norm": 0.16058190166950226, "learning_rate": 1.4858201560672108e-05, "loss": 0.3474, "num_tokens": 1324889167.0, "step": 2092 }, { "epoch": 0.24748728863663239, "grad_norm": 0.15663427114486694, "learning_rate": 1.483521025666501e-05, "loss": 0.351, "num_tokens": 1325522031.0, "step": 2093 }, { "epoch": 0.24760553387726145, "grad_norm": 0.17227518558502197, "learning_rate": 1.4812242990012637e-05, "loss": 0.3813, "num_tokens": 1326153928.0, "step": 2094 }, { "epoch": 0.2477237791178905, "grad_norm": 0.16479241847991943, "learning_rate": 1.4789299791107426e-05, "loss": 0.4066, "num_tokens": 1326785341.0, "step": 2095 }, { "epoch": 0.24784202435851957, "grad_norm": 0.1628458946943283, "learning_rate": 1.4766380690309985e-05, "loss": 0.3918, "num_tokens": 1327423021.0, "step": 2096 }, { "epoch": 0.24796026959914863, "grad_norm": 0.14676113426685333, "learning_rate": 1.474348571794901e-05, "loss": 0.3159, "num_tokens": 1328060086.0, "step": 2097 }, { "epoch": 0.2480785148397777, "grad_norm": 0.14470823109149933, "learning_rate": 1.4720614904321281e-05, "loss": 0.3668, "num_tokens": 1328698549.0, "step": 2098 }, { "epoch": 0.24819676008040675, "grad_norm": 0.14710788428783417, "learning_rate": 1.4697768279691603e-05, "loss": 0.3493, "num_tokens": 1329331694.0, "step": 2099 }, { "epoch": 0.24831500532103584, "grad_norm": 0.16010682284832, "learning_rate": 1.4674945874292765e-05, "loss": 0.3501, "num_tokens": 1329962005.0, "step": 2100 }, { "epoch": 0.2484332505616649, "grad_norm": 0.1616450697183609, "learning_rate": 1.4652147718325535e-05, "loss": 0.3719, "num_tokens": 1330597204.0, "step": 2101 }, { "epoch": 0.24855149580229396, "grad_norm": 0.16321998834609985, "learning_rate": 1.4629373841958563e-05, "loss": 0.3487, "num_tokens": 1331236517.0, "step": 2102 }, { "epoch": 0.24866974104292303, "grad_norm": 0.1300300657749176, "learning_rate": 1.4606624275328379e-05, "loss": 0.2921, "num_tokens": 1331871778.0, "step": 2103 }, { "epoch": 0.2487879862835521, "grad_norm": 0.16807685792446136, "learning_rate": 1.4583899048539347e-05, "loss": 0.3591, "num_tokens": 1332509099.0, "step": 2104 }, { "epoch": 0.24890623152418115, "grad_norm": 0.15710899233818054, "learning_rate": 1.4561198191663608e-05, "loss": 0.3425, "num_tokens": 1333140652.0, "step": 2105 }, { "epoch": 0.2490244767648102, "grad_norm": 0.17277449369430542, "learning_rate": 1.4538521734741089e-05, "loss": 0.4135, "num_tokens": 1333778201.0, "step": 2106 }, { "epoch": 0.24914272200543927, "grad_norm": 0.1700480431318283, "learning_rate": 1.451586970777939e-05, "loss": 0.3538, "num_tokens": 1334415211.0, "step": 2107 }, { "epoch": 0.24926096724606833, "grad_norm": 0.14761683344841003, "learning_rate": 1.4493242140753808e-05, "loss": 0.2949, "num_tokens": 1335048790.0, "step": 2108 }, { "epoch": 0.24937921248669742, "grad_norm": 0.15288735926151276, "learning_rate": 1.4470639063607256e-05, "loss": 0.3646, "num_tokens": 1335683687.0, "step": 2109 }, { "epoch": 0.24949745772732648, "grad_norm": 0.148744598031044, "learning_rate": 1.4448060506250244e-05, "loss": 0.3514, "num_tokens": 1336320525.0, "step": 2110 }, { "epoch": 0.24961570296795554, "grad_norm": 0.14903289079666138, "learning_rate": 1.4425506498560853e-05, "loss": 0.3495, "num_tokens": 1336952088.0, "step": 2111 }, { "epoch": 0.2497339482085846, "grad_norm": 0.15876129269599915, "learning_rate": 1.4402977070384653e-05, "loss": 0.3764, "num_tokens": 1337591066.0, "step": 2112 }, { "epoch": 0.24985219344921367, "grad_norm": 0.14318297803401947, "learning_rate": 1.43804722515347e-05, "loss": 0.308, "num_tokens": 1338227429.0, "step": 2113 }, { "epoch": 0.24997043868984273, "grad_norm": 0.1475268453359604, "learning_rate": 1.4357992071791484e-05, "loss": 0.3364, "num_tokens": 1338857099.0, "step": 2114 }, { "epoch": 0.2500886839304718, "grad_norm": 0.14468343555927277, "learning_rate": 1.4335536560902873e-05, "loss": 0.3265, "num_tokens": 1339491225.0, "step": 2115 }, { "epoch": 0.2502069291711009, "grad_norm": 0.152864471077919, "learning_rate": 1.4313105748584135e-05, "loss": 0.3405, "num_tokens": 1340126659.0, "step": 2116 }, { "epoch": 0.2503251744117299, "grad_norm": 0.15547658503055573, "learning_rate": 1.4290699664517809e-05, "loss": 0.3257, "num_tokens": 1340752701.0, "step": 2117 }, { "epoch": 0.250443419652359, "grad_norm": 0.16664119064807892, "learning_rate": 1.4268318338353732e-05, "loss": 0.3643, "num_tokens": 1341388806.0, "step": 2118 }, { "epoch": 0.25056166489298803, "grad_norm": 0.16034823656082153, "learning_rate": 1.4245961799708973e-05, "loss": 0.3646, "num_tokens": 1342025572.0, "step": 2119 }, { "epoch": 0.2506799101336171, "grad_norm": 0.16009068489074707, "learning_rate": 1.4223630078167796e-05, "loss": 0.3524, "num_tokens": 1342621450.0, "step": 2120 }, { "epoch": 0.2507981553742462, "grad_norm": 0.15944495797157288, "learning_rate": 1.420132320328165e-05, "loss": 0.3561, "num_tokens": 1343251996.0, "step": 2121 }, { "epoch": 0.25091640061487525, "grad_norm": 0.16853056848049164, "learning_rate": 1.4179041204569073e-05, "loss": 0.3782, "num_tokens": 1343888063.0, "step": 2122 }, { "epoch": 0.25103464585550433, "grad_norm": 0.153689906001091, "learning_rate": 1.4156784111515697e-05, "loss": 0.3152, "num_tokens": 1344524355.0, "step": 2123 }, { "epoch": 0.25115289109613337, "grad_norm": 0.16411302983760834, "learning_rate": 1.4134551953574201e-05, "loss": 0.3794, "num_tokens": 1345157130.0, "step": 2124 }, { "epoch": 0.25127113633676246, "grad_norm": 0.149836927652359, "learning_rate": 1.4112344760164249e-05, "loss": 0.3162, "num_tokens": 1345788742.0, "step": 2125 }, { "epoch": 0.2513893815773915, "grad_norm": 0.14406760036945343, "learning_rate": 1.4090162560672506e-05, "loss": 0.3558, "num_tokens": 1346424640.0, "step": 2126 }, { "epoch": 0.2515076268180206, "grad_norm": 0.14408567547798157, "learning_rate": 1.4068005384452529e-05, "loss": 0.3522, "num_tokens": 1347050079.0, "step": 2127 }, { "epoch": 0.2516258720586496, "grad_norm": 0.15319818258285522, "learning_rate": 1.4045873260824772e-05, "loss": 0.3417, "num_tokens": 1347683639.0, "step": 2128 }, { "epoch": 0.2517441172992787, "grad_norm": 0.16909605264663696, "learning_rate": 1.4023766219076532e-05, "loss": 0.4197, "num_tokens": 1348322094.0, "step": 2129 }, { "epoch": 0.2518623625399078, "grad_norm": 0.16063344478607178, "learning_rate": 1.4001684288461939e-05, "loss": 0.3496, "num_tokens": 1348957537.0, "step": 2130 }, { "epoch": 0.2519806077805368, "grad_norm": 0.15000493824481964, "learning_rate": 1.3979627498201864e-05, "loss": 0.3504, "num_tokens": 1349596973.0, "step": 2131 }, { "epoch": 0.2520988530211659, "grad_norm": 0.14872415363788605, "learning_rate": 1.3957595877483925e-05, "loss": 0.3262, "num_tokens": 1350233160.0, "step": 2132 }, { "epoch": 0.25221709826179495, "grad_norm": 0.16195665299892426, "learning_rate": 1.3935589455462432e-05, "loss": 0.3389, "num_tokens": 1350870883.0, "step": 2133 }, { "epoch": 0.25233534350242404, "grad_norm": 0.1620256006717682, "learning_rate": 1.3913608261258338e-05, "loss": 0.3235, "num_tokens": 1351499885.0, "step": 2134 }, { "epoch": 0.25245358874305307, "grad_norm": 0.13652187585830688, "learning_rate": 1.3891652323959228e-05, "loss": 0.3166, "num_tokens": 1352131223.0, "step": 2135 }, { "epoch": 0.25257183398368216, "grad_norm": 0.16615505516529083, "learning_rate": 1.3869721672619274e-05, "loss": 0.3438, "num_tokens": 1352768469.0, "step": 2136 }, { "epoch": 0.25269007922431125, "grad_norm": 0.14930325746536255, "learning_rate": 1.3847816336259156e-05, "loss": 0.3163, "num_tokens": 1353395367.0, "step": 2137 }, { "epoch": 0.2528083244649403, "grad_norm": 0.1527901440858841, "learning_rate": 1.3825936343866075e-05, "loss": 0.3608, "num_tokens": 1354024395.0, "step": 2138 }, { "epoch": 0.25292656970556937, "grad_norm": 0.1636216789484024, "learning_rate": 1.3804081724393691e-05, "loss": 0.3696, "num_tokens": 1354657702.0, "step": 2139 }, { "epoch": 0.2530448149461984, "grad_norm": 0.15648838877677917, "learning_rate": 1.3782252506762083e-05, "loss": 0.3834, "num_tokens": 1355295855.0, "step": 2140 }, { "epoch": 0.2531630601868275, "grad_norm": 0.1640644371509552, "learning_rate": 1.3760448719857729e-05, "loss": 0.3471, "num_tokens": 1355913131.0, "step": 2141 }, { "epoch": 0.2532813054274565, "grad_norm": 0.15252865850925446, "learning_rate": 1.3738670392533444e-05, "loss": 0.3549, "num_tokens": 1356552376.0, "step": 2142 }, { "epoch": 0.2533995506680856, "grad_norm": 0.16179129481315613, "learning_rate": 1.3716917553608351e-05, "loss": 0.3726, "num_tokens": 1357190876.0, "step": 2143 }, { "epoch": 0.25351779590871465, "grad_norm": 0.1642070710659027, "learning_rate": 1.3695190231867853e-05, "loss": 0.3587, "num_tokens": 1357827846.0, "step": 2144 }, { "epoch": 0.25363604114934374, "grad_norm": 0.14839212596416473, "learning_rate": 1.367348845606357e-05, "loss": 0.3342, "num_tokens": 1358460902.0, "step": 2145 }, { "epoch": 0.2537542863899728, "grad_norm": 0.15711148083209991, "learning_rate": 1.3651812254913344e-05, "loss": 0.3558, "num_tokens": 1359097940.0, "step": 2146 }, { "epoch": 0.25387253163060186, "grad_norm": 0.14963391423225403, "learning_rate": 1.3630161657101154e-05, "loss": 0.3304, "num_tokens": 1359727970.0, "step": 2147 }, { "epoch": 0.25399077687123095, "grad_norm": 0.16470035910606384, "learning_rate": 1.3608536691277099e-05, "loss": 0.3534, "num_tokens": 1360363518.0, "step": 2148 }, { "epoch": 0.25410902211186, "grad_norm": 0.18055987358093262, "learning_rate": 1.3586937386057365e-05, "loss": 0.3466, "num_tokens": 1360996423.0, "step": 2149 }, { "epoch": 0.2542272673524891, "grad_norm": 0.16418340802192688, "learning_rate": 1.3565363770024177e-05, "loss": 0.3435, "num_tokens": 1361629975.0, "step": 2150 }, { "epoch": 0.2543455125931181, "grad_norm": 0.16093459725379944, "learning_rate": 1.3543815871725777e-05, "loss": 0.3634, "num_tokens": 1362263332.0, "step": 2151 }, { "epoch": 0.2544637578337472, "grad_norm": 0.16369055211544037, "learning_rate": 1.3522293719676363e-05, "loss": 0.3681, "num_tokens": 1362893971.0, "step": 2152 }, { "epoch": 0.2545820030743763, "grad_norm": 0.14665311574935913, "learning_rate": 1.3500797342356067e-05, "loss": 0.3719, "num_tokens": 1363530574.0, "step": 2153 }, { "epoch": 0.2547002483150053, "grad_norm": 0.1517995148897171, "learning_rate": 1.3479326768210912e-05, "loss": 0.354, "num_tokens": 1364162896.0, "step": 2154 }, { "epoch": 0.2548184935556344, "grad_norm": 0.16105465590953827, "learning_rate": 1.3457882025652765e-05, "loss": 0.3433, "num_tokens": 1364795298.0, "step": 2155 }, { "epoch": 0.25493673879626344, "grad_norm": 0.14719729125499725, "learning_rate": 1.3436463143059343e-05, "loss": 0.3726, "num_tokens": 1365431336.0, "step": 2156 }, { "epoch": 0.25505498403689253, "grad_norm": 0.14745932817459106, "learning_rate": 1.3415070148774115e-05, "loss": 0.3386, "num_tokens": 1366063237.0, "step": 2157 }, { "epoch": 0.25517322927752156, "grad_norm": 0.15282244980335236, "learning_rate": 1.3393703071106295e-05, "loss": 0.3277, "num_tokens": 1366701239.0, "step": 2158 }, { "epoch": 0.25529147451815065, "grad_norm": 0.16405080258846283, "learning_rate": 1.3372361938330806e-05, "loss": 0.3404, "num_tokens": 1367334509.0, "step": 2159 }, { "epoch": 0.2554097197587797, "grad_norm": 0.15155361592769623, "learning_rate": 1.3351046778688231e-05, "loss": 0.3715, "num_tokens": 1367970895.0, "step": 2160 }, { "epoch": 0.2555279649994088, "grad_norm": 0.1698579043149948, "learning_rate": 1.3329757620384806e-05, "loss": 0.3459, "num_tokens": 1368606104.0, "step": 2161 }, { "epoch": 0.25564621024003786, "grad_norm": 0.15005841851234436, "learning_rate": 1.3308494491592337e-05, "loss": 0.35, "num_tokens": 1369242016.0, "step": 2162 }, { "epoch": 0.2557644554806669, "grad_norm": 0.16898460686206818, "learning_rate": 1.328725742044819e-05, "loss": 0.3533, "num_tokens": 1369878992.0, "step": 2163 }, { "epoch": 0.255882700721296, "grad_norm": 0.1578129231929779, "learning_rate": 1.3266046435055247e-05, "loss": 0.3561, "num_tokens": 1370511489.0, "step": 2164 }, { "epoch": 0.256000945961925, "grad_norm": 0.15820948779582977, "learning_rate": 1.3244861563481878e-05, "loss": 0.3651, "num_tokens": 1371142032.0, "step": 2165 }, { "epoch": 0.2561191912025541, "grad_norm": 0.14821572601795197, "learning_rate": 1.32237028337619e-05, "loss": 0.346, "num_tokens": 1371775322.0, "step": 2166 }, { "epoch": 0.25623743644318314, "grad_norm": 0.14930571615695953, "learning_rate": 1.3202570273894522e-05, "loss": 0.3376, "num_tokens": 1372409818.0, "step": 2167 }, { "epoch": 0.25635568168381223, "grad_norm": 0.16223351657390594, "learning_rate": 1.3181463911844337e-05, "loss": 0.3759, "num_tokens": 1373047668.0, "step": 2168 }, { "epoch": 0.25647392692444126, "grad_norm": 0.17227718234062195, "learning_rate": 1.3160383775541251e-05, "loss": 0.3755, "num_tokens": 1373685499.0, "step": 2169 }, { "epoch": 0.25659217216507035, "grad_norm": 0.15489031374454498, "learning_rate": 1.3139329892880495e-05, "loss": 0.3598, "num_tokens": 1374323849.0, "step": 2170 }, { "epoch": 0.25671041740569944, "grad_norm": 0.14238935708999634, "learning_rate": 1.3118302291722535e-05, "loss": 0.2983, "num_tokens": 1374959880.0, "step": 2171 }, { "epoch": 0.2568286626463285, "grad_norm": 0.14943967759609222, "learning_rate": 1.309730099989307e-05, "loss": 0.3409, "num_tokens": 1375595575.0, "step": 2172 }, { "epoch": 0.25694690788695757, "grad_norm": 0.15923093259334564, "learning_rate": 1.3076326045182975e-05, "loss": 0.363, "num_tokens": 1376233493.0, "step": 2173 }, { "epoch": 0.2570651531275866, "grad_norm": 0.17506137490272522, "learning_rate": 1.3055377455348276e-05, "loss": 0.35, "num_tokens": 1376867408.0, "step": 2174 }, { "epoch": 0.2571833983682157, "grad_norm": 0.14500072598457336, "learning_rate": 1.3034455258110127e-05, "loss": 0.3259, "num_tokens": 1377501464.0, "step": 2175 }, { "epoch": 0.2573016436088447, "grad_norm": 0.15769173204898834, "learning_rate": 1.3013559481154734e-05, "loss": 0.3356, "num_tokens": 1378107558.0, "step": 2176 }, { "epoch": 0.2574198888494738, "grad_norm": 0.16178518533706665, "learning_rate": 1.2992690152133351e-05, "loss": 0.3318, "num_tokens": 1378742114.0, "step": 2177 }, { "epoch": 0.2575381340901029, "grad_norm": 0.136235311627388, "learning_rate": 1.2971847298662235e-05, "loss": 0.3182, "num_tokens": 1379380501.0, "step": 2178 }, { "epoch": 0.25765637933073193, "grad_norm": 0.17029152810573578, "learning_rate": 1.2951030948322594e-05, "loss": 0.3309, "num_tokens": 1380013591.0, "step": 2179 }, { "epoch": 0.257774624571361, "grad_norm": 0.14048749208450317, "learning_rate": 1.2930241128660597e-05, "loss": 0.3193, "num_tokens": 1380648061.0, "step": 2180 }, { "epoch": 0.25789286981199006, "grad_norm": 0.15299919247627258, "learning_rate": 1.2909477867187276e-05, "loss": 0.3569, "num_tokens": 1381282667.0, "step": 2181 }, { "epoch": 0.25801111505261914, "grad_norm": 0.15771017968654633, "learning_rate": 1.2888741191378522e-05, "loss": 0.3695, "num_tokens": 1381921709.0, "step": 2182 }, { "epoch": 0.2581293602932482, "grad_norm": 0.15074072778224945, "learning_rate": 1.2868031128675058e-05, "loss": 0.3364, "num_tokens": 1382558484.0, "step": 2183 }, { "epoch": 0.25824760553387727, "grad_norm": 0.1526341289281845, "learning_rate": 1.2847347706482375e-05, "loss": 0.3643, "num_tokens": 1383194599.0, "step": 2184 }, { "epoch": 0.2583658507745063, "grad_norm": 0.15062035620212555, "learning_rate": 1.2826690952170725e-05, "loss": 0.3334, "num_tokens": 1383830599.0, "step": 2185 }, { "epoch": 0.2584840960151354, "grad_norm": 0.15836162865161896, "learning_rate": 1.2806060893075064e-05, "loss": 0.3451, "num_tokens": 1384469453.0, "step": 2186 }, { "epoch": 0.2586023412557645, "grad_norm": 0.1659291684627533, "learning_rate": 1.2785457556495023e-05, "loss": 0.3624, "num_tokens": 1385108151.0, "step": 2187 }, { "epoch": 0.2587205864963935, "grad_norm": 0.1547379344701767, "learning_rate": 1.2764880969694867e-05, "loss": 0.3677, "num_tokens": 1385737537.0, "step": 2188 }, { "epoch": 0.2588388317370226, "grad_norm": 0.16594819724559784, "learning_rate": 1.2744331159903462e-05, "loss": 0.3528, "num_tokens": 1386371978.0, "step": 2189 }, { "epoch": 0.25895707697765163, "grad_norm": 0.1468471735715866, "learning_rate": 1.2723808154314257e-05, "loss": 0.3031, "num_tokens": 1387006855.0, "step": 2190 }, { "epoch": 0.2590753222182807, "grad_norm": 0.14902792870998383, "learning_rate": 1.2703311980085216e-05, "loss": 0.3739, "num_tokens": 1387642922.0, "step": 2191 }, { "epoch": 0.25919356745890976, "grad_norm": 0.16405217349529266, "learning_rate": 1.2682842664338798e-05, "loss": 0.3412, "num_tokens": 1388273872.0, "step": 2192 }, { "epoch": 0.25931181269953885, "grad_norm": 0.1533263772726059, "learning_rate": 1.2662400234161925e-05, "loss": 0.3315, "num_tokens": 1388907732.0, "step": 2193 }, { "epoch": 0.25943005794016794, "grad_norm": 0.18562085926532745, "learning_rate": 1.2641984716605926e-05, "loss": 0.3749, "num_tokens": 1389539553.0, "step": 2194 }, { "epoch": 0.25954830318079697, "grad_norm": 0.1535794585943222, "learning_rate": 1.2621596138686556e-05, "loss": 0.3604, "num_tokens": 1390174842.0, "step": 2195 }, { "epoch": 0.25966654842142606, "grad_norm": 0.16793584823608398, "learning_rate": 1.2601234527383877e-05, "loss": 0.3613, "num_tokens": 1390810002.0, "step": 2196 }, { "epoch": 0.2597847936620551, "grad_norm": 0.15016791224479675, "learning_rate": 1.2580899909642288e-05, "loss": 0.3614, "num_tokens": 1391444325.0, "step": 2197 }, { "epoch": 0.2599030389026842, "grad_norm": 0.14536795020103455, "learning_rate": 1.2560592312370456e-05, "loss": 0.3375, "num_tokens": 1392075518.0, "step": 2198 }, { "epoch": 0.2600212841433132, "grad_norm": 0.16178761422634125, "learning_rate": 1.2540311762441314e-05, "loss": 0.3751, "num_tokens": 1392708819.0, "step": 2199 }, { "epoch": 0.2601395293839423, "grad_norm": 0.17659318447113037, "learning_rate": 1.2520058286691983e-05, "loss": 0.3666, "num_tokens": 1393340694.0, "step": 2200 }, { "epoch": 0.26025777462457134, "grad_norm": 0.14843176305294037, "learning_rate": 1.2499831911923764e-05, "loss": 0.3502, "num_tokens": 1393973875.0, "step": 2201 }, { "epoch": 0.2603760198652004, "grad_norm": 0.15998484194278717, "learning_rate": 1.2479632664902092e-05, "loss": 0.3457, "num_tokens": 1394610492.0, "step": 2202 }, { "epoch": 0.2604942651058295, "grad_norm": 0.16628916561603546, "learning_rate": 1.2459460572356502e-05, "loss": 0.3594, "num_tokens": 1395249777.0, "step": 2203 }, { "epoch": 0.26061251034645855, "grad_norm": 0.15287388861179352, "learning_rate": 1.2439315660980614e-05, "loss": 0.3491, "num_tokens": 1395881158.0, "step": 2204 }, { "epoch": 0.26073075558708764, "grad_norm": 0.1673702746629715, "learning_rate": 1.2419197957432053e-05, "loss": 0.3629, "num_tokens": 1396510147.0, "step": 2205 }, { "epoch": 0.26084900082771667, "grad_norm": 0.15775969624519348, "learning_rate": 1.2399107488332462e-05, "loss": 0.3576, "num_tokens": 1397143032.0, "step": 2206 }, { "epoch": 0.26096724606834576, "grad_norm": 0.1686294972896576, "learning_rate": 1.2379044280267427e-05, "loss": 0.3673, "num_tokens": 1397778901.0, "step": 2207 }, { "epoch": 0.2610854913089748, "grad_norm": 0.1551026701927185, "learning_rate": 1.2359008359786469e-05, "loss": 0.34, "num_tokens": 1398416648.0, "step": 2208 }, { "epoch": 0.2612037365496039, "grad_norm": 0.14781665802001953, "learning_rate": 1.2338999753403002e-05, "loss": 0.338, "num_tokens": 1399055774.0, "step": 2209 }, { "epoch": 0.2613219817902329, "grad_norm": 0.1639232337474823, "learning_rate": 1.2319018487594295e-05, "loss": 0.3909, "num_tokens": 1399679014.0, "step": 2210 }, { "epoch": 0.261440227030862, "grad_norm": 0.16171275079250336, "learning_rate": 1.2299064588801428e-05, "loss": 0.3538, "num_tokens": 1400310496.0, "step": 2211 }, { "epoch": 0.2615584722714911, "grad_norm": 0.1480337232351303, "learning_rate": 1.2279138083429277e-05, "loss": 0.3343, "num_tokens": 1400944987.0, "step": 2212 }, { "epoch": 0.2616767175121201, "grad_norm": 0.15325020253658295, "learning_rate": 1.2259238997846454e-05, "loss": 0.3251, "num_tokens": 1401580390.0, "step": 2213 }, { "epoch": 0.2617949627527492, "grad_norm": 0.16419720649719238, "learning_rate": 1.2239367358385317e-05, "loss": 0.3737, "num_tokens": 1402207325.0, "step": 2214 }, { "epoch": 0.26191320799337825, "grad_norm": 0.15797926485538483, "learning_rate": 1.2219523191341866e-05, "loss": 0.3559, "num_tokens": 1402846619.0, "step": 2215 }, { "epoch": 0.26203145323400734, "grad_norm": 0.15500512719154358, "learning_rate": 1.2199706522975776e-05, "loss": 0.3411, "num_tokens": 1403475238.0, "step": 2216 }, { "epoch": 0.2621496984746364, "grad_norm": 0.1467328667640686, "learning_rate": 1.2179917379510315e-05, "loss": 0.3186, "num_tokens": 1404109414.0, "step": 2217 }, { "epoch": 0.26226794371526546, "grad_norm": 0.15633271634578705, "learning_rate": 1.2160155787132334e-05, "loss": 0.3618, "num_tokens": 1404741649.0, "step": 2218 }, { "epoch": 0.26238618895589455, "grad_norm": 0.1830826699733734, "learning_rate": 1.2140421771992239e-05, "loss": 0.3871, "num_tokens": 1405372004.0, "step": 2219 }, { "epoch": 0.2625044341965236, "grad_norm": 0.15860310196876526, "learning_rate": 1.2120715360203924e-05, "loss": 0.3829, "num_tokens": 1406008107.0, "step": 2220 }, { "epoch": 0.2626226794371527, "grad_norm": 0.156002938747406, "learning_rate": 1.210103657784476e-05, "loss": 0.329, "num_tokens": 1406642830.0, "step": 2221 }, { "epoch": 0.2627409246777817, "grad_norm": 0.15903633832931519, "learning_rate": 1.2081385450955562e-05, "loss": 0.3507, "num_tokens": 1407279706.0, "step": 2222 }, { "epoch": 0.2628591699184108, "grad_norm": 0.14660197496414185, "learning_rate": 1.2061762005540534e-05, "loss": 0.3719, "num_tokens": 1407913980.0, "step": 2223 }, { "epoch": 0.26297741515903983, "grad_norm": 0.1643187403678894, "learning_rate": 1.2042166267567283e-05, "loss": 0.3839, "num_tokens": 1408544218.0, "step": 2224 }, { "epoch": 0.2630956603996689, "grad_norm": 0.1486949622631073, "learning_rate": 1.2022598262966715e-05, "loss": 0.3254, "num_tokens": 1409176076.0, "step": 2225 }, { "epoch": 0.26321390564029795, "grad_norm": 0.147776260972023, "learning_rate": 1.2003058017633055e-05, "loss": 0.3251, "num_tokens": 1409808144.0, "step": 2226 }, { "epoch": 0.26333215088092704, "grad_norm": 0.1610105186700821, "learning_rate": 1.1983545557423785e-05, "loss": 0.3559, "num_tokens": 1410438046.0, "step": 2227 }, { "epoch": 0.26345039612155613, "grad_norm": 0.15127472579479218, "learning_rate": 1.1964060908159617e-05, "loss": 0.3498, "num_tokens": 1411070928.0, "step": 2228 }, { "epoch": 0.26356864136218516, "grad_norm": 0.17238755524158478, "learning_rate": 1.1944604095624487e-05, "loss": 0.3671, "num_tokens": 1411705318.0, "step": 2229 }, { "epoch": 0.26368688660281425, "grad_norm": 0.14921915531158447, "learning_rate": 1.1925175145565464e-05, "loss": 0.3364, "num_tokens": 1412339837.0, "step": 2230 }, { "epoch": 0.2638051318434433, "grad_norm": 0.156773179769516, "learning_rate": 1.190577408369276e-05, "loss": 0.3049, "num_tokens": 1412944973.0, "step": 2231 }, { "epoch": 0.2639233770840724, "grad_norm": 0.15290521085262299, "learning_rate": 1.1886400935679674e-05, "loss": 0.3543, "num_tokens": 1413570089.0, "step": 2232 }, { "epoch": 0.2640416223247014, "grad_norm": 0.1462039202451706, "learning_rate": 1.1867055727162587e-05, "loss": 0.3502, "num_tokens": 1414206557.0, "step": 2233 }, { "epoch": 0.2641598675653305, "grad_norm": 0.15547192096710205, "learning_rate": 1.1847738483740886e-05, "loss": 0.3444, "num_tokens": 1414839263.0, "step": 2234 }, { "epoch": 0.2642781128059596, "grad_norm": 0.14497071504592896, "learning_rate": 1.1828449230976965e-05, "loss": 0.3448, "num_tokens": 1415473529.0, "step": 2235 }, { "epoch": 0.2643963580465886, "grad_norm": 0.14295479655265808, "learning_rate": 1.1809187994396168e-05, "loss": 0.3288, "num_tokens": 1416110661.0, "step": 2236 }, { "epoch": 0.2645146032872177, "grad_norm": 0.17718076705932617, "learning_rate": 1.178995479948677e-05, "loss": 0.3639, "num_tokens": 1416741736.0, "step": 2237 }, { "epoch": 0.26463284852784674, "grad_norm": 0.15077832341194153, "learning_rate": 1.177074967169995e-05, "loss": 0.3182, "num_tokens": 1417367685.0, "step": 2238 }, { "epoch": 0.26475109376847583, "grad_norm": 0.1541740894317627, "learning_rate": 1.175157263644973e-05, "loss": 0.3239, "num_tokens": 1418004275.0, "step": 2239 }, { "epoch": 0.26486933900910486, "grad_norm": 0.16033287346363068, "learning_rate": 1.1732423719112964e-05, "loss": 0.3284, "num_tokens": 1418632155.0, "step": 2240 }, { "epoch": 0.26498758424973395, "grad_norm": 0.15380583703517914, "learning_rate": 1.1713302945029295e-05, "loss": 0.3649, "num_tokens": 1419269496.0, "step": 2241 }, { "epoch": 0.265105829490363, "grad_norm": 0.14381130039691925, "learning_rate": 1.1694210339501118e-05, "loss": 0.3257, "num_tokens": 1419904473.0, "step": 2242 }, { "epoch": 0.2652240747309921, "grad_norm": 0.15737563371658325, "learning_rate": 1.1675145927793579e-05, "loss": 0.3254, "num_tokens": 1420538845.0, "step": 2243 }, { "epoch": 0.26534231997162117, "grad_norm": 0.1575727015733719, "learning_rate": 1.1656109735134482e-05, "loss": 0.3566, "num_tokens": 1421173978.0, "step": 2244 }, { "epoch": 0.2654605652122502, "grad_norm": 0.14479680359363556, "learning_rate": 1.1637101786714315e-05, "loss": 0.3718, "num_tokens": 1421805151.0, "step": 2245 }, { "epoch": 0.2655788104528793, "grad_norm": 0.1669468730688095, "learning_rate": 1.1618122107686168e-05, "loss": 0.3823, "num_tokens": 1422442066.0, "step": 2246 }, { "epoch": 0.2656970556935083, "grad_norm": 0.16510143876075745, "learning_rate": 1.1599170723165734e-05, "loss": 0.327, "num_tokens": 1423075090.0, "step": 2247 }, { "epoch": 0.2658153009341374, "grad_norm": 0.1484033763408661, "learning_rate": 1.1580247658231277e-05, "loss": 0.3169, "num_tokens": 1423675926.0, "step": 2248 }, { "epoch": 0.26593354617476644, "grad_norm": 0.15835952758789062, "learning_rate": 1.1561352937923563e-05, "loss": 0.3288, "num_tokens": 1424296031.0, "step": 2249 }, { "epoch": 0.26605179141539553, "grad_norm": 0.1563296765089035, "learning_rate": 1.154248658724586e-05, "loss": 0.3507, "num_tokens": 1424930538.0, "step": 2250 }, { "epoch": 0.2661700366560246, "grad_norm": 0.16382481157779694, "learning_rate": 1.1523648631163897e-05, "loss": 0.3681, "num_tokens": 1425559363.0, "step": 2251 }, { "epoch": 0.26628828189665366, "grad_norm": 0.15770530700683594, "learning_rate": 1.1504839094605815e-05, "loss": 0.3637, "num_tokens": 1426194887.0, "step": 2252 }, { "epoch": 0.26640652713728274, "grad_norm": 0.16853371262550354, "learning_rate": 1.1486058002462176e-05, "loss": 0.3491, "num_tokens": 1426827620.0, "step": 2253 }, { "epoch": 0.2665247723779118, "grad_norm": 0.1755160689353943, "learning_rate": 1.1467305379585872e-05, "loss": 0.3548, "num_tokens": 1427462285.0, "step": 2254 }, { "epoch": 0.26664301761854087, "grad_norm": 0.1687714010477066, "learning_rate": 1.1448581250792136e-05, "loss": 0.3571, "num_tokens": 1428098823.0, "step": 2255 }, { "epoch": 0.2667612628591699, "grad_norm": 0.14964935183525085, "learning_rate": 1.1429885640858493e-05, "loss": 0.3432, "num_tokens": 1428738293.0, "step": 2256 }, { "epoch": 0.266879508099799, "grad_norm": 0.15902990102767944, "learning_rate": 1.1411218574524726e-05, "loss": 0.3554, "num_tokens": 1429371807.0, "step": 2257 }, { "epoch": 0.266997753340428, "grad_norm": 0.15595631301403046, "learning_rate": 1.1392580076492852e-05, "loss": 0.3519, "num_tokens": 1430008824.0, "step": 2258 }, { "epoch": 0.2671159985810571, "grad_norm": 0.17033277451992035, "learning_rate": 1.1373970171427084e-05, "loss": 0.3639, "num_tokens": 1430648432.0, "step": 2259 }, { "epoch": 0.2672342438216862, "grad_norm": 0.16252721846103668, "learning_rate": 1.1355388883953794e-05, "loss": 0.3266, "num_tokens": 1431274507.0, "step": 2260 }, { "epoch": 0.26735248906231524, "grad_norm": 0.16513614356517792, "learning_rate": 1.133683623866148e-05, "loss": 0.3513, "num_tokens": 1431899378.0, "step": 2261 }, { "epoch": 0.2674707343029443, "grad_norm": 0.16827766597270966, "learning_rate": 1.1318312260100742e-05, "loss": 0.3575, "num_tokens": 1432532765.0, "step": 2262 }, { "epoch": 0.26758897954357336, "grad_norm": 0.156415656208992, "learning_rate": 1.1299816972784262e-05, "loss": 0.3473, "num_tokens": 1433167753.0, "step": 2263 }, { "epoch": 0.26770722478420245, "grad_norm": 0.1539047360420227, "learning_rate": 1.1281350401186727e-05, "loss": 0.3337, "num_tokens": 1433802888.0, "step": 2264 }, { "epoch": 0.2678254700248315, "grad_norm": 0.1548675000667572, "learning_rate": 1.1262912569744841e-05, "loss": 0.3582, "num_tokens": 1434439739.0, "step": 2265 }, { "epoch": 0.26794371526546057, "grad_norm": 0.1661369800567627, "learning_rate": 1.124450350285727e-05, "loss": 0.3617, "num_tokens": 1435075550.0, "step": 2266 }, { "epoch": 0.2680619605060896, "grad_norm": 0.16508784890174866, "learning_rate": 1.1226123224884625e-05, "loss": 0.358, "num_tokens": 1435702450.0, "step": 2267 }, { "epoch": 0.2681802057467187, "grad_norm": 0.1570100486278534, "learning_rate": 1.1207771760149412e-05, "loss": 0.354, "num_tokens": 1436341880.0, "step": 2268 }, { "epoch": 0.2682984509873478, "grad_norm": 0.15311993658542633, "learning_rate": 1.1189449132936016e-05, "loss": 0.3284, "num_tokens": 1436980764.0, "step": 2269 }, { "epoch": 0.2684166962279768, "grad_norm": 0.1685255914926529, "learning_rate": 1.1171155367490653e-05, "loss": 0.3593, "num_tokens": 1437607941.0, "step": 2270 }, { "epoch": 0.2685349414686059, "grad_norm": 0.15958444774150848, "learning_rate": 1.115289048802134e-05, "loss": 0.3655, "num_tokens": 1438242314.0, "step": 2271 }, { "epoch": 0.26865318670923494, "grad_norm": 0.165290966629982, "learning_rate": 1.1134654518697899e-05, "loss": 0.3474, "num_tokens": 1438871363.0, "step": 2272 }, { "epoch": 0.268771431949864, "grad_norm": 0.15361101925373077, "learning_rate": 1.1116447483651872e-05, "loss": 0.344, "num_tokens": 1439499872.0, "step": 2273 }, { "epoch": 0.26888967719049306, "grad_norm": 0.14768876135349274, "learning_rate": 1.1098269406976507e-05, "loss": 0.3208, "num_tokens": 1440133531.0, "step": 2274 }, { "epoch": 0.26900792243112215, "grad_norm": 0.14982637763023376, "learning_rate": 1.1080120312726752e-05, "loss": 0.3184, "num_tokens": 1440768688.0, "step": 2275 }, { "epoch": 0.26912616767175124, "grad_norm": 0.1513940989971161, "learning_rate": 1.1062000224919177e-05, "loss": 0.399, "num_tokens": 1441398656.0, "step": 2276 }, { "epoch": 0.26924441291238027, "grad_norm": 0.15859779715538025, "learning_rate": 1.1043909167532e-05, "loss": 0.3304, "num_tokens": 1442003989.0, "step": 2277 }, { "epoch": 0.26936265815300936, "grad_norm": 0.15063779056072235, "learning_rate": 1.1025847164504997e-05, "loss": 0.3779, "num_tokens": 1442643326.0, "step": 2278 }, { "epoch": 0.2694809033936384, "grad_norm": 0.1525985449552536, "learning_rate": 1.1007814239739506e-05, "loss": 0.3666, "num_tokens": 1443278975.0, "step": 2279 }, { "epoch": 0.2695991486342675, "grad_norm": 0.15241971611976624, "learning_rate": 1.098981041709839e-05, "loss": 0.3297, "num_tokens": 1443913604.0, "step": 2280 }, { "epoch": 0.2697173938748965, "grad_norm": 0.15431083738803864, "learning_rate": 1.0971835720405987e-05, "loss": 0.3506, "num_tokens": 1444549969.0, "step": 2281 }, { "epoch": 0.2698356391155256, "grad_norm": 0.14797011017799377, "learning_rate": 1.0953890173448117e-05, "loss": 0.3456, "num_tokens": 1445189491.0, "step": 2282 }, { "epoch": 0.26995388435615464, "grad_norm": 0.14169208705425262, "learning_rate": 1.0935973799972001e-05, "loss": 0.3292, "num_tokens": 1445820831.0, "step": 2283 }, { "epoch": 0.2700721295967837, "grad_norm": 0.15232716500759125, "learning_rate": 1.0918086623686275e-05, "loss": 0.3351, "num_tokens": 1446457219.0, "step": 2284 }, { "epoch": 0.2701903748374128, "grad_norm": 0.16128025949001312, "learning_rate": 1.0900228668260919e-05, "loss": 0.339, "num_tokens": 1447094737.0, "step": 2285 }, { "epoch": 0.27030862007804185, "grad_norm": 0.1798248142004013, "learning_rate": 1.0882399957327259e-05, "loss": 0.3763, "num_tokens": 1447727826.0, "step": 2286 }, { "epoch": 0.27042686531867094, "grad_norm": 0.15512481331825256, "learning_rate": 1.0864600514477925e-05, "loss": 0.3576, "num_tokens": 1448362348.0, "step": 2287 }, { "epoch": 0.2705451105593, "grad_norm": 0.1553766280412674, "learning_rate": 1.0846830363266808e-05, "loss": 0.3691, "num_tokens": 1448998191.0, "step": 2288 }, { "epoch": 0.27066335579992906, "grad_norm": 0.16342367231845856, "learning_rate": 1.0829089527209038e-05, "loss": 0.3467, "num_tokens": 1449624381.0, "step": 2289 }, { "epoch": 0.2707816010405581, "grad_norm": 0.1577490121126175, "learning_rate": 1.0811378029780954e-05, "loss": 0.3561, "num_tokens": 1450260226.0, "step": 2290 }, { "epoch": 0.2708998462811872, "grad_norm": 0.16117647290229797, "learning_rate": 1.0793695894420068e-05, "loss": 0.3552, "num_tokens": 1450890661.0, "step": 2291 }, { "epoch": 0.2710180915218163, "grad_norm": 0.1617818921804428, "learning_rate": 1.0776043144525048e-05, "loss": 0.3384, "num_tokens": 1451526642.0, "step": 2292 }, { "epoch": 0.2711363367624453, "grad_norm": 0.15790875256061554, "learning_rate": 1.0758419803455669e-05, "loss": 0.3605, "num_tokens": 1452159607.0, "step": 2293 }, { "epoch": 0.2712545820030744, "grad_norm": 0.15148423612117767, "learning_rate": 1.074082589453279e-05, "loss": 0.3745, "num_tokens": 1452797560.0, "step": 2294 }, { "epoch": 0.27137282724370343, "grad_norm": 0.15457701683044434, "learning_rate": 1.0723261441038317e-05, "loss": 0.3468, "num_tokens": 1453434624.0, "step": 2295 }, { "epoch": 0.2714910724843325, "grad_norm": 0.1459672898054123, "learning_rate": 1.0705726466215182e-05, "loss": 0.3503, "num_tokens": 1454067127.0, "step": 2296 }, { "epoch": 0.27160931772496155, "grad_norm": 0.1752711683511734, "learning_rate": 1.0688220993267324e-05, "loss": 0.3656, "num_tokens": 1454700667.0, "step": 2297 }, { "epoch": 0.27172756296559064, "grad_norm": 0.17267681658267975, "learning_rate": 1.0670745045359618e-05, "loss": 0.3873, "num_tokens": 1455337380.0, "step": 2298 }, { "epoch": 0.2718458082062197, "grad_norm": 0.14968270063400269, "learning_rate": 1.0653298645617877e-05, "loss": 0.3483, "num_tokens": 1455974456.0, "step": 2299 }, { "epoch": 0.27196405344684876, "grad_norm": 0.16421633958816528, "learning_rate": 1.063588181712881e-05, "loss": 0.3856, "num_tokens": 1456611249.0, "step": 2300 }, { "epoch": 0.27208229868747785, "grad_norm": 0.16939447820186615, "learning_rate": 1.0618494582940016e-05, "loss": 0.3393, "num_tokens": 1457243118.0, "step": 2301 }, { "epoch": 0.2722005439281069, "grad_norm": 0.15923595428466797, "learning_rate": 1.0601136966059902e-05, "loss": 0.3544, "num_tokens": 1457878878.0, "step": 2302 }, { "epoch": 0.272318789168736, "grad_norm": 0.17342090606689453, "learning_rate": 1.0583808989457697e-05, "loss": 0.3381, "num_tokens": 1458506707.0, "step": 2303 }, { "epoch": 0.272437034409365, "grad_norm": 0.17090991139411926, "learning_rate": 1.0566510676063415e-05, "loss": 0.3591, "num_tokens": 1459136085.0, "step": 2304 }, { "epoch": 0.2725552796499941, "grad_norm": 0.1702851802110672, "learning_rate": 1.0549242048767789e-05, "loss": 0.3738, "num_tokens": 1459775346.0, "step": 2305 }, { "epoch": 0.27267352489062313, "grad_norm": 0.1562279909849167, "learning_rate": 1.053200313042231e-05, "loss": 0.3837, "num_tokens": 1460412291.0, "step": 2306 }, { "epoch": 0.2727917701312522, "grad_norm": 0.14924395084381104, "learning_rate": 1.0514793943839119e-05, "loss": 0.322, "num_tokens": 1461048611.0, "step": 2307 }, { "epoch": 0.2729100153718813, "grad_norm": 0.15389004349708557, "learning_rate": 1.049761451179103e-05, "loss": 0.3586, "num_tokens": 1461677729.0, "step": 2308 }, { "epoch": 0.27302826061251034, "grad_norm": 0.16182515025138855, "learning_rate": 1.0480464857011482e-05, "loss": 0.37, "num_tokens": 1462315617.0, "step": 2309 }, { "epoch": 0.27314650585313943, "grad_norm": 0.14908406138420105, "learning_rate": 1.0463345002194496e-05, "loss": 0.3185, "num_tokens": 1462930982.0, "step": 2310 }, { "epoch": 0.27326475109376847, "grad_norm": 0.16309335827827454, "learning_rate": 1.044625496999469e-05, "loss": 0.3491, "num_tokens": 1463566971.0, "step": 2311 }, { "epoch": 0.27338299633439755, "grad_norm": 0.1465187668800354, "learning_rate": 1.0429194783027187e-05, "loss": 0.3295, "num_tokens": 1464202172.0, "step": 2312 }, { "epoch": 0.2735012415750266, "grad_norm": 0.15645480155944824, "learning_rate": 1.0412164463867636e-05, "loss": 0.3839, "num_tokens": 1464838092.0, "step": 2313 }, { "epoch": 0.2736194868156557, "grad_norm": 0.1486484557390213, "learning_rate": 1.0395164035052143e-05, "loss": 0.3429, "num_tokens": 1465477610.0, "step": 2314 }, { "epoch": 0.2737377320562847, "grad_norm": 0.16061237454414368, "learning_rate": 1.0378193519077273e-05, "loss": 0.3488, "num_tokens": 1466116332.0, "step": 2315 }, { "epoch": 0.2738559772969138, "grad_norm": 0.17339017987251282, "learning_rate": 1.0361252938400012e-05, "loss": 0.3766, "num_tokens": 1466749671.0, "step": 2316 }, { "epoch": 0.2739742225375429, "grad_norm": 0.16877548396587372, "learning_rate": 1.0344342315437721e-05, "loss": 0.3971, "num_tokens": 1467388729.0, "step": 2317 }, { "epoch": 0.2740924677781719, "grad_norm": 0.14575250446796417, "learning_rate": 1.0327461672568127e-05, "loss": 0.3433, "num_tokens": 1468022678.0, "step": 2318 }, { "epoch": 0.274210713018801, "grad_norm": 0.16383600234985352, "learning_rate": 1.0310611032129282e-05, "loss": 0.3782, "num_tokens": 1468653588.0, "step": 2319 }, { "epoch": 0.27432895825943004, "grad_norm": 0.15298399329185486, "learning_rate": 1.0293790416419522e-05, "loss": 0.3774, "num_tokens": 1469292071.0, "step": 2320 }, { "epoch": 0.27444720350005913, "grad_norm": 0.15629802644252777, "learning_rate": 1.0276999847697484e-05, "loss": 0.3509, "num_tokens": 1469926130.0, "step": 2321 }, { "epoch": 0.27456544874068817, "grad_norm": 0.1455887109041214, "learning_rate": 1.0260239348182013e-05, "loss": 0.3298, "num_tokens": 1470565682.0, "step": 2322 }, { "epoch": 0.27468369398131726, "grad_norm": 0.15099728107452393, "learning_rate": 1.0243508940052179e-05, "loss": 0.3377, "num_tokens": 1471199661.0, "step": 2323 }, { "epoch": 0.2748019392219463, "grad_norm": 0.15650267899036407, "learning_rate": 1.0226808645447229e-05, "loss": 0.3179, "num_tokens": 1471837079.0, "step": 2324 }, { "epoch": 0.2749201844625754, "grad_norm": 0.15078777074813843, "learning_rate": 1.0210138486466549e-05, "loss": 0.3531, "num_tokens": 1472470748.0, "step": 2325 }, { "epoch": 0.27503842970320447, "grad_norm": 0.14938890933990479, "learning_rate": 1.0193498485169674e-05, "loss": 0.3335, "num_tokens": 1473076369.0, "step": 2326 }, { "epoch": 0.2751566749438335, "grad_norm": 0.17257998883724213, "learning_rate": 1.0176888663576212e-05, "loss": 0.344, "num_tokens": 1473713052.0, "step": 2327 }, { "epoch": 0.2752749201844626, "grad_norm": 0.16291533410549164, "learning_rate": 1.0160309043665832e-05, "loss": 0.3318, "num_tokens": 1474350921.0, "step": 2328 }, { "epoch": 0.2753931654250916, "grad_norm": 0.15999779105186462, "learning_rate": 1.0143759647378238e-05, "loss": 0.4077, "num_tokens": 1474987408.0, "step": 2329 }, { "epoch": 0.2755114106657207, "grad_norm": 0.1630454659461975, "learning_rate": 1.0127240496613158e-05, "loss": 0.3422, "num_tokens": 1475621528.0, "step": 2330 }, { "epoch": 0.27562965590634975, "grad_norm": 0.15796898305416107, "learning_rate": 1.0110751613230277e-05, "loss": 0.3445, "num_tokens": 1476256968.0, "step": 2331 }, { "epoch": 0.27574790114697884, "grad_norm": 0.16335657238960266, "learning_rate": 1.0094293019049237e-05, "loss": 0.321, "num_tokens": 1476887629.0, "step": 2332 }, { "epoch": 0.2758661463876079, "grad_norm": 0.14884811639785767, "learning_rate": 1.0077864735849589e-05, "loss": 0.3735, "num_tokens": 1477522575.0, "step": 2333 }, { "epoch": 0.27598439162823696, "grad_norm": 0.15638722479343414, "learning_rate": 1.0061466785370775e-05, "loss": 0.3373, "num_tokens": 1478158656.0, "step": 2334 }, { "epoch": 0.27610263686886605, "grad_norm": 0.16069157421588898, "learning_rate": 1.0045099189312113e-05, "loss": 0.3649, "num_tokens": 1478794818.0, "step": 2335 }, { "epoch": 0.2762208821094951, "grad_norm": 0.14972923696041107, "learning_rate": 1.002876196933274e-05, "loss": 0.3375, "num_tokens": 1479428236.0, "step": 2336 }, { "epoch": 0.27633912735012417, "grad_norm": 0.153402641415596, "learning_rate": 1.0012455147051596e-05, "loss": 0.3564, "num_tokens": 1480061891.0, "step": 2337 }, { "epoch": 0.2764573725907532, "grad_norm": 0.16654513776302338, "learning_rate": 9.9961787440474e-06, "loss": 0.3946, "num_tokens": 1480701365.0, "step": 2338 }, { "epoch": 0.2765756178313823, "grad_norm": 0.1553163081407547, "learning_rate": 9.979932781858607e-06, "loss": 0.337, "num_tokens": 1481336637.0, "step": 2339 }, { "epoch": 0.2766938630720113, "grad_norm": 0.1627359241247177, "learning_rate": 9.963717281983415e-06, "loss": 0.3462, "num_tokens": 1481974800.0, "step": 2340 }, { "epoch": 0.2768121083126404, "grad_norm": 0.1517624706029892, "learning_rate": 9.947532265879685e-06, "loss": 0.3314, "num_tokens": 1482607289.0, "step": 2341 }, { "epoch": 0.2769303535532695, "grad_norm": 0.16129785776138306, "learning_rate": 9.931377754964957e-06, "loss": 0.3646, "num_tokens": 1483243948.0, "step": 2342 }, { "epoch": 0.27704859879389854, "grad_norm": 38.480674743652344, "learning_rate": 9.915253770616389e-06, "loss": 1.4308, "num_tokens": 1483845955.0, "step": 2343 }, { "epoch": 0.2771668440345276, "grad_norm": 0.16987748444080353, "learning_rate": 9.899160334170746e-06, "loss": 0.3543, "num_tokens": 1484479960.0, "step": 2344 }, { "epoch": 0.27728508927515666, "grad_norm": 0.1605212539434433, "learning_rate": 9.883097466924388e-06, "loss": 0.3566, "num_tokens": 1485115529.0, "step": 2345 }, { "epoch": 0.27740333451578575, "grad_norm": 0.15978476405143738, "learning_rate": 9.867065190133201e-06, "loss": 0.3562, "num_tokens": 1485751394.0, "step": 2346 }, { "epoch": 0.2775215797564148, "grad_norm": 0.15629255771636963, "learning_rate": 9.851063525012598e-06, "loss": 0.3737, "num_tokens": 1486390225.0, "step": 2347 }, { "epoch": 0.27763982499704387, "grad_norm": 0.1548636257648468, "learning_rate": 9.835092492737483e-06, "loss": 0.3469, "num_tokens": 1486988102.0, "step": 2348 }, { "epoch": 0.27775807023767296, "grad_norm": 0.15224124491214752, "learning_rate": 9.819152114442225e-06, "loss": 0.3431, "num_tokens": 1487623515.0, "step": 2349 }, { "epoch": 0.277876315478302, "grad_norm": 0.18440452218055725, "learning_rate": 9.803242411220631e-06, "loss": 0.3828, "num_tokens": 1488259828.0, "step": 2350 }, { "epoch": 0.2779945607189311, "grad_norm": 0.14934492111206055, "learning_rate": 9.78736340412592e-06, "loss": 0.3422, "num_tokens": 1488891872.0, "step": 2351 }, { "epoch": 0.2781128059595601, "grad_norm": 0.16854149103164673, "learning_rate": 9.771515114170672e-06, "loss": 0.4027, "num_tokens": 1489527101.0, "step": 2352 }, { "epoch": 0.2782310512001892, "grad_norm": 0.1552664041519165, "learning_rate": 9.755697562326836e-06, "loss": 0.3541, "num_tokens": 1490165911.0, "step": 2353 }, { "epoch": 0.27834929644081824, "grad_norm": 0.14984580874443054, "learning_rate": 9.73991076952568e-06, "loss": 0.3422, "num_tokens": 1490802923.0, "step": 2354 }, { "epoch": 0.27846754168144733, "grad_norm": 0.1623169183731079, "learning_rate": 9.724154756657781e-06, "loss": 0.3602, "num_tokens": 1491439721.0, "step": 2355 }, { "epoch": 0.27858578692207636, "grad_norm": 0.15729185938835144, "learning_rate": 9.70842954457296e-06, "loss": 0.3458, "num_tokens": 1492078820.0, "step": 2356 }, { "epoch": 0.27870403216270545, "grad_norm": 0.15174199640750885, "learning_rate": 9.692735154080303e-06, "loss": 0.3309, "num_tokens": 1492714650.0, "step": 2357 }, { "epoch": 0.27882227740333454, "grad_norm": 0.1500135362148285, "learning_rate": 9.677071605948097e-06, "loss": 0.3087, "num_tokens": 1493341798.0, "step": 2358 }, { "epoch": 0.2789405226439636, "grad_norm": 0.16156241297721863, "learning_rate": 9.661438920903808e-06, "loss": 0.3864, "num_tokens": 1493978267.0, "step": 2359 }, { "epoch": 0.27905876788459266, "grad_norm": 0.14532062411308289, "learning_rate": 9.645837119634094e-06, "loss": 0.3069, "num_tokens": 1494612569.0, "step": 2360 }, { "epoch": 0.2791770131252217, "grad_norm": 0.16516432166099548, "learning_rate": 9.630266222784709e-06, "loss": 0.3665, "num_tokens": 1495250024.0, "step": 2361 }, { "epoch": 0.2792952583658508, "grad_norm": 0.15118518471717834, "learning_rate": 9.61472625096053e-06, "loss": 0.3517, "num_tokens": 1495879619.0, "step": 2362 }, { "epoch": 0.2794135036064798, "grad_norm": 0.15301978588104248, "learning_rate": 9.599217224725496e-06, "loss": 0.3815, "num_tokens": 1496509723.0, "step": 2363 }, { "epoch": 0.2795317488471089, "grad_norm": 0.1620192974805832, "learning_rate": 9.583739164602621e-06, "loss": 0.3602, "num_tokens": 1497139086.0, "step": 2364 }, { "epoch": 0.27964999408773794, "grad_norm": 0.14597781002521515, "learning_rate": 9.568292091073922e-06, "loss": 0.3078, "num_tokens": 1497769353.0, "step": 2365 }, { "epoch": 0.27976823932836703, "grad_norm": 0.14736135303974152, "learning_rate": 9.552876024580412e-06, "loss": 0.3148, "num_tokens": 1498406713.0, "step": 2366 }, { "epoch": 0.2798864845689961, "grad_norm": 0.16254974901676178, "learning_rate": 9.537490985522081e-06, "loss": 0.3268, "num_tokens": 1499042860.0, "step": 2367 }, { "epoch": 0.28000472980962515, "grad_norm": 0.14526546001434326, "learning_rate": 9.52213699425785e-06, "loss": 0.3151, "num_tokens": 1499673955.0, "step": 2368 }, { "epoch": 0.28012297505025424, "grad_norm": 0.13753469288349152, "learning_rate": 9.506814071105575e-06, "loss": 0.3189, "num_tokens": 1500308786.0, "step": 2369 }, { "epoch": 0.2802412202908833, "grad_norm": 0.14920324087142944, "learning_rate": 9.491522236341976e-06, "loss": 0.316, "num_tokens": 1500943668.0, "step": 2370 }, { "epoch": 0.28035946553151236, "grad_norm": 0.14196032285690308, "learning_rate": 9.476261510202648e-06, "loss": 0.3167, "num_tokens": 1501575125.0, "step": 2371 }, { "epoch": 0.2804777107721414, "grad_norm": 0.14756308495998383, "learning_rate": 9.461031912882011e-06, "loss": 0.3828, "num_tokens": 1502205209.0, "step": 2372 }, { "epoch": 0.2805959560127705, "grad_norm": 0.15238507091999054, "learning_rate": 9.4458334645333e-06, "loss": 0.3794, "num_tokens": 1502841700.0, "step": 2373 }, { "epoch": 0.2807142012533996, "grad_norm": 0.14375653862953186, "learning_rate": 9.430666185268535e-06, "loss": 0.339, "num_tokens": 1503478757.0, "step": 2374 }, { "epoch": 0.2808324464940286, "grad_norm": 0.14228053390979767, "learning_rate": 9.415530095158473e-06, "loss": 0.3186, "num_tokens": 1504114367.0, "step": 2375 }, { "epoch": 0.2809506917346577, "grad_norm": 0.15334486961364746, "learning_rate": 9.400425214232616e-06, "loss": 0.3335, "num_tokens": 1504750346.0, "step": 2376 }, { "epoch": 0.28106893697528673, "grad_norm": 0.1638639122247696, "learning_rate": 9.385351562479158e-06, "loss": 0.3512, "num_tokens": 1505373996.0, "step": 2377 }, { "epoch": 0.2811871822159158, "grad_norm": 0.14417004585266113, "learning_rate": 9.370309159844967e-06, "loss": 0.3221, "num_tokens": 1506006742.0, "step": 2378 }, { "epoch": 0.28130542745654485, "grad_norm": 0.15065158903598785, "learning_rate": 9.35529802623557e-06, "loss": 0.3403, "num_tokens": 1506642591.0, "step": 2379 }, { "epoch": 0.28142367269717394, "grad_norm": 0.15715816617012024, "learning_rate": 9.3403181815151e-06, "loss": 0.3177, "num_tokens": 1507279859.0, "step": 2380 }, { "epoch": 0.281541917937803, "grad_norm": 0.16456620395183563, "learning_rate": 9.325369645506298e-06, "loss": 0.3342, "num_tokens": 1507913520.0, "step": 2381 }, { "epoch": 0.28166016317843207, "grad_norm": 0.15010853111743927, "learning_rate": 9.31045243799047e-06, "loss": 0.3382, "num_tokens": 1508547624.0, "step": 2382 }, { "epoch": 0.28177840841906115, "grad_norm": 0.16282407939434052, "learning_rate": 9.295566578707456e-06, "loss": 0.3657, "num_tokens": 1509186196.0, "step": 2383 }, { "epoch": 0.2818966536596902, "grad_norm": 0.14777715504169464, "learning_rate": 9.280712087355635e-06, "loss": 0.337, "num_tokens": 1509824371.0, "step": 2384 }, { "epoch": 0.2820148989003193, "grad_norm": 0.1608475148677826, "learning_rate": 9.265888983591861e-06, "loss": 0.3411, "num_tokens": 1510453337.0, "step": 2385 }, { "epoch": 0.2821331441409483, "grad_norm": 0.15267649292945862, "learning_rate": 9.251097287031453e-06, "loss": 0.323, "num_tokens": 1511089984.0, "step": 2386 }, { "epoch": 0.2822513893815774, "grad_norm": 0.14557266235351562, "learning_rate": 9.23633701724817e-06, "loss": 0.3359, "num_tokens": 1511723104.0, "step": 2387 }, { "epoch": 0.28236963462220643, "grad_norm": 0.15417338907718658, "learning_rate": 9.22160819377419e-06, "loss": 0.3459, "num_tokens": 1512361915.0, "step": 2388 }, { "epoch": 0.2824878798628355, "grad_norm": 0.15560688078403473, "learning_rate": 9.206910836100078e-06, "loss": 0.3907, "num_tokens": 1512989309.0, "step": 2389 }, { "epoch": 0.2826061251034646, "grad_norm": 0.1650230437517166, "learning_rate": 9.19224496367475e-06, "loss": 0.3583, "num_tokens": 1513623233.0, "step": 2390 }, { "epoch": 0.28272437034409365, "grad_norm": 0.16392244398593903, "learning_rate": 9.177610595905472e-06, "loss": 0.3566, "num_tokens": 1514257994.0, "step": 2391 }, { "epoch": 0.28284261558472273, "grad_norm": 0.16251103579998016, "learning_rate": 9.16300775215781e-06, "loss": 0.3546, "num_tokens": 1514895129.0, "step": 2392 }, { "epoch": 0.28296086082535177, "grad_norm": 0.15515641868114471, "learning_rate": 9.148436451755609e-06, "loss": 0.3172, "num_tokens": 1515533555.0, "step": 2393 }, { "epoch": 0.28307910606598086, "grad_norm": 0.14485909044742584, "learning_rate": 9.133896713980998e-06, "loss": 0.3138, "num_tokens": 1516170546.0, "step": 2394 }, { "epoch": 0.2831973513066099, "grad_norm": 0.14556768536567688, "learning_rate": 9.119388558074312e-06, "loss": 0.2966, "num_tokens": 1516799730.0, "step": 2395 }, { "epoch": 0.283315596547239, "grad_norm": 0.17382657527923584, "learning_rate": 9.104912003234105e-06, "loss": 0.3483, "num_tokens": 1517436780.0, "step": 2396 }, { "epoch": 0.283433841787868, "grad_norm": 0.15845876932144165, "learning_rate": 9.09046706861711e-06, "loss": 0.3901, "num_tokens": 1518072732.0, "step": 2397 }, { "epoch": 0.2835520870284971, "grad_norm": 0.15543769299983978, "learning_rate": 9.076053773338229e-06, "loss": 0.3586, "num_tokens": 1518710901.0, "step": 2398 }, { "epoch": 0.2836703322691262, "grad_norm": 0.15003569424152374, "learning_rate": 9.061672136470488e-06, "loss": 0.3336, "num_tokens": 1519345313.0, "step": 2399 }, { "epoch": 0.2837885775097552, "grad_norm": 0.1602921187877655, "learning_rate": 9.047322177045008e-06, "loss": 0.3591, "num_tokens": 1519980264.0, "step": 2400 }, { "epoch": 0.2839068227503843, "grad_norm": 0.17690017819404602, "learning_rate": 9.033003914051014e-06, "loss": 0.3711, "num_tokens": 1520615916.0, "step": 2401 }, { "epoch": 0.28402506799101335, "grad_norm": 0.1696273237466812, "learning_rate": 9.018717366435759e-06, "loss": 0.3715, "num_tokens": 1521252102.0, "step": 2402 }, { "epoch": 0.28414331323164244, "grad_norm": 0.14623281359672546, "learning_rate": 9.004462553104562e-06, "loss": 0.3344, "num_tokens": 1521879862.0, "step": 2403 }, { "epoch": 0.28426155847227147, "grad_norm": 0.1635226607322693, "learning_rate": 8.990239492920721e-06, "loss": 0.3781, "num_tokens": 1522505201.0, "step": 2404 }, { "epoch": 0.28437980371290056, "grad_norm": 0.143572598695755, "learning_rate": 8.976048204705527e-06, "loss": 0.3311, "num_tokens": 1523144696.0, "step": 2405 }, { "epoch": 0.28449804895352965, "grad_norm": 0.14635297656059265, "learning_rate": 8.961888707238222e-06, "loss": 0.3456, "num_tokens": 1523776301.0, "step": 2406 }, { "epoch": 0.2846162941941587, "grad_norm": 0.15382792055606842, "learning_rate": 8.94776101925598e-06, "loss": 0.3524, "num_tokens": 1524411665.0, "step": 2407 }, { "epoch": 0.28473453943478777, "grad_norm": 0.15080878138542175, "learning_rate": 8.93366515945389e-06, "loss": 0.3482, "num_tokens": 1525049675.0, "step": 2408 }, { "epoch": 0.2848527846754168, "grad_norm": 0.14847759902477264, "learning_rate": 8.919601146484912e-06, "loss": 0.3085, "num_tokens": 1525681212.0, "step": 2409 }, { "epoch": 0.2849710299160459, "grad_norm": 0.15085552632808685, "learning_rate": 8.90556899895987e-06, "loss": 0.3404, "num_tokens": 1526316616.0, "step": 2410 }, { "epoch": 0.2850892751566749, "grad_norm": 0.15188775956630707, "learning_rate": 8.891568735447421e-06, "loss": 0.3498, "num_tokens": 1526950609.0, "step": 2411 }, { "epoch": 0.285207520397304, "grad_norm": 0.1585957109928131, "learning_rate": 8.877600374474018e-06, "loss": 0.3697, "num_tokens": 1527587441.0, "step": 2412 }, { "epoch": 0.28532576563793305, "grad_norm": 0.15482857823371887, "learning_rate": 8.863663934523919e-06, "loss": 0.3782, "num_tokens": 1528223473.0, "step": 2413 }, { "epoch": 0.28544401087856214, "grad_norm": 0.14563055336475372, "learning_rate": 8.849759434039126e-06, "loss": 0.3582, "num_tokens": 1528862184.0, "step": 2414 }, { "epoch": 0.2855622561191912, "grad_norm": 0.1472221165895462, "learning_rate": 8.835886891419377e-06, "loss": 0.3469, "num_tokens": 1529498134.0, "step": 2415 }, { "epoch": 0.28568050135982026, "grad_norm": 0.15884767472743988, "learning_rate": 8.822046325022123e-06, "loss": 0.3557, "num_tokens": 1530132720.0, "step": 2416 }, { "epoch": 0.28579874660044935, "grad_norm": 0.14727573096752167, "learning_rate": 8.808237753162495e-06, "loss": 0.34, "num_tokens": 1530766312.0, "step": 2417 }, { "epoch": 0.2859169918410784, "grad_norm": 0.15706580877304077, "learning_rate": 8.794461194113302e-06, "loss": 0.3628, "num_tokens": 1531401703.0, "step": 2418 }, { "epoch": 0.28603523708170747, "grad_norm": 0.1499369889497757, "learning_rate": 8.780716666104976e-06, "loss": 0.3531, "num_tokens": 1532033358.0, "step": 2419 }, { "epoch": 0.2861534823223365, "grad_norm": 0.16059350967407227, "learning_rate": 8.767004187325567e-06, "loss": 0.3328, "num_tokens": 1532666946.0, "step": 2420 }, { "epoch": 0.2862717275629656, "grad_norm": 0.15611490607261658, "learning_rate": 8.75332377592071e-06, "loss": 0.3531, "num_tokens": 1533305486.0, "step": 2421 }, { "epoch": 0.28638997280359463, "grad_norm": 0.16063754260540009, "learning_rate": 8.739675449993611e-06, "loss": 0.3369, "num_tokens": 1533935802.0, "step": 2422 }, { "epoch": 0.2865082180442237, "grad_norm": 0.17527389526367188, "learning_rate": 8.726059227605018e-06, "loss": 0.3663, "num_tokens": 1534537936.0, "step": 2423 }, { "epoch": 0.2866264632848528, "grad_norm": 0.15854455530643463, "learning_rate": 8.712475126773192e-06, "loss": 0.3627, "num_tokens": 1535176503.0, "step": 2424 }, { "epoch": 0.28674470852548184, "grad_norm": 0.1538389027118683, "learning_rate": 8.698923165473894e-06, "loss": 0.306, "num_tokens": 1535809878.0, "step": 2425 }, { "epoch": 0.28686295376611093, "grad_norm": 0.14468614757061005, "learning_rate": 8.685403361640349e-06, "loss": 0.3063, "num_tokens": 1536444197.0, "step": 2426 }, { "epoch": 0.28698119900673996, "grad_norm": 0.15404705703258514, "learning_rate": 8.671915733163223e-06, "loss": 0.3336, "num_tokens": 1537077940.0, "step": 2427 }, { "epoch": 0.28709944424736905, "grad_norm": 0.14619845151901245, "learning_rate": 8.658460297890626e-06, "loss": 0.3092, "num_tokens": 1537709405.0, "step": 2428 }, { "epoch": 0.2872176894879981, "grad_norm": 0.1538306325674057, "learning_rate": 8.645037073628046e-06, "loss": 0.3657, "num_tokens": 1538346163.0, "step": 2429 }, { "epoch": 0.2873359347286272, "grad_norm": 0.1490619033575058, "learning_rate": 8.631646078138352e-06, "loss": 0.4068, "num_tokens": 1538982802.0, "step": 2430 }, { "epoch": 0.28745417996925626, "grad_norm": 0.1757480949163437, "learning_rate": 8.618287329141761e-06, "loss": 0.3608, "num_tokens": 1539617968.0, "step": 2431 }, { "epoch": 0.2875724252098853, "grad_norm": 0.14783507585525513, "learning_rate": 8.604960844315839e-06, "loss": 0.3197, "num_tokens": 1540248847.0, "step": 2432 }, { "epoch": 0.2876906704505144, "grad_norm": 0.14284883439540863, "learning_rate": 8.59166664129543e-06, "loss": 0.3541, "num_tokens": 1540884115.0, "step": 2433 }, { "epoch": 0.2878089156911434, "grad_norm": 0.16350941359996796, "learning_rate": 8.578404737672674e-06, "loss": 0.3229, "num_tokens": 1541519078.0, "step": 2434 }, { "epoch": 0.2879271609317725, "grad_norm": 0.1476208120584488, "learning_rate": 8.565175150996966e-06, "loss": 0.3008, "num_tokens": 1542154829.0, "step": 2435 }, { "epoch": 0.28804540617240154, "grad_norm": 0.14756110310554504, "learning_rate": 8.55197789877493e-06, "loss": 0.337, "num_tokens": 1542787615.0, "step": 2436 }, { "epoch": 0.28816365141303063, "grad_norm": 0.14862175285816193, "learning_rate": 8.53881299847042e-06, "loss": 0.3599, "num_tokens": 1543422823.0, "step": 2437 }, { "epoch": 0.28828189665365966, "grad_norm": 0.1573106050491333, "learning_rate": 8.525680467504469e-06, "loss": 0.3862, "num_tokens": 1544061139.0, "step": 2438 }, { "epoch": 0.28840014189428875, "grad_norm": 0.15238477289676666, "learning_rate": 8.512580323255266e-06, "loss": 0.3918, "num_tokens": 1544680188.0, "step": 2439 }, { "epoch": 0.28851838713491784, "grad_norm": 0.17096689343452454, "learning_rate": 8.499512583058153e-06, "loss": 0.4132, "num_tokens": 1545317687.0, "step": 2440 }, { "epoch": 0.2886366323755469, "grad_norm": 0.15198521316051483, "learning_rate": 8.486477264205592e-06, "loss": 0.3426, "num_tokens": 1545950953.0, "step": 2441 }, { "epoch": 0.28875487761617596, "grad_norm": 0.1503327190876007, "learning_rate": 8.473474383947138e-06, "loss": 0.3267, "num_tokens": 1546570745.0, "step": 2442 }, { "epoch": 0.288873122856805, "grad_norm": 0.1454770714044571, "learning_rate": 8.46050395948943e-06, "loss": 0.3253, "num_tokens": 1547208758.0, "step": 2443 }, { "epoch": 0.2889913680974341, "grad_norm": 0.16143880784511566, "learning_rate": 8.447566007996142e-06, "loss": 0.3414, "num_tokens": 1547841621.0, "step": 2444 }, { "epoch": 0.2891096133380631, "grad_norm": 0.16132719814777374, "learning_rate": 8.434660546587994e-06, "loss": 0.3751, "num_tokens": 1548474236.0, "step": 2445 }, { "epoch": 0.2892278585786922, "grad_norm": 0.15949557721614838, "learning_rate": 8.421787592342694e-06, "loss": 0.3318, "num_tokens": 1549102957.0, "step": 2446 }, { "epoch": 0.2893461038193213, "grad_norm": 0.1593041867017746, "learning_rate": 8.40894716229495e-06, "loss": 0.3544, "num_tokens": 1549738034.0, "step": 2447 }, { "epoch": 0.28946434905995033, "grad_norm": 0.15824440121650696, "learning_rate": 8.396139273436427e-06, "loss": 0.3356, "num_tokens": 1550367757.0, "step": 2448 }, { "epoch": 0.2895825943005794, "grad_norm": 0.14293761551380157, "learning_rate": 8.383363942715722e-06, "loss": 0.3418, "num_tokens": 1551002943.0, "step": 2449 }, { "epoch": 0.28970083954120845, "grad_norm": 0.155470073223114, "learning_rate": 8.37062118703835e-06, "loss": 0.332, "num_tokens": 1551640916.0, "step": 2450 }, { "epoch": 0.28981908478183754, "grad_norm": 0.16884839534759521, "learning_rate": 8.357911023266716e-06, "loss": 0.3752, "num_tokens": 1552279719.0, "step": 2451 }, { "epoch": 0.2899373300224666, "grad_norm": 0.15490181744098663, "learning_rate": 8.34523346822012e-06, "loss": 0.3553, "num_tokens": 1552909547.0, "step": 2452 }, { "epoch": 0.29005557526309567, "grad_norm": 0.1494501233100891, "learning_rate": 8.332588538674671e-06, "loss": 0.3218, "num_tokens": 1553547378.0, "step": 2453 }, { "epoch": 0.2901738205037247, "grad_norm": 0.16306337714195251, "learning_rate": 8.319976251363345e-06, "loss": 0.3499, "num_tokens": 1554186991.0, "step": 2454 }, { "epoch": 0.2902920657443538, "grad_norm": 0.16215814650058746, "learning_rate": 8.307396622975894e-06, "loss": 0.3755, "num_tokens": 1554823738.0, "step": 2455 }, { "epoch": 0.2904103109849829, "grad_norm": 0.14529335498809814, "learning_rate": 8.294849670158859e-06, "loss": 0.3713, "num_tokens": 1555463056.0, "step": 2456 }, { "epoch": 0.2905285562256119, "grad_norm": 0.18343862891197205, "learning_rate": 8.282335409515559e-06, "loss": 0.3582, "num_tokens": 1556098586.0, "step": 2457 }, { "epoch": 0.290646801466241, "grad_norm": 0.15133658051490784, "learning_rate": 8.26985385760603e-06, "loss": 0.3536, "num_tokens": 1556734027.0, "step": 2458 }, { "epoch": 0.29076504670687003, "grad_norm": 0.16418562829494476, "learning_rate": 8.257405030947032e-06, "loss": 0.3539, "num_tokens": 1557369690.0, "step": 2459 }, { "epoch": 0.2908832919474991, "grad_norm": 0.1541043072938919, "learning_rate": 8.244988946012026e-06, "loss": 0.3597, "num_tokens": 1557999303.0, "step": 2460 }, { "epoch": 0.29100153718812816, "grad_norm": 0.17861461639404297, "learning_rate": 8.232605619231132e-06, "loss": 0.3616, "num_tokens": 1558634807.0, "step": 2461 }, { "epoch": 0.29111978242875725, "grad_norm": 0.1503981500864029, "learning_rate": 8.220255066991141e-06, "loss": 0.304, "num_tokens": 1559269638.0, "step": 2462 }, { "epoch": 0.29123802766938633, "grad_norm": 0.150724858045578, "learning_rate": 8.207937305635455e-06, "loss": 0.3233, "num_tokens": 1559902736.0, "step": 2463 }, { "epoch": 0.29135627291001537, "grad_norm": 0.1694389432668686, "learning_rate": 8.1956523514641e-06, "loss": 0.3651, "num_tokens": 1560537476.0, "step": 2464 }, { "epoch": 0.29147451815064446, "grad_norm": 0.15489940345287323, "learning_rate": 8.18340022073367e-06, "loss": 0.3424, "num_tokens": 1561173697.0, "step": 2465 }, { "epoch": 0.2915927633912735, "grad_norm": 0.1514696478843689, "learning_rate": 8.171180929657341e-06, "loss": 0.3378, "num_tokens": 1561809371.0, "step": 2466 }, { "epoch": 0.2917110086319026, "grad_norm": 0.14901064336299896, "learning_rate": 8.158994494404828e-06, "loss": 0.3397, "num_tokens": 1562448843.0, "step": 2467 }, { "epoch": 0.2918292538725316, "grad_norm": 0.20931477844715118, "learning_rate": 8.146840931102362e-06, "loss": 0.3752, "num_tokens": 1563085995.0, "step": 2468 }, { "epoch": 0.2919474991131607, "grad_norm": 0.17268788814544678, "learning_rate": 8.13472025583268e-06, "loss": 0.4279, "num_tokens": 1563717068.0, "step": 2469 }, { "epoch": 0.29206574435378974, "grad_norm": 0.16520725190639496, "learning_rate": 8.122632484634991e-06, "loss": 0.3953, "num_tokens": 1564350110.0, "step": 2470 }, { "epoch": 0.2921839895944188, "grad_norm": 0.159401997923851, "learning_rate": 8.110577633504978e-06, "loss": 0.3674, "num_tokens": 1564985340.0, "step": 2471 }, { "epoch": 0.2923022348350479, "grad_norm": 0.14242856204509735, "learning_rate": 8.098555718394746e-06, "loss": 0.3257, "num_tokens": 1565619951.0, "step": 2472 }, { "epoch": 0.29242048007567695, "grad_norm": 0.1541231870651245, "learning_rate": 8.086566755212826e-06, "loss": 0.3472, "num_tokens": 1566247164.0, "step": 2473 }, { "epoch": 0.29253872531630604, "grad_norm": 0.1567128747701645, "learning_rate": 8.074610759824134e-06, "loss": 0.3766, "num_tokens": 1566885564.0, "step": 2474 }, { "epoch": 0.29265697055693507, "grad_norm": 0.14308445155620575, "learning_rate": 8.062687748049966e-06, "loss": 0.3116, "num_tokens": 1567517351.0, "step": 2475 }, { "epoch": 0.29277521579756416, "grad_norm": 0.1461796760559082, "learning_rate": 8.050797735667974e-06, "loss": 0.3298, "num_tokens": 1568152846.0, "step": 2476 }, { "epoch": 0.2928934610381932, "grad_norm": 0.1555767059326172, "learning_rate": 8.038940738412134e-06, "loss": 0.3536, "num_tokens": 1568780581.0, "step": 2477 }, { "epoch": 0.2930117062788223, "grad_norm": 0.14955316483974457, "learning_rate": 8.027116771972743e-06, "loss": 0.331, "num_tokens": 1569415183.0, "step": 2478 }, { "epoch": 0.2931299515194513, "grad_norm": 0.17126750946044922, "learning_rate": 8.015325851996378e-06, "loss": 0.3697, "num_tokens": 1570054656.0, "step": 2479 }, { "epoch": 0.2932481967600804, "grad_norm": 0.15955372154712677, "learning_rate": 8.00356799408589e-06, "loss": 0.3548, "num_tokens": 1570688875.0, "step": 2480 }, { "epoch": 0.2933664420007095, "grad_norm": 0.1520116925239563, "learning_rate": 7.991843213800385e-06, "loss": 0.3196, "num_tokens": 1571324827.0, "step": 2481 }, { "epoch": 0.2934846872413385, "grad_norm": 0.1544649600982666, "learning_rate": 7.980151526655194e-06, "loss": 0.3522, "num_tokens": 1571958663.0, "step": 2482 }, { "epoch": 0.2936029324819676, "grad_norm": 0.14922268688678741, "learning_rate": 7.968492948121852e-06, "loss": 0.3601, "num_tokens": 1572588508.0, "step": 2483 }, { "epoch": 0.29372117772259665, "grad_norm": 0.16230155527591705, "learning_rate": 7.956867493628082e-06, "loss": 0.3358, "num_tokens": 1573222363.0, "step": 2484 }, { "epoch": 0.29383942296322574, "grad_norm": 0.1554425209760666, "learning_rate": 7.945275178557774e-06, "loss": 0.3626, "num_tokens": 1573857267.0, "step": 2485 }, { "epoch": 0.29395766820385477, "grad_norm": 0.153177872300148, "learning_rate": 7.933716018250979e-06, "loss": 0.3245, "num_tokens": 1574489833.0, "step": 2486 }, { "epoch": 0.29407591344448386, "grad_norm": 0.1506984978914261, "learning_rate": 7.92219002800385e-06, "loss": 0.3347, "num_tokens": 1575127826.0, "step": 2487 }, { "epoch": 0.29419415868511295, "grad_norm": 0.16485412418842316, "learning_rate": 7.91069722306867e-06, "loss": 0.379, "num_tokens": 1575764942.0, "step": 2488 }, { "epoch": 0.294312403925742, "grad_norm": 0.131722092628479, "learning_rate": 7.899237618653793e-06, "loss": 0.3212, "num_tokens": 1576396971.0, "step": 2489 }, { "epoch": 0.2944306491663711, "grad_norm": 0.14967596530914307, "learning_rate": 7.887811229923635e-06, "loss": 0.3866, "num_tokens": 1577032933.0, "step": 2490 }, { "epoch": 0.2945488944070001, "grad_norm": 0.14169523119926453, "learning_rate": 7.876418071998677e-06, "loss": 0.3577, "num_tokens": 1577670215.0, "step": 2491 }, { "epoch": 0.2946671396476292, "grad_norm": 0.15427854657173157, "learning_rate": 7.865058159955415e-06, "loss": 0.3404, "num_tokens": 1578304041.0, "step": 2492 }, { "epoch": 0.29478538488825823, "grad_norm": 0.14424122869968414, "learning_rate": 7.853731508826346e-06, "loss": 0.3259, "num_tokens": 1578942969.0, "step": 2493 }, { "epoch": 0.2949036301288873, "grad_norm": 0.15108096599578857, "learning_rate": 7.842438133599956e-06, "loss": 0.3657, "num_tokens": 1579580392.0, "step": 2494 }, { "epoch": 0.29502187536951635, "grad_norm": 0.16395127773284912, "learning_rate": 7.831178049220699e-06, "loss": 0.3409, "num_tokens": 1580212819.0, "step": 2495 }, { "epoch": 0.29514012061014544, "grad_norm": 0.14246340095996857, "learning_rate": 7.819951270588985e-06, "loss": 0.3328, "num_tokens": 1580842095.0, "step": 2496 }, { "epoch": 0.29525836585077453, "grad_norm": 0.14513429999351501, "learning_rate": 7.808757812561132e-06, "loss": 0.3627, "num_tokens": 1581477926.0, "step": 2497 }, { "epoch": 0.29537661109140356, "grad_norm": 0.15380927920341492, "learning_rate": 7.79759768994938e-06, "loss": 0.3452, "num_tokens": 1582109535.0, "step": 2498 }, { "epoch": 0.29549485633203265, "grad_norm": 0.15641339123249054, "learning_rate": 7.786470917521844e-06, "loss": 0.355, "num_tokens": 1582745020.0, "step": 2499 }, { "epoch": 0.2956131015726617, "grad_norm": 0.1374625712633133, "learning_rate": 7.775377510002519e-06, "loss": 0.3431, "num_tokens": 1583382664.0, "step": 2500 }, { "epoch": 0.2957313468132908, "grad_norm": 0.1453300416469574, "learning_rate": 7.764317482071242e-06, "loss": 0.3383, "num_tokens": 1584021873.0, "step": 2501 }, { "epoch": 0.2958495920539198, "grad_norm": 0.1649884134531021, "learning_rate": 7.753290848363678e-06, "loss": 0.3865, "num_tokens": 1584661560.0, "step": 2502 }, { "epoch": 0.2959678372945489, "grad_norm": 5.652665615081787, "learning_rate": 7.742297623471309e-06, "loss": 0.5273, "num_tokens": 1585262729.0, "step": 2503 }, { "epoch": 0.296086082535178, "grad_norm": 0.16333463788032532, "learning_rate": 7.73133782194139e-06, "loss": 0.3621, "num_tokens": 1585884360.0, "step": 2504 }, { "epoch": 0.296204327775807, "grad_norm": 0.15930195152759552, "learning_rate": 7.720411458276974e-06, "loss": 0.392, "num_tokens": 1586513378.0, "step": 2505 }, { "epoch": 0.2963225730164361, "grad_norm": 0.15929850935935974, "learning_rate": 7.709518546936844e-06, "loss": 0.333, "num_tokens": 1587146261.0, "step": 2506 }, { "epoch": 0.29644081825706514, "grad_norm": 0.14589621126651764, "learning_rate": 7.698659102335525e-06, "loss": 0.3441, "num_tokens": 1587784709.0, "step": 2507 }, { "epoch": 0.29655906349769423, "grad_norm": 0.1511000245809555, "learning_rate": 7.687833138843248e-06, "loss": 0.3299, "num_tokens": 1588417863.0, "step": 2508 }, { "epoch": 0.29667730873832326, "grad_norm": 0.14837408065795898, "learning_rate": 7.677040670785942e-06, "loss": 0.3657, "num_tokens": 1589052471.0, "step": 2509 }, { "epoch": 0.29679555397895235, "grad_norm": 0.15598666667938232, "learning_rate": 7.666281712445225e-06, "loss": 0.3694, "num_tokens": 1589685366.0, "step": 2510 }, { "epoch": 0.2969137992195814, "grad_norm": 0.14032559096813202, "learning_rate": 7.655556278058356e-06, "loss": 0.3292, "num_tokens": 1590322671.0, "step": 2511 }, { "epoch": 0.2970320444602105, "grad_norm": 0.15163357555866241, "learning_rate": 7.644864381818234e-06, "loss": 0.3212, "num_tokens": 1590953678.0, "step": 2512 }, { "epoch": 0.29715028970083956, "grad_norm": 0.1445772647857666, "learning_rate": 7.63420603787338e-06, "loss": 0.3457, "num_tokens": 1591588343.0, "step": 2513 }, { "epoch": 0.2972685349414686, "grad_norm": 0.1554974913597107, "learning_rate": 7.623581260327911e-06, "loss": 0.3543, "num_tokens": 1592222393.0, "step": 2514 }, { "epoch": 0.2973867801820977, "grad_norm": 0.16030625998973846, "learning_rate": 7.6129900632415415e-06, "loss": 0.3763, "num_tokens": 1592836737.0, "step": 2515 }, { "epoch": 0.2975050254227267, "grad_norm": 0.15744347870349884, "learning_rate": 7.6024324606295274e-06, "loss": 0.3728, "num_tokens": 1593468479.0, "step": 2516 }, { "epoch": 0.2976232706633558, "grad_norm": 0.1460658609867096, "learning_rate": 7.591908466462682e-06, "loss": 0.3173, "num_tokens": 1594103240.0, "step": 2517 }, { "epoch": 0.29774151590398484, "grad_norm": 0.14362597465515137, "learning_rate": 7.5814180946673465e-06, "loss": 0.3054, "num_tokens": 1594736479.0, "step": 2518 }, { "epoch": 0.29785976114461393, "grad_norm": 0.14968115091323853, "learning_rate": 7.570961359125354e-06, "loss": 0.3409, "num_tokens": 1595369844.0, "step": 2519 }, { "epoch": 0.297978006385243, "grad_norm": 20.09839630126953, "learning_rate": 7.560538273674054e-06, "loss": 0.8587, "num_tokens": 1595969594.0, "step": 2520 }, { "epoch": 0.29809625162587206, "grad_norm": 0.1636546552181244, "learning_rate": 7.550148852106242e-06, "loss": 0.3541, "num_tokens": 1596607685.0, "step": 2521 }, { "epoch": 0.29821449686650114, "grad_norm": 0.1554463803768158, "learning_rate": 7.539793108170178e-06, "loss": 0.338, "num_tokens": 1597246441.0, "step": 2522 }, { "epoch": 0.2983327421071302, "grad_norm": 0.16188600659370422, "learning_rate": 7.529471055569549e-06, "loss": 0.3647, "num_tokens": 1597882201.0, "step": 2523 }, { "epoch": 0.29845098734775927, "grad_norm": 0.158077672123909, "learning_rate": 7.519182707963472e-06, "loss": 0.3499, "num_tokens": 1598512865.0, "step": 2524 }, { "epoch": 0.2985692325883883, "grad_norm": 0.14716975390911102, "learning_rate": 7.508928078966451e-06, "loss": 0.3353, "num_tokens": 1599149984.0, "step": 2525 }, { "epoch": 0.2986874778290174, "grad_norm": 0.15012039244174957, "learning_rate": 7.498707182148375e-06, "loss": 0.334, "num_tokens": 1599780457.0, "step": 2526 }, { "epoch": 0.2988057230696464, "grad_norm": 0.1462370604276657, "learning_rate": 7.488520031034491e-06, "loss": 0.3484, "num_tokens": 1600414611.0, "step": 2527 }, { "epoch": 0.2989239683102755, "grad_norm": 0.14537328481674194, "learning_rate": 7.478366639105394e-06, "loss": 0.3615, "num_tokens": 1601051962.0, "step": 2528 }, { "epoch": 0.2990422135509046, "grad_norm": 0.1465734839439392, "learning_rate": 7.468247019797004e-06, "loss": 0.3504, "num_tokens": 1601682372.0, "step": 2529 }, { "epoch": 0.29916045879153363, "grad_norm": 0.16139309108257294, "learning_rate": 7.458161186500556e-06, "loss": 0.3698, "num_tokens": 1602307670.0, "step": 2530 }, { "epoch": 0.2992787040321627, "grad_norm": 0.14881828427314758, "learning_rate": 7.448109152562571e-06, "loss": 0.3622, "num_tokens": 1602943700.0, "step": 2531 }, { "epoch": 0.29939694927279176, "grad_norm": 0.16552668809890747, "learning_rate": 7.438090931284837e-06, "loss": 0.353, "num_tokens": 1603576967.0, "step": 2532 }, { "epoch": 0.29951519451342085, "grad_norm": 0.14624199271202087, "learning_rate": 7.428106535924409e-06, "loss": 0.3338, "num_tokens": 1604215712.0, "step": 2533 }, { "epoch": 0.2996334397540499, "grad_norm": 0.1505880057811737, "learning_rate": 7.418155979693578e-06, "loss": 0.3329, "num_tokens": 1604816424.0, "step": 2534 }, { "epoch": 0.29975168499467897, "grad_norm": 0.1545705497264862, "learning_rate": 7.408239275759853e-06, "loss": 0.3326, "num_tokens": 1605422776.0, "step": 2535 }, { "epoch": 0.299869930235308, "grad_norm": 0.1565445214509964, "learning_rate": 7.398356437245944e-06, "loss": 0.3406, "num_tokens": 1606055073.0, "step": 2536 }, { "epoch": 0.2999881754759371, "grad_norm": 0.16159765422344208, "learning_rate": 7.388507477229758e-06, "loss": 0.3546, "num_tokens": 1606691741.0, "step": 2537 }, { "epoch": 0.3001064207165662, "grad_norm": 0.15116626024246216, "learning_rate": 7.3786924087443494e-06, "loss": 0.3564, "num_tokens": 1607324599.0, "step": 2538 }, { "epoch": 0.3002246659571952, "grad_norm": 0.1525968313217163, "learning_rate": 7.368911244777954e-06, "loss": 0.3124, "num_tokens": 1607960797.0, "step": 2539 }, { "epoch": 0.3003429111978243, "grad_norm": 0.1497633010149002, "learning_rate": 7.359163998273918e-06, "loss": 0.3612, "num_tokens": 1608569909.0, "step": 2540 }, { "epoch": 0.30046115643845334, "grad_norm": 0.15262489020824432, "learning_rate": 7.349450682130711e-06, "loss": 0.3599, "num_tokens": 1609206721.0, "step": 2541 }, { "epoch": 0.3005794016790824, "grad_norm": 0.15345512330532074, "learning_rate": 7.339771309201911e-06, "loss": 0.3594, "num_tokens": 1609836379.0, "step": 2542 }, { "epoch": 0.30069764691971146, "grad_norm": 0.15568511188030243, "learning_rate": 7.330125892296162e-06, "loss": 0.3374, "num_tokens": 1610467723.0, "step": 2543 }, { "epoch": 0.30081589216034055, "grad_norm": 0.1619885116815567, "learning_rate": 7.320514444177199e-06, "loss": 0.3578, "num_tokens": 1611102219.0, "step": 2544 }, { "epoch": 0.30093413740096964, "grad_norm": 0.15388430655002594, "learning_rate": 7.3109369775637855e-06, "loss": 0.3487, "num_tokens": 1611737217.0, "step": 2545 }, { "epoch": 0.30105238264159867, "grad_norm": 0.15204574167728424, "learning_rate": 7.301393505129721e-06, "loss": 0.3263, "num_tokens": 1612372064.0, "step": 2546 }, { "epoch": 0.30117062788222776, "grad_norm": 0.14779198169708252, "learning_rate": 7.291884039503831e-06, "loss": 0.3464, "num_tokens": 1613007358.0, "step": 2547 }, { "epoch": 0.3012888731228568, "grad_norm": 0.16607439517974854, "learning_rate": 7.282408593269922e-06, "loss": 0.3447, "num_tokens": 1613639636.0, "step": 2548 }, { "epoch": 0.3014071183634859, "grad_norm": 0.14590926468372345, "learning_rate": 7.272967178966806e-06, "loss": 0.3287, "num_tokens": 1614272364.0, "step": 2549 }, { "epoch": 0.3015253636041149, "grad_norm": 0.1410803347826004, "learning_rate": 7.263559809088245e-06, "loss": 0.3463, "num_tokens": 1614901916.0, "step": 2550 }, { "epoch": 0.301643608844744, "grad_norm": 0.14510096609592438, "learning_rate": 7.254186496082951e-06, "loss": 0.3264, "num_tokens": 1615538690.0, "step": 2551 }, { "epoch": 0.30176185408537304, "grad_norm": 0.15616920590400696, "learning_rate": 7.244847252354571e-06, "loss": 0.3539, "num_tokens": 1616173042.0, "step": 2552 }, { "epoch": 0.3018800993260021, "grad_norm": 0.15855540335178375, "learning_rate": 7.23554209026167e-06, "loss": 0.3964, "num_tokens": 1616809845.0, "step": 2553 }, { "epoch": 0.3019983445666312, "grad_norm": 0.1399592161178589, "learning_rate": 7.226271022117714e-06, "loss": 0.3345, "num_tokens": 1617446340.0, "step": 2554 }, { "epoch": 0.30211658980726025, "grad_norm": 0.14557190239429474, "learning_rate": 7.217034060191049e-06, "loss": 0.3508, "num_tokens": 1618075970.0, "step": 2555 }, { "epoch": 0.30223483504788934, "grad_norm": 0.15270313620567322, "learning_rate": 7.20783121670489e-06, "loss": 0.3556, "num_tokens": 1618702541.0, "step": 2556 }, { "epoch": 0.30235308028851837, "grad_norm": 0.15719246864318848, "learning_rate": 7.198662503837305e-06, "loss": 0.3453, "num_tokens": 1619339972.0, "step": 2557 }, { "epoch": 0.30247132552914746, "grad_norm": 0.14748413860797882, "learning_rate": 7.18952793372119e-06, "loss": 0.3571, "num_tokens": 1619975087.0, "step": 2558 }, { "epoch": 0.3025895707697765, "grad_norm": 0.16462109982967377, "learning_rate": 7.180427518444273e-06, "loss": 0.363, "num_tokens": 1620598247.0, "step": 2559 }, { "epoch": 0.3027078160104056, "grad_norm": 0.15592196583747864, "learning_rate": 7.1713612700490745e-06, "loss": 0.3595, "num_tokens": 1621233754.0, "step": 2560 }, { "epoch": 0.3028260612510347, "grad_norm": 0.16148506104946136, "learning_rate": 7.162329200532902e-06, "loss": 0.3835, "num_tokens": 1621865701.0, "step": 2561 }, { "epoch": 0.3029443064916637, "grad_norm": 0.14827576279640198, "learning_rate": 7.153331321847842e-06, "loss": 0.3607, "num_tokens": 1622502297.0, "step": 2562 }, { "epoch": 0.3030625517322928, "grad_norm": 0.15333227813243866, "learning_rate": 7.144367645900729e-06, "loss": 0.3723, "num_tokens": 1623133709.0, "step": 2563 }, { "epoch": 0.30318079697292183, "grad_norm": 0.14636582136154175, "learning_rate": 7.135438184553143e-06, "loss": 0.3669, "num_tokens": 1623765687.0, "step": 2564 }, { "epoch": 0.3032990422135509, "grad_norm": 0.15707650780677795, "learning_rate": 7.126542949621384e-06, "loss": 0.3607, "num_tokens": 1624390373.0, "step": 2565 }, { "epoch": 0.30341728745417995, "grad_norm": 0.15035566687583923, "learning_rate": 7.1176819528764625e-06, "loss": 0.3441, "num_tokens": 1625025113.0, "step": 2566 }, { "epoch": 0.30353553269480904, "grad_norm": 0.15434648096561432, "learning_rate": 7.108855206044077e-06, "loss": 0.3414, "num_tokens": 1625657956.0, "step": 2567 }, { "epoch": 0.3036537779354381, "grad_norm": 0.1429792046546936, "learning_rate": 7.100062720804617e-06, "loss": 0.3331, "num_tokens": 1626290658.0, "step": 2568 }, { "epoch": 0.30377202317606716, "grad_norm": 0.14561037719249725, "learning_rate": 7.091304508793116e-06, "loss": 0.3643, "num_tokens": 1626927675.0, "step": 2569 }, { "epoch": 0.30389026841669625, "grad_norm": 0.1610872894525528, "learning_rate": 7.082580581599263e-06, "loss": 0.3487, "num_tokens": 1627561965.0, "step": 2570 }, { "epoch": 0.3040085136573253, "grad_norm": 0.15274368226528168, "learning_rate": 7.073890950767382e-06, "loss": 0.302, "num_tokens": 1628198126.0, "step": 2571 }, { "epoch": 0.3041267588979544, "grad_norm": 0.1400538682937622, "learning_rate": 7.0652356277963985e-06, "loss": 0.3281, "num_tokens": 1628832165.0, "step": 2572 }, { "epoch": 0.3042450041385834, "grad_norm": 0.15818189084529877, "learning_rate": 7.056614624139859e-06, "loss": 0.3515, "num_tokens": 1629466351.0, "step": 2573 }, { "epoch": 0.3043632493792125, "grad_norm": 0.1607942134141922, "learning_rate": 7.048027951205882e-06, "loss": 0.3882, "num_tokens": 1630103503.0, "step": 2574 }, { "epoch": 0.30448149461984153, "grad_norm": 0.17156703770160675, "learning_rate": 7.039475620357156e-06, "loss": 0.3725, "num_tokens": 1630741258.0, "step": 2575 }, { "epoch": 0.3045997398604706, "grad_norm": 0.15256287157535553, "learning_rate": 7.030957642910928e-06, "loss": 0.335, "num_tokens": 1631378278.0, "step": 2576 }, { "epoch": 0.30471798510109965, "grad_norm": 0.1651080846786499, "learning_rate": 7.022474030138984e-06, "loss": 0.3239, "num_tokens": 1632012029.0, "step": 2577 }, { "epoch": 0.30483623034172874, "grad_norm": 0.14645303785800934, "learning_rate": 7.014024793267643e-06, "loss": 0.3164, "num_tokens": 1632649835.0, "step": 2578 }, { "epoch": 0.30495447558235783, "grad_norm": 0.1718732863664627, "learning_rate": 7.005609943477723e-06, "loss": 0.359, "num_tokens": 1633279626.0, "step": 2579 }, { "epoch": 0.30507272082298686, "grad_norm": 0.16827356815338135, "learning_rate": 6.9972294919045465e-06, "loss": 0.3314, "num_tokens": 1633913718.0, "step": 2580 }, { "epoch": 0.30519096606361595, "grad_norm": 0.1514851301908493, "learning_rate": 6.9888834496379095e-06, "loss": 0.3225, "num_tokens": 1634549519.0, "step": 2581 }, { "epoch": 0.305309211304245, "grad_norm": 0.13561248779296875, "learning_rate": 6.98057182772208e-06, "loss": 0.3311, "num_tokens": 1635181293.0, "step": 2582 }, { "epoch": 0.3054274565448741, "grad_norm": 0.1461554765701294, "learning_rate": 6.972294637155777e-06, "loss": 0.3516, "num_tokens": 1635817022.0, "step": 2583 }, { "epoch": 0.3055457017855031, "grad_norm": 0.1553107649087906, "learning_rate": 6.964051888892156e-06, "loss": 0.3586, "num_tokens": 1636452163.0, "step": 2584 }, { "epoch": 0.3056639470261322, "grad_norm": 0.17162953317165375, "learning_rate": 6.955843593838796e-06, "loss": 0.3717, "num_tokens": 1637091122.0, "step": 2585 }, { "epoch": 0.3057821922667613, "grad_norm": 0.15595267713069916, "learning_rate": 6.9476697628576825e-06, "loss": 0.3752, "num_tokens": 1637730453.0, "step": 2586 }, { "epoch": 0.3059004375073903, "grad_norm": 0.14371612668037415, "learning_rate": 6.939530406765194e-06, "loss": 0.3042, "num_tokens": 1638367703.0, "step": 2587 }, { "epoch": 0.3060186827480194, "grad_norm": 0.16365359723567963, "learning_rate": 6.931425536332091e-06, "loss": 0.3877, "num_tokens": 1638999767.0, "step": 2588 }, { "epoch": 0.30613692798864844, "grad_norm": 0.15286149084568024, "learning_rate": 6.923355162283503e-06, "loss": 0.3677, "num_tokens": 1639639175.0, "step": 2589 }, { "epoch": 0.30625517322927753, "grad_norm": 0.16202640533447266, "learning_rate": 6.915319295298903e-06, "loss": 0.3488, "num_tokens": 1640268937.0, "step": 2590 }, { "epoch": 0.30637341846990657, "grad_norm": 0.1711316555738449, "learning_rate": 6.907317946012105e-06, "loss": 0.3854, "num_tokens": 1640905144.0, "step": 2591 }, { "epoch": 0.30649166371053566, "grad_norm": 0.15269654989242554, "learning_rate": 6.89935112501124e-06, "loss": 0.3579, "num_tokens": 1641535980.0, "step": 2592 }, { "epoch": 0.3066099089511647, "grad_norm": 0.14511872828006744, "learning_rate": 6.891418842838766e-06, "loss": 0.3148, "num_tokens": 1642164823.0, "step": 2593 }, { "epoch": 0.3067281541917938, "grad_norm": 0.1409059464931488, "learning_rate": 6.883521109991411e-06, "loss": 0.3365, "num_tokens": 1642792813.0, "step": 2594 }, { "epoch": 0.30684639943242287, "grad_norm": 0.14985384047031403, "learning_rate": 6.875657936920204e-06, "loss": 0.3078, "num_tokens": 1643418555.0, "step": 2595 }, { "epoch": 0.3069646446730519, "grad_norm": 0.16204628348350525, "learning_rate": 6.86782933403043e-06, "loss": 0.3503, "num_tokens": 1644034211.0, "step": 2596 }, { "epoch": 0.307082889913681, "grad_norm": 0.14266403019428253, "learning_rate": 6.860035311681629e-06, "loss": 0.3332, "num_tokens": 1644670523.0, "step": 2597 }, { "epoch": 0.30720113515431, "grad_norm": 0.1625642627477646, "learning_rate": 6.852275880187587e-06, "loss": 0.3591, "num_tokens": 1645306242.0, "step": 2598 }, { "epoch": 0.3073193803949391, "grad_norm": 0.13715782761573792, "learning_rate": 6.844551049816312e-06, "loss": 0.3074, "num_tokens": 1645938779.0, "step": 2599 }, { "epoch": 0.30743762563556815, "grad_norm": 0.16159267723560333, "learning_rate": 6.8368608307900235e-06, "loss": 0.3655, "num_tokens": 1646575837.0, "step": 2600 }, { "epoch": 0.30755587087619723, "grad_norm": 0.1451670527458191, "learning_rate": 6.8292052332851405e-06, "loss": 0.3118, "num_tokens": 1647211588.0, "step": 2601 }, { "epoch": 0.3076741161168263, "grad_norm": 0.15132470428943634, "learning_rate": 6.821584267432268e-06, "loss": 0.3341, "num_tokens": 1647847594.0, "step": 2602 }, { "epoch": 0.30779236135745536, "grad_norm": 0.15491485595703125, "learning_rate": 6.813997943316185e-06, "loss": 0.3666, "num_tokens": 1648480650.0, "step": 2603 }, { "epoch": 0.30791060659808445, "grad_norm": 0.1458674818277359, "learning_rate": 6.8064462709758295e-06, "loss": 0.3328, "num_tokens": 1649117682.0, "step": 2604 }, { "epoch": 0.3080288518387135, "grad_norm": 0.14040695130825043, "learning_rate": 6.798929260404277e-06, "loss": 0.295, "num_tokens": 1649751752.0, "step": 2605 }, { "epoch": 0.30814709707934257, "grad_norm": 0.15380658209323883, "learning_rate": 6.791446921548744e-06, "loss": 0.3448, "num_tokens": 1650391077.0, "step": 2606 }, { "epoch": 0.3082653423199716, "grad_norm": 0.1441585123538971, "learning_rate": 6.783999264310565e-06, "loss": 0.3339, "num_tokens": 1651025851.0, "step": 2607 }, { "epoch": 0.3083835875606007, "grad_norm": 0.1481962352991104, "learning_rate": 6.776586298545181e-06, "loss": 0.3348, "num_tokens": 1651663895.0, "step": 2608 }, { "epoch": 0.3085018328012297, "grad_norm": 0.1441476196050644, "learning_rate": 6.769208034062119e-06, "loss": 0.3345, "num_tokens": 1652297334.0, "step": 2609 }, { "epoch": 0.3086200780418588, "grad_norm": 0.15080349147319794, "learning_rate": 6.761864480624994e-06, "loss": 0.3585, "num_tokens": 1652930319.0, "step": 2610 }, { "epoch": 0.3087383232824879, "grad_norm": 0.16040252149105072, "learning_rate": 6.754555647951481e-06, "loss": 0.3402, "num_tokens": 1653558106.0, "step": 2611 }, { "epoch": 0.30885656852311694, "grad_norm": 0.1390003263950348, "learning_rate": 6.747281545713321e-06, "loss": 0.2883, "num_tokens": 1654190622.0, "step": 2612 }, { "epoch": 0.308974813763746, "grad_norm": 0.1431875079870224, "learning_rate": 6.740042183536288e-06, "loss": 0.3487, "num_tokens": 1654804468.0, "step": 2613 }, { "epoch": 0.30909305900437506, "grad_norm": 0.15379014611244202, "learning_rate": 6.73283757100018e-06, "loss": 0.3333, "num_tokens": 1655436872.0, "step": 2614 }, { "epoch": 0.30921130424500415, "grad_norm": 0.17064590752124786, "learning_rate": 6.725667717638816e-06, "loss": 0.371, "num_tokens": 1656073073.0, "step": 2615 }, { "epoch": 0.3093295494856332, "grad_norm": 0.14026252925395966, "learning_rate": 6.718532632940025e-06, "loss": 0.3295, "num_tokens": 1656704728.0, "step": 2616 }, { "epoch": 0.30944779472626227, "grad_norm": 0.1622316539287567, "learning_rate": 6.711432326345615e-06, "loss": 0.3299, "num_tokens": 1657332271.0, "step": 2617 }, { "epoch": 0.30956603996689136, "grad_norm": 0.16228297352790833, "learning_rate": 6.704366807251383e-06, "loss": 0.3521, "num_tokens": 1657948359.0, "step": 2618 }, { "epoch": 0.3096842852075204, "grad_norm": 0.17386050522327423, "learning_rate": 6.697336085007086e-06, "loss": 0.4092, "num_tokens": 1658582924.0, "step": 2619 }, { "epoch": 0.3098025304481495, "grad_norm": 0.15338410437107086, "learning_rate": 6.690340168916435e-06, "loss": 0.3586, "num_tokens": 1659208653.0, "step": 2620 }, { "epoch": 0.3099207756887785, "grad_norm": 0.14601671695709229, "learning_rate": 6.683379068237077e-06, "loss": 0.3084, "num_tokens": 1659845271.0, "step": 2621 }, { "epoch": 0.3100390209294076, "grad_norm": 0.16629400849342346, "learning_rate": 6.676452792180601e-06, "loss": 0.3571, "num_tokens": 1660468585.0, "step": 2622 }, { "epoch": 0.31015726617003664, "grad_norm": 0.1492847353219986, "learning_rate": 6.669561349912505e-06, "loss": 0.3345, "num_tokens": 1661101485.0, "step": 2623 }, { "epoch": 0.3102755114106657, "grad_norm": 0.14829352498054504, "learning_rate": 6.66270475055219e-06, "loss": 0.319, "num_tokens": 1661735662.0, "step": 2624 }, { "epoch": 0.31039375665129476, "grad_norm": 0.15923385322093964, "learning_rate": 6.655883003172954e-06, "loss": 0.4076, "num_tokens": 1662373879.0, "step": 2625 }, { "epoch": 0.31051200189192385, "grad_norm": 0.15548096597194672, "learning_rate": 6.649096116801971e-06, "loss": 0.3547, "num_tokens": 1663008427.0, "step": 2626 }, { "epoch": 0.31063024713255294, "grad_norm": 0.15542833507061005, "learning_rate": 6.642344100420285e-06, "loss": 0.3544, "num_tokens": 1663638900.0, "step": 2627 }, { "epoch": 0.310748492373182, "grad_norm": 0.14559157192707062, "learning_rate": 6.635626962962804e-06, "loss": 0.3098, "num_tokens": 1664272324.0, "step": 2628 }, { "epoch": 0.31086673761381106, "grad_norm": 0.17403483390808105, "learning_rate": 6.62894471331827e-06, "loss": 0.3971, "num_tokens": 1664904228.0, "step": 2629 }, { "epoch": 0.3109849828544401, "grad_norm": 0.15496373176574707, "learning_rate": 6.6222973603292636e-06, "loss": 0.3408, "num_tokens": 1665537753.0, "step": 2630 }, { "epoch": 0.3111032280950692, "grad_norm": 0.16460204124450684, "learning_rate": 6.61568491279218e-06, "loss": 0.3652, "num_tokens": 1666174169.0, "step": 2631 }, { "epoch": 0.3112214733356982, "grad_norm": 0.16071675717830658, "learning_rate": 6.6091073794572425e-06, "loss": 0.3514, "num_tokens": 1666803754.0, "step": 2632 }, { "epoch": 0.3113397185763273, "grad_norm": 0.1600765883922577, "learning_rate": 6.602564769028456e-06, "loss": 0.3304, "num_tokens": 1667438241.0, "step": 2633 }, { "epoch": 0.31145796381695634, "grad_norm": 0.15328559279441833, "learning_rate": 6.596057090163614e-06, "loss": 0.3284, "num_tokens": 1668076450.0, "step": 2634 }, { "epoch": 0.31157620905758543, "grad_norm": 0.154999241232872, "learning_rate": 6.589584351474286e-06, "loss": 0.3584, "num_tokens": 1668712082.0, "step": 2635 }, { "epoch": 0.3116944542982145, "grad_norm": 0.14657165110111237, "learning_rate": 6.583146561525815e-06, "loss": 0.3807, "num_tokens": 1669348399.0, "step": 2636 }, { "epoch": 0.31181269953884355, "grad_norm": 0.15919843316078186, "learning_rate": 6.576743728837283e-06, "loss": 0.3351, "num_tokens": 1669984836.0, "step": 2637 }, { "epoch": 0.31193094477947264, "grad_norm": 0.15095235407352448, "learning_rate": 6.570375861881518e-06, "loss": 0.3035, "num_tokens": 1670619936.0, "step": 2638 }, { "epoch": 0.3120491900201017, "grad_norm": 0.16317443549633026, "learning_rate": 6.564042969085087e-06, "loss": 0.3509, "num_tokens": 1671255448.0, "step": 2639 }, { "epoch": 0.31216743526073076, "grad_norm": 0.1617879569530487, "learning_rate": 6.557745058828257e-06, "loss": 0.3665, "num_tokens": 1671888642.0, "step": 2640 }, { "epoch": 0.3122856805013598, "grad_norm": 0.1533808708190918, "learning_rate": 6.551482139445026e-06, "loss": 0.3438, "num_tokens": 1672528375.0, "step": 2641 }, { "epoch": 0.3124039257419889, "grad_norm": 0.15954667329788208, "learning_rate": 6.54525421922307e-06, "loss": 0.3904, "num_tokens": 1673167791.0, "step": 2642 }, { "epoch": 0.312522170982618, "grad_norm": 0.15365970134735107, "learning_rate": 6.5390613064037576e-06, "loss": 0.332, "num_tokens": 1673807365.0, "step": 2643 }, { "epoch": 0.312640416223247, "grad_norm": 0.15740928053855896, "learning_rate": 6.532903409182135e-06, "loss": 0.3659, "num_tokens": 1674442513.0, "step": 2644 }, { "epoch": 0.3127586614638761, "grad_norm": 0.15962831676006317, "learning_rate": 6.526780535706904e-06, "loss": 0.3486, "num_tokens": 1675075724.0, "step": 2645 }, { "epoch": 0.31287690670450513, "grad_norm": 0.16220061480998993, "learning_rate": 6.52069269408043e-06, "loss": 0.3534, "num_tokens": 1675710105.0, "step": 2646 }, { "epoch": 0.3129951519451342, "grad_norm": 0.15352794528007507, "learning_rate": 6.51463989235872e-06, "loss": 0.3395, "num_tokens": 1676349650.0, "step": 2647 }, { "epoch": 0.31311339718576325, "grad_norm": 0.1553868055343628, "learning_rate": 6.508622138551402e-06, "loss": 0.3394, "num_tokens": 1676989186.0, "step": 2648 }, { "epoch": 0.31323164242639234, "grad_norm": 0.15505124628543854, "learning_rate": 6.502639440621739e-06, "loss": 0.3812, "num_tokens": 1677628500.0, "step": 2649 }, { "epoch": 0.3133498876670214, "grad_norm": 0.14550581574440002, "learning_rate": 6.49669180648659e-06, "loss": 0.3103, "num_tokens": 1678243011.0, "step": 2650 }, { "epoch": 0.31346813290765047, "grad_norm": 0.16432583332061768, "learning_rate": 6.490779244016433e-06, "loss": 0.3595, "num_tokens": 1678877133.0, "step": 2651 }, { "epoch": 0.31358637814827955, "grad_norm": 0.14240172505378723, "learning_rate": 6.484901761035319e-06, "loss": 0.3482, "num_tokens": 1679511112.0, "step": 2652 }, { "epoch": 0.3137046233889086, "grad_norm": 0.1719961315393448, "learning_rate": 6.479059365320886e-06, "loss": 0.3884, "num_tokens": 1680144682.0, "step": 2653 }, { "epoch": 0.3138228686295377, "grad_norm": 0.15486003458499908, "learning_rate": 6.473252064604342e-06, "loss": 0.3532, "num_tokens": 1680769049.0, "step": 2654 }, { "epoch": 0.3139411138701667, "grad_norm": 0.14725635945796967, "learning_rate": 6.46747986657045e-06, "loss": 0.3594, "num_tokens": 1681408352.0, "step": 2655 }, { "epoch": 0.3140593591107958, "grad_norm": 0.15648604929447174, "learning_rate": 6.46174277885753e-06, "loss": 0.3512, "num_tokens": 1682038448.0, "step": 2656 }, { "epoch": 0.31417760435142483, "grad_norm": 0.15705391764640808, "learning_rate": 6.456040809057429e-06, "loss": 0.3592, "num_tokens": 1682676961.0, "step": 2657 }, { "epoch": 0.3142958495920539, "grad_norm": 0.15460099279880524, "learning_rate": 6.450373964715527e-06, "loss": 0.3493, "num_tokens": 1683310802.0, "step": 2658 }, { "epoch": 0.314414094832683, "grad_norm": 0.14110355079174042, "learning_rate": 6.444742253330732e-06, "loss": 0.356, "num_tokens": 1683948414.0, "step": 2659 }, { "epoch": 0.31453234007331204, "grad_norm": 0.15325553715229034, "learning_rate": 6.439145682355447e-06, "loss": 0.3738, "num_tokens": 1684582469.0, "step": 2660 }, { "epoch": 0.31465058531394113, "grad_norm": 0.15117822587490082, "learning_rate": 6.433584259195583e-06, "loss": 0.3557, "num_tokens": 1685219110.0, "step": 2661 }, { "epoch": 0.31476883055457017, "grad_norm": 0.15664592385292053, "learning_rate": 6.428057991210534e-06, "loss": 0.3209, "num_tokens": 1685852803.0, "step": 2662 }, { "epoch": 0.31488707579519926, "grad_norm": 0.1503203809261322, "learning_rate": 6.422566885713179e-06, "loss": 0.3275, "num_tokens": 1686488656.0, "step": 2663 }, { "epoch": 0.3150053210358283, "grad_norm": 0.16129250824451447, "learning_rate": 6.417110949969865e-06, "loss": 0.3434, "num_tokens": 1687123786.0, "step": 2664 }, { "epoch": 0.3151235662764574, "grad_norm": 0.15377874672412872, "learning_rate": 6.411690191200393e-06, "loss": 0.3625, "num_tokens": 1687756907.0, "step": 2665 }, { "epoch": 0.3152418115170864, "grad_norm": 0.1461174190044403, "learning_rate": 6.406304616578026e-06, "loss": 0.3234, "num_tokens": 1688393115.0, "step": 2666 }, { "epoch": 0.3153600567577155, "grad_norm": 0.14871680736541748, "learning_rate": 6.400954233229461e-06, "loss": 0.3529, "num_tokens": 1689028874.0, "step": 2667 }, { "epoch": 0.3154783019983446, "grad_norm": 0.16240054368972778, "learning_rate": 6.395639048234825e-06, "loss": 0.3768, "num_tokens": 1689668481.0, "step": 2668 }, { "epoch": 0.3155965472389736, "grad_norm": 0.14038841426372528, "learning_rate": 6.390359068627669e-06, "loss": 0.3306, "num_tokens": 1690307233.0, "step": 2669 }, { "epoch": 0.3157147924796027, "grad_norm": 0.15858112275600433, "learning_rate": 6.38511430139496e-06, "loss": 0.3381, "num_tokens": 1690936000.0, "step": 2670 }, { "epoch": 0.31583303772023175, "grad_norm": 0.14911122620105743, "learning_rate": 6.379904753477067e-06, "loss": 0.3699, "num_tokens": 1691574884.0, "step": 2671 }, { "epoch": 0.31595128296086084, "grad_norm": 0.14863522350788116, "learning_rate": 6.374730431767749e-06, "loss": 0.3198, "num_tokens": 1692206801.0, "step": 2672 }, { "epoch": 0.31606952820148987, "grad_norm": 0.14785444736480713, "learning_rate": 6.369591343114158e-06, "loss": 0.3431, "num_tokens": 1692843392.0, "step": 2673 }, { "epoch": 0.31618777344211896, "grad_norm": 0.156451016664505, "learning_rate": 6.364487494316815e-06, "loss": 0.3979, "num_tokens": 1693481747.0, "step": 2674 }, { "epoch": 0.31630601868274805, "grad_norm": 0.15187658369541168, "learning_rate": 6.359418892129616e-06, "loss": 0.3667, "num_tokens": 1694120880.0, "step": 2675 }, { "epoch": 0.3164242639233771, "grad_norm": 0.15651914477348328, "learning_rate": 6.354385543259807e-06, "loss": 0.3522, "num_tokens": 1694755717.0, "step": 2676 }, { "epoch": 0.31654250916400617, "grad_norm": 0.14550632238388062, "learning_rate": 6.349387454367989e-06, "loss": 0.3614, "num_tokens": 1695389815.0, "step": 2677 }, { "epoch": 0.3166607544046352, "grad_norm": 0.1500931680202484, "learning_rate": 6.344424632068104e-06, "loss": 0.3344, "num_tokens": 1696025843.0, "step": 2678 }, { "epoch": 0.3167789996452643, "grad_norm": 0.1742960661649704, "learning_rate": 6.339497082927423e-06, "loss": 0.355, "num_tokens": 1696659035.0, "step": 2679 }, { "epoch": 0.3168972448858933, "grad_norm": 0.15291573107242584, "learning_rate": 6.334604813466541e-06, "loss": 0.3375, "num_tokens": 1697289201.0, "step": 2680 }, { "epoch": 0.3170154901265224, "grad_norm": 0.15118835866451263, "learning_rate": 6.32974783015937e-06, "loss": 0.3301, "num_tokens": 1697925113.0, "step": 2681 }, { "epoch": 0.31713373536715145, "grad_norm": 0.1467960923910141, "learning_rate": 6.324926139433123e-06, "loss": 0.3395, "num_tokens": 1698558395.0, "step": 2682 }, { "epoch": 0.31725198060778054, "grad_norm": 0.15137924253940582, "learning_rate": 6.320139747668318e-06, "loss": 0.3641, "num_tokens": 1699196086.0, "step": 2683 }, { "epoch": 0.3173702258484096, "grad_norm": 0.13839225471019745, "learning_rate": 6.315388661198751e-06, "loss": 0.2986, "num_tokens": 1699829880.0, "step": 2684 }, { "epoch": 0.31748847108903866, "grad_norm": 0.16591571271419525, "learning_rate": 6.310672886311513e-06, "loss": 0.3798, "num_tokens": 1700459160.0, "step": 2685 }, { "epoch": 0.31760671632966775, "grad_norm": 0.14456845819950104, "learning_rate": 6.305992429246957e-06, "loss": 0.3326, "num_tokens": 1701090510.0, "step": 2686 }, { "epoch": 0.3177249615702968, "grad_norm": 0.15700989961624146, "learning_rate": 6.301347296198703e-06, "loss": 0.3672, "num_tokens": 1701726174.0, "step": 2687 }, { "epoch": 0.31784320681092587, "grad_norm": 0.175731360912323, "learning_rate": 6.2967374933136255e-06, "loss": 0.3796, "num_tokens": 1702361400.0, "step": 2688 }, { "epoch": 0.3179614520515549, "grad_norm": 0.1563163846731186, "learning_rate": 6.292163026691851e-06, "loss": 0.3667, "num_tokens": 1702993806.0, "step": 2689 }, { "epoch": 0.318079697292184, "grad_norm": 0.1482599675655365, "learning_rate": 6.28762390238674e-06, "loss": 0.3112, "num_tokens": 1703632945.0, "step": 2690 }, { "epoch": 0.318197942532813, "grad_norm": 0.15436552464962006, "learning_rate": 6.283120126404893e-06, "loss": 0.3446, "num_tokens": 1704265862.0, "step": 2691 }, { "epoch": 0.3183161877734421, "grad_norm": 0.1484438180923462, "learning_rate": 6.278651704706125e-06, "loss": 0.3433, "num_tokens": 1704866810.0, "step": 2692 }, { "epoch": 0.3184344330140712, "grad_norm": 0.15252937376499176, "learning_rate": 6.274218643203471e-06, "loss": 0.3451, "num_tokens": 1705501981.0, "step": 2693 }, { "epoch": 0.31855267825470024, "grad_norm": 0.16562378406524658, "learning_rate": 6.2698209477631745e-06, "loss": 0.355, "num_tokens": 1706139459.0, "step": 2694 }, { "epoch": 0.31867092349532933, "grad_norm": 0.15192921459674835, "learning_rate": 6.265458624204681e-06, "loss": 0.3204, "num_tokens": 1706776278.0, "step": 2695 }, { "epoch": 0.31878916873595836, "grad_norm": 0.1544736921787262, "learning_rate": 6.261131678300624e-06, "loss": 0.3347, "num_tokens": 1707408737.0, "step": 2696 }, { "epoch": 0.31890741397658745, "grad_norm": 0.17091700434684753, "learning_rate": 6.256840115776828e-06, "loss": 0.394, "num_tokens": 1708040485.0, "step": 2697 }, { "epoch": 0.3190256592172165, "grad_norm": 0.16079741716384888, "learning_rate": 6.252583942312284e-06, "loss": 0.3696, "num_tokens": 1708667258.0, "step": 2698 }, { "epoch": 0.3191439044578456, "grad_norm": 0.1671343594789505, "learning_rate": 6.248363163539164e-06, "loss": 0.3792, "num_tokens": 1709306576.0, "step": 2699 }, { "epoch": 0.31926214969847466, "grad_norm": 0.1549573540687561, "learning_rate": 6.2441777850428045e-06, "loss": 0.3605, "num_tokens": 1709931524.0, "step": 2700 }, { "epoch": 0.3193803949391037, "grad_norm": 0.15157970786094666, "learning_rate": 6.240027812361683e-06, "loss": 0.326, "num_tokens": 1710563229.0, "step": 2701 }, { "epoch": 0.3194986401797328, "grad_norm": 0.17513641715049744, "learning_rate": 6.235913250987438e-06, "loss": 0.3655, "num_tokens": 1711197349.0, "step": 2702 }, { "epoch": 0.3196168854203618, "grad_norm": 0.14705513417720795, "learning_rate": 6.231834106364838e-06, "loss": 0.3039, "num_tokens": 1711832054.0, "step": 2703 }, { "epoch": 0.3197351306609909, "grad_norm": 0.15651701390743256, "learning_rate": 6.227790383891798e-06, "loss": 0.3549, "num_tokens": 1712466588.0, "step": 2704 }, { "epoch": 0.31985337590161994, "grad_norm": 0.1544400155544281, "learning_rate": 6.223782088919343e-06, "loss": 0.333, "num_tokens": 1713104055.0, "step": 2705 }, { "epoch": 0.31997162114224903, "grad_norm": 0.15615926682949066, "learning_rate": 6.219809226751629e-06, "loss": 0.3661, "num_tokens": 1713741405.0, "step": 2706 }, { "epoch": 0.32008986638287806, "grad_norm": 0.14605312049388885, "learning_rate": 6.2158718026459194e-06, "loss": 0.3552, "num_tokens": 1714376139.0, "step": 2707 }, { "epoch": 0.32020811162350715, "grad_norm": 0.14911401271820068, "learning_rate": 6.211969821812582e-06, "loss": 0.3391, "num_tokens": 1715010719.0, "step": 2708 }, { "epoch": 0.32032635686413624, "grad_norm": 0.17062732577323914, "learning_rate": 6.20810328941509e-06, "loss": 0.3724, "num_tokens": 1715642575.0, "step": 2709 }, { "epoch": 0.3204446021047653, "grad_norm": 0.16017664968967438, "learning_rate": 6.204272210569996e-06, "loss": 0.3558, "num_tokens": 1716275068.0, "step": 2710 }, { "epoch": 0.32056284734539436, "grad_norm": 0.16315767168998718, "learning_rate": 6.200476590346943e-06, "loss": 0.3842, "num_tokens": 1716914533.0, "step": 2711 }, { "epoch": 0.3206810925860234, "grad_norm": 0.15205085277557373, "learning_rate": 6.196716433768656e-06, "loss": 0.3443, "num_tokens": 1717515924.0, "step": 2712 }, { "epoch": 0.3207993378266525, "grad_norm": 0.13858628273010254, "learning_rate": 6.19299174581092e-06, "loss": 0.3262, "num_tokens": 1718149602.0, "step": 2713 }, { "epoch": 0.3209175830672815, "grad_norm": 0.1503458470106125, "learning_rate": 6.189302531402601e-06, "loss": 0.35, "num_tokens": 1718783402.0, "step": 2714 }, { "epoch": 0.3210358283079106, "grad_norm": 0.14808592200279236, "learning_rate": 6.185648795425611e-06, "loss": 0.3547, "num_tokens": 1719417133.0, "step": 2715 }, { "epoch": 0.3211540735485397, "grad_norm": 0.14757515490055084, "learning_rate": 6.1820305427149105e-06, "loss": 0.3212, "num_tokens": 1720055957.0, "step": 2716 }, { "epoch": 0.32127231878916873, "grad_norm": 0.14903753995895386, "learning_rate": 6.178447778058516e-06, "loss": 0.3389, "num_tokens": 1720672344.0, "step": 2717 }, { "epoch": 0.3213905640297978, "grad_norm": 0.14562296867370605, "learning_rate": 6.174900506197477e-06, "loss": 0.3312, "num_tokens": 1721301318.0, "step": 2718 }, { "epoch": 0.32150880927042685, "grad_norm": 0.15241187810897827, "learning_rate": 6.171388731825877e-06, "loss": 0.3683, "num_tokens": 1721927703.0, "step": 2719 }, { "epoch": 0.32162705451105594, "grad_norm": 0.14671999216079712, "learning_rate": 6.167912459590827e-06, "loss": 0.3284, "num_tokens": 1722565193.0, "step": 2720 }, { "epoch": 0.321745299751685, "grad_norm": 0.15789209306240082, "learning_rate": 6.164471694092456e-06, "loss": 0.3568, "num_tokens": 1723200696.0, "step": 2721 }, { "epoch": 0.32186354499231407, "grad_norm": 0.16588889062404633, "learning_rate": 6.161066439883903e-06, "loss": 0.3705, "num_tokens": 1723839772.0, "step": 2722 }, { "epoch": 0.3219817902329431, "grad_norm": 0.16096794605255127, "learning_rate": 6.157696701471324e-06, "loss": 0.382, "num_tokens": 1724477404.0, "step": 2723 }, { "epoch": 0.3221000354735722, "grad_norm": 0.14348092675209045, "learning_rate": 6.154362483313876e-06, "loss": 0.3303, "num_tokens": 1725115337.0, "step": 2724 }, { "epoch": 0.3222182807142013, "grad_norm": 0.14821667969226837, "learning_rate": 6.151063789823705e-06, "loss": 0.3286, "num_tokens": 1725751875.0, "step": 2725 }, { "epoch": 0.3223365259548303, "grad_norm": 0.16842487454414368, "learning_rate": 6.1478006253659545e-06, "loss": 0.3153, "num_tokens": 1726386668.0, "step": 2726 }, { "epoch": 0.3224547711954594, "grad_norm": 0.14876829087734222, "learning_rate": 6.144572994258746e-06, "loss": 0.3477, "num_tokens": 1727024471.0, "step": 2727 }, { "epoch": 0.32257301643608843, "grad_norm": 0.16405583918094635, "learning_rate": 6.141380900773187e-06, "loss": 0.3453, "num_tokens": 1727657575.0, "step": 2728 }, { "epoch": 0.3226912616767175, "grad_norm": 0.16747678816318512, "learning_rate": 6.138224349133355e-06, "loss": 0.3412, "num_tokens": 1728294968.0, "step": 2729 }, { "epoch": 0.32280950691734656, "grad_norm": 0.14372828602790833, "learning_rate": 6.135103343516297e-06, "loss": 0.3232, "num_tokens": 1728933844.0, "step": 2730 }, { "epoch": 0.32292775215797564, "grad_norm": 0.1462886780500412, "learning_rate": 6.132017888052019e-06, "loss": 0.3362, "num_tokens": 1729563923.0, "step": 2731 }, { "epoch": 0.3230459973986047, "grad_norm": 0.15404145419597626, "learning_rate": 6.128967986823482e-06, "loss": 0.3458, "num_tokens": 1730193072.0, "step": 2732 }, { "epoch": 0.32316424263923377, "grad_norm": 0.15265800058841705, "learning_rate": 6.125953643866606e-06, "loss": 0.3348, "num_tokens": 1730826376.0, "step": 2733 }, { "epoch": 0.32328248787986286, "grad_norm": 0.17396466434001923, "learning_rate": 6.122974863170252e-06, "loss": 0.3679, "num_tokens": 1731457578.0, "step": 2734 }, { "epoch": 0.3234007331204919, "grad_norm": 0.1429223269224167, "learning_rate": 6.120031648676221e-06, "loss": 0.3076, "num_tokens": 1732094125.0, "step": 2735 }, { "epoch": 0.323518978361121, "grad_norm": 0.17876330018043518, "learning_rate": 6.1171240042792495e-06, "loss": 0.4053, "num_tokens": 1732695672.0, "step": 2736 }, { "epoch": 0.32363722360175, "grad_norm": 0.15872971713542938, "learning_rate": 6.114251933827009e-06, "loss": 0.3741, "num_tokens": 1733327345.0, "step": 2737 }, { "epoch": 0.3237554688423791, "grad_norm": 0.14188595116138458, "learning_rate": 6.111415441120089e-06, "loss": 0.3303, "num_tokens": 1733965576.0, "step": 2738 }, { "epoch": 0.32387371408300814, "grad_norm": 0.148431196808815, "learning_rate": 6.108614529912007e-06, "loss": 0.339, "num_tokens": 1734592394.0, "step": 2739 }, { "epoch": 0.3239919593236372, "grad_norm": 0.14060525596141815, "learning_rate": 6.10584920390919e-06, "loss": 0.3421, "num_tokens": 1735221890.0, "step": 2740 }, { "epoch": 0.3241102045642663, "grad_norm": 0.16482548415660858, "learning_rate": 6.103119466770971e-06, "loss": 0.3863, "num_tokens": 1735857891.0, "step": 2741 }, { "epoch": 0.32422844980489535, "grad_norm": 0.16050606966018677, "learning_rate": 6.100425322109603e-06, "loss": 0.3649, "num_tokens": 1736492956.0, "step": 2742 }, { "epoch": 0.32434669504552444, "grad_norm": 0.15739870071411133, "learning_rate": 6.097766773490227e-06, "loss": 0.3793, "num_tokens": 1737125568.0, "step": 2743 }, { "epoch": 0.32446494028615347, "grad_norm": 0.15872135758399963, "learning_rate": 6.095143824430883e-06, "loss": 0.3695, "num_tokens": 1737736959.0, "step": 2744 }, { "epoch": 0.32458318552678256, "grad_norm": 0.1529282182455063, "learning_rate": 6.0925564784025065e-06, "loss": 0.3506, "num_tokens": 1738364118.0, "step": 2745 }, { "epoch": 0.3247014307674116, "grad_norm": 0.1539255529642105, "learning_rate": 6.090004738828917e-06, "loss": 0.3245, "num_tokens": 1739000213.0, "step": 2746 }, { "epoch": 0.3248196760080407, "grad_norm": 0.16003718972206116, "learning_rate": 6.087488609086811e-06, "loss": 0.3712, "num_tokens": 1739635841.0, "step": 2747 }, { "epoch": 0.3249379212486697, "grad_norm": 0.14386966824531555, "learning_rate": 6.085008092505771e-06, "loss": 0.3046, "num_tokens": 1740270727.0, "step": 2748 }, { "epoch": 0.3250561664892988, "grad_norm": 0.14387185871601105, "learning_rate": 6.082563192368251e-06, "loss": 0.3092, "num_tokens": 1740910145.0, "step": 2749 }, { "epoch": 0.3251744117299279, "grad_norm": 0.1531536877155304, "learning_rate": 6.0801539119095686e-06, "loss": 0.3296, "num_tokens": 1741545955.0, "step": 2750 }, { "epoch": 0.3252926569705569, "grad_norm": 0.1645767241716385, "learning_rate": 6.077780254317911e-06, "loss": 0.3373, "num_tokens": 1742182116.0, "step": 2751 }, { "epoch": 0.325410902211186, "grad_norm": 0.15716831386089325, "learning_rate": 6.075442222734327e-06, "loss": 0.3461, "num_tokens": 1742817038.0, "step": 2752 }, { "epoch": 0.32552914745181505, "grad_norm": 0.15224866569042206, "learning_rate": 6.073139820252716e-06, "loss": 0.3191, "num_tokens": 1743447171.0, "step": 2753 }, { "epoch": 0.32564739269244414, "grad_norm": 0.137272447347641, "learning_rate": 6.070873049919833e-06, "loss": 0.3014, "num_tokens": 1744082013.0, "step": 2754 }, { "epoch": 0.32576563793307317, "grad_norm": 0.1579388976097107, "learning_rate": 6.068641914735285e-06, "loss": 0.3649, "num_tokens": 1744718914.0, "step": 2755 }, { "epoch": 0.32588388317370226, "grad_norm": 0.160564124584198, "learning_rate": 6.0664464176515185e-06, "loss": 0.3511, "num_tokens": 1745347427.0, "step": 2756 }, { "epoch": 0.32600212841433135, "grad_norm": 0.15549030900001526, "learning_rate": 6.064286561573819e-06, "loss": 0.35, "num_tokens": 1745987038.0, "step": 2757 }, { "epoch": 0.3261203736549604, "grad_norm": 0.1522926241159439, "learning_rate": 6.0621623493603116e-06, "loss": 0.3607, "num_tokens": 1746619830.0, "step": 2758 }, { "epoch": 0.32623861889558947, "grad_norm": 0.1506992131471634, "learning_rate": 6.060073783821954e-06, "loss": 0.3916, "num_tokens": 1747252651.0, "step": 2759 }, { "epoch": 0.3263568641362185, "grad_norm": 0.15200266242027283, "learning_rate": 6.05802086772253e-06, "loss": 0.3717, "num_tokens": 1747887273.0, "step": 2760 }, { "epoch": 0.3264751093768476, "grad_norm": 0.14301730692386627, "learning_rate": 6.056003603778654e-06, "loss": 0.3351, "num_tokens": 1748523868.0, "step": 2761 }, { "epoch": 0.32659335461747663, "grad_norm": 0.15410567820072174, "learning_rate": 6.054021994659758e-06, "loss": 0.34, "num_tokens": 1749155294.0, "step": 2762 }, { "epoch": 0.3267115998581057, "grad_norm": 0.15273074805736542, "learning_rate": 6.052076042988093e-06, "loss": 0.3257, "num_tokens": 1749791054.0, "step": 2763 }, { "epoch": 0.32682984509873475, "grad_norm": 0.1528049111366272, "learning_rate": 6.050165751338724e-06, "loss": 0.3165, "num_tokens": 1750428935.0, "step": 2764 }, { "epoch": 0.32694809033936384, "grad_norm": 0.1597285270690918, "learning_rate": 6.0482911222395316e-06, "loss": 0.3743, "num_tokens": 1751065445.0, "step": 2765 }, { "epoch": 0.32706633557999293, "grad_norm": 0.15127848088741302, "learning_rate": 6.046452158171194e-06, "loss": 0.3488, "num_tokens": 1751702074.0, "step": 2766 }, { "epoch": 0.32718458082062196, "grad_norm": 0.1535509079694748, "learning_rate": 6.044648861567207e-06, "loss": 0.3532, "num_tokens": 1752338344.0, "step": 2767 }, { "epoch": 0.32730282606125105, "grad_norm": 0.16607314348220825, "learning_rate": 6.042881234813861e-06, "loss": 0.3621, "num_tokens": 1752976568.0, "step": 2768 }, { "epoch": 0.3274210713018801, "grad_norm": 0.14180459082126617, "learning_rate": 6.041149280250246e-06, "loss": 0.332, "num_tokens": 1753603003.0, "step": 2769 }, { "epoch": 0.3275393165425092, "grad_norm": 0.1491679847240448, "learning_rate": 6.039453000168244e-06, "loss": 0.345, "num_tokens": 1754241935.0, "step": 2770 }, { "epoch": 0.3276575617831382, "grad_norm": 0.14957982301712036, "learning_rate": 6.037792396812535e-06, "loss": 0.366, "num_tokens": 1754872359.0, "step": 2771 }, { "epoch": 0.3277758070237673, "grad_norm": 0.14234739542007446, "learning_rate": 6.036167472380583e-06, "loss": 0.2863, "num_tokens": 1755511603.0, "step": 2772 }, { "epoch": 0.3278940522643964, "grad_norm": 0.16358140110969543, "learning_rate": 6.034578229022643e-06, "loss": 0.3651, "num_tokens": 1756143176.0, "step": 2773 }, { "epoch": 0.3280122975050254, "grad_norm": 0.16191521286964417, "learning_rate": 6.033024668841748e-06, "loss": 0.3497, "num_tokens": 1756756307.0, "step": 2774 }, { "epoch": 0.3281305427456545, "grad_norm": 0.14113369584083557, "learning_rate": 6.031506793893716e-06, "loss": 0.3439, "num_tokens": 1757390678.0, "step": 2775 }, { "epoch": 0.32824878798628354, "grad_norm": 0.14521457254886627, "learning_rate": 6.030024606187143e-06, "loss": 0.3371, "num_tokens": 1758029292.0, "step": 2776 }, { "epoch": 0.32836703322691263, "grad_norm": 0.1455880105495453, "learning_rate": 6.028578107683398e-06, "loss": 0.3538, "num_tokens": 1758663414.0, "step": 2777 }, { "epoch": 0.32848527846754166, "grad_norm": 0.143596813082695, "learning_rate": 6.0271673002966225e-06, "loss": 0.3284, "num_tokens": 1759297216.0, "step": 2778 }, { "epoch": 0.32860352370817075, "grad_norm": 0.14440219104290009, "learning_rate": 6.025792185893728e-06, "loss": 0.3409, "num_tokens": 1759927411.0, "step": 2779 }, { "epoch": 0.3287217689487998, "grad_norm": 0.16369512677192688, "learning_rate": 6.0244527662944e-06, "loss": 0.3611, "num_tokens": 1760561865.0, "step": 2780 }, { "epoch": 0.3288400141894289, "grad_norm": 0.160583034157753, "learning_rate": 6.023149043271077e-06, "loss": 0.3806, "num_tokens": 1761198485.0, "step": 2781 }, { "epoch": 0.32895825943005796, "grad_norm": 0.15042239427566528, "learning_rate": 6.021881018548975e-06, "loss": 0.301, "num_tokens": 1761832996.0, "step": 2782 }, { "epoch": 0.329076504670687, "grad_norm": 0.15653792023658752, "learning_rate": 6.020648693806062e-06, "loss": 0.3781, "num_tokens": 1762469073.0, "step": 2783 }, { "epoch": 0.3291947499113161, "grad_norm": 0.14766980707645416, "learning_rate": 6.0194520706730605e-06, "loss": 0.33, "num_tokens": 1763102453.0, "step": 2784 }, { "epoch": 0.3293129951519451, "grad_norm": 0.1543278843164444, "learning_rate": 6.018291150733459e-06, "loss": 0.3735, "num_tokens": 1763740288.0, "step": 2785 }, { "epoch": 0.3294312403925742, "grad_norm": 0.16179020702838898, "learning_rate": 6.017165935523497e-06, "loss": 0.3704, "num_tokens": 1764368292.0, "step": 2786 }, { "epoch": 0.32954948563320324, "grad_norm": 0.1551685631275177, "learning_rate": 6.01607642653216e-06, "loss": 0.3604, "num_tokens": 1764998177.0, "step": 2787 }, { "epoch": 0.32966773087383233, "grad_norm": 0.13794450461864471, "learning_rate": 6.015022625201192e-06, "loss": 0.3112, "num_tokens": 1765629891.0, "step": 2788 }, { "epoch": 0.32978597611446137, "grad_norm": 0.14764420688152313, "learning_rate": 6.0140045329250805e-06, "loss": 0.348, "num_tokens": 1766266964.0, "step": 2789 }, { "epoch": 0.32990422135509045, "grad_norm": 0.15834534168243408, "learning_rate": 6.013022151051061e-06, "loss": 0.3447, "num_tokens": 1766904529.0, "step": 2790 }, { "epoch": 0.33002246659571954, "grad_norm": 0.14628948271274567, "learning_rate": 6.012075480879112e-06, "loss": 0.3693, "num_tokens": 1767537573.0, "step": 2791 }, { "epoch": 0.3301407118363486, "grad_norm": 0.15087901055812836, "learning_rate": 6.011164523661962e-06, "loss": 0.3558, "num_tokens": 1768173295.0, "step": 2792 }, { "epoch": 0.33025895707697767, "grad_norm": 0.1705261468887329, "learning_rate": 6.010289280605068e-06, "loss": 0.3896, "num_tokens": 1768810003.0, "step": 2793 }, { "epoch": 0.3303772023176067, "grad_norm": 0.15986499190330505, "learning_rate": 6.00944975286664e-06, "loss": 0.3592, "num_tokens": 1769440243.0, "step": 2794 }, { "epoch": 0.3304954475582358, "grad_norm": 0.14154967665672302, "learning_rate": 6.0086459415576145e-06, "loss": 0.3463, "num_tokens": 1770079311.0, "step": 2795 }, { "epoch": 0.3306136927988648, "grad_norm": 0.15516899526119232, "learning_rate": 6.00787784774167e-06, "loss": 0.3453, "num_tokens": 1770715195.0, "step": 2796 }, { "epoch": 0.3307319380394939, "grad_norm": 0.15446634590625763, "learning_rate": 6.007145472435228e-06, "loss": 0.33, "num_tokens": 1771349599.0, "step": 2797 }, { "epoch": 0.330850183280123, "grad_norm": 0.15723447501659393, "learning_rate": 6.0064488166074275e-06, "loss": 0.3421, "num_tokens": 1771943463.0, "step": 2798 }, { "epoch": 0.33096842852075203, "grad_norm": 0.14613540470600128, "learning_rate": 6.005787881180151e-06, "loss": 0.3381, "num_tokens": 1772581089.0, "step": 2799 }, { "epoch": 0.3310866737613811, "grad_norm": 0.15577061474323273, "learning_rate": 6.005162667028012e-06, "loss": 0.3441, "num_tokens": 1773213267.0, "step": 2800 }, { "epoch": 0.33120491900201016, "grad_norm": 0.15103386342525482, "learning_rate": 6.004573174978353e-06, "loss": 0.3228, "num_tokens": 1773843997.0, "step": 2801 }, { "epoch": 0.33132316424263925, "grad_norm": 0.1580219715833664, "learning_rate": 6.004019405811245e-06, "loss": 0.3462, "num_tokens": 1774443566.0, "step": 2802 }, { "epoch": 0.3314414094832683, "grad_norm": 0.1480458378791809, "learning_rate": 6.003501360259485e-06, "loss": 0.3636, "num_tokens": 1775076095.0, "step": 2803 }, { "epoch": 0.33155965472389737, "grad_norm": 0.16500131785869598, "learning_rate": 6.003019039008604e-06, "loss": 0.3803, "num_tokens": 1775705995.0, "step": 2804 }, { "epoch": 0.3316778999645264, "grad_norm": 0.14276215434074402, "learning_rate": 6.0025724426968495e-06, "loss": 0.345, "num_tokens": 1776340157.0, "step": 2805 }, { "epoch": 0.3317961452051555, "grad_norm": 0.14830231666564941, "learning_rate": 6.002161571915202e-06, "loss": 0.3383, "num_tokens": 1776975868.0, "step": 2806 }, { "epoch": 0.3319143904457846, "grad_norm": 0.14599831402301788, "learning_rate": 6.001786427207366e-06, "loss": 0.3453, "num_tokens": 1777609915.0, "step": 2807 }, { "epoch": 0.3320326356864136, "grad_norm": 0.15465782582759857, "learning_rate": 6.0014470090697654e-06, "loss": 0.3297, "num_tokens": 1778243872.0, "step": 2808 }, { "epoch": 0.3321508809270427, "grad_norm": 0.1415361762046814, "learning_rate": 6.00114331795155e-06, "loss": 0.3511, "num_tokens": 1778876930.0, "step": 2809 }, { "epoch": 0.33226912616767174, "grad_norm": 0.1687016785144806, "learning_rate": 6.000875354254595e-06, "loss": 0.3536, "num_tokens": 1779509817.0, "step": 2810 }, { "epoch": 0.3323873714083008, "grad_norm": 0.1572532057762146, "learning_rate": 6.000643118333494e-06, "loss": 0.3789, "num_tokens": 1780144087.0, "step": 2811 }, { "epoch": 0.33250561664892986, "grad_norm": 0.15837253630161285, "learning_rate": 6.000446610495562e-06, "loss": 0.3558, "num_tokens": 1780778006.0, "step": 2812 }, { "epoch": 0.33262386188955895, "grad_norm": 0.1413705050945282, "learning_rate": 6.0002858310008385e-06, "loss": 0.3238, "num_tokens": 1781416088.0, "step": 2813 }, { "epoch": 0.33274210713018804, "grad_norm": 0.17787781357765198, "learning_rate": 6.00016078006208e-06, "loss": 0.3605, "num_tokens": 1782052246.0, "step": 2814 }, { "epoch": 0.33286035237081707, "grad_norm": 0.15967637300491333, "learning_rate": 6.000071457844771e-06, "loss": 0.3456, "num_tokens": 1782691473.0, "step": 2815 }, { "epoch": 0.33297859761144616, "grad_norm": 0.14508619904518127, "learning_rate": 6.000017864467102e-06, "loss": 0.3088, "num_tokens": 1783330384.0, "step": 2816 } ], "logging_steps": 1.0, "max_steps": 2816, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2816, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.5080662592441549e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }