{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 8241, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00012134449702705982, "grad_norm": 0.8723246455192566, "learning_rate": 0.0, "loss": 0.8186817169189453, "step": 1 }, { "epoch": 0.00024268899405411964, "grad_norm": 0.3477829396724701, "learning_rate": 2.0000000000000002e-07, "loss": 0.16060441732406616, "step": 2 }, { "epoch": 0.00036403349108117945, "grad_norm": 0.7758898735046387, "learning_rate": 4.0000000000000003e-07, "loss": 0.2928085923194885, "step": 3 }, { "epoch": 0.00048537798810823927, "grad_norm": 0.7930212616920471, "learning_rate": 6.000000000000001e-07, "loss": 0.4934349060058594, "step": 4 }, { "epoch": 0.0006067224851352991, "grad_norm": 0.49778178334236145, "learning_rate": 8.000000000000001e-07, "loss": 0.1982222944498062, "step": 5 }, { "epoch": 0.0007280669821623589, "grad_norm": 0.9954462051391602, "learning_rate": 1.0000000000000002e-06, "loss": 0.779515266418457, "step": 6 }, { "epoch": 0.0008494114791894187, "grad_norm": 0.7579694986343384, "learning_rate": 1.2000000000000002e-06, "loss": 0.8047744035720825, "step": 7 }, { "epoch": 0.0009707559762164785, "grad_norm": 1.1304748058319092, "learning_rate": 1.4000000000000001e-06, "loss": 0.5158179998397827, "step": 8 }, { "epoch": 0.0010921004732435385, "grad_norm": 0.6553785800933838, "learning_rate": 1.6000000000000001e-06, "loss": 0.5927454233169556, "step": 9 }, { "epoch": 0.0012134449702705982, "grad_norm": 0.6902636885643005, "learning_rate": 1.8000000000000001e-06, "loss": 0.5002865791320801, "step": 10 }, { "epoch": 0.001334789467297658, "grad_norm": 0.8518681526184082, "learning_rate": 2.0000000000000003e-06, "loss": 0.6353203654289246, "step": 11 }, { "epoch": 0.0014561339643247178, "grad_norm": 0.7608593702316284, "learning_rate": 2.2e-06, "loss": 0.5984488129615784, "step": 12 }, { "epoch": 0.0015774784613517777, "grad_norm": 0.523412823677063, "learning_rate": 2.4000000000000003e-06, "loss": 0.2241007536649704, "step": 13 }, { "epoch": 0.0016988229583788375, "grad_norm": 0.5840254426002502, "learning_rate": 2.6e-06, "loss": 0.1746365875005722, "step": 14 }, { "epoch": 0.0018201674554058974, "grad_norm": 1.0090986490249634, "learning_rate": 2.8000000000000003e-06, "loss": 0.5960265398025513, "step": 15 }, { "epoch": 0.001941511952432957, "grad_norm": 0.5519396066665649, "learning_rate": 3e-06, "loss": 0.11730103194713593, "step": 16 }, { "epoch": 0.002062856449460017, "grad_norm": 0.6301160454750061, "learning_rate": 3.2000000000000003e-06, "loss": 0.3237765431404114, "step": 17 }, { "epoch": 0.002184200946487077, "grad_norm": 0.6448126435279846, "learning_rate": 3.4000000000000005e-06, "loss": 0.36336758732795715, "step": 18 }, { "epoch": 0.0023055454435141364, "grad_norm": 1.1270724534988403, "learning_rate": 3.6000000000000003e-06, "loss": 0.9434967041015625, "step": 19 }, { "epoch": 0.0024268899405411964, "grad_norm": 0.8342574238777161, "learning_rate": 3.8000000000000005e-06, "loss": 0.3132971525192261, "step": 20 }, { "epoch": 0.0025482344375682563, "grad_norm": 0.6736285090446472, "learning_rate": 4.000000000000001e-06, "loss": 0.2724347412586212, "step": 21 }, { "epoch": 0.002669578934595316, "grad_norm": 0.831219494342804, "learning_rate": 4.2000000000000004e-06, "loss": 0.5868111848831177, "step": 22 }, { "epoch": 0.002790923431622376, "grad_norm": 0.7777939438819885, "learning_rate": 4.4e-06, "loss": 0.1734337955713272, "step": 23 }, { "epoch": 0.0029122679286494356, "grad_norm": 0.37845781445503235, "learning_rate": 4.600000000000001e-06, "loss": 0.7677285075187683, "step": 24 }, { "epoch": 0.0030336124256764956, "grad_norm": 0.9251959323883057, "learning_rate": 4.800000000000001e-06, "loss": 0.6172207593917847, "step": 25 }, { "epoch": 0.0031549569227035555, "grad_norm": 0.9809829592704773, "learning_rate": 5e-06, "loss": 0.906048059463501, "step": 26 }, { "epoch": 0.0032763014197306154, "grad_norm": 0.6690176725387573, "learning_rate": 5.2e-06, "loss": 0.4358732998371124, "step": 27 }, { "epoch": 0.003397645916757675, "grad_norm": 0.7016831636428833, "learning_rate": 5.400000000000001e-06, "loss": 0.7462010979652405, "step": 28 }, { "epoch": 0.003518990413784735, "grad_norm": 1.2273560762405396, "learning_rate": 5.600000000000001e-06, "loss": 0.4869852662086487, "step": 29 }, { "epoch": 0.0036403349108117948, "grad_norm": 0.6704553365707397, "learning_rate": 5.8e-06, "loss": 0.65860915184021, "step": 30 }, { "epoch": 0.0037616794078388547, "grad_norm": 0.6678578853607178, "learning_rate": 6e-06, "loss": 0.22405284643173218, "step": 31 }, { "epoch": 0.003883023904865914, "grad_norm": 0.875632107257843, "learning_rate": 6.200000000000001e-06, "loss": 0.2586999237537384, "step": 32 }, { "epoch": 0.004004368401892974, "grad_norm": 0.8029634356498718, "learning_rate": 6.4000000000000006e-06, "loss": 0.37517064809799194, "step": 33 }, { "epoch": 0.004125712898920034, "grad_norm": 0.9273082613945007, "learning_rate": 6.600000000000001e-06, "loss": 0.5981779098510742, "step": 34 }, { "epoch": 0.004247057395947094, "grad_norm": 0.8178271055221558, "learning_rate": 6.800000000000001e-06, "loss": 0.6380455493927002, "step": 35 }, { "epoch": 0.004368401892974154, "grad_norm": 0.7530136108398438, "learning_rate": 7e-06, "loss": 0.559657096862793, "step": 36 }, { "epoch": 0.004489746390001214, "grad_norm": 0.8515595197677612, "learning_rate": 7.2000000000000005e-06, "loss": 0.5521618127822876, "step": 37 }, { "epoch": 0.004611090887028273, "grad_norm": 0.937356173992157, "learning_rate": 7.4e-06, "loss": 0.764209508895874, "step": 38 }, { "epoch": 0.004732435384055333, "grad_norm": 0.8871638178825378, "learning_rate": 7.600000000000001e-06, "loss": 0.3783448040485382, "step": 39 }, { "epoch": 0.004853779881082393, "grad_norm": 0.5699480772018433, "learning_rate": 7.800000000000002e-06, "loss": 0.45090657472610474, "step": 40 }, { "epoch": 0.004975124378109453, "grad_norm": 0.7399379014968872, "learning_rate": 8.000000000000001e-06, "loss": 0.22258038818836212, "step": 41 }, { "epoch": 0.005096468875136513, "grad_norm": 0.8235787749290466, "learning_rate": 8.2e-06, "loss": 0.4367460608482361, "step": 42 }, { "epoch": 0.0052178133721635725, "grad_norm": 0.5692397952079773, "learning_rate": 8.400000000000001e-06, "loss": 0.497313916683197, "step": 43 }, { "epoch": 0.005339157869190632, "grad_norm": 0.8195589780807495, "learning_rate": 8.6e-06, "loss": 0.7818059325218201, "step": 44 }, { "epoch": 0.005460502366217692, "grad_norm": 1.0632492303848267, "learning_rate": 8.8e-06, "loss": 0.5494105219841003, "step": 45 }, { "epoch": 0.005581846863244752, "grad_norm": 0.6694285869598389, "learning_rate": 9e-06, "loss": 0.35489311814308167, "step": 46 }, { "epoch": 0.005703191360271811, "grad_norm": 0.9679849147796631, "learning_rate": 9.200000000000002e-06, "loss": 0.9104418754577637, "step": 47 }, { "epoch": 0.005824535857298871, "grad_norm": 0.8836600184440613, "learning_rate": 9.4e-06, "loss": 0.33178386092185974, "step": 48 }, { "epoch": 0.005945880354325931, "grad_norm": 0.9069057106971741, "learning_rate": 9.600000000000001e-06, "loss": 0.47104474902153015, "step": 49 }, { "epoch": 0.006067224851352991, "grad_norm": 0.9322941899299622, "learning_rate": 9.800000000000001e-06, "loss": 0.8761706948280334, "step": 50 }, { "epoch": 0.006188569348380051, "grad_norm": 0.96770840883255, "learning_rate": 1e-05, "loss": 0.7080501317977905, "step": 51 }, { "epoch": 0.006309913845407111, "grad_norm": 0.8596216440200806, "learning_rate": 1.02e-05, "loss": 0.6395684480667114, "step": 52 }, { "epoch": 0.006431258342434171, "grad_norm": 0.9308010339736938, "learning_rate": 1.04e-05, "loss": 0.776948869228363, "step": 53 }, { "epoch": 0.006552602839461231, "grad_norm": 0.9452096223831177, "learning_rate": 1.0600000000000002e-05, "loss": 0.5925815105438232, "step": 54 }, { "epoch": 0.00667394733648829, "grad_norm": 0.7136799693107605, "learning_rate": 1.0800000000000002e-05, "loss": 0.4017346203327179, "step": 55 }, { "epoch": 0.00679529183351535, "grad_norm": 0.8862737417221069, "learning_rate": 1.1000000000000001e-05, "loss": 0.7030301094055176, "step": 56 }, { "epoch": 0.00691663633054241, "grad_norm": 0.8768438696861267, "learning_rate": 1.1200000000000001e-05, "loss": 0.1365959346294403, "step": 57 }, { "epoch": 0.00703798082756947, "grad_norm": 0.6508520245552063, "learning_rate": 1.14e-05, "loss": 0.14912734925746918, "step": 58 }, { "epoch": 0.00715932532459653, "grad_norm": 0.942901074886322, "learning_rate": 1.16e-05, "loss": 0.5415380001068115, "step": 59 }, { "epoch": 0.0072806698216235895, "grad_norm": 1.0844477415084839, "learning_rate": 1.18e-05, "loss": 0.20876353979110718, "step": 60 }, { "epoch": 0.007402014318650649, "grad_norm": 0.9388653039932251, "learning_rate": 1.2e-05, "loss": 0.4371647238731384, "step": 61 }, { "epoch": 0.007523358815677709, "grad_norm": 0.8884531855583191, "learning_rate": 1.22e-05, "loss": 0.2126459777355194, "step": 62 }, { "epoch": 0.007644703312704768, "grad_norm": 0.9780679941177368, "learning_rate": 1.2400000000000002e-05, "loss": 0.5204879641532898, "step": 63 }, { "epoch": 0.007766047809731828, "grad_norm": 0.9438526630401611, "learning_rate": 1.2600000000000001e-05, "loss": 0.6052622199058533, "step": 64 }, { "epoch": 0.00788739230675889, "grad_norm": 0.6636860370635986, "learning_rate": 1.2800000000000001e-05, "loss": 0.4711700975894928, "step": 65 }, { "epoch": 0.008008736803785948, "grad_norm": 0.9902396202087402, "learning_rate": 1.3000000000000001e-05, "loss": 0.21103627979755402, "step": 66 }, { "epoch": 0.008130081300813009, "grad_norm": 0.6545515060424805, "learning_rate": 1.3200000000000002e-05, "loss": 0.22432354092597961, "step": 67 }, { "epoch": 0.008251425797840068, "grad_norm": 0.7995378971099854, "learning_rate": 1.3400000000000002e-05, "loss": 0.44884270429611206, "step": 68 }, { "epoch": 0.008372770294867127, "grad_norm": 0.9875938296318054, "learning_rate": 1.3600000000000002e-05, "loss": 0.5456703305244446, "step": 69 }, { "epoch": 0.008494114791894188, "grad_norm": 1.2119238376617432, "learning_rate": 1.38e-05, "loss": 0.5117138624191284, "step": 70 }, { "epoch": 0.008615459288921247, "grad_norm": 0.7160502672195435, "learning_rate": 1.4e-05, "loss": 0.4587688446044922, "step": 71 }, { "epoch": 0.008736803785948308, "grad_norm": 0.8775944113731384, "learning_rate": 1.4200000000000001e-05, "loss": 0.181992307305336, "step": 72 }, { "epoch": 0.008858148282975367, "grad_norm": 1.1906511783599854, "learning_rate": 1.4400000000000001e-05, "loss": 0.8971315622329712, "step": 73 }, { "epoch": 0.008979492780002428, "grad_norm": 1.5473064184188843, "learning_rate": 1.46e-05, "loss": 0.5879963636398315, "step": 74 }, { "epoch": 0.009100837277029487, "grad_norm": 1.413874864578247, "learning_rate": 1.48e-05, "loss": 0.4934973120689392, "step": 75 }, { "epoch": 0.009222181774056546, "grad_norm": 0.9657158851623535, "learning_rate": 1.5000000000000002e-05, "loss": 0.3402983248233795, "step": 76 }, { "epoch": 0.009343526271083607, "grad_norm": 0.8786820769309998, "learning_rate": 1.5200000000000002e-05, "loss": 0.31571996212005615, "step": 77 }, { "epoch": 0.009464870768110666, "grad_norm": 0.981968343257904, "learning_rate": 1.54e-05, "loss": 0.6843433380126953, "step": 78 }, { "epoch": 0.009586215265137726, "grad_norm": 1.0725715160369873, "learning_rate": 1.5600000000000003e-05, "loss": 0.34444135427474976, "step": 79 }, { "epoch": 0.009707559762164785, "grad_norm": 1.2156447172164917, "learning_rate": 1.58e-05, "loss": 0.5834903120994568, "step": 80 }, { "epoch": 0.009828904259191846, "grad_norm": 1.1125974655151367, "learning_rate": 1.6000000000000003e-05, "loss": 0.34024637937545776, "step": 81 }, { "epoch": 0.009950248756218905, "grad_norm": 0.6233447790145874, "learning_rate": 1.62e-05, "loss": 0.46968546509742737, "step": 82 }, { "epoch": 0.010071593253245966, "grad_norm": 1.2955931425094604, "learning_rate": 1.64e-05, "loss": 0.3877994418144226, "step": 83 }, { "epoch": 0.010192937750273025, "grad_norm": 1.098732829093933, "learning_rate": 1.66e-05, "loss": 0.5838992595672607, "step": 84 }, { "epoch": 0.010314282247300084, "grad_norm": 0.6143050789833069, "learning_rate": 1.6800000000000002e-05, "loss": 0.16620726883411407, "step": 85 }, { "epoch": 0.010435626744327145, "grad_norm": 0.8536654710769653, "learning_rate": 1.7e-05, "loss": 0.46331706643104553, "step": 86 }, { "epoch": 0.010556971241354204, "grad_norm": 0.9140154719352722, "learning_rate": 1.72e-05, "loss": 0.46669042110443115, "step": 87 }, { "epoch": 0.010678315738381265, "grad_norm": 0.8299479484558105, "learning_rate": 1.7400000000000003e-05, "loss": 0.39312833547592163, "step": 88 }, { "epoch": 0.010799660235408324, "grad_norm": 1.2406340837478638, "learning_rate": 1.76e-05, "loss": 0.670952320098877, "step": 89 }, { "epoch": 0.010921004732435385, "grad_norm": 0.7150036692619324, "learning_rate": 1.7800000000000002e-05, "loss": 0.49945852160453796, "step": 90 }, { "epoch": 0.011042349229462444, "grad_norm": 1.02498197555542, "learning_rate": 1.8e-05, "loss": 0.4261772036552429, "step": 91 }, { "epoch": 0.011163693726489505, "grad_norm": 0.849663496017456, "learning_rate": 1.8200000000000002e-05, "loss": 0.5062468647956848, "step": 92 }, { "epoch": 0.011285038223516564, "grad_norm": 0.9148527979850769, "learning_rate": 1.8400000000000003e-05, "loss": 0.7943180799484253, "step": 93 }, { "epoch": 0.011406382720543623, "grad_norm": 0.6996797323226929, "learning_rate": 1.86e-05, "loss": 0.453007310628891, "step": 94 }, { "epoch": 0.011527727217570683, "grad_norm": 1.9976884126663208, "learning_rate": 1.88e-05, "loss": 0.3840827941894531, "step": 95 }, { "epoch": 0.011649071714597743, "grad_norm": 0.7963491082191467, "learning_rate": 1.9e-05, "loss": 0.3547995686531067, "step": 96 }, { "epoch": 0.011770416211624803, "grad_norm": 0.6255000233650208, "learning_rate": 1.9200000000000003e-05, "loss": 0.1267634928226471, "step": 97 }, { "epoch": 0.011891760708651862, "grad_norm": 1.1216952800750732, "learning_rate": 1.94e-05, "loss": 0.5351840853691101, "step": 98 }, { "epoch": 0.012013105205678923, "grad_norm": 0.933724045753479, "learning_rate": 1.9600000000000002e-05, "loss": 0.28443726897239685, "step": 99 }, { "epoch": 0.012134449702705982, "grad_norm": 1.23550283908844, "learning_rate": 1.98e-05, "loss": 0.9422104954719543, "step": 100 }, { "epoch": 0.012255794199733041, "grad_norm": 0.6336556077003479, "learning_rate": 2e-05, "loss": 0.12934568524360657, "step": 101 }, { "epoch": 0.012377138696760102, "grad_norm": 1.0921475887298584, "learning_rate": 1.9997543299348976e-05, "loss": 0.5496861338615417, "step": 102 }, { "epoch": 0.012498483193787161, "grad_norm": 1.4995360374450684, "learning_rate": 1.999508659869795e-05, "loss": 0.450344443321228, "step": 103 }, { "epoch": 0.012619827690814222, "grad_norm": 1.3966925144195557, "learning_rate": 1.9992629898046924e-05, "loss": 0.4021853804588318, "step": 104 }, { "epoch": 0.012741172187841281, "grad_norm": 0.6302288770675659, "learning_rate": 1.99901731973959e-05, "loss": 0.16195248067378998, "step": 105 }, { "epoch": 0.012862516684868342, "grad_norm": 0.8286706209182739, "learning_rate": 1.9987716496744873e-05, "loss": 0.5685713291168213, "step": 106 }, { "epoch": 0.0129838611818954, "grad_norm": 1.2312484979629517, "learning_rate": 1.9985259796093847e-05, "loss": 0.4577338993549347, "step": 107 }, { "epoch": 0.013105205678922462, "grad_norm": 0.7556170225143433, "learning_rate": 1.998280309544282e-05, "loss": 0.47135791182518005, "step": 108 }, { "epoch": 0.01322655017594952, "grad_norm": 0.9953917860984802, "learning_rate": 1.9980346394791796e-05, "loss": 0.8566328883171082, "step": 109 }, { "epoch": 0.01334789467297658, "grad_norm": 0.43752583861351013, "learning_rate": 1.997788969414077e-05, "loss": 0.03474525734782219, "step": 110 }, { "epoch": 0.01346923917000364, "grad_norm": 0.8211821913719177, "learning_rate": 1.9975432993489744e-05, "loss": 0.5059197545051575, "step": 111 }, { "epoch": 0.0135905836670307, "grad_norm": 1.6312147378921509, "learning_rate": 1.997297629283872e-05, "loss": 0.44580140709877014, "step": 112 }, { "epoch": 0.01371192816405776, "grad_norm": 1.2254769802093506, "learning_rate": 1.9970519592187693e-05, "loss": 0.5274494886398315, "step": 113 }, { "epoch": 0.01383327266108482, "grad_norm": 0.9368826150894165, "learning_rate": 1.9968062891536667e-05, "loss": 0.4496898651123047, "step": 114 }, { "epoch": 0.01395461715811188, "grad_norm": 0.7070733308792114, "learning_rate": 1.996560619088564e-05, "loss": 0.3537105619907379, "step": 115 }, { "epoch": 0.01407596165513894, "grad_norm": 1.085019826889038, "learning_rate": 1.9963149490234616e-05, "loss": 0.5918015837669373, "step": 116 }, { "epoch": 0.014197306152166, "grad_norm": 0.8331693410873413, "learning_rate": 1.996069278958359e-05, "loss": 0.7372145652770996, "step": 117 }, { "epoch": 0.01431865064919306, "grad_norm": 1.0224517583847046, "learning_rate": 1.9958236088932564e-05, "loss": 0.6304808855056763, "step": 118 }, { "epoch": 0.014439995146220118, "grad_norm": 1.0307385921478271, "learning_rate": 1.9955779388281538e-05, "loss": 0.4330332279205322, "step": 119 }, { "epoch": 0.014561339643247179, "grad_norm": 1.1663846969604492, "learning_rate": 1.9953322687630513e-05, "loss": 1.0179262161254883, "step": 120 }, { "epoch": 0.014682684140274238, "grad_norm": 1.2464661598205566, "learning_rate": 1.995086598697949e-05, "loss": 0.4001690149307251, "step": 121 }, { "epoch": 0.014804028637301299, "grad_norm": 0.6119124889373779, "learning_rate": 1.9948409286328464e-05, "loss": 0.5289167761802673, "step": 122 }, { "epoch": 0.014925373134328358, "grad_norm": 0.7121961712837219, "learning_rate": 1.994595258567744e-05, "loss": 0.39589789509773254, "step": 123 }, { "epoch": 0.015046717631355419, "grad_norm": 0.8262454867362976, "learning_rate": 1.9943495885026413e-05, "loss": 0.3565620481967926, "step": 124 }, { "epoch": 0.015168062128382478, "grad_norm": 0.9075021743774414, "learning_rate": 1.9941039184375387e-05, "loss": 0.2919665575027466, "step": 125 }, { "epoch": 0.015289406625409537, "grad_norm": 1.4302308559417725, "learning_rate": 1.993858248372436e-05, "loss": 0.5871132612228394, "step": 126 }, { "epoch": 0.015410751122436598, "grad_norm": 1.2167500257492065, "learning_rate": 1.9936125783073336e-05, "loss": 0.2407664954662323, "step": 127 }, { "epoch": 0.015532095619463657, "grad_norm": 1.038041353225708, "learning_rate": 1.993366908242231e-05, "loss": 0.8497026562690735, "step": 128 }, { "epoch": 0.015653440116490717, "grad_norm": 1.1356406211853027, "learning_rate": 1.9931212381771284e-05, "loss": 0.4753378927707672, "step": 129 }, { "epoch": 0.01577478461351778, "grad_norm": 1.1320428848266602, "learning_rate": 1.992875568112026e-05, "loss": 0.4532396197319031, "step": 130 }, { "epoch": 0.015896129110544836, "grad_norm": 0.9516323804855347, "learning_rate": 1.9926298980469233e-05, "loss": 0.38531002402305603, "step": 131 }, { "epoch": 0.016017473607571896, "grad_norm": 1.3494288921356201, "learning_rate": 1.9923842279818207e-05, "loss": 0.7311023473739624, "step": 132 }, { "epoch": 0.016138818104598957, "grad_norm": 1.1651663780212402, "learning_rate": 1.992138557916718e-05, "loss": 0.4712047278881073, "step": 133 }, { "epoch": 0.016260162601626018, "grad_norm": 1.558090090751648, "learning_rate": 1.9918928878516156e-05, "loss": 0.5418444871902466, "step": 134 }, { "epoch": 0.016381507098653075, "grad_norm": 0.792945384979248, "learning_rate": 1.991647217786513e-05, "loss": 0.2568298876285553, "step": 135 }, { "epoch": 0.016502851595680136, "grad_norm": 0.7723349928855896, "learning_rate": 1.9914015477214104e-05, "loss": 0.6914761066436768, "step": 136 }, { "epoch": 0.016624196092707197, "grad_norm": 0.8072736859321594, "learning_rate": 1.991155877656308e-05, "loss": 0.5070819854736328, "step": 137 }, { "epoch": 0.016745540589734254, "grad_norm": 0.8866456747055054, "learning_rate": 1.9909102075912053e-05, "loss": 0.48820972442626953, "step": 138 }, { "epoch": 0.016866885086761315, "grad_norm": 0.6745986342430115, "learning_rate": 1.9906645375261027e-05, "loss": 0.4791775643825531, "step": 139 }, { "epoch": 0.016988229583788376, "grad_norm": 0.7663500905036926, "learning_rate": 1.990418867461e-05, "loss": 0.27651312947273254, "step": 140 }, { "epoch": 0.017109574080815437, "grad_norm": 0.7036702632904053, "learning_rate": 1.9901731973958975e-05, "loss": 0.34517037868499756, "step": 141 }, { "epoch": 0.017230918577842494, "grad_norm": 1.1952064037322998, "learning_rate": 1.989927527330795e-05, "loss": 0.7066012024879456, "step": 142 }, { "epoch": 0.017352263074869555, "grad_norm": 1.4770616292953491, "learning_rate": 1.9896818572656924e-05, "loss": 0.5686489343643188, "step": 143 }, { "epoch": 0.017473607571896616, "grad_norm": 0.9231040477752686, "learning_rate": 1.9894361872005898e-05, "loss": 0.5055479407310486, "step": 144 }, { "epoch": 0.017594952068923673, "grad_norm": 2.1276888847351074, "learning_rate": 1.9891905171354872e-05, "loss": 0.676661491394043, "step": 145 }, { "epoch": 0.017716296565950734, "grad_norm": 1.249248743057251, "learning_rate": 1.9889448470703847e-05, "loss": 0.7343191504478455, "step": 146 }, { "epoch": 0.017837641062977794, "grad_norm": 0.7005143761634827, "learning_rate": 1.988699177005282e-05, "loss": 0.1592637598514557, "step": 147 }, { "epoch": 0.017958985560004855, "grad_norm": 1.0925079584121704, "learning_rate": 1.9884535069401795e-05, "loss": 0.46038728952407837, "step": 148 }, { "epoch": 0.018080330057031913, "grad_norm": 1.1379766464233398, "learning_rate": 1.988207836875077e-05, "loss": 0.21679271757602692, "step": 149 }, { "epoch": 0.018201674554058973, "grad_norm": 1.6844356060028076, "learning_rate": 1.9879621668099744e-05, "loss": 0.460208535194397, "step": 150 }, { "epoch": 0.018323019051086034, "grad_norm": 0.9418531060218811, "learning_rate": 1.9877164967448718e-05, "loss": 0.2926967442035675, "step": 151 }, { "epoch": 0.01844436354811309, "grad_norm": 1.0456334352493286, "learning_rate": 1.9874708266797692e-05, "loss": 0.6844074726104736, "step": 152 }, { "epoch": 0.018565708045140152, "grad_norm": 0.8679888248443604, "learning_rate": 1.9872251566146666e-05, "loss": 0.5056055188179016, "step": 153 }, { "epoch": 0.018687052542167213, "grad_norm": 0.8799395561218262, "learning_rate": 1.986979486549564e-05, "loss": 0.3345467448234558, "step": 154 }, { "epoch": 0.018808397039194274, "grad_norm": 1.5138970613479614, "learning_rate": 1.9867338164844615e-05, "loss": 0.46227532625198364, "step": 155 }, { "epoch": 0.01892974153622133, "grad_norm": 0.959060788154602, "learning_rate": 1.986488146419359e-05, "loss": 0.598260223865509, "step": 156 }, { "epoch": 0.019051086033248392, "grad_norm": 0.9521774649620056, "learning_rate": 1.9862424763542563e-05, "loss": 0.4968634843826294, "step": 157 }, { "epoch": 0.019172430530275453, "grad_norm": 1.074791669845581, "learning_rate": 1.9859968062891538e-05, "loss": 0.3660491704940796, "step": 158 }, { "epoch": 0.019293775027302514, "grad_norm": 0.7658405303955078, "learning_rate": 1.9857511362240512e-05, "loss": 0.31504422426223755, "step": 159 }, { "epoch": 0.01941511952432957, "grad_norm": 1.2017768621444702, "learning_rate": 1.9855054661589486e-05, "loss": 0.2452397644519806, "step": 160 }, { "epoch": 0.01953646402135663, "grad_norm": 0.8670822978019714, "learning_rate": 1.9852597960938464e-05, "loss": 0.2822229862213135, "step": 161 }, { "epoch": 0.019657808518383692, "grad_norm": 1.3473888635635376, "learning_rate": 1.9850141260287438e-05, "loss": 0.349812388420105, "step": 162 }, { "epoch": 0.01977915301541075, "grad_norm": 0.9460965394973755, "learning_rate": 1.9847684559636412e-05, "loss": 0.3580651879310608, "step": 163 }, { "epoch": 0.01990049751243781, "grad_norm": 1.454413652420044, "learning_rate": 1.9845227858985387e-05, "loss": 0.3063688278198242, "step": 164 }, { "epoch": 0.02002184200946487, "grad_norm": 1.1844851970672607, "learning_rate": 1.984277115833436e-05, "loss": 0.3315426707267761, "step": 165 }, { "epoch": 0.020143186506491932, "grad_norm": 1.5113213062286377, "learning_rate": 1.9840314457683335e-05, "loss": 0.2392674684524536, "step": 166 }, { "epoch": 0.02026453100351899, "grad_norm": 0.9833793640136719, "learning_rate": 1.983785775703231e-05, "loss": 0.5257890820503235, "step": 167 }, { "epoch": 0.02038587550054605, "grad_norm": 1.514121651649475, "learning_rate": 1.9835401056381284e-05, "loss": 0.6917684078216553, "step": 168 }, { "epoch": 0.02050721999757311, "grad_norm": 0.9638065099716187, "learning_rate": 1.9832944355730255e-05, "loss": 0.41345733404159546, "step": 169 }, { "epoch": 0.02062856449460017, "grad_norm": 1.2704676389694214, "learning_rate": 1.983048765507923e-05, "loss": 0.6704689264297485, "step": 170 }, { "epoch": 0.02074990899162723, "grad_norm": 0.9627780914306641, "learning_rate": 1.9828030954428203e-05, "loss": 0.3117220997810364, "step": 171 }, { "epoch": 0.02087125348865429, "grad_norm": 0.8880823850631714, "learning_rate": 1.9825574253777177e-05, "loss": 0.5640937089920044, "step": 172 }, { "epoch": 0.02099259798568135, "grad_norm": 1.2231365442276, "learning_rate": 1.982311755312615e-05, "loss": 0.3630719780921936, "step": 173 }, { "epoch": 0.021113942482708408, "grad_norm": 1.5474482774734497, "learning_rate": 1.9820660852475126e-05, "loss": 0.7506128549575806, "step": 174 }, { "epoch": 0.02123528697973547, "grad_norm": 1.6173280477523804, "learning_rate": 1.98182041518241e-05, "loss": 0.4840516746044159, "step": 175 }, { "epoch": 0.02135663147676253, "grad_norm": 0.610375702381134, "learning_rate": 1.9815747451173074e-05, "loss": 0.12637916207313538, "step": 176 }, { "epoch": 0.021477975973789587, "grad_norm": 0.8489571809768677, "learning_rate": 1.981329075052205e-05, "loss": 0.19583982229232788, "step": 177 }, { "epoch": 0.021599320470816648, "grad_norm": 1.4588679075241089, "learning_rate": 1.9810834049871023e-05, "loss": 0.6651678085327148, "step": 178 }, { "epoch": 0.02172066496784371, "grad_norm": 0.778512716293335, "learning_rate": 1.9808377349219997e-05, "loss": 0.37274667620658875, "step": 179 }, { "epoch": 0.02184200946487077, "grad_norm": 1.3927290439605713, "learning_rate": 1.980592064856897e-05, "loss": 0.4347939193248749, "step": 180 }, { "epoch": 0.021963353961897827, "grad_norm": 1.2548933029174805, "learning_rate": 1.9803463947917946e-05, "loss": 0.36539730429649353, "step": 181 }, { "epoch": 0.022084698458924888, "grad_norm": 11.010635375976562, "learning_rate": 1.980100724726692e-05, "loss": 0.44999217987060547, "step": 182 }, { "epoch": 0.02220604295595195, "grad_norm": 1.3671001195907593, "learning_rate": 1.9798550546615894e-05, "loss": 0.18597112596035004, "step": 183 }, { "epoch": 0.02232738745297901, "grad_norm": 1.1180782318115234, "learning_rate": 1.979609384596487e-05, "loss": 0.2359408438205719, "step": 184 }, { "epoch": 0.022448731950006066, "grad_norm": 0.8790715932846069, "learning_rate": 1.9793637145313843e-05, "loss": 0.22656098008155823, "step": 185 }, { "epoch": 0.022570076447033127, "grad_norm": 1.5535168647766113, "learning_rate": 1.9791180444662817e-05, "loss": 0.2677188813686371, "step": 186 }, { "epoch": 0.022691420944060188, "grad_norm": 0.7048156261444092, "learning_rate": 1.9788723744011795e-05, "loss": 0.15088553726673126, "step": 187 }, { "epoch": 0.022812765441087245, "grad_norm": 0.8741711378097534, "learning_rate": 1.978626704336077e-05, "loss": 0.2621510922908783, "step": 188 }, { "epoch": 0.022934109938114306, "grad_norm": 1.5229653120040894, "learning_rate": 1.9783810342709743e-05, "loss": 0.6171689033508301, "step": 189 }, { "epoch": 0.023055454435141367, "grad_norm": 1.302876353263855, "learning_rate": 1.9781353642058717e-05, "loss": 0.7471094131469727, "step": 190 }, { "epoch": 0.023176798932168428, "grad_norm": 1.594721794128418, "learning_rate": 1.977889694140769e-05, "loss": 0.40545159578323364, "step": 191 }, { "epoch": 0.023298143429195485, "grad_norm": 1.2041152715682983, "learning_rate": 1.9776440240756666e-05, "loss": 0.538151204586029, "step": 192 }, { "epoch": 0.023419487926222546, "grad_norm": 1.349916934967041, "learning_rate": 1.977398354010564e-05, "loss": 0.5056011080741882, "step": 193 }, { "epoch": 0.023540832423249607, "grad_norm": 1.6573007106781006, "learning_rate": 1.9771526839454614e-05, "loss": 0.679570734500885, "step": 194 }, { "epoch": 0.023662176920276664, "grad_norm": 1.1572545766830444, "learning_rate": 1.976907013880359e-05, "loss": 0.18730156123638153, "step": 195 }, { "epoch": 0.023783521417303725, "grad_norm": 0.9210532903671265, "learning_rate": 1.9766613438152563e-05, "loss": 0.18758049607276917, "step": 196 }, { "epoch": 0.023904865914330786, "grad_norm": 1.0380648374557495, "learning_rate": 1.9764156737501537e-05, "loss": 0.4383860230445862, "step": 197 }, { "epoch": 0.024026210411357846, "grad_norm": 1.1509276628494263, "learning_rate": 1.976170003685051e-05, "loss": 0.5359134078025818, "step": 198 }, { "epoch": 0.024147554908384904, "grad_norm": 1.693115234375, "learning_rate": 1.9759243336199486e-05, "loss": 0.13348811864852905, "step": 199 }, { "epoch": 0.024268899405411964, "grad_norm": 1.350980281829834, "learning_rate": 1.975678663554846e-05, "loss": 0.32777535915374756, "step": 200 }, { "epoch": 0.024390243902439025, "grad_norm": 1.4759892225265503, "learning_rate": 1.9754329934897434e-05, "loss": 0.810895562171936, "step": 201 }, { "epoch": 0.024511588399466083, "grad_norm": 1.1393275260925293, "learning_rate": 1.975187323424641e-05, "loss": 0.1503686010837555, "step": 202 }, { "epoch": 0.024632932896493143, "grad_norm": 0.7875057458877563, "learning_rate": 1.9749416533595383e-05, "loss": 0.07484026253223419, "step": 203 }, { "epoch": 0.024754277393520204, "grad_norm": 0.8852109909057617, "learning_rate": 1.9746959832944357e-05, "loss": 0.2887484133243561, "step": 204 }, { "epoch": 0.024875621890547265, "grad_norm": 1.01993727684021, "learning_rate": 1.974450313229333e-05, "loss": 0.43222254514694214, "step": 205 }, { "epoch": 0.024996966387574322, "grad_norm": 0.8183336853981018, "learning_rate": 1.9742046431642306e-05, "loss": 0.19282624125480652, "step": 206 }, { "epoch": 0.025118310884601383, "grad_norm": 1.5417038202285767, "learning_rate": 1.973958973099128e-05, "loss": 0.46405136585235596, "step": 207 }, { "epoch": 0.025239655381628444, "grad_norm": 1.2917892932891846, "learning_rate": 1.9737133030340254e-05, "loss": 0.23757943511009216, "step": 208 }, { "epoch": 0.025360999878655505, "grad_norm": 1.2024424076080322, "learning_rate": 1.9734676329689228e-05, "loss": 0.06261929869651794, "step": 209 }, { "epoch": 0.025482344375682562, "grad_norm": 1.0761046409606934, "learning_rate": 1.9732219629038203e-05, "loss": 0.2930389940738678, "step": 210 }, { "epoch": 0.025603688872709623, "grad_norm": 1.4010992050170898, "learning_rate": 1.9729762928387177e-05, "loss": 0.42875391244888306, "step": 211 }, { "epoch": 0.025725033369736684, "grad_norm": 1.1493103504180908, "learning_rate": 1.972730622773615e-05, "loss": 0.665216326713562, "step": 212 }, { "epoch": 0.02584637786676374, "grad_norm": 1.3462070226669312, "learning_rate": 1.9724849527085125e-05, "loss": 0.547272264957428, "step": 213 }, { "epoch": 0.0259677223637908, "grad_norm": 0.8287025690078735, "learning_rate": 1.97223928264341e-05, "loss": 0.1717478632926941, "step": 214 }, { "epoch": 0.026089066860817862, "grad_norm": 1.4044225215911865, "learning_rate": 1.9719936125783074e-05, "loss": 0.5439479351043701, "step": 215 }, { "epoch": 0.026210411357844923, "grad_norm": 1.362339973449707, "learning_rate": 1.9717479425132048e-05, "loss": 0.38482236862182617, "step": 216 }, { "epoch": 0.02633175585487198, "grad_norm": 1.0870776176452637, "learning_rate": 1.9715022724481022e-05, "loss": 0.2305067777633667, "step": 217 }, { "epoch": 0.02645310035189904, "grad_norm": 1.8639475107192993, "learning_rate": 1.9712566023829997e-05, "loss": 0.7351633906364441, "step": 218 }, { "epoch": 0.026574444848926102, "grad_norm": 1.2605034112930298, "learning_rate": 1.971010932317897e-05, "loss": 0.3830586373806, "step": 219 }, { "epoch": 0.02669578934595316, "grad_norm": 1.146794080734253, "learning_rate": 1.9707652622527945e-05, "loss": 0.4568835198879242, "step": 220 }, { "epoch": 0.02681713384298022, "grad_norm": 1.3597760200500488, "learning_rate": 1.970519592187692e-05, "loss": 0.3955010175704956, "step": 221 }, { "epoch": 0.02693847834000728, "grad_norm": 1.0351749658584595, "learning_rate": 1.9702739221225894e-05, "loss": 0.24032047390937805, "step": 222 }, { "epoch": 0.027059822837034342, "grad_norm": 1.7738534212112427, "learning_rate": 1.9700282520574868e-05, "loss": 0.4054880142211914, "step": 223 }, { "epoch": 0.0271811673340614, "grad_norm": 1.0380606651306152, "learning_rate": 1.9697825819923842e-05, "loss": 0.3499588668346405, "step": 224 }, { "epoch": 0.02730251183108846, "grad_norm": 0.9284490942955017, "learning_rate": 1.9695369119272816e-05, "loss": 0.1194787323474884, "step": 225 }, { "epoch": 0.02742385632811552, "grad_norm": 1.1300830841064453, "learning_rate": 1.969291241862179e-05, "loss": 0.2504207193851471, "step": 226 }, { "epoch": 0.027545200825142578, "grad_norm": 1.2859063148498535, "learning_rate": 1.969045571797077e-05, "loss": 0.7922459840774536, "step": 227 }, { "epoch": 0.02766654532216964, "grad_norm": 0.8715733289718628, "learning_rate": 1.9687999017319743e-05, "loss": 0.157515287399292, "step": 228 }, { "epoch": 0.0277878898191967, "grad_norm": 0.7158194184303284, "learning_rate": 1.9685542316668717e-05, "loss": 0.0951254814863205, "step": 229 }, { "epoch": 0.02790923431622376, "grad_norm": 1.2676968574523926, "learning_rate": 1.968308561601769e-05, "loss": 0.3762540817260742, "step": 230 }, { "epoch": 0.028030578813250818, "grad_norm": 1.142686367034912, "learning_rate": 1.9680628915366665e-05, "loss": 0.4209720194339752, "step": 231 }, { "epoch": 0.02815192331027788, "grad_norm": 0.9411739706993103, "learning_rate": 1.967817221471564e-05, "loss": 0.19837401807308197, "step": 232 }, { "epoch": 0.02827326780730494, "grad_norm": 1.1360442638397217, "learning_rate": 1.9675715514064614e-05, "loss": 0.2968640923500061, "step": 233 }, { "epoch": 0.028394612304332, "grad_norm": 1.573232650756836, "learning_rate": 1.9673258813413588e-05, "loss": 0.45530423521995544, "step": 234 }, { "epoch": 0.028515956801359058, "grad_norm": 1.6560251712799072, "learning_rate": 1.9670802112762562e-05, "loss": 0.3799901604652405, "step": 235 }, { "epoch": 0.02863730129838612, "grad_norm": 0.7339390516281128, "learning_rate": 1.9668345412111537e-05, "loss": 0.3144574463367462, "step": 236 }, { "epoch": 0.02875864579541318, "grad_norm": 1.104168176651001, "learning_rate": 1.966588871146051e-05, "loss": 0.3688885569572449, "step": 237 }, { "epoch": 0.028879990292440236, "grad_norm": 0.6204283237457275, "learning_rate": 1.9663432010809485e-05, "loss": 0.07376033812761307, "step": 238 }, { "epoch": 0.029001334789467297, "grad_norm": 1.2197209596633911, "learning_rate": 1.966097531015846e-05, "loss": 0.6477288603782654, "step": 239 }, { "epoch": 0.029122679286494358, "grad_norm": 1.0721914768218994, "learning_rate": 1.9658518609507434e-05, "loss": 0.2640606164932251, "step": 240 }, { "epoch": 0.02924402378352142, "grad_norm": 0.9805510640144348, "learning_rate": 1.9656061908856408e-05, "loss": 0.4014909267425537, "step": 241 }, { "epoch": 0.029365368280548476, "grad_norm": 0.8440432548522949, "learning_rate": 1.9653605208205382e-05, "loss": 0.0517299547791481, "step": 242 }, { "epoch": 0.029486712777575537, "grad_norm": 1.2538248300552368, "learning_rate": 1.9651148507554356e-05, "loss": 0.46744751930236816, "step": 243 }, { "epoch": 0.029608057274602598, "grad_norm": 1.3388352394104004, "learning_rate": 1.964869180690333e-05, "loss": 0.6701927185058594, "step": 244 }, { "epoch": 0.029729401771629655, "grad_norm": 1.1601953506469727, "learning_rate": 1.9646235106252305e-05, "loss": 0.5082125067710876, "step": 245 }, { "epoch": 0.029850746268656716, "grad_norm": 1.445131778717041, "learning_rate": 1.964377840560128e-05, "loss": 0.5758916139602661, "step": 246 }, { "epoch": 0.029972090765683777, "grad_norm": 0.9608274698257446, "learning_rate": 1.9641321704950253e-05, "loss": 0.47571951150894165, "step": 247 }, { "epoch": 0.030093435262710837, "grad_norm": 1.0206998586654663, "learning_rate": 1.9638865004299228e-05, "loss": 0.16756445169448853, "step": 248 }, { "epoch": 0.030214779759737895, "grad_norm": 1.00753653049469, "learning_rate": 1.9636408303648202e-05, "loss": 0.2088935673236847, "step": 249 }, { "epoch": 0.030336124256764956, "grad_norm": 1.2297927141189575, "learning_rate": 1.9633951602997176e-05, "loss": 0.904977023601532, "step": 250 }, { "epoch": 0.030457468753792016, "grad_norm": 0.9755082726478577, "learning_rate": 1.963149490234615e-05, "loss": 0.15337421000003815, "step": 251 }, { "epoch": 0.030578813250819074, "grad_norm": 1.074012279510498, "learning_rate": 1.9629038201695125e-05, "loss": 0.6006253957748413, "step": 252 }, { "epoch": 0.030700157747846134, "grad_norm": 0.9824368953704834, "learning_rate": 1.96265815010441e-05, "loss": 0.379146933555603, "step": 253 }, { "epoch": 0.030821502244873195, "grad_norm": 1.5277190208435059, "learning_rate": 1.9624124800393073e-05, "loss": 0.6424251198768616, "step": 254 }, { "epoch": 0.030942846741900256, "grad_norm": 0.8885117769241333, "learning_rate": 1.9621668099742048e-05, "loss": 0.28905048966407776, "step": 255 }, { "epoch": 0.031064191238927313, "grad_norm": 0.633774995803833, "learning_rate": 1.9619211399091022e-05, "loss": 0.045177094638347626, "step": 256 }, { "epoch": 0.031185535735954374, "grad_norm": 1.156493902206421, "learning_rate": 1.9616754698439996e-05, "loss": 0.2632194757461548, "step": 257 }, { "epoch": 0.031306880232981435, "grad_norm": 1.1284003257751465, "learning_rate": 1.961429799778897e-05, "loss": 0.3057049512863159, "step": 258 }, { "epoch": 0.031428224730008496, "grad_norm": 0.7986247539520264, "learning_rate": 1.9611841297137945e-05, "loss": 0.20919805765151978, "step": 259 }, { "epoch": 0.03154956922703556, "grad_norm": 0.9055368304252625, "learning_rate": 1.960938459648692e-05, "loss": 0.39165881276130676, "step": 260 }, { "epoch": 0.03167091372406261, "grad_norm": 1.911102056503296, "learning_rate": 1.9606927895835893e-05, "loss": 0.45982038974761963, "step": 261 }, { "epoch": 0.03179225822108967, "grad_norm": 0.8879384398460388, "learning_rate": 1.9604471195184867e-05, "loss": 0.29178065061569214, "step": 262 }, { "epoch": 0.03191360271811673, "grad_norm": 1.4164674282073975, "learning_rate": 1.960201449453384e-05, "loss": 0.16232866048812866, "step": 263 }, { "epoch": 0.03203494721514379, "grad_norm": 1.9253220558166504, "learning_rate": 1.9599557793882816e-05, "loss": 0.42316365242004395, "step": 264 }, { "epoch": 0.032156291712170854, "grad_norm": 0.7398253083229065, "learning_rate": 1.959710109323179e-05, "loss": 0.23177435994148254, "step": 265 }, { "epoch": 0.032277636209197914, "grad_norm": 1.0191659927368164, "learning_rate": 1.9594644392580768e-05, "loss": 0.26261794567108154, "step": 266 }, { "epoch": 0.032398980706224975, "grad_norm": 1.2973541021347046, "learning_rate": 1.9592187691929742e-05, "loss": 0.3316439986228943, "step": 267 }, { "epoch": 0.032520325203252036, "grad_norm": 1.374732255935669, "learning_rate": 1.9589730991278716e-05, "loss": 0.4202871322631836, "step": 268 }, { "epoch": 0.03264166970027909, "grad_norm": 1.5805230140686035, "learning_rate": 1.958727429062769e-05, "loss": 0.5804644823074341, "step": 269 }, { "epoch": 0.03276301419730615, "grad_norm": 0.008758433163166046, "learning_rate": 1.9584817589976665e-05, "loss": 0.00015381905541289598, "step": 270 }, { "epoch": 0.03288435869433321, "grad_norm": 0.9753175377845764, "learning_rate": 1.958236088932564e-05, "loss": 0.2967366576194763, "step": 271 }, { "epoch": 0.03300570319136027, "grad_norm": 1.6118595600128174, "learning_rate": 1.9579904188674613e-05, "loss": 0.33840176463127136, "step": 272 }, { "epoch": 0.03312704768838733, "grad_norm": 1.6024789810180664, "learning_rate": 1.9577447488023588e-05, "loss": 0.6967238187789917, "step": 273 }, { "epoch": 0.033248392185414394, "grad_norm": 1.214158535003662, "learning_rate": 1.9574990787372562e-05, "loss": 0.5965083837509155, "step": 274 }, { "epoch": 0.033369736682441455, "grad_norm": 1.519809603691101, "learning_rate": 1.9572534086721536e-05, "loss": 0.4016053080558777, "step": 275 }, { "epoch": 0.03349108117946851, "grad_norm": 1.6006823778152466, "learning_rate": 1.957007738607051e-05, "loss": 0.39769408106803894, "step": 276 }, { "epoch": 0.03361242567649557, "grad_norm": 0.9429724216461182, "learning_rate": 1.9567620685419485e-05, "loss": 0.20001475512981415, "step": 277 }, { "epoch": 0.03373377017352263, "grad_norm": 1.666814923286438, "learning_rate": 1.956516398476846e-05, "loss": 0.34259510040283203, "step": 278 }, { "epoch": 0.03385511467054969, "grad_norm": 0.3574555814266205, "learning_rate": 1.9562707284117433e-05, "loss": 0.017914820462465286, "step": 279 }, { "epoch": 0.03397645916757675, "grad_norm": 1.5245598554611206, "learning_rate": 1.9560250583466407e-05, "loss": 0.5376090407371521, "step": 280 }, { "epoch": 0.03409780366460381, "grad_norm": 1.0760517120361328, "learning_rate": 1.955779388281538e-05, "loss": 0.2445524036884308, "step": 281 }, { "epoch": 0.03421914816163087, "grad_norm": 1.4953891038894653, "learning_rate": 1.9555337182164356e-05, "loss": 0.5643976330757141, "step": 282 }, { "epoch": 0.03434049265865793, "grad_norm": 1.616189956665039, "learning_rate": 1.955288048151333e-05, "loss": 0.2937336564064026, "step": 283 }, { "epoch": 0.03446183715568499, "grad_norm": 0.9644538164138794, "learning_rate": 1.9550423780862304e-05, "loss": 0.125191330909729, "step": 284 }, { "epoch": 0.03458318165271205, "grad_norm": 1.649443507194519, "learning_rate": 1.954796708021128e-05, "loss": 0.5455986857414246, "step": 285 }, { "epoch": 0.03470452614973911, "grad_norm": 1.6259334087371826, "learning_rate": 1.9545510379560253e-05, "loss": 0.42649298906326294, "step": 286 }, { "epoch": 0.03482587064676617, "grad_norm": 1.2836439609527588, "learning_rate": 1.9543053678909227e-05, "loss": 0.5799545645713806, "step": 287 }, { "epoch": 0.03494721514379323, "grad_norm": 0.9867308735847473, "learning_rate": 1.95405969782582e-05, "loss": 0.39529839158058167, "step": 288 }, { "epoch": 0.03506855964082029, "grad_norm": 1.5423862934112549, "learning_rate": 1.9538140277607176e-05, "loss": 0.35383814573287964, "step": 289 }, { "epoch": 0.035189904137847346, "grad_norm": 1.2744529247283936, "learning_rate": 1.953568357695615e-05, "loss": 0.22410523891448975, "step": 290 }, { "epoch": 0.035311248634874406, "grad_norm": 1.787878394126892, "learning_rate": 1.9533226876305124e-05, "loss": 0.6131466031074524, "step": 291 }, { "epoch": 0.03543259313190147, "grad_norm": 1.1000661849975586, "learning_rate": 1.95307701756541e-05, "loss": 0.3855549097061157, "step": 292 }, { "epoch": 0.03555393762892853, "grad_norm": 1.2115724086761475, "learning_rate": 1.9528313475003073e-05, "loss": 0.4426603317260742, "step": 293 }, { "epoch": 0.03567528212595559, "grad_norm": 1.5347511768341064, "learning_rate": 1.9525856774352047e-05, "loss": 0.4702164828777313, "step": 294 }, { "epoch": 0.03579662662298265, "grad_norm": 4.0347466468811035, "learning_rate": 1.952340007370102e-05, "loss": 0.2603279948234558, "step": 295 }, { "epoch": 0.03591797112000971, "grad_norm": 1.5106102228164673, "learning_rate": 1.9520943373049996e-05, "loss": 0.5465511083602905, "step": 296 }, { "epoch": 0.036039315617036764, "grad_norm": 1.0082039833068848, "learning_rate": 1.951848667239897e-05, "loss": 0.05971769616007805, "step": 297 }, { "epoch": 0.036160660114063825, "grad_norm": 1.33002769947052, "learning_rate": 1.9516029971747944e-05, "loss": 0.44476959109306335, "step": 298 }, { "epoch": 0.036282004611090886, "grad_norm": 1.1616014242172241, "learning_rate": 1.951357327109692e-05, "loss": 0.29045650362968445, "step": 299 }, { "epoch": 0.03640334910811795, "grad_norm": 1.1139581203460693, "learning_rate": 1.9511116570445893e-05, "loss": 0.1310575008392334, "step": 300 }, { "epoch": 0.03652469360514501, "grad_norm": 1.2292546033859253, "learning_rate": 1.9508659869794867e-05, "loss": 0.2641526162624359, "step": 301 }, { "epoch": 0.03664603810217207, "grad_norm": 0.9400305151939392, "learning_rate": 1.950620316914384e-05, "loss": 0.19729545712471008, "step": 302 }, { "epoch": 0.03676738259919913, "grad_norm": 1.266480565071106, "learning_rate": 1.9503746468492815e-05, "loss": 0.44374752044677734, "step": 303 }, { "epoch": 0.03688872709622618, "grad_norm": 0.6891433596611023, "learning_rate": 1.950128976784179e-05, "loss": 0.055492326617240906, "step": 304 }, { "epoch": 0.037010071593253244, "grad_norm": 1.0153636932373047, "learning_rate": 1.9498833067190764e-05, "loss": 0.16541236639022827, "step": 305 }, { "epoch": 0.037131416090280304, "grad_norm": 1.5183488130569458, "learning_rate": 1.9496376366539738e-05, "loss": 0.1984182596206665, "step": 306 }, { "epoch": 0.037252760587307365, "grad_norm": 0.5728133320808411, "learning_rate": 1.9493919665888712e-05, "loss": 0.04827238991856575, "step": 307 }, { "epoch": 0.037374105084334426, "grad_norm": 1.2158844470977783, "learning_rate": 1.9491462965237687e-05, "loss": 0.20424340665340424, "step": 308 }, { "epoch": 0.03749544958136149, "grad_norm": 1.7594895362854004, "learning_rate": 1.948900626458666e-05, "loss": 0.3363400995731354, "step": 309 }, { "epoch": 0.03761679407838855, "grad_norm": 1.7387893199920654, "learning_rate": 1.9486549563935635e-05, "loss": 0.5342078804969788, "step": 310 }, { "epoch": 0.0377381385754156, "grad_norm": 0.7087361812591553, "learning_rate": 1.948409286328461e-05, "loss": 0.054663654416799545, "step": 311 }, { "epoch": 0.03785948307244266, "grad_norm": 1.1876829862594604, "learning_rate": 1.9481636162633584e-05, "loss": 0.2173326164484024, "step": 312 }, { "epoch": 0.03798082756946972, "grad_norm": 1.8285133838653564, "learning_rate": 1.9479179461982558e-05, "loss": 0.2650899589061737, "step": 313 }, { "epoch": 0.038102172066496784, "grad_norm": 1.2233879566192627, "learning_rate": 1.9476722761331532e-05, "loss": 0.36404329538345337, "step": 314 }, { "epoch": 0.038223516563523845, "grad_norm": 1.1021192073822021, "learning_rate": 1.9474266060680506e-05, "loss": 0.5485677123069763, "step": 315 }, { "epoch": 0.038344861060550905, "grad_norm": 1.15021812915802, "learning_rate": 1.947180936002948e-05, "loss": 0.39669936895370483, "step": 316 }, { "epoch": 0.038466205557577966, "grad_norm": 1.1393232345581055, "learning_rate": 1.9469352659378455e-05, "loss": 0.2991783618927002, "step": 317 }, { "epoch": 0.03858755005460503, "grad_norm": 0.939648449420929, "learning_rate": 1.946689595872743e-05, "loss": 0.15160952508449554, "step": 318 }, { "epoch": 0.03870889455163208, "grad_norm": 1.3451627492904663, "learning_rate": 1.9464439258076403e-05, "loss": 0.5237815380096436, "step": 319 }, { "epoch": 0.03883023904865914, "grad_norm": 1.3838374614715576, "learning_rate": 1.9461982557425378e-05, "loss": 0.19870781898498535, "step": 320 }, { "epoch": 0.0389515835456862, "grad_norm": 1.3146677017211914, "learning_rate": 1.9459525856774352e-05, "loss": 0.33396434783935547, "step": 321 }, { "epoch": 0.03907292804271326, "grad_norm": 0.8067727088928223, "learning_rate": 1.9457069156123326e-05, "loss": 0.49019211530685425, "step": 322 }, { "epoch": 0.039194272539740324, "grad_norm": 1.2528276443481445, "learning_rate": 1.94546124554723e-05, "loss": 0.2535683810710907, "step": 323 }, { "epoch": 0.039315617036767385, "grad_norm": 1.3783961534500122, "learning_rate": 1.9452155754821275e-05, "loss": 0.5087883472442627, "step": 324 }, { "epoch": 0.039436961533794446, "grad_norm": 1.6668391227722168, "learning_rate": 1.944969905417025e-05, "loss": 0.5472003817558289, "step": 325 }, { "epoch": 0.0395583060308215, "grad_norm": 1.0295048952102661, "learning_rate": 1.9447242353519223e-05, "loss": 0.40367716550827026, "step": 326 }, { "epoch": 0.03967965052784856, "grad_norm": 1.3721429109573364, "learning_rate": 1.9444785652868198e-05, "loss": 0.6061164736747742, "step": 327 }, { "epoch": 0.03980099502487562, "grad_norm": 1.2872403860092163, "learning_rate": 1.9442328952217172e-05, "loss": 0.23954078555107117, "step": 328 }, { "epoch": 0.03992233952190268, "grad_norm": 1.9958724975585938, "learning_rate": 1.9439872251566146e-05, "loss": 0.6404703855514526, "step": 329 }, { "epoch": 0.04004368401892974, "grad_norm": 1.2831591367721558, "learning_rate": 1.943741555091512e-05, "loss": 0.5384237766265869, "step": 330 }, { "epoch": 0.040165028515956804, "grad_norm": 0.716844379901886, "learning_rate": 1.9434958850264095e-05, "loss": 0.0433930829167366, "step": 331 }, { "epoch": 0.040286373012983864, "grad_norm": 1.186511754989624, "learning_rate": 1.9432502149613072e-05, "loss": 0.5770034193992615, "step": 332 }, { "epoch": 0.04040771751001092, "grad_norm": 1.5067001581192017, "learning_rate": 1.9430045448962046e-05, "loss": 0.4589945673942566, "step": 333 }, { "epoch": 0.04052906200703798, "grad_norm": 0.7389973402023315, "learning_rate": 1.942758874831102e-05, "loss": 0.17588192224502563, "step": 334 }, { "epoch": 0.04065040650406504, "grad_norm": 1.0256229639053345, "learning_rate": 1.9425132047659995e-05, "loss": 0.27561086416244507, "step": 335 }, { "epoch": 0.0407717510010921, "grad_norm": 1.3332164287567139, "learning_rate": 1.942267534700897e-05, "loss": 0.681479275226593, "step": 336 }, { "epoch": 0.04089309549811916, "grad_norm": 1.355659008026123, "learning_rate": 1.9420218646357944e-05, "loss": 0.2441406548023224, "step": 337 }, { "epoch": 0.04101443999514622, "grad_norm": 0.6369838714599609, "learning_rate": 1.9417761945706918e-05, "loss": 0.13781212270259857, "step": 338 }, { "epoch": 0.04113578449217328, "grad_norm": 0.9141871333122253, "learning_rate": 1.9415305245055892e-05, "loss": 0.41506925225257874, "step": 339 }, { "epoch": 0.04125712898920034, "grad_norm": 0.7715675830841064, "learning_rate": 1.9412848544404866e-05, "loss": 0.2426312416791916, "step": 340 }, { "epoch": 0.0413784734862274, "grad_norm": 1.2251585721969604, "learning_rate": 1.941039184375384e-05, "loss": 0.5912874937057495, "step": 341 }, { "epoch": 0.04149981798325446, "grad_norm": 1.3260804414749146, "learning_rate": 1.9407935143102815e-05, "loss": 0.40357863903045654, "step": 342 }, { "epoch": 0.04162116248028152, "grad_norm": 1.37641179561615, "learning_rate": 1.940547844245179e-05, "loss": 0.4066475033760071, "step": 343 }, { "epoch": 0.04174250697730858, "grad_norm": 1.1889725923538208, "learning_rate": 1.9403021741800763e-05, "loss": 0.5459408760070801, "step": 344 }, { "epoch": 0.04186385147433564, "grad_norm": 1.1155644655227661, "learning_rate": 1.9400565041149738e-05, "loss": 0.41826558113098145, "step": 345 }, { "epoch": 0.0419851959713627, "grad_norm": 1.9572199583053589, "learning_rate": 1.9398108340498712e-05, "loss": 0.505934476852417, "step": 346 }, { "epoch": 0.042106540468389755, "grad_norm": 1.1355222463607788, "learning_rate": 1.9395651639847686e-05, "loss": 0.3404514193534851, "step": 347 }, { "epoch": 0.042227884965416816, "grad_norm": 0.9914278388023376, "learning_rate": 1.939319493919666e-05, "loss": 0.3612911105155945, "step": 348 }, { "epoch": 0.04234922946244388, "grad_norm": 1.6539616584777832, "learning_rate": 1.9390738238545635e-05, "loss": 0.31920090317726135, "step": 349 }, { "epoch": 0.04247057395947094, "grad_norm": 1.2495417594909668, "learning_rate": 1.938828153789461e-05, "loss": 0.2993244230747223, "step": 350 }, { "epoch": 0.042591918456498, "grad_norm": 1.154098629951477, "learning_rate": 1.9385824837243583e-05, "loss": 0.27929073572158813, "step": 351 }, { "epoch": 0.04271326295352506, "grad_norm": 1.6769806146621704, "learning_rate": 1.9383368136592557e-05, "loss": 0.31991660594940186, "step": 352 }, { "epoch": 0.04283460745055212, "grad_norm": 1.692519187927246, "learning_rate": 1.938091143594153e-05, "loss": 0.6428519487380981, "step": 353 }, { "epoch": 0.042955951947579174, "grad_norm": 1.7793716192245483, "learning_rate": 1.9378454735290506e-05, "loss": 0.2733398973941803, "step": 354 }, { "epoch": 0.043077296444606235, "grad_norm": 1.1902222633361816, "learning_rate": 1.937599803463948e-05, "loss": 0.20086917281150818, "step": 355 }, { "epoch": 0.043198640941633296, "grad_norm": 0.530981183052063, "learning_rate": 1.9373541333988454e-05, "loss": 0.03699813410639763, "step": 356 }, { "epoch": 0.043319985438660356, "grad_norm": 1.1668962240219116, "learning_rate": 1.937108463333743e-05, "loss": 0.6546061635017395, "step": 357 }, { "epoch": 0.04344132993568742, "grad_norm": 1.5194002389907837, "learning_rate": 1.9368627932686403e-05, "loss": 0.3529921770095825, "step": 358 }, { "epoch": 0.04356267443271448, "grad_norm": 1.5513792037963867, "learning_rate": 1.9366171232035377e-05, "loss": 0.3908025324344635, "step": 359 }, { "epoch": 0.04368401892974154, "grad_norm": 1.0645341873168945, "learning_rate": 1.936371453138435e-05, "loss": 0.25461000204086304, "step": 360 }, { "epoch": 0.04380536342676859, "grad_norm": 1.077723503112793, "learning_rate": 1.9361257830733326e-05, "loss": 0.5458009243011475, "step": 361 }, { "epoch": 0.04392670792379565, "grad_norm": 1.0532881021499634, "learning_rate": 1.93588011300823e-05, "loss": 0.31512653827667236, "step": 362 }, { "epoch": 0.044048052420822714, "grad_norm": 1.0603703260421753, "learning_rate": 1.9356344429431274e-05, "loss": 0.3389180302619934, "step": 363 }, { "epoch": 0.044169396917849775, "grad_norm": 1.0187443494796753, "learning_rate": 1.935388772878025e-05, "loss": 0.5617249011993408, "step": 364 }, { "epoch": 0.044290741414876836, "grad_norm": 1.0340298414230347, "learning_rate": 1.9351431028129223e-05, "loss": 0.22403107583522797, "step": 365 }, { "epoch": 0.0444120859119039, "grad_norm": 1.4951913356781006, "learning_rate": 1.9348974327478197e-05, "loss": 0.31615158915519714, "step": 366 }, { "epoch": 0.04453343040893096, "grad_norm": 1.050989031791687, "learning_rate": 1.934651762682717e-05, "loss": 0.46508675813674927, "step": 367 }, { "epoch": 0.04465477490595802, "grad_norm": 1.4088681936264038, "learning_rate": 1.9344060926176146e-05, "loss": 0.6532362103462219, "step": 368 }, { "epoch": 0.04477611940298507, "grad_norm": 0.6320997476577759, "learning_rate": 1.934160422552512e-05, "loss": 0.2274395227432251, "step": 369 }, { "epoch": 0.04489746390001213, "grad_norm": 1.3754754066467285, "learning_rate": 1.9339147524874094e-05, "loss": 0.7371792793273926, "step": 370 }, { "epoch": 0.045018808397039194, "grad_norm": 0.7076073884963989, "learning_rate": 1.9336690824223068e-05, "loss": 0.06037794053554535, "step": 371 }, { "epoch": 0.045140152894066254, "grad_norm": 0.9627287983894348, "learning_rate": 1.9334234123572046e-05, "loss": 0.33090823888778687, "step": 372 }, { "epoch": 0.045261497391093315, "grad_norm": 1.2749892473220825, "learning_rate": 1.933177742292102e-05, "loss": 0.17351853847503662, "step": 373 }, { "epoch": 0.045382841888120376, "grad_norm": 1.4868316650390625, "learning_rate": 1.9329320722269994e-05, "loss": 0.4319830536842346, "step": 374 }, { "epoch": 0.04550418638514744, "grad_norm": 1.2494465112686157, "learning_rate": 1.932686402161897e-05, "loss": 0.26532214879989624, "step": 375 }, { "epoch": 0.04562553088217449, "grad_norm": 1.0503991842269897, "learning_rate": 1.9324407320967943e-05, "loss": 0.2953014671802521, "step": 376 }, { "epoch": 0.04574687537920155, "grad_norm": 1.8432469367980957, "learning_rate": 1.9321950620316917e-05, "loss": 0.4219951629638672, "step": 377 }, { "epoch": 0.04586821987622861, "grad_norm": 1.6262578964233398, "learning_rate": 1.931949391966589e-05, "loss": 0.2898225784301758, "step": 378 }, { "epoch": 0.04598956437325567, "grad_norm": 0.6830425262451172, "learning_rate": 1.9317037219014866e-05, "loss": 0.07195363193750381, "step": 379 }, { "epoch": 0.046110908870282734, "grad_norm": 2.3727402687072754, "learning_rate": 1.931458051836384e-05, "loss": 0.5569823980331421, "step": 380 }, { "epoch": 0.046232253367309795, "grad_norm": 1.520284652709961, "learning_rate": 1.9312123817712814e-05, "loss": 0.5095877647399902, "step": 381 }, { "epoch": 0.046353597864336855, "grad_norm": 1.143632411956787, "learning_rate": 1.930966711706179e-05, "loss": 0.2604987621307373, "step": 382 }, { "epoch": 0.04647494236136391, "grad_norm": 1.01264226436615, "learning_rate": 1.9307210416410763e-05, "loss": 0.3068877160549164, "step": 383 }, { "epoch": 0.04659628685839097, "grad_norm": 1.8846100568771362, "learning_rate": 1.9304753715759737e-05, "loss": 0.3760281503200531, "step": 384 }, { "epoch": 0.04671763135541803, "grad_norm": 1.457566261291504, "learning_rate": 1.930229701510871e-05, "loss": 0.36060136556625366, "step": 385 }, { "epoch": 0.04683897585244509, "grad_norm": 1.2625572681427002, "learning_rate": 1.9299840314457686e-05, "loss": 0.35702618956565857, "step": 386 }, { "epoch": 0.04696032034947215, "grad_norm": 1.4238474369049072, "learning_rate": 1.929738361380666e-05, "loss": 0.4716323912143707, "step": 387 }, { "epoch": 0.04708166484649921, "grad_norm": 1.07540762424469, "learning_rate": 1.9294926913155634e-05, "loss": 0.3101731538772583, "step": 388 }, { "epoch": 0.047203009343526274, "grad_norm": 0.9744298458099365, "learning_rate": 1.929247021250461e-05, "loss": 0.12369873374700546, "step": 389 }, { "epoch": 0.04732435384055333, "grad_norm": 0.677290678024292, "learning_rate": 1.9290013511853583e-05, "loss": 0.13228675723075867, "step": 390 }, { "epoch": 0.04744569833758039, "grad_norm": 1.0706666707992554, "learning_rate": 1.9287556811202557e-05, "loss": 0.171414315700531, "step": 391 }, { "epoch": 0.04756704283460745, "grad_norm": 1.5353080034255981, "learning_rate": 1.928510011055153e-05, "loss": 0.24053412675857544, "step": 392 }, { "epoch": 0.04768838733163451, "grad_norm": 2.8229808807373047, "learning_rate": 1.9282643409900505e-05, "loss": 0.4611489176750183, "step": 393 }, { "epoch": 0.04780973182866157, "grad_norm": 1.421183466911316, "learning_rate": 1.928018670924948e-05, "loss": 0.26332008838653564, "step": 394 }, { "epoch": 0.04793107632568863, "grad_norm": 1.5224473476409912, "learning_rate": 1.9277730008598454e-05, "loss": 0.6890032291412354, "step": 395 }, { "epoch": 0.04805242082271569, "grad_norm": 1.2183220386505127, "learning_rate": 1.9275273307947428e-05, "loss": 0.24116888642311096, "step": 396 }, { "epoch": 0.048173765319742747, "grad_norm": 1.4274941682815552, "learning_rate": 1.9272816607296402e-05, "loss": 0.686172604560852, "step": 397 }, { "epoch": 0.04829510981676981, "grad_norm": 1.118133306503296, "learning_rate": 1.9270359906645377e-05, "loss": 0.2652670443058014, "step": 398 }, { "epoch": 0.04841645431379687, "grad_norm": 1.4742660522460938, "learning_rate": 1.926790320599435e-05, "loss": 0.4737281799316406, "step": 399 }, { "epoch": 0.04853779881082393, "grad_norm": 1.1788684129714966, "learning_rate": 1.9265446505343325e-05, "loss": 0.21296805143356323, "step": 400 }, { "epoch": 0.04865914330785099, "grad_norm": 1.2983324527740479, "learning_rate": 1.92629898046923e-05, "loss": 0.16385263204574585, "step": 401 }, { "epoch": 0.04878048780487805, "grad_norm": 0.004901287145912647, "learning_rate": 1.9260533104041274e-05, "loss": 9.251898882212117e-05, "step": 402 }, { "epoch": 0.04890183230190511, "grad_norm": 1.0808979272842407, "learning_rate": 1.9258076403390248e-05, "loss": 0.28296127915382385, "step": 403 }, { "epoch": 0.049023176798932165, "grad_norm": 1.846316933631897, "learning_rate": 1.9255619702739222e-05, "loss": 0.3567678928375244, "step": 404 }, { "epoch": 0.049144521295959226, "grad_norm": 1.4769459962844849, "learning_rate": 1.9253163002088196e-05, "loss": 0.6017166972160339, "step": 405 }, { "epoch": 0.04926586579298629, "grad_norm": 1.3384523391723633, "learning_rate": 1.925070630143717e-05, "loss": 0.3126193881034851, "step": 406 }, { "epoch": 0.04938721029001335, "grad_norm": 1.2163547277450562, "learning_rate": 1.9248249600786145e-05, "loss": 0.25492674112319946, "step": 407 }, { "epoch": 0.04950855478704041, "grad_norm": 1.340456247329712, "learning_rate": 1.924579290013512e-05, "loss": 0.6409405469894409, "step": 408 }, { "epoch": 0.04962989928406747, "grad_norm": 1.3869006633758545, "learning_rate": 1.9243336199484093e-05, "loss": 0.3362444043159485, "step": 409 }, { "epoch": 0.04975124378109453, "grad_norm": 1.314749002456665, "learning_rate": 1.9240879498833068e-05, "loss": 0.5342424511909485, "step": 410 }, { "epoch": 0.049872588278121584, "grad_norm": 2.0399301052093506, "learning_rate": 1.9238422798182045e-05, "loss": 0.19150543212890625, "step": 411 }, { "epoch": 0.049993932775148645, "grad_norm": 1.3778923749923706, "learning_rate": 1.923596609753102e-05, "loss": 0.40402331948280334, "step": 412 }, { "epoch": 0.050115277272175705, "grad_norm": 1.1802595853805542, "learning_rate": 1.9233509396879994e-05, "loss": 0.45945459604263306, "step": 413 }, { "epoch": 0.050236621769202766, "grad_norm": 1.1052002906799316, "learning_rate": 1.9231052696228968e-05, "loss": 0.08993005007505417, "step": 414 }, { "epoch": 0.05035796626622983, "grad_norm": 1.3626540899276733, "learning_rate": 1.9228595995577942e-05, "loss": 0.6543693542480469, "step": 415 }, { "epoch": 0.05047931076325689, "grad_norm": 1.4075640439987183, "learning_rate": 1.9226139294926917e-05, "loss": 0.24491381645202637, "step": 416 }, { "epoch": 0.05060065526028395, "grad_norm": 1.5025800466537476, "learning_rate": 1.922368259427589e-05, "loss": 0.5007286071777344, "step": 417 }, { "epoch": 0.05072199975731101, "grad_norm": 2.165350914001465, "learning_rate": 1.9221225893624865e-05, "loss": 0.5292732119560242, "step": 418 }, { "epoch": 0.05084334425433806, "grad_norm": 1.0646313428878784, "learning_rate": 1.921876919297384e-05, "loss": 0.31505468487739563, "step": 419 }, { "epoch": 0.050964688751365124, "grad_norm": 1.194830298423767, "learning_rate": 1.9216312492322814e-05, "loss": 0.2772429883480072, "step": 420 }, { "epoch": 0.051086033248392185, "grad_norm": 1.535117745399475, "learning_rate": 1.9213855791671788e-05, "loss": 0.4633476734161377, "step": 421 }, { "epoch": 0.051207377745419246, "grad_norm": 1.5908533334732056, "learning_rate": 1.9211399091020762e-05, "loss": 0.6686984300613403, "step": 422 }, { "epoch": 0.051328722242446306, "grad_norm": 1.5840030908584595, "learning_rate": 1.9208942390369736e-05, "loss": 0.34995588660240173, "step": 423 }, { "epoch": 0.05145006673947337, "grad_norm": 1.0218498706817627, "learning_rate": 1.920648568971871e-05, "loss": 0.16546282172203064, "step": 424 }, { "epoch": 0.05157141123650043, "grad_norm": 1.6098021268844604, "learning_rate": 1.9204028989067685e-05, "loss": 0.5376899242401123, "step": 425 }, { "epoch": 0.05169275573352748, "grad_norm": 1.1683999300003052, "learning_rate": 1.920157228841666e-05, "loss": 0.31663447618484497, "step": 426 }, { "epoch": 0.05181410023055454, "grad_norm": 0.8926824927330017, "learning_rate": 1.9199115587765634e-05, "loss": 0.08985061198472977, "step": 427 }, { "epoch": 0.0519354447275816, "grad_norm": 0.5471736192703247, "learning_rate": 1.9196658887114608e-05, "loss": 0.05214562639594078, "step": 428 }, { "epoch": 0.052056789224608664, "grad_norm": 1.3294402360916138, "learning_rate": 1.9194202186463582e-05, "loss": 0.2623544931411743, "step": 429 }, { "epoch": 0.052178133721635725, "grad_norm": 1.1779325008392334, "learning_rate": 1.9191745485812556e-05, "loss": 0.2422659695148468, "step": 430 }, { "epoch": 0.052299478218662786, "grad_norm": 1.0145118236541748, "learning_rate": 1.918928878516153e-05, "loss": 0.490875780582428, "step": 431 }, { "epoch": 0.052420822715689847, "grad_norm": 1.2716591358184814, "learning_rate": 1.9186832084510505e-05, "loss": 0.39811912178993225, "step": 432 }, { "epoch": 0.0525421672127169, "grad_norm": 0.8617557287216187, "learning_rate": 1.918437538385948e-05, "loss": 0.1596330851316452, "step": 433 }, { "epoch": 0.05266351170974396, "grad_norm": 1.282023310661316, "learning_rate": 1.9181918683208453e-05, "loss": 0.1750062257051468, "step": 434 }, { "epoch": 0.05278485620677102, "grad_norm": 1.3805968761444092, "learning_rate": 1.9179461982557428e-05, "loss": 0.6116123795509338, "step": 435 }, { "epoch": 0.05290620070379808, "grad_norm": 1.3073794841766357, "learning_rate": 1.9177005281906402e-05, "loss": 0.40776562690734863, "step": 436 }, { "epoch": 0.053027545200825144, "grad_norm": 1.3929219245910645, "learning_rate": 1.9174548581255376e-05, "loss": 0.2514992952346802, "step": 437 }, { "epoch": 0.053148889697852204, "grad_norm": 1.6806739568710327, "learning_rate": 1.917209188060435e-05, "loss": 0.4892512559890747, "step": 438 }, { "epoch": 0.053270234194879265, "grad_norm": 1.1705715656280518, "learning_rate": 1.9169635179953325e-05, "loss": 0.44782862067222595, "step": 439 }, { "epoch": 0.05339157869190632, "grad_norm": 1.4459282159805298, "learning_rate": 1.91671784793023e-05, "loss": 0.2908768951892853, "step": 440 }, { "epoch": 0.05351292318893338, "grad_norm": 1.9081748723983765, "learning_rate": 1.9164721778651273e-05, "loss": 0.5691676139831543, "step": 441 }, { "epoch": 0.05363426768596044, "grad_norm": 1.0076907873153687, "learning_rate": 1.9162265078000247e-05, "loss": 0.2442394495010376, "step": 442 }, { "epoch": 0.0537556121829875, "grad_norm": 1.482526421546936, "learning_rate": 1.915980837734922e-05, "loss": 0.07226604223251343, "step": 443 }, { "epoch": 0.05387695668001456, "grad_norm": 1.7005579471588135, "learning_rate": 1.9157351676698196e-05, "loss": 0.6055192351341248, "step": 444 }, { "epoch": 0.05399830117704162, "grad_norm": 1.4745745658874512, "learning_rate": 1.915489497604717e-05, "loss": 0.7092527151107788, "step": 445 }, { "epoch": 0.054119645674068684, "grad_norm": 1.3547440767288208, "learning_rate": 1.9152438275396144e-05, "loss": 0.596627414226532, "step": 446 }, { "epoch": 0.05424099017109574, "grad_norm": 1.1186338663101196, "learning_rate": 1.914998157474512e-05, "loss": 0.1992674469947815, "step": 447 }, { "epoch": 0.0543623346681228, "grad_norm": 0.933089017868042, "learning_rate": 1.9147524874094093e-05, "loss": 0.11702962219715118, "step": 448 }, { "epoch": 0.05448367916514986, "grad_norm": 1.5945713520050049, "learning_rate": 1.9145068173443067e-05, "loss": 0.5891073942184448, "step": 449 }, { "epoch": 0.05460502366217692, "grad_norm": 1.8687334060668945, "learning_rate": 1.914261147279204e-05, "loss": 0.5784303545951843, "step": 450 }, { "epoch": 0.05472636815920398, "grad_norm": 1.6527996063232422, "learning_rate": 1.9140154772141016e-05, "loss": 0.5086016654968262, "step": 451 }, { "epoch": 0.05484771265623104, "grad_norm": 1.261507272720337, "learning_rate": 1.913769807148999e-05, "loss": 0.5271745324134827, "step": 452 }, { "epoch": 0.0549690571532581, "grad_norm": 1.0135947465896606, "learning_rate": 1.9135241370838964e-05, "loss": 0.5260915160179138, "step": 453 }, { "epoch": 0.055090401650285156, "grad_norm": 1.6660957336425781, "learning_rate": 1.913278467018794e-05, "loss": 0.3362421989440918, "step": 454 }, { "epoch": 0.05521174614731222, "grad_norm": 1.1020255088806152, "learning_rate": 1.9130327969536913e-05, "loss": 0.09704497456550598, "step": 455 }, { "epoch": 0.05533309064433928, "grad_norm": 2.270763874053955, "learning_rate": 1.9127871268885887e-05, "loss": 0.6879133582115173, "step": 456 }, { "epoch": 0.05545443514136634, "grad_norm": 1.3732212781906128, "learning_rate": 1.912541456823486e-05, "loss": 0.3480108976364136, "step": 457 }, { "epoch": 0.0555757796383934, "grad_norm": 1.5962785482406616, "learning_rate": 1.9122957867583836e-05, "loss": 0.31269481778144836, "step": 458 }, { "epoch": 0.05569712413542046, "grad_norm": 1.1583328247070312, "learning_rate": 1.912050116693281e-05, "loss": 0.22761228680610657, "step": 459 }, { "epoch": 0.05581846863244752, "grad_norm": 0.7482461333274841, "learning_rate": 1.9118044466281784e-05, "loss": 0.062126971781253815, "step": 460 }, { "epoch": 0.05593981312947458, "grad_norm": 1.0668491125106812, "learning_rate": 1.9115587765630758e-05, "loss": 0.7171897888183594, "step": 461 }, { "epoch": 0.056061157626501636, "grad_norm": 1.107728362083435, "learning_rate": 1.9113131064979733e-05, "loss": 0.24725040793418884, "step": 462 }, { "epoch": 0.056182502123528696, "grad_norm": 1.5959255695343018, "learning_rate": 1.9110674364328707e-05, "loss": 0.6176046133041382, "step": 463 }, { "epoch": 0.05630384662055576, "grad_norm": 0.48865923285484314, "learning_rate": 1.910821766367768e-05, "loss": 0.10571181029081345, "step": 464 }, { "epoch": 0.05642519111758282, "grad_norm": 0.04387793317437172, "learning_rate": 1.9105760963026655e-05, "loss": 0.001076234970241785, "step": 465 }, { "epoch": 0.05654653561460988, "grad_norm": 1.5911579132080078, "learning_rate": 1.910330426237563e-05, "loss": 0.4412878751754761, "step": 466 }, { "epoch": 0.05666788011163694, "grad_norm": 1.2079088687896729, "learning_rate": 1.9100847561724604e-05, "loss": 0.1528819501399994, "step": 467 }, { "epoch": 0.056789224608664, "grad_norm": 1.6561167240142822, "learning_rate": 1.9098390861073578e-05, "loss": 0.39373156428337097, "step": 468 }, { "epoch": 0.056910569105691054, "grad_norm": 1.4517277479171753, "learning_rate": 1.9095934160422552e-05, "loss": 0.45171377062797546, "step": 469 }, { "epoch": 0.057031913602718115, "grad_norm": 1.3444980382919312, "learning_rate": 1.9093477459771527e-05, "loss": 0.3764626681804657, "step": 470 }, { "epoch": 0.057153258099745176, "grad_norm": 1.5290117263793945, "learning_rate": 1.90910207591205e-05, "loss": 0.41870003938674927, "step": 471 }, { "epoch": 0.05727460259677224, "grad_norm": 0.6672114133834839, "learning_rate": 1.9088564058469475e-05, "loss": 0.10688811540603638, "step": 472 }, { "epoch": 0.0573959470937993, "grad_norm": 1.296738624572754, "learning_rate": 1.908610735781845e-05, "loss": 0.4453229606151581, "step": 473 }, { "epoch": 0.05751729159082636, "grad_norm": 1.166654109954834, "learning_rate": 1.9083650657167424e-05, "loss": 0.3140110373497009, "step": 474 }, { "epoch": 0.05763863608785342, "grad_norm": 1.5547395944595337, "learning_rate": 1.9081193956516398e-05, "loss": 0.38892969489097595, "step": 475 }, { "epoch": 0.05775998058488047, "grad_norm": 1.580997109413147, "learning_rate": 1.9078737255865372e-05, "loss": 0.5940026640892029, "step": 476 }, { "epoch": 0.057881325081907534, "grad_norm": 1.5121760368347168, "learning_rate": 1.907628055521435e-05, "loss": 0.6734681725502014, "step": 477 }, { "epoch": 0.058002669578934594, "grad_norm": 1.4761507511138916, "learning_rate": 1.9073823854563324e-05, "loss": 0.3817492127418518, "step": 478 }, { "epoch": 0.058124014075961655, "grad_norm": 0.8833889961242676, "learning_rate": 1.90713671539123e-05, "loss": 0.11671547591686249, "step": 479 }, { "epoch": 0.058245358572988716, "grad_norm": 1.3200422525405884, "learning_rate": 1.9068910453261273e-05, "loss": 0.4326450228691101, "step": 480 }, { "epoch": 0.05836670307001578, "grad_norm": 1.7077932357788086, "learning_rate": 1.9066453752610247e-05, "loss": 0.2545737326145172, "step": 481 }, { "epoch": 0.05848804756704284, "grad_norm": 0.9082516431808472, "learning_rate": 1.906399705195922e-05, "loss": 0.3537253141403198, "step": 482 }, { "epoch": 0.05860939206406989, "grad_norm": 2.068510055541992, "learning_rate": 1.9061540351308195e-05, "loss": 0.6128970384597778, "step": 483 }, { "epoch": 0.05873073656109695, "grad_norm": 1.1531740427017212, "learning_rate": 1.905908365065717e-05, "loss": 0.1987893432378769, "step": 484 }, { "epoch": 0.05885208105812401, "grad_norm": 1.568968653678894, "learning_rate": 1.9056626950006144e-05, "loss": 0.21139925718307495, "step": 485 }, { "epoch": 0.058973425555151074, "grad_norm": 1.657418131828308, "learning_rate": 1.9054170249355118e-05, "loss": 0.5464147329330444, "step": 486 }, { "epoch": 0.059094770052178135, "grad_norm": 1.0412527322769165, "learning_rate": 1.9051713548704092e-05, "loss": 0.21365782618522644, "step": 487 }, { "epoch": 0.059216114549205195, "grad_norm": 0.047710780054330826, "learning_rate": 1.9049256848053067e-05, "loss": 0.0005216295248828828, "step": 488 }, { "epoch": 0.059337459046232256, "grad_norm": 1.0616328716278076, "learning_rate": 1.904680014740204e-05, "loss": 0.2274220585823059, "step": 489 }, { "epoch": 0.05945880354325931, "grad_norm": 0.7739994525909424, "learning_rate": 1.9044343446751015e-05, "loss": 0.13946875929832458, "step": 490 }, { "epoch": 0.05958014804028637, "grad_norm": 1.4279123544692993, "learning_rate": 1.904188674609999e-05, "loss": 0.20513373613357544, "step": 491 }, { "epoch": 0.05970149253731343, "grad_norm": 2.026980400085449, "learning_rate": 1.9039430045448964e-05, "loss": 0.31660470366477966, "step": 492 }, { "epoch": 0.05982283703434049, "grad_norm": 1.2501922845840454, "learning_rate": 1.9036973344797938e-05, "loss": 0.13497872650623322, "step": 493 }, { "epoch": 0.05994418153136755, "grad_norm": 2.2237708568573, "learning_rate": 1.9034516644146912e-05, "loss": 0.6203042268753052, "step": 494 }, { "epoch": 0.060065526028394614, "grad_norm": 1.9448051452636719, "learning_rate": 1.9032059943495886e-05, "loss": 0.23030954599380493, "step": 495 }, { "epoch": 0.060186870525421675, "grad_norm": 1.8851776123046875, "learning_rate": 1.902960324284486e-05, "loss": 0.3695673942565918, "step": 496 }, { "epoch": 0.06030821502244873, "grad_norm": 1.1489198207855225, "learning_rate": 1.9027146542193835e-05, "loss": 0.407935231924057, "step": 497 }, { "epoch": 0.06042955951947579, "grad_norm": 1.5059272050857544, "learning_rate": 1.902468984154281e-05, "loss": 0.46162354946136475, "step": 498 }, { "epoch": 0.06055090401650285, "grad_norm": 2.244837760925293, "learning_rate": 1.9022233140891784e-05, "loss": 0.5662449598312378, "step": 499 }, { "epoch": 0.06067224851352991, "grad_norm": 1.3596556186676025, "learning_rate": 1.9019776440240758e-05, "loss": 0.3611029386520386, "step": 500 }, { "epoch": 0.06079359301055697, "grad_norm": 1.878268837928772, "learning_rate": 1.9017319739589732e-05, "loss": 0.48691171407699585, "step": 501 }, { "epoch": 0.06091493750758403, "grad_norm": 1.615587830543518, "learning_rate": 1.9014863038938706e-05, "loss": 0.1986699253320694, "step": 502 }, { "epoch": 0.061036282004611093, "grad_norm": 1.384652018547058, "learning_rate": 1.901240633828768e-05, "loss": 0.4047502875328064, "step": 503 }, { "epoch": 0.06115762650163815, "grad_norm": 0.8918592929840088, "learning_rate": 1.9009949637636655e-05, "loss": 0.08136077225208282, "step": 504 }, { "epoch": 0.06127897099866521, "grad_norm": 1.2311476469039917, "learning_rate": 1.900749293698563e-05, "loss": 0.1510145664215088, "step": 505 }, { "epoch": 0.06140031549569227, "grad_norm": 1.1738781929016113, "learning_rate": 1.9005036236334603e-05, "loss": 0.14017607271671295, "step": 506 }, { "epoch": 0.06152165999271933, "grad_norm": 1.477210283279419, "learning_rate": 1.9002579535683578e-05, "loss": 0.38299548625946045, "step": 507 }, { "epoch": 0.06164300448974639, "grad_norm": 1.4116266965866089, "learning_rate": 1.9000122835032552e-05, "loss": 0.4138404130935669, "step": 508 }, { "epoch": 0.06176434898677345, "grad_norm": 1.5390279293060303, "learning_rate": 1.8997666134381526e-05, "loss": 0.13937437534332275, "step": 509 }, { "epoch": 0.06188569348380051, "grad_norm": 2.1501710414886475, "learning_rate": 1.89952094337305e-05, "loss": 0.30845504999160767, "step": 510 }, { "epoch": 0.06200703798082757, "grad_norm": 1.35932457447052, "learning_rate": 1.8992752733079475e-05, "loss": 0.2177773267030716, "step": 511 }, { "epoch": 0.06212838247785463, "grad_norm": 1.361790418624878, "learning_rate": 1.899029603242845e-05, "loss": 0.2292359620332718, "step": 512 }, { "epoch": 0.06224972697488169, "grad_norm": 1.9305003881454468, "learning_rate": 1.8987839331777423e-05, "loss": 0.37796324491500854, "step": 513 }, { "epoch": 0.06237107147190875, "grad_norm": 1.1280158758163452, "learning_rate": 1.8985382631126397e-05, "loss": 0.150230273604393, "step": 514 }, { "epoch": 0.06249241596893581, "grad_norm": 2.7915799617767334, "learning_rate": 1.898292593047537e-05, "loss": 0.4558163285255432, "step": 515 }, { "epoch": 0.06261376046596287, "grad_norm": 1.4745959043502808, "learning_rate": 1.898046922982435e-05, "loss": 0.4183858633041382, "step": 516 }, { "epoch": 0.06273510496298992, "grad_norm": 1.6241668462753296, "learning_rate": 1.8978012529173324e-05, "loss": 0.5954026579856873, "step": 517 }, { "epoch": 0.06285644946001699, "grad_norm": 2.7014975547790527, "learning_rate": 1.8975555828522298e-05, "loss": 0.3073278069496155, "step": 518 }, { "epoch": 0.06297779395704405, "grad_norm": 1.5124610662460327, "learning_rate": 1.8973099127871272e-05, "loss": 0.4629092812538147, "step": 519 }, { "epoch": 0.06309913845407111, "grad_norm": 1.3076839447021484, "learning_rate": 1.8970642427220246e-05, "loss": 0.359159380197525, "step": 520 }, { "epoch": 0.06322048295109817, "grad_norm": 2.850494146347046, "learning_rate": 1.896818572656922e-05, "loss": 0.5875416398048401, "step": 521 }, { "epoch": 0.06334182744812522, "grad_norm": 1.2282150983810425, "learning_rate": 1.8965729025918195e-05, "loss": 0.6359026432037354, "step": 522 }, { "epoch": 0.06346317194515229, "grad_norm": 2.336590528488159, "learning_rate": 1.896327232526717e-05, "loss": 0.29532766342163086, "step": 523 }, { "epoch": 0.06358451644217934, "grad_norm": 1.3243014812469482, "learning_rate": 1.8960815624616143e-05, "loss": 0.2546826899051666, "step": 524 }, { "epoch": 0.06370586093920641, "grad_norm": 0.884636402130127, "learning_rate": 1.8958358923965118e-05, "loss": 0.07391630858182907, "step": 525 }, { "epoch": 0.06382720543623346, "grad_norm": 1.5499517917633057, "learning_rate": 1.8955902223314092e-05, "loss": 0.38295266032218933, "step": 526 }, { "epoch": 0.06394854993326053, "grad_norm": 1.8321927785873413, "learning_rate": 1.8953445522663066e-05, "loss": 0.2729780673980713, "step": 527 }, { "epoch": 0.06406989443028759, "grad_norm": 1.900599718093872, "learning_rate": 1.895098882201204e-05, "loss": 0.5558912754058838, "step": 528 }, { "epoch": 0.06419123892731464, "grad_norm": 1.706315279006958, "learning_rate": 1.8948532121361015e-05, "loss": 0.7064175605773926, "step": 529 }, { "epoch": 0.06431258342434171, "grad_norm": 1.0284161567687988, "learning_rate": 1.894607542070999e-05, "loss": 0.36064383387565613, "step": 530 }, { "epoch": 0.06443392792136876, "grad_norm": 1.0576647520065308, "learning_rate": 1.8943618720058963e-05, "loss": 0.14544187486171722, "step": 531 }, { "epoch": 0.06455527241839583, "grad_norm": 1.206278681755066, "learning_rate": 1.8941162019407937e-05, "loss": 0.743944525718689, "step": 532 }, { "epoch": 0.06467661691542288, "grad_norm": 1.364727258682251, "learning_rate": 1.893870531875691e-05, "loss": 0.4518415927886963, "step": 533 }, { "epoch": 0.06479796141244995, "grad_norm": 1.4188811779022217, "learning_rate": 1.8936248618105886e-05, "loss": 0.37862730026245117, "step": 534 }, { "epoch": 0.064919305909477, "grad_norm": 1.7733440399169922, "learning_rate": 1.893379191745486e-05, "loss": 0.3728104829788208, "step": 535 }, { "epoch": 0.06504065040650407, "grad_norm": 1.6150974035263062, "learning_rate": 1.8931335216803834e-05, "loss": 0.4845805764198303, "step": 536 }, { "epoch": 0.06516199490353113, "grad_norm": 1.8303987979888916, "learning_rate": 1.892887851615281e-05, "loss": 0.1986956149339676, "step": 537 }, { "epoch": 0.06528333940055818, "grad_norm": 1.9609991312026978, "learning_rate": 1.8926421815501783e-05, "loss": 0.3362962603569031, "step": 538 }, { "epoch": 0.06540468389758525, "grad_norm": 1.5091760158538818, "learning_rate": 1.8923965114850757e-05, "loss": 0.43894076347351074, "step": 539 }, { "epoch": 0.0655260283946123, "grad_norm": 1.83540678024292, "learning_rate": 1.892150841419973e-05, "loss": 0.5007857084274292, "step": 540 }, { "epoch": 0.06564737289163937, "grad_norm": 1.6493889093399048, "learning_rate": 1.8919051713548706e-05, "loss": 0.31600168347358704, "step": 541 }, { "epoch": 0.06576871738866642, "grad_norm": 1.0712192058563232, "learning_rate": 1.891659501289768e-05, "loss": 0.23161625862121582, "step": 542 }, { "epoch": 0.06589006188569349, "grad_norm": 2.0619916915893555, "learning_rate": 1.8914138312246654e-05, "loss": 0.6339962482452393, "step": 543 }, { "epoch": 0.06601140638272054, "grad_norm": 1.6993929147720337, "learning_rate": 1.891168161159563e-05, "loss": 0.5062494874000549, "step": 544 }, { "epoch": 0.0661327508797476, "grad_norm": 1.5165525674819946, "learning_rate": 1.8909224910944603e-05, "loss": 0.29236191511154175, "step": 545 }, { "epoch": 0.06625409537677467, "grad_norm": 0.7951221466064453, "learning_rate": 1.8906768210293577e-05, "loss": 0.11672580987215042, "step": 546 }, { "epoch": 0.06637543987380172, "grad_norm": 1.2992923259735107, "learning_rate": 1.890431150964255e-05, "loss": 0.3086448907852173, "step": 547 }, { "epoch": 0.06649678437082879, "grad_norm": 1.2831530570983887, "learning_rate": 1.8901854808991526e-05, "loss": 0.29797589778900146, "step": 548 }, { "epoch": 0.06661812886785584, "grad_norm": 1.8516546487808228, "learning_rate": 1.88993981083405e-05, "loss": 0.42842912673950195, "step": 549 }, { "epoch": 0.06673947336488291, "grad_norm": 1.7039152383804321, "learning_rate": 1.8896941407689474e-05, "loss": 0.383547842502594, "step": 550 }, { "epoch": 0.06686081786190996, "grad_norm": 1.2621033191680908, "learning_rate": 1.889448470703845e-05, "loss": 0.19925151765346527, "step": 551 }, { "epoch": 0.06698216235893702, "grad_norm": 1.5435742139816284, "learning_rate": 1.8892028006387423e-05, "loss": 0.37353450059890747, "step": 552 }, { "epoch": 0.06710350685596408, "grad_norm": 2.185764789581299, "learning_rate": 1.8889571305736397e-05, "loss": 0.5865182876586914, "step": 553 }, { "epoch": 0.06722485135299114, "grad_norm": 1.4764949083328247, "learning_rate": 1.888711460508537e-05, "loss": 0.3307309150695801, "step": 554 }, { "epoch": 0.0673461958500182, "grad_norm": 1.880235195159912, "learning_rate": 1.8884657904434345e-05, "loss": 0.7467969059944153, "step": 555 }, { "epoch": 0.06746754034704526, "grad_norm": 2.191826581954956, "learning_rate": 1.8882201203783323e-05, "loss": 0.4978898763656616, "step": 556 }, { "epoch": 0.06758888484407233, "grad_norm": 0.9400010108947754, "learning_rate": 1.8879744503132297e-05, "loss": 0.29659247398376465, "step": 557 }, { "epoch": 0.06771022934109938, "grad_norm": 1.570605754852295, "learning_rate": 1.887728780248127e-05, "loss": 0.28735125064849854, "step": 558 }, { "epoch": 0.06783157383812644, "grad_norm": 1.8483541011810303, "learning_rate": 1.8874831101830246e-05, "loss": 0.3445549011230469, "step": 559 }, { "epoch": 0.0679529183351535, "grad_norm": 1.9162882566452026, "learning_rate": 1.887237440117922e-05, "loss": 0.5881408452987671, "step": 560 }, { "epoch": 0.06807426283218056, "grad_norm": 1.7977851629257202, "learning_rate": 1.8869917700528194e-05, "loss": 0.41886040568351746, "step": 561 }, { "epoch": 0.06819560732920762, "grad_norm": 1.3679685592651367, "learning_rate": 1.886746099987717e-05, "loss": 0.26222050189971924, "step": 562 }, { "epoch": 0.06831695182623468, "grad_norm": 1.60178804397583, "learning_rate": 1.8865004299226143e-05, "loss": 0.341381311416626, "step": 563 }, { "epoch": 0.06843829632326175, "grad_norm": 0.8967300653457642, "learning_rate": 1.8862547598575117e-05, "loss": 0.29784804582595825, "step": 564 }, { "epoch": 0.0685596408202888, "grad_norm": 2.0774638652801514, "learning_rate": 1.886009089792409e-05, "loss": 0.3782203793525696, "step": 565 }, { "epoch": 0.06868098531731585, "grad_norm": 1.6538910865783691, "learning_rate": 1.8857634197273066e-05, "loss": 0.7248138785362244, "step": 566 }, { "epoch": 0.06880232981434292, "grad_norm": 1.3195281028747559, "learning_rate": 1.885517749662204e-05, "loss": 0.44836243987083435, "step": 567 }, { "epoch": 0.06892367431136998, "grad_norm": 0.2957918345928192, "learning_rate": 1.8852720795971014e-05, "loss": 0.004815039690583944, "step": 568 }, { "epoch": 0.06904501880839704, "grad_norm": 1.6919680833816528, "learning_rate": 1.885026409531999e-05, "loss": 0.48703718185424805, "step": 569 }, { "epoch": 0.0691663633054241, "grad_norm": 2.1297340393066406, "learning_rate": 1.8847807394668963e-05, "loss": 0.4740438163280487, "step": 570 }, { "epoch": 0.06928770780245117, "grad_norm": 1.4757106304168701, "learning_rate": 1.8845350694017937e-05, "loss": 0.42466428875923157, "step": 571 }, { "epoch": 0.06940905229947822, "grad_norm": 0.9064832329750061, "learning_rate": 1.884289399336691e-05, "loss": 0.1427098661661148, "step": 572 }, { "epoch": 0.06953039679650527, "grad_norm": 1.0294671058654785, "learning_rate": 1.8840437292715885e-05, "loss": 0.2024490237236023, "step": 573 }, { "epoch": 0.06965174129353234, "grad_norm": 1.66569185256958, "learning_rate": 1.883798059206486e-05, "loss": 0.9233149290084839, "step": 574 }, { "epoch": 0.0697730857905594, "grad_norm": 1.395765781402588, "learning_rate": 1.8835523891413834e-05, "loss": 0.24765007197856903, "step": 575 }, { "epoch": 0.06989443028758646, "grad_norm": 1.773474097251892, "learning_rate": 1.8833067190762805e-05, "loss": 0.38082554936408997, "step": 576 }, { "epoch": 0.07001577478461352, "grad_norm": 1.2505320310592651, "learning_rate": 1.883061049011178e-05, "loss": 0.348379909992218, "step": 577 }, { "epoch": 0.07013711928164058, "grad_norm": 1.428821086883545, "learning_rate": 1.8828153789460753e-05, "loss": 0.33820322155952454, "step": 578 }, { "epoch": 0.07025846377866764, "grad_norm": 1.0174955129623413, "learning_rate": 1.8825697088809728e-05, "loss": 0.250951886177063, "step": 579 }, { "epoch": 0.07037980827569469, "grad_norm": 1.4117581844329834, "learning_rate": 1.8823240388158702e-05, "loss": 0.36001020669937134, "step": 580 }, { "epoch": 0.07050115277272176, "grad_norm": 0.23253233730793, "learning_rate": 1.8820783687507676e-05, "loss": 0.010078919120132923, "step": 581 }, { "epoch": 0.07062249726974881, "grad_norm": 1.21126127243042, "learning_rate": 1.8818326986856654e-05, "loss": 0.402252733707428, "step": 582 }, { "epoch": 0.07074384176677588, "grad_norm": 1.4796817302703857, "learning_rate": 1.8815870286205628e-05, "loss": 0.3913116753101349, "step": 583 }, { "epoch": 0.07086518626380293, "grad_norm": 1.842236042022705, "learning_rate": 1.8813413585554602e-05, "loss": 0.7778733968734741, "step": 584 }, { "epoch": 0.07098653076083, "grad_norm": 1.782017707824707, "learning_rate": 1.8810956884903576e-05, "loss": 0.3612639904022217, "step": 585 }, { "epoch": 0.07110787525785706, "grad_norm": 1.6680972576141357, "learning_rate": 1.880850018425255e-05, "loss": 0.762682318687439, "step": 586 }, { "epoch": 0.07122921975488411, "grad_norm": 1.2227903604507446, "learning_rate": 1.8806043483601525e-05, "loss": 0.5214573740959167, "step": 587 }, { "epoch": 0.07135056425191118, "grad_norm": 1.5926989316940308, "learning_rate": 1.88035867829505e-05, "loss": 0.5479337573051453, "step": 588 }, { "epoch": 0.07147190874893823, "grad_norm": 1.989459753036499, "learning_rate": 1.8801130082299474e-05, "loss": 0.3255896270275116, "step": 589 }, { "epoch": 0.0715932532459653, "grad_norm": 1.7789111137390137, "learning_rate": 1.8798673381648448e-05, "loss": 0.5009081363677979, "step": 590 }, { "epoch": 0.07171459774299235, "grad_norm": 1.1982905864715576, "learning_rate": 1.8796216680997422e-05, "loss": 0.6694433093070984, "step": 591 }, { "epoch": 0.07183594224001942, "grad_norm": 1.6150325536727905, "learning_rate": 1.8793759980346396e-05, "loss": 0.4211908280849457, "step": 592 }, { "epoch": 0.07195728673704647, "grad_norm": 1.140662431716919, "learning_rate": 1.879130327969537e-05, "loss": 0.28169333934783936, "step": 593 }, { "epoch": 0.07207863123407353, "grad_norm": 2.1310620307922363, "learning_rate": 1.8788846579044345e-05, "loss": 0.5164771676063538, "step": 594 }, { "epoch": 0.0721999757311006, "grad_norm": 1.233970046043396, "learning_rate": 1.878638987839332e-05, "loss": 0.3783186376094818, "step": 595 }, { "epoch": 0.07232132022812765, "grad_norm": 1.430479645729065, "learning_rate": 1.8783933177742293e-05, "loss": 0.5629549622535706, "step": 596 }, { "epoch": 0.07244266472515472, "grad_norm": 1.5569998025894165, "learning_rate": 1.8781476477091268e-05, "loss": 0.27930858731269836, "step": 597 }, { "epoch": 0.07256400922218177, "grad_norm": 1.2450876235961914, "learning_rate": 1.8779019776440242e-05, "loss": 0.7581778764724731, "step": 598 }, { "epoch": 0.07268535371920884, "grad_norm": 2.200653076171875, "learning_rate": 1.8776563075789216e-05, "loss": 0.24638301134109497, "step": 599 }, { "epoch": 0.0728066982162359, "grad_norm": 0.1584484577178955, "learning_rate": 1.877410637513819e-05, "loss": 0.0029435211326926947, "step": 600 }, { "epoch": 0.07292804271326295, "grad_norm": 1.201370358467102, "learning_rate": 1.8771649674487165e-05, "loss": 0.17360961437225342, "step": 601 }, { "epoch": 0.07304938721029001, "grad_norm": 1.5583597421646118, "learning_rate": 1.876919297383614e-05, "loss": 0.19166655838489532, "step": 602 }, { "epoch": 0.07317073170731707, "grad_norm": 1.9324877262115479, "learning_rate": 1.8766736273185113e-05, "loss": 0.5034867525100708, "step": 603 }, { "epoch": 0.07329207620434414, "grad_norm": 1.4319108724594116, "learning_rate": 1.8764279572534087e-05, "loss": 0.16094264388084412, "step": 604 }, { "epoch": 0.07341342070137119, "grad_norm": 2.0198020935058594, "learning_rate": 1.876182287188306e-05, "loss": 0.8958789706230164, "step": 605 }, { "epoch": 0.07353476519839826, "grad_norm": 1.1970410346984863, "learning_rate": 1.8759366171232036e-05, "loss": 0.456455796957016, "step": 606 }, { "epoch": 0.07365610969542531, "grad_norm": 1.285672903060913, "learning_rate": 1.875690947058101e-05, "loss": 0.6159220933914185, "step": 607 }, { "epoch": 0.07377745419245237, "grad_norm": 1.3117607831954956, "learning_rate": 1.8754452769929984e-05, "loss": 0.40198272466659546, "step": 608 }, { "epoch": 0.07389879868947943, "grad_norm": 1.767972469329834, "learning_rate": 1.875199606927896e-05, "loss": 0.7003971934318542, "step": 609 }, { "epoch": 0.07402014318650649, "grad_norm": 1.3937331438064575, "learning_rate": 1.8749539368627933e-05, "loss": 0.11055199801921844, "step": 610 }, { "epoch": 0.07414148768353356, "grad_norm": 1.1846213340759277, "learning_rate": 1.8747082667976907e-05, "loss": 0.819743812084198, "step": 611 }, { "epoch": 0.07426283218056061, "grad_norm": 1.818548560142517, "learning_rate": 1.874462596732588e-05, "loss": 0.6363766193389893, "step": 612 }, { "epoch": 0.07438417667758768, "grad_norm": 2.3553943634033203, "learning_rate": 1.8742169266674856e-05, "loss": 0.8139269351959229, "step": 613 }, { "epoch": 0.07450552117461473, "grad_norm": 0.9787309169769287, "learning_rate": 1.873971256602383e-05, "loss": 0.17067642509937286, "step": 614 }, { "epoch": 0.07462686567164178, "grad_norm": 2.027261257171631, "learning_rate": 1.8737255865372804e-05, "loss": 0.17268799245357513, "step": 615 }, { "epoch": 0.07474821016866885, "grad_norm": 1.1048139333724976, "learning_rate": 1.873479916472178e-05, "loss": 0.33995521068573, "step": 616 }, { "epoch": 0.0748695546656959, "grad_norm": 1.7998343706130981, "learning_rate": 1.8732342464070753e-05, "loss": 0.15454964339733124, "step": 617 }, { "epoch": 0.07499089916272297, "grad_norm": 1.2843570709228516, "learning_rate": 1.8729885763419727e-05, "loss": 0.14955607056617737, "step": 618 }, { "epoch": 0.07511224365975003, "grad_norm": 1.1684657335281372, "learning_rate": 1.87274290627687e-05, "loss": 0.5342341661453247, "step": 619 }, { "epoch": 0.0752335881567771, "grad_norm": 1.2346407175064087, "learning_rate": 1.8724972362117676e-05, "loss": 0.18371909856796265, "step": 620 }, { "epoch": 0.07535493265380415, "grad_norm": 1.3623450994491577, "learning_rate": 1.872251566146665e-05, "loss": 0.30086177587509155, "step": 621 }, { "epoch": 0.0754762771508312, "grad_norm": 1.979787826538086, "learning_rate": 1.8720058960815627e-05, "loss": 0.7226313352584839, "step": 622 }, { "epoch": 0.07559762164785827, "grad_norm": 1.101216435432434, "learning_rate": 1.87176022601646e-05, "loss": 0.1416693776845932, "step": 623 }, { "epoch": 0.07571896614488532, "grad_norm": 1.8791908025741577, "learning_rate": 1.8715145559513576e-05, "loss": 0.5394376516342163, "step": 624 }, { "epoch": 0.07584031064191239, "grad_norm": 1.2553093433380127, "learning_rate": 1.871268885886255e-05, "loss": 0.19903355836868286, "step": 625 }, { "epoch": 0.07596165513893945, "grad_norm": 1.5131609439849854, "learning_rate": 1.8710232158211524e-05, "loss": 0.2551720142364502, "step": 626 }, { "epoch": 0.07608299963596651, "grad_norm": 0.9879768490791321, "learning_rate": 1.87077754575605e-05, "loss": 0.09607790410518646, "step": 627 }, { "epoch": 0.07620434413299357, "grad_norm": 1.5272071361541748, "learning_rate": 1.8705318756909473e-05, "loss": 0.16435827314853668, "step": 628 }, { "epoch": 0.07632568863002062, "grad_norm": 1.7799582481384277, "learning_rate": 1.8702862056258447e-05, "loss": 0.3401604890823364, "step": 629 }, { "epoch": 0.07644703312704769, "grad_norm": 1.4967886209487915, "learning_rate": 1.870040535560742e-05, "loss": 0.41344839334487915, "step": 630 }, { "epoch": 0.07656837762407474, "grad_norm": 1.9690594673156738, "learning_rate": 1.8697948654956396e-05, "loss": 0.8188158273696899, "step": 631 }, { "epoch": 0.07668972212110181, "grad_norm": 1.2859214544296265, "learning_rate": 1.869549195430537e-05, "loss": 0.21575604379177094, "step": 632 }, { "epoch": 0.07681106661812886, "grad_norm": 1.5824931859970093, "learning_rate": 1.8693035253654344e-05, "loss": 0.6719478368759155, "step": 633 }, { "epoch": 0.07693241111515593, "grad_norm": 2.319385290145874, "learning_rate": 1.869057855300332e-05, "loss": 0.3536515235900879, "step": 634 }, { "epoch": 0.07705375561218299, "grad_norm": 1.7407270669937134, "learning_rate": 1.8688121852352293e-05, "loss": 0.4539559781551361, "step": 635 }, { "epoch": 0.07717510010921005, "grad_norm": 1.8825663328170776, "learning_rate": 1.8685665151701267e-05, "loss": 0.37786608934402466, "step": 636 }, { "epoch": 0.07729644460623711, "grad_norm": 1.678596019744873, "learning_rate": 1.868320845105024e-05, "loss": 0.42622077465057373, "step": 637 }, { "epoch": 0.07741778910326416, "grad_norm": 1.5039141178131104, "learning_rate": 1.8680751750399216e-05, "loss": 0.22307798266410828, "step": 638 }, { "epoch": 0.07753913360029123, "grad_norm": 1.364247441291809, "learning_rate": 1.867829504974819e-05, "loss": 0.15877319872379303, "step": 639 }, { "epoch": 0.07766047809731828, "grad_norm": 1.4648646116256714, "learning_rate": 1.8675838349097164e-05, "loss": 0.3406936228275299, "step": 640 }, { "epoch": 0.07778182259434535, "grad_norm": 1.3261477947235107, "learning_rate": 1.867338164844614e-05, "loss": 0.12792298197746277, "step": 641 }, { "epoch": 0.0779031670913724, "grad_norm": 1.1395256519317627, "learning_rate": 1.8670924947795113e-05, "loss": 0.3013390600681305, "step": 642 }, { "epoch": 0.07802451158839947, "grad_norm": 1.2113020420074463, "learning_rate": 1.8668468247144087e-05, "loss": 0.13082636892795563, "step": 643 }, { "epoch": 0.07814585608542653, "grad_norm": 1.6144537925720215, "learning_rate": 1.866601154649306e-05, "loss": 0.16165593266487122, "step": 644 }, { "epoch": 0.07826720058245358, "grad_norm": 1.3433409929275513, "learning_rate": 1.8663554845842035e-05, "loss": 0.1441866159439087, "step": 645 }, { "epoch": 0.07838854507948065, "grad_norm": 1.0517584085464478, "learning_rate": 1.866109814519101e-05, "loss": 0.2606707215309143, "step": 646 }, { "epoch": 0.0785098895765077, "grad_norm": 1.180669903755188, "learning_rate": 1.8658641444539984e-05, "loss": 0.21883589029312134, "step": 647 }, { "epoch": 0.07863123407353477, "grad_norm": 0.658696174621582, "learning_rate": 1.8656184743888958e-05, "loss": 0.03140142932534218, "step": 648 }, { "epoch": 0.07875257857056182, "grad_norm": 0.051501210778951645, "learning_rate": 1.8653728043237932e-05, "loss": 0.0010449824621900916, "step": 649 }, { "epoch": 0.07887392306758889, "grad_norm": 1.6505728960037231, "learning_rate": 1.8651271342586907e-05, "loss": 0.6298208236694336, "step": 650 }, { "epoch": 0.07899526756461595, "grad_norm": 1.3531675338745117, "learning_rate": 1.864881464193588e-05, "loss": 0.26334941387176514, "step": 651 }, { "epoch": 0.079116612061643, "grad_norm": 1.7941267490386963, "learning_rate": 1.8646357941284855e-05, "loss": 0.3610610067844391, "step": 652 }, { "epoch": 0.07923795655867007, "grad_norm": 2.6597185134887695, "learning_rate": 1.864390124063383e-05, "loss": 0.5727459788322449, "step": 653 }, { "epoch": 0.07935930105569712, "grad_norm": 1.6264383792877197, "learning_rate": 1.8641444539982804e-05, "loss": 0.23900170624256134, "step": 654 }, { "epoch": 0.07948064555272419, "grad_norm": 3.0904436111450195, "learning_rate": 1.8638987839331778e-05, "loss": 0.5079998970031738, "step": 655 }, { "epoch": 0.07960199004975124, "grad_norm": 1.3061164617538452, "learning_rate": 1.8636531138680752e-05, "loss": 0.19908404350280762, "step": 656 }, { "epoch": 0.07972333454677831, "grad_norm": 1.316514492034912, "learning_rate": 1.8634074438029726e-05, "loss": 0.3161211609840393, "step": 657 }, { "epoch": 0.07984467904380536, "grad_norm": 1.1303502321243286, "learning_rate": 1.86316177373787e-05, "loss": 0.29433509707450867, "step": 658 }, { "epoch": 0.07996602354083242, "grad_norm": 0.0029176683165133, "learning_rate": 1.8629161036727675e-05, "loss": 3.159634070470929e-05, "step": 659 }, { "epoch": 0.08008736803785949, "grad_norm": 1.445120096206665, "learning_rate": 1.862670433607665e-05, "loss": 0.31950703263282776, "step": 660 }, { "epoch": 0.08020871253488654, "grad_norm": 1.4325833320617676, "learning_rate": 1.8624247635425627e-05, "loss": 0.3625190556049347, "step": 661 }, { "epoch": 0.08033005703191361, "grad_norm": 2.9001710414886475, "learning_rate": 1.86217909347746e-05, "loss": 0.17414988577365875, "step": 662 }, { "epoch": 0.08045140152894066, "grad_norm": 1.1585091352462769, "learning_rate": 1.8619334234123575e-05, "loss": 0.2450495958328247, "step": 663 }, { "epoch": 0.08057274602596773, "grad_norm": 1.7768579721450806, "learning_rate": 1.861687753347255e-05, "loss": 0.2746257781982422, "step": 664 }, { "epoch": 0.08069409052299478, "grad_norm": 1.5646682977676392, "learning_rate": 1.8614420832821524e-05, "loss": 0.3015173375606537, "step": 665 }, { "epoch": 0.08081543502002184, "grad_norm": 1.926649570465088, "learning_rate": 1.8611964132170498e-05, "loss": 0.38973182439804077, "step": 666 }, { "epoch": 0.0809367795170489, "grad_norm": 1.0687040090560913, "learning_rate": 1.8609507431519472e-05, "loss": 0.21271148324012756, "step": 667 }, { "epoch": 0.08105812401407596, "grad_norm": 2.043708086013794, "learning_rate": 1.8607050730868447e-05, "loss": 0.1781885027885437, "step": 668 }, { "epoch": 0.08117946851110303, "grad_norm": 1.3621066808700562, "learning_rate": 1.860459403021742e-05, "loss": 0.39263036847114563, "step": 669 }, { "epoch": 0.08130081300813008, "grad_norm": 1.9370335340499878, "learning_rate": 1.8602137329566395e-05, "loss": 0.454712450504303, "step": 670 }, { "epoch": 0.08142215750515715, "grad_norm": 0.988834023475647, "learning_rate": 1.859968062891537e-05, "loss": 0.0948018729686737, "step": 671 }, { "epoch": 0.0815435020021842, "grad_norm": 1.7850199937820435, "learning_rate": 1.8597223928264344e-05, "loss": 0.43955737352371216, "step": 672 }, { "epoch": 0.08166484649921125, "grad_norm": 1.6654014587402344, "learning_rate": 1.8594767227613318e-05, "loss": 0.3758952021598816, "step": 673 }, { "epoch": 0.08178619099623832, "grad_norm": 2.2570245265960693, "learning_rate": 1.8592310526962292e-05, "loss": 0.46082913875579834, "step": 674 }, { "epoch": 0.08190753549326538, "grad_norm": 1.4857622385025024, "learning_rate": 1.8589853826311266e-05, "loss": 0.6366814374923706, "step": 675 }, { "epoch": 0.08202887999029244, "grad_norm": 0.6030565500259399, "learning_rate": 1.858739712566024e-05, "loss": 0.0605991892516613, "step": 676 }, { "epoch": 0.0821502244873195, "grad_norm": 2.023913621902466, "learning_rate": 1.8584940425009215e-05, "loss": 0.2816411554813385, "step": 677 }, { "epoch": 0.08227156898434657, "grad_norm": 1.27947199344635, "learning_rate": 1.858248372435819e-05, "loss": 0.5554491877555847, "step": 678 }, { "epoch": 0.08239291348137362, "grad_norm": 1.3732556104660034, "learning_rate": 1.8580027023707164e-05, "loss": 0.26525643467903137, "step": 679 }, { "epoch": 0.08251425797840067, "grad_norm": 0.8106752038002014, "learning_rate": 1.8577570323056138e-05, "loss": 0.05604014918208122, "step": 680 }, { "epoch": 0.08263560247542774, "grad_norm": 1.0731443166732788, "learning_rate": 1.8575113622405112e-05, "loss": 0.2855074107646942, "step": 681 }, { "epoch": 0.0827569469724548, "grad_norm": 1.1271634101867676, "learning_rate": 1.8572656921754086e-05, "loss": 0.42854225635528564, "step": 682 }, { "epoch": 0.08287829146948186, "grad_norm": 1.4456641674041748, "learning_rate": 1.857020022110306e-05, "loss": 0.24991053342819214, "step": 683 }, { "epoch": 0.08299963596650892, "grad_norm": 2.6240429878234863, "learning_rate": 1.8567743520452035e-05, "loss": 0.6288108229637146, "step": 684 }, { "epoch": 0.08312098046353598, "grad_norm": 1.276291012763977, "learning_rate": 1.856528681980101e-05, "loss": 0.08958227187395096, "step": 685 }, { "epoch": 0.08324232496056304, "grad_norm": 1.562274694442749, "learning_rate": 1.8562830119149983e-05, "loss": 0.2525687515735626, "step": 686 }, { "epoch": 0.08336366945759009, "grad_norm": 1.218352198600769, "learning_rate": 1.8560373418498958e-05, "loss": 0.24451139569282532, "step": 687 }, { "epoch": 0.08348501395461716, "grad_norm": 1.408584713935852, "learning_rate": 1.8557916717847932e-05, "loss": 0.4832595884799957, "step": 688 }, { "epoch": 0.08360635845164421, "grad_norm": 2.0641961097717285, "learning_rate": 1.8555460017196906e-05, "loss": 0.3223777413368225, "step": 689 }, { "epoch": 0.08372770294867128, "grad_norm": 1.7355111837387085, "learning_rate": 1.855300331654588e-05, "loss": 0.4831840991973877, "step": 690 }, { "epoch": 0.08384904744569834, "grad_norm": 2.660064935684204, "learning_rate": 1.8550546615894855e-05, "loss": 0.48859894275665283, "step": 691 }, { "epoch": 0.0839703919427254, "grad_norm": 17.630550384521484, "learning_rate": 1.854808991524383e-05, "loss": 0.24826133251190186, "step": 692 }, { "epoch": 0.08409173643975246, "grad_norm": 1.8000401258468628, "learning_rate": 1.8545633214592803e-05, "loss": 0.4145761728286743, "step": 693 }, { "epoch": 0.08421308093677951, "grad_norm": 1.4511135816574097, "learning_rate": 1.8543176513941777e-05, "loss": 0.417000949382782, "step": 694 }, { "epoch": 0.08433442543380658, "grad_norm": 2.309809446334839, "learning_rate": 1.854071981329075e-05, "loss": 0.38240236043930054, "step": 695 }, { "epoch": 0.08445576993083363, "grad_norm": 1.9691797494888306, "learning_rate": 1.8538263112639726e-05, "loss": 0.5611581206321716, "step": 696 }, { "epoch": 0.0845771144278607, "grad_norm": 0.88741135597229, "learning_rate": 1.85358064119887e-05, "loss": 0.2746793031692505, "step": 697 }, { "epoch": 0.08469845892488775, "grad_norm": 1.727298378944397, "learning_rate": 1.8533349711337674e-05, "loss": 0.42077144980430603, "step": 698 }, { "epoch": 0.08481980342191482, "grad_norm": 1.5723243951797485, "learning_rate": 1.853089301068665e-05, "loss": 0.30194780230522156, "step": 699 }, { "epoch": 0.08494114791894188, "grad_norm": 0.6778169274330139, "learning_rate": 1.8528436310035623e-05, "loss": 0.06341211497783661, "step": 700 }, { "epoch": 0.08506249241596893, "grad_norm": 1.1526869535446167, "learning_rate": 1.85259796093846e-05, "loss": 0.14544203877449036, "step": 701 }, { "epoch": 0.085183836912996, "grad_norm": 1.8126407861709595, "learning_rate": 1.8523522908733575e-05, "loss": 0.5493510961532593, "step": 702 }, { "epoch": 0.08530518141002305, "grad_norm": 1.4785159826278687, "learning_rate": 1.852106620808255e-05, "loss": 0.28663206100463867, "step": 703 }, { "epoch": 0.08542652590705012, "grad_norm": 1.6556025743484497, "learning_rate": 1.8518609507431523e-05, "loss": 0.2000901848077774, "step": 704 }, { "epoch": 0.08554787040407717, "grad_norm": 0.6264957785606384, "learning_rate": 1.8516152806780498e-05, "loss": 0.0764586552977562, "step": 705 }, { "epoch": 0.08566921490110424, "grad_norm": 1.0878223180770874, "learning_rate": 1.8513696106129472e-05, "loss": 0.21222439408302307, "step": 706 }, { "epoch": 0.0857905593981313, "grad_norm": 2.753580331802368, "learning_rate": 1.8511239405478446e-05, "loss": 0.5297750234603882, "step": 707 }, { "epoch": 0.08591190389515835, "grad_norm": 1.854968786239624, "learning_rate": 1.850878270482742e-05, "loss": 0.3501977324485779, "step": 708 }, { "epoch": 0.08603324839218542, "grad_norm": 0.9170989990234375, "learning_rate": 1.8506326004176395e-05, "loss": 0.45474666357040405, "step": 709 }, { "epoch": 0.08615459288921247, "grad_norm": 1.7949663400650024, "learning_rate": 1.850386930352537e-05, "loss": 0.26724478602409363, "step": 710 }, { "epoch": 0.08627593738623954, "grad_norm": 1.7690072059631348, "learning_rate": 1.8501412602874343e-05, "loss": 0.18915711343288422, "step": 711 }, { "epoch": 0.08639728188326659, "grad_norm": 1.5598441362380981, "learning_rate": 1.8498955902223314e-05, "loss": 0.22758185863494873, "step": 712 }, { "epoch": 0.08651862638029366, "grad_norm": 1.995851993560791, "learning_rate": 1.849649920157229e-05, "loss": 0.38368889689445496, "step": 713 }, { "epoch": 0.08663997087732071, "grad_norm": 2.4046647548675537, "learning_rate": 1.8494042500921263e-05, "loss": 0.37635087966918945, "step": 714 }, { "epoch": 0.08676131537434777, "grad_norm": 1.9158389568328857, "learning_rate": 1.8491585800270237e-05, "loss": 0.4280529320240021, "step": 715 }, { "epoch": 0.08688265987137483, "grad_norm": 1.2061840295791626, "learning_rate": 1.848912909961921e-05, "loss": 0.21197004616260529, "step": 716 }, { "epoch": 0.08700400436840189, "grad_norm": 2.140751600265503, "learning_rate": 1.8486672398968185e-05, "loss": 0.18690326809883118, "step": 717 }, { "epoch": 0.08712534886542896, "grad_norm": 0.8397735357284546, "learning_rate": 1.848421569831716e-05, "loss": 0.10207987576723099, "step": 718 }, { "epoch": 0.08724669336245601, "grad_norm": 1.7140696048736572, "learning_rate": 1.8481758997666134e-05, "loss": 0.41563165187835693, "step": 719 }, { "epoch": 0.08736803785948308, "grad_norm": 1.6332520246505737, "learning_rate": 1.8479302297015108e-05, "loss": 0.272539883852005, "step": 720 }, { "epoch": 0.08748938235651013, "grad_norm": 1.8010307550430298, "learning_rate": 1.8476845596364082e-05, "loss": 0.6407032608985901, "step": 721 }, { "epoch": 0.08761072685353719, "grad_norm": 1.55129873752594, "learning_rate": 1.8474388895713057e-05, "loss": 0.23682965338230133, "step": 722 }, { "epoch": 0.08773207135056425, "grad_norm": 2.135488986968994, "learning_rate": 1.847193219506203e-05, "loss": 0.7710912227630615, "step": 723 }, { "epoch": 0.0878534158475913, "grad_norm": 2.4005861282348633, "learning_rate": 1.8469475494411005e-05, "loss": 0.564693808555603, "step": 724 }, { "epoch": 0.08797476034461837, "grad_norm": 1.3816473484039307, "learning_rate": 1.846701879375998e-05, "loss": 0.4674697518348694, "step": 725 }, { "epoch": 0.08809610484164543, "grad_norm": 1.5146907567977905, "learning_rate": 1.8464562093108954e-05, "loss": 0.5922137498855591, "step": 726 }, { "epoch": 0.0882174493386725, "grad_norm": 2.8482632637023926, "learning_rate": 1.846210539245793e-05, "loss": 0.8533211350440979, "step": 727 }, { "epoch": 0.08833879383569955, "grad_norm": 1.5686261653900146, "learning_rate": 1.8459648691806906e-05, "loss": 0.31927329301834106, "step": 728 }, { "epoch": 0.08846013833272662, "grad_norm": 1.3741048574447632, "learning_rate": 1.845719199115588e-05, "loss": 0.30205488204956055, "step": 729 }, { "epoch": 0.08858148282975367, "grad_norm": 1.9760974645614624, "learning_rate": 1.8454735290504854e-05, "loss": 0.27656230330467224, "step": 730 }, { "epoch": 0.08870282732678073, "grad_norm": 1.5433790683746338, "learning_rate": 1.845227858985383e-05, "loss": 0.5273905992507935, "step": 731 }, { "epoch": 0.0888241718238078, "grad_norm": 1.6777325868606567, "learning_rate": 1.8449821889202803e-05, "loss": 0.17607736587524414, "step": 732 }, { "epoch": 0.08894551632083485, "grad_norm": 1.780245065689087, "learning_rate": 1.8447365188551777e-05, "loss": 0.31574001908302307, "step": 733 }, { "epoch": 0.08906686081786191, "grad_norm": 1.0304222106933594, "learning_rate": 1.844490848790075e-05, "loss": 0.12235292792320251, "step": 734 }, { "epoch": 0.08918820531488897, "grad_norm": 2.0634799003601074, "learning_rate": 1.8442451787249725e-05, "loss": 0.4336833953857422, "step": 735 }, { "epoch": 0.08930954981191604, "grad_norm": 1.5077424049377441, "learning_rate": 1.84399950865987e-05, "loss": 0.3145570755004883, "step": 736 }, { "epoch": 0.08943089430894309, "grad_norm": 1.5537257194519043, "learning_rate": 1.8437538385947674e-05, "loss": 0.32689836621284485, "step": 737 }, { "epoch": 0.08955223880597014, "grad_norm": 1.7519437074661255, "learning_rate": 1.8435081685296648e-05, "loss": 0.8935326933860779, "step": 738 }, { "epoch": 0.08967358330299721, "grad_norm": 1.947508454322815, "learning_rate": 1.8432624984645622e-05, "loss": 0.3590155839920044, "step": 739 }, { "epoch": 0.08979492780002427, "grad_norm": 1.9229471683502197, "learning_rate": 1.8430168283994597e-05, "loss": 0.3991833031177521, "step": 740 }, { "epoch": 0.08991627229705133, "grad_norm": 1.3160052299499512, "learning_rate": 1.842771158334357e-05, "loss": 0.2500799894332886, "step": 741 }, { "epoch": 0.09003761679407839, "grad_norm": 1.176512598991394, "learning_rate": 1.8425254882692545e-05, "loss": 0.09526380151510239, "step": 742 }, { "epoch": 0.09015896129110545, "grad_norm": 1.6869382858276367, "learning_rate": 1.842279818204152e-05, "loss": 0.26154953241348267, "step": 743 }, { "epoch": 0.09028030578813251, "grad_norm": 2.0682740211486816, "learning_rate": 1.8420341481390494e-05, "loss": 0.3846953511238098, "step": 744 }, { "epoch": 0.09040165028515956, "grad_norm": 1.7865413427352905, "learning_rate": 1.8417884780739468e-05, "loss": 0.32650160789489746, "step": 745 }, { "epoch": 0.09052299478218663, "grad_norm": 2.0560121536254883, "learning_rate": 1.8415428080088442e-05, "loss": 0.2540816068649292, "step": 746 }, { "epoch": 0.09064433927921368, "grad_norm": 1.3295836448669434, "learning_rate": 1.8412971379437416e-05, "loss": 0.29928016662597656, "step": 747 }, { "epoch": 0.09076568377624075, "grad_norm": 1.530879020690918, "learning_rate": 1.841051467878639e-05, "loss": 0.40534618496894836, "step": 748 }, { "epoch": 0.0908870282732678, "grad_norm": 1.5177699327468872, "learning_rate": 1.8408057978135365e-05, "loss": 0.39622828364372253, "step": 749 }, { "epoch": 0.09100837277029487, "grad_norm": 1.8274325132369995, "learning_rate": 1.840560127748434e-05, "loss": 0.24163588881492615, "step": 750 }, { "epoch": 0.09112971726732193, "grad_norm": 1.2737239599227905, "learning_rate": 1.8403144576833314e-05, "loss": 0.174016535282135, "step": 751 }, { "epoch": 0.09125106176434898, "grad_norm": 2.5806877613067627, "learning_rate": 1.8400687876182288e-05, "loss": 0.5132497549057007, "step": 752 }, { "epoch": 0.09137240626137605, "grad_norm": 2.5546791553497314, "learning_rate": 1.8398231175531262e-05, "loss": 0.43177586793899536, "step": 753 }, { "epoch": 0.0914937507584031, "grad_norm": 1.690147042274475, "learning_rate": 1.8395774474880236e-05, "loss": 0.24541637301445007, "step": 754 }, { "epoch": 0.09161509525543017, "grad_norm": 1.4954241514205933, "learning_rate": 1.839331777422921e-05, "loss": 0.16959670186042786, "step": 755 }, { "epoch": 0.09173643975245722, "grad_norm": 0.9505935907363892, "learning_rate": 1.8390861073578185e-05, "loss": 0.12421949207782745, "step": 756 }, { "epoch": 0.09185778424948429, "grad_norm": 1.4919354915618896, "learning_rate": 1.838840437292716e-05, "loss": 0.3456411361694336, "step": 757 }, { "epoch": 0.09197912874651135, "grad_norm": 1.6589082479476929, "learning_rate": 1.8385947672276133e-05, "loss": 0.24717597663402557, "step": 758 }, { "epoch": 0.0921004732435384, "grad_norm": 1.4445219039916992, "learning_rate": 1.8383490971625108e-05, "loss": 0.3239576816558838, "step": 759 }, { "epoch": 0.09222181774056547, "grad_norm": 1.030391812324524, "learning_rate": 1.8381034270974082e-05, "loss": 0.09199359267950058, "step": 760 }, { "epoch": 0.09234316223759252, "grad_norm": 1.5500389337539673, "learning_rate": 1.8378577570323056e-05, "loss": 0.5370050668716431, "step": 761 }, { "epoch": 0.09246450673461959, "grad_norm": 1.5590590238571167, "learning_rate": 1.837612086967203e-05, "loss": 0.31459516286849976, "step": 762 }, { "epoch": 0.09258585123164664, "grad_norm": 2.197721481323242, "learning_rate": 1.8373664169021005e-05, "loss": 0.674180269241333, "step": 763 }, { "epoch": 0.09270719572867371, "grad_norm": 1.6125683784484863, "learning_rate": 1.837120746836998e-05, "loss": 0.3430609405040741, "step": 764 }, { "epoch": 0.09282854022570076, "grad_norm": 1.7196494340896606, "learning_rate": 1.8368750767718953e-05, "loss": 0.22308464348316193, "step": 765 }, { "epoch": 0.09294988472272782, "grad_norm": 2.1390604972839355, "learning_rate": 1.8366294067067927e-05, "loss": 0.5569056272506714, "step": 766 }, { "epoch": 0.09307122921975489, "grad_norm": 1.9109262228012085, "learning_rate": 1.8363837366416905e-05, "loss": 0.5384218692779541, "step": 767 }, { "epoch": 0.09319257371678194, "grad_norm": 2.4339253902435303, "learning_rate": 1.836138066576588e-05, "loss": 0.4181733727455139, "step": 768 }, { "epoch": 0.09331391821380901, "grad_norm": 1.388688325881958, "learning_rate": 1.8358923965114854e-05, "loss": 0.052905499935150146, "step": 769 }, { "epoch": 0.09343526271083606, "grad_norm": 1.6281681060791016, "learning_rate": 1.8356467264463828e-05, "loss": 0.6276299357414246, "step": 770 }, { "epoch": 0.09355660720786313, "grad_norm": 1.908575177192688, "learning_rate": 1.8354010563812802e-05, "loss": 0.376204252243042, "step": 771 }, { "epoch": 0.09367795170489018, "grad_norm": 1.5560134649276733, "learning_rate": 1.8351553863161776e-05, "loss": 0.2944219410419464, "step": 772 }, { "epoch": 0.09379929620191724, "grad_norm": 1.2017306089401245, "learning_rate": 1.834909716251075e-05, "loss": 0.15335910022258759, "step": 773 }, { "epoch": 0.0939206406989443, "grad_norm": 1.7475948333740234, "learning_rate": 1.8346640461859725e-05, "loss": 0.3523990511894226, "step": 774 }, { "epoch": 0.09404198519597136, "grad_norm": 2.778636932373047, "learning_rate": 1.83441837612087e-05, "loss": 0.14939619600772858, "step": 775 }, { "epoch": 0.09416332969299843, "grad_norm": 1.7566347122192383, "learning_rate": 1.8341727060557673e-05, "loss": 0.534525454044342, "step": 776 }, { "epoch": 0.09428467419002548, "grad_norm": 1.0987921953201294, "learning_rate": 1.8339270359906648e-05, "loss": 0.45171016454696655, "step": 777 }, { "epoch": 0.09440601868705255, "grad_norm": 1.7017377614974976, "learning_rate": 1.8336813659255622e-05, "loss": 0.3392978310585022, "step": 778 }, { "epoch": 0.0945273631840796, "grad_norm": 1.220464825630188, "learning_rate": 1.8334356958604596e-05, "loss": 0.1805679053068161, "step": 779 }, { "epoch": 0.09464870768110666, "grad_norm": 1.404594898223877, "learning_rate": 1.833190025795357e-05, "loss": 0.3082903027534485, "step": 780 }, { "epoch": 0.09477005217813372, "grad_norm": 1.8245242834091187, "learning_rate": 1.8329443557302545e-05, "loss": 0.41371673345565796, "step": 781 }, { "epoch": 0.09489139667516078, "grad_norm": 2.1793110370635986, "learning_rate": 1.832698685665152e-05, "loss": 0.548283040523529, "step": 782 }, { "epoch": 0.09501274117218785, "grad_norm": 2.5097601413726807, "learning_rate": 1.8324530156000493e-05, "loss": 0.8257855176925659, "step": 783 }, { "epoch": 0.0951340856692149, "grad_norm": 1.4109855890274048, "learning_rate": 1.8322073455349467e-05, "loss": 0.3920208811759949, "step": 784 }, { "epoch": 0.09525543016624197, "grad_norm": 1.6185084581375122, "learning_rate": 1.831961675469844e-05, "loss": 0.5085690021514893, "step": 785 }, { "epoch": 0.09537677466326902, "grad_norm": 1.548444151878357, "learning_rate": 1.8317160054047416e-05, "loss": 0.16307635605335236, "step": 786 }, { "epoch": 0.09549811916029607, "grad_norm": 1.5586305856704712, "learning_rate": 1.831470335339639e-05, "loss": 0.3123549520969391, "step": 787 }, { "epoch": 0.09561946365732314, "grad_norm": 1.7862480878829956, "learning_rate": 1.8312246652745364e-05, "loss": 0.5024250149726868, "step": 788 }, { "epoch": 0.0957408081543502, "grad_norm": 1.647065281867981, "learning_rate": 1.830978995209434e-05, "loss": 0.26289641857147217, "step": 789 }, { "epoch": 0.09586215265137726, "grad_norm": 1.5449492931365967, "learning_rate": 1.8307333251443313e-05, "loss": 0.20891058444976807, "step": 790 }, { "epoch": 0.09598349714840432, "grad_norm": 2.2280101776123047, "learning_rate": 1.8304876550792287e-05, "loss": 0.1946217268705368, "step": 791 }, { "epoch": 0.09610484164543139, "grad_norm": 1.7998099327087402, "learning_rate": 1.830241985014126e-05, "loss": 0.5536755323410034, "step": 792 }, { "epoch": 0.09622618614245844, "grad_norm": 1.2248331308364868, "learning_rate": 1.8299963149490236e-05, "loss": 0.1123380959033966, "step": 793 }, { "epoch": 0.09634753063948549, "grad_norm": 1.9093798398971558, "learning_rate": 1.829750644883921e-05, "loss": 0.2815214991569519, "step": 794 }, { "epoch": 0.09646887513651256, "grad_norm": 1.4137870073318481, "learning_rate": 1.8295049748188184e-05, "loss": 0.1948762834072113, "step": 795 }, { "epoch": 0.09659021963353961, "grad_norm": 1.9202245473861694, "learning_rate": 1.829259304753716e-05, "loss": 0.22015176713466644, "step": 796 }, { "epoch": 0.09671156413056668, "grad_norm": 1.1859745979309082, "learning_rate": 1.8290136346886133e-05, "loss": 0.3085130751132965, "step": 797 }, { "epoch": 0.09683290862759374, "grad_norm": 1.4333667755126953, "learning_rate": 1.8287679646235107e-05, "loss": 0.18858560919761658, "step": 798 }, { "epoch": 0.0969542531246208, "grad_norm": 1.2532932758331299, "learning_rate": 1.828522294558408e-05, "loss": 0.15305699408054352, "step": 799 }, { "epoch": 0.09707559762164786, "grad_norm": 1.6238371133804321, "learning_rate": 1.8282766244933056e-05, "loss": 0.10041648149490356, "step": 800 }, { "epoch": 0.09719694211867491, "grad_norm": 1.7450344562530518, "learning_rate": 1.828030954428203e-05, "loss": 0.12939795851707458, "step": 801 }, { "epoch": 0.09731828661570198, "grad_norm": 1.793636441230774, "learning_rate": 1.8277852843631004e-05, "loss": 0.6190224885940552, "step": 802 }, { "epoch": 0.09743963111272903, "grad_norm": 1.11686110496521, "learning_rate": 1.827539614297998e-05, "loss": 0.21365734934806824, "step": 803 }, { "epoch": 0.0975609756097561, "grad_norm": 1.3621420860290527, "learning_rate": 1.8272939442328953e-05, "loss": 0.15825393795967102, "step": 804 }, { "epoch": 0.09768232010678315, "grad_norm": 1.776959776878357, "learning_rate": 1.8270482741677927e-05, "loss": 0.3757792115211487, "step": 805 }, { "epoch": 0.09780366460381022, "grad_norm": 1.7959625720977783, "learning_rate": 1.8268026041026904e-05, "loss": 0.3489462435245514, "step": 806 }, { "epoch": 0.09792500910083728, "grad_norm": 2.1753323078155518, "learning_rate": 1.826556934037588e-05, "loss": 0.3603140413761139, "step": 807 }, { "epoch": 0.09804635359786433, "grad_norm": 1.625357747077942, "learning_rate": 1.8263112639724853e-05, "loss": 0.3582020699977875, "step": 808 }, { "epoch": 0.0981676980948914, "grad_norm": 2.369624376296997, "learning_rate": 1.8260655939073827e-05, "loss": 0.60666424036026, "step": 809 }, { "epoch": 0.09828904259191845, "grad_norm": 1.7844915390014648, "learning_rate": 1.82581992384228e-05, "loss": 0.15997028350830078, "step": 810 }, { "epoch": 0.09841038708894552, "grad_norm": 1.6896699666976929, "learning_rate": 1.8255742537771776e-05, "loss": 0.329281210899353, "step": 811 }, { "epoch": 0.09853173158597257, "grad_norm": 1.1069085597991943, "learning_rate": 1.825328583712075e-05, "loss": 0.49478620290756226, "step": 812 }, { "epoch": 0.09865307608299964, "grad_norm": 1.5067551136016846, "learning_rate": 1.8250829136469724e-05, "loss": 0.42493683099746704, "step": 813 }, { "epoch": 0.0987744205800267, "grad_norm": 1.9894484281539917, "learning_rate": 1.82483724358187e-05, "loss": 0.33686619997024536, "step": 814 }, { "epoch": 0.09889576507705375, "grad_norm": 1.79850172996521, "learning_rate": 1.8245915735167673e-05, "loss": 0.4596613645553589, "step": 815 }, { "epoch": 0.09901710957408082, "grad_norm": 1.3043336868286133, "learning_rate": 1.8243459034516647e-05, "loss": 0.39217016100883484, "step": 816 }, { "epoch": 0.09913845407110787, "grad_norm": 1.2223256826400757, "learning_rate": 1.824100233386562e-05, "loss": 0.17283861339092255, "step": 817 }, { "epoch": 0.09925979856813494, "grad_norm": 1.9951136112213135, "learning_rate": 1.8238545633214596e-05, "loss": 0.37487325072288513, "step": 818 }, { "epoch": 0.09938114306516199, "grad_norm": 1.6043702363967896, "learning_rate": 1.823608893256357e-05, "loss": 0.3740643858909607, "step": 819 }, { "epoch": 0.09950248756218906, "grad_norm": 1.5967137813568115, "learning_rate": 1.8233632231912544e-05, "loss": 0.4586668610572815, "step": 820 }, { "epoch": 0.09962383205921611, "grad_norm": 2.1203699111938477, "learning_rate": 1.823117553126152e-05, "loss": 0.43344107270240784, "step": 821 }, { "epoch": 0.09974517655624317, "grad_norm": 2.03326678276062, "learning_rate": 1.8228718830610493e-05, "loss": 0.2968045473098755, "step": 822 }, { "epoch": 0.09986652105327024, "grad_norm": 2.2074062824249268, "learning_rate": 1.8226262129959467e-05, "loss": 0.4776389002799988, "step": 823 }, { "epoch": 0.09998786555029729, "grad_norm": 1.5093281269073486, "learning_rate": 1.822380542930844e-05, "loss": 0.30169808864593506, "step": 824 }, { "epoch": 0.10010921004732436, "grad_norm": 1.4926306009292603, "learning_rate": 1.8221348728657415e-05, "loss": 0.36792951822280884, "step": 825 }, { "epoch": 0.10023055454435141, "grad_norm": 1.9243760108947754, "learning_rate": 1.821889202800639e-05, "loss": 0.3194350600242615, "step": 826 }, { "epoch": 0.10035189904137848, "grad_norm": 2.0720691680908203, "learning_rate": 1.8216435327355364e-05, "loss": 0.5761525630950928, "step": 827 }, { "epoch": 0.10047324353840553, "grad_norm": 1.1331757307052612, "learning_rate": 1.8213978626704338e-05, "loss": 0.12145671993494034, "step": 828 }, { "epoch": 0.1005945880354326, "grad_norm": 2.4218826293945312, "learning_rate": 1.8211521926053312e-05, "loss": 0.30834150314331055, "step": 829 }, { "epoch": 0.10071593253245965, "grad_norm": 1.6021833419799805, "learning_rate": 1.8209065225402287e-05, "loss": 0.24986329674720764, "step": 830 }, { "epoch": 0.10083727702948671, "grad_norm": 1.436381220817566, "learning_rate": 1.820660852475126e-05, "loss": 0.3261716961860657, "step": 831 }, { "epoch": 0.10095862152651378, "grad_norm": 1.4951610565185547, "learning_rate": 1.8204151824100235e-05, "loss": 0.2016591727733612, "step": 832 }, { "epoch": 0.10107996602354083, "grad_norm": 1.6854465007781982, "learning_rate": 1.820169512344921e-05, "loss": 0.18829268217086792, "step": 833 }, { "epoch": 0.1012013105205679, "grad_norm": 1.5968453884124756, "learning_rate": 1.8199238422798184e-05, "loss": 0.27957627177238464, "step": 834 }, { "epoch": 0.10132265501759495, "grad_norm": 1.384221076965332, "learning_rate": 1.8196781722147158e-05, "loss": 0.11764024198055267, "step": 835 }, { "epoch": 0.10144399951462202, "grad_norm": 1.5695202350616455, "learning_rate": 1.8194325021496132e-05, "loss": 0.5740670561790466, "step": 836 }, { "epoch": 0.10156534401164907, "grad_norm": 1.2498831748962402, "learning_rate": 1.8191868320845106e-05, "loss": 0.4904257655143738, "step": 837 }, { "epoch": 0.10168668850867613, "grad_norm": 1.4824366569519043, "learning_rate": 1.818941162019408e-05, "loss": 0.19962450861930847, "step": 838 }, { "epoch": 0.1018080330057032, "grad_norm": 0.6617300510406494, "learning_rate": 1.8186954919543055e-05, "loss": 0.036515962332487106, "step": 839 }, { "epoch": 0.10192937750273025, "grad_norm": 1.7322537899017334, "learning_rate": 1.818449821889203e-05, "loss": 0.3243387043476105, "step": 840 }, { "epoch": 0.10205072199975732, "grad_norm": 2.164475917816162, "learning_rate": 1.8182041518241004e-05, "loss": 0.4636027216911316, "step": 841 }, { "epoch": 0.10217206649678437, "grad_norm": 1.594534993171692, "learning_rate": 1.8179584817589978e-05, "loss": 0.08477664738893509, "step": 842 }, { "epoch": 0.10229341099381144, "grad_norm": 2.2461769580841064, "learning_rate": 1.8177128116938952e-05, "loss": 0.4827083647251129, "step": 843 }, { "epoch": 0.10241475549083849, "grad_norm": 1.0204533338546753, "learning_rate": 1.8174671416287926e-05, "loss": 0.017575763165950775, "step": 844 }, { "epoch": 0.10253609998786554, "grad_norm": 2.0975234508514404, "learning_rate": 1.81722147156369e-05, "loss": 0.1643562614917755, "step": 845 }, { "epoch": 0.10265744448489261, "grad_norm": 2.3310747146606445, "learning_rate": 1.8169758014985878e-05, "loss": 0.5198527574539185, "step": 846 }, { "epoch": 0.10277878898191967, "grad_norm": 1.4509798288345337, "learning_rate": 1.8167301314334852e-05, "loss": 0.38531947135925293, "step": 847 }, { "epoch": 0.10290013347894673, "grad_norm": 1.9895381927490234, "learning_rate": 1.8164844613683823e-05, "loss": 0.3467338979244232, "step": 848 }, { "epoch": 0.10302147797597379, "grad_norm": 2.63995361328125, "learning_rate": 1.8162387913032798e-05, "loss": 0.6168738603591919, "step": 849 }, { "epoch": 0.10314282247300086, "grad_norm": 1.148128628730774, "learning_rate": 1.8159931212381772e-05, "loss": 0.20436763763427734, "step": 850 }, { "epoch": 0.10326416697002791, "grad_norm": 1.5788880586624146, "learning_rate": 1.8157474511730746e-05, "loss": 0.6665417551994324, "step": 851 }, { "epoch": 0.10338551146705496, "grad_norm": 1.841687560081482, "learning_rate": 1.815501781107972e-05, "loss": 0.917460560798645, "step": 852 }, { "epoch": 0.10350685596408203, "grad_norm": 1.3689830303192139, "learning_rate": 1.8152561110428695e-05, "loss": 0.0624203160405159, "step": 853 }, { "epoch": 0.10362820046110909, "grad_norm": 0.030568469315767288, "learning_rate": 1.815010440977767e-05, "loss": 0.000579987361561507, "step": 854 }, { "epoch": 0.10374954495813615, "grad_norm": 1.6430957317352295, "learning_rate": 1.8147647709126643e-05, "loss": 0.34169209003448486, "step": 855 }, { "epoch": 0.1038708894551632, "grad_norm": 0.8496302962303162, "learning_rate": 1.8145191008475617e-05, "loss": 0.1893075406551361, "step": 856 }, { "epoch": 0.10399223395219027, "grad_norm": 1.5164453983306885, "learning_rate": 1.814273430782459e-05, "loss": 0.14207033812999725, "step": 857 }, { "epoch": 0.10411357844921733, "grad_norm": 1.661000370979309, "learning_rate": 1.8140277607173566e-05, "loss": 0.302262544631958, "step": 858 }, { "epoch": 0.10423492294624438, "grad_norm": 1.6371593475341797, "learning_rate": 1.813782090652254e-05, "loss": 0.3569563627243042, "step": 859 }, { "epoch": 0.10435626744327145, "grad_norm": 3.8244004249572754, "learning_rate": 1.8135364205871514e-05, "loss": 0.4229733645915985, "step": 860 }, { "epoch": 0.1044776119402985, "grad_norm": 2.6182684898376465, "learning_rate": 1.813290750522049e-05, "loss": 0.28717556595802307, "step": 861 }, { "epoch": 0.10459895643732557, "grad_norm": 1.6072068214416504, "learning_rate": 1.8130450804569463e-05, "loss": 0.27383649349212646, "step": 862 }, { "epoch": 0.10472030093435263, "grad_norm": 1.6131926774978638, "learning_rate": 1.8127994103918437e-05, "loss": 0.36529356241226196, "step": 863 }, { "epoch": 0.10484164543137969, "grad_norm": 1.0998895168304443, "learning_rate": 1.812553740326741e-05, "loss": 0.08332019299268723, "step": 864 }, { "epoch": 0.10496298992840675, "grad_norm": 1.5871561765670776, "learning_rate": 1.8123080702616386e-05, "loss": 0.4199519753456116, "step": 865 }, { "epoch": 0.1050843344254338, "grad_norm": 1.2270817756652832, "learning_rate": 1.812062400196536e-05, "loss": 0.22014616429805756, "step": 866 }, { "epoch": 0.10520567892246087, "grad_norm": 1.2801979780197144, "learning_rate": 1.8118167301314334e-05, "loss": 0.23065850138664246, "step": 867 }, { "epoch": 0.10532702341948792, "grad_norm": 1.5992664098739624, "learning_rate": 1.811571060066331e-05, "loss": 0.34659886360168457, "step": 868 }, { "epoch": 0.10544836791651499, "grad_norm": 1.710518479347229, "learning_rate": 1.8113253900012283e-05, "loss": 0.1520070731639862, "step": 869 }, { "epoch": 0.10556971241354204, "grad_norm": 1.231435775756836, "learning_rate": 1.8110797199361257e-05, "loss": 0.4307411313056946, "step": 870 }, { "epoch": 0.10569105691056911, "grad_norm": 1.7171838283538818, "learning_rate": 1.810834049871023e-05, "loss": 0.6533911228179932, "step": 871 }, { "epoch": 0.10581240140759617, "grad_norm": 2.1740949153900146, "learning_rate": 1.810588379805921e-05, "loss": 0.36171165108680725, "step": 872 }, { "epoch": 0.10593374590462322, "grad_norm": 0.6744754910469055, "learning_rate": 1.8103427097408183e-05, "loss": 0.025561649352312088, "step": 873 }, { "epoch": 0.10605509040165029, "grad_norm": 2.0431101322174072, "learning_rate": 1.8100970396757157e-05, "loss": 0.190989151597023, "step": 874 }, { "epoch": 0.10617643489867734, "grad_norm": 1.504240870475769, "learning_rate": 1.809851369610613e-05, "loss": 0.22664177417755127, "step": 875 }, { "epoch": 0.10629777939570441, "grad_norm": 1.855709433555603, "learning_rate": 1.8096056995455106e-05, "loss": 0.48425769805908203, "step": 876 }, { "epoch": 0.10641912389273146, "grad_norm": 1.0872446298599243, "learning_rate": 1.809360029480408e-05, "loss": 0.36054062843322754, "step": 877 }, { "epoch": 0.10654046838975853, "grad_norm": 1.4773401021957397, "learning_rate": 1.8091143594153054e-05, "loss": 0.16998085379600525, "step": 878 }, { "epoch": 0.10666181288678558, "grad_norm": 1.5159122943878174, "learning_rate": 1.808868689350203e-05, "loss": 0.350002646446228, "step": 879 }, { "epoch": 0.10678315738381264, "grad_norm": 1.9150190353393555, "learning_rate": 1.8086230192851003e-05, "loss": 0.3344387710094452, "step": 880 }, { "epoch": 0.1069045018808397, "grad_norm": 1.570078730583191, "learning_rate": 1.8083773492199977e-05, "loss": 0.13790318369865417, "step": 881 }, { "epoch": 0.10702584637786676, "grad_norm": 1.8442866802215576, "learning_rate": 1.808131679154895e-05, "loss": 0.3512876033782959, "step": 882 }, { "epoch": 0.10714719087489383, "grad_norm": 1.1014479398727417, "learning_rate": 1.8078860090897926e-05, "loss": 0.28830617666244507, "step": 883 }, { "epoch": 0.10726853537192088, "grad_norm": 1.95488440990448, "learning_rate": 1.80764033902469e-05, "loss": 0.5615525245666504, "step": 884 }, { "epoch": 0.10738987986894795, "grad_norm": 1.3948452472686768, "learning_rate": 1.8073946689595874e-05, "loss": 0.20246197283267975, "step": 885 }, { "epoch": 0.107511224365975, "grad_norm": 1.8468222618103027, "learning_rate": 1.807148998894485e-05, "loss": 0.5539135932922363, "step": 886 }, { "epoch": 0.10763256886300206, "grad_norm": 1.8323395252227783, "learning_rate": 1.8069033288293823e-05, "loss": 0.6240039467811584, "step": 887 }, { "epoch": 0.10775391336002912, "grad_norm": 1.7520233392715454, "learning_rate": 1.8066576587642797e-05, "loss": 0.3012508153915405, "step": 888 }, { "epoch": 0.10787525785705618, "grad_norm": 3.212730884552002, "learning_rate": 1.806411988699177e-05, "loss": 0.4331943392753601, "step": 889 }, { "epoch": 0.10799660235408325, "grad_norm": 3.032711982727051, "learning_rate": 1.8061663186340746e-05, "loss": 0.391502320766449, "step": 890 }, { "epoch": 0.1081179468511103, "grad_norm": 1.2783416509628296, "learning_rate": 1.805920648568972e-05, "loss": 0.2393539398908615, "step": 891 }, { "epoch": 0.10823929134813737, "grad_norm": 1.7031818628311157, "learning_rate": 1.8056749785038694e-05, "loss": 0.2811068892478943, "step": 892 }, { "epoch": 0.10836063584516442, "grad_norm": 3.197322368621826, "learning_rate": 1.805429308438767e-05, "loss": 0.4427284300327301, "step": 893 }, { "epoch": 0.10848198034219148, "grad_norm": 1.8068060874938965, "learning_rate": 1.8051836383736643e-05, "loss": 0.5168130397796631, "step": 894 }, { "epoch": 0.10860332483921854, "grad_norm": 2.21925950050354, "learning_rate": 1.8049379683085617e-05, "loss": 1.1132900714874268, "step": 895 }, { "epoch": 0.1087246693362456, "grad_norm": 1.7533857822418213, "learning_rate": 1.804692298243459e-05, "loss": 0.5228627324104309, "step": 896 }, { "epoch": 0.10884601383327266, "grad_norm": 2.0163865089416504, "learning_rate": 1.8044466281783565e-05, "loss": 0.2808172106742859, "step": 897 }, { "epoch": 0.10896735833029972, "grad_norm": 2.2207977771759033, "learning_rate": 1.804200958113254e-05, "loss": 0.4984392821788788, "step": 898 }, { "epoch": 0.10908870282732679, "grad_norm": 1.315753698348999, "learning_rate": 1.8039552880481514e-05, "loss": 0.05970459431409836, "step": 899 }, { "epoch": 0.10921004732435384, "grad_norm": 1.4315860271453857, "learning_rate": 1.8037096179830488e-05, "loss": 0.1566053330898285, "step": 900 }, { "epoch": 0.1093313918213809, "grad_norm": 1.2822933197021484, "learning_rate": 1.8034639479179462e-05, "loss": 0.09656253457069397, "step": 901 }, { "epoch": 0.10945273631840796, "grad_norm": 1.3101221323013306, "learning_rate": 1.8032182778528437e-05, "loss": 0.16047993302345276, "step": 902 }, { "epoch": 0.10957408081543502, "grad_norm": 1.6088323593139648, "learning_rate": 1.802972607787741e-05, "loss": 0.3874964118003845, "step": 903 }, { "epoch": 0.10969542531246208, "grad_norm": 2.3265113830566406, "learning_rate": 1.8027269377226385e-05, "loss": 0.32157737016677856, "step": 904 }, { "epoch": 0.10981676980948914, "grad_norm": 1.6004971265792847, "learning_rate": 1.802481267657536e-05, "loss": 0.3022279441356659, "step": 905 }, { "epoch": 0.1099381143065162, "grad_norm": 2.5602383613586426, "learning_rate": 1.8022355975924334e-05, "loss": 0.21964332461357117, "step": 906 }, { "epoch": 0.11005945880354326, "grad_norm": 1.7562570571899414, "learning_rate": 1.8019899275273308e-05, "loss": 0.4960334002971649, "step": 907 }, { "epoch": 0.11018080330057031, "grad_norm": 2.157813549041748, "learning_rate": 1.8017442574622282e-05, "loss": 0.4423237144947052, "step": 908 }, { "epoch": 0.11030214779759738, "grad_norm": 1.567229986190796, "learning_rate": 1.8014985873971256e-05, "loss": 0.37426209449768066, "step": 909 }, { "epoch": 0.11042349229462443, "grad_norm": 0.5419113636016846, "learning_rate": 1.801252917332023e-05, "loss": 0.024737078696489334, "step": 910 }, { "epoch": 0.1105448367916515, "grad_norm": 1.8386318683624268, "learning_rate": 1.8010072472669205e-05, "loss": 0.38826245069503784, "step": 911 }, { "epoch": 0.11066618128867856, "grad_norm": 2.213700532913208, "learning_rate": 1.8007615772018183e-05, "loss": 0.384283185005188, "step": 912 }, { "epoch": 0.11078752578570562, "grad_norm": 1.9804258346557617, "learning_rate": 1.8005159071367157e-05, "loss": 0.3135190010070801, "step": 913 }, { "epoch": 0.11090887028273268, "grad_norm": 1.7234359979629517, "learning_rate": 1.800270237071613e-05, "loss": 0.3256887197494507, "step": 914 }, { "epoch": 0.11103021477975973, "grad_norm": 1.073075532913208, "learning_rate": 1.8000245670065105e-05, "loss": 0.3075997829437256, "step": 915 }, { "epoch": 0.1111515592767868, "grad_norm": 1.4961111545562744, "learning_rate": 1.799778896941408e-05, "loss": 0.6745727062225342, "step": 916 }, { "epoch": 0.11127290377381385, "grad_norm": 1.4811803102493286, "learning_rate": 1.7995332268763054e-05, "loss": 0.18449625372886658, "step": 917 }, { "epoch": 0.11139424827084092, "grad_norm": 2.448620319366455, "learning_rate": 1.7992875568112028e-05, "loss": 0.4953969120979309, "step": 918 }, { "epoch": 0.11151559276786797, "grad_norm": 2.133965253829956, "learning_rate": 1.7990418867461002e-05, "loss": 0.21984288096427917, "step": 919 }, { "epoch": 0.11163693726489504, "grad_norm": 2.0249030590057373, "learning_rate": 1.7987962166809977e-05, "loss": 0.5486893057823181, "step": 920 }, { "epoch": 0.1117582817619221, "grad_norm": 1.5349080562591553, "learning_rate": 1.798550546615895e-05, "loss": 0.08272730559110641, "step": 921 }, { "epoch": 0.11187962625894916, "grad_norm": 1.4977649450302124, "learning_rate": 1.7983048765507925e-05, "loss": 0.202124685049057, "step": 922 }, { "epoch": 0.11200097075597622, "grad_norm": 1.1629031896591187, "learning_rate": 1.79805920648569e-05, "loss": 0.2228085845708847, "step": 923 }, { "epoch": 0.11212231525300327, "grad_norm": 1.4893561601638794, "learning_rate": 1.7978135364205874e-05, "loss": 0.3735879957675934, "step": 924 }, { "epoch": 0.11224365975003034, "grad_norm": 1.5193450450897217, "learning_rate": 1.7975678663554848e-05, "loss": 0.23259831964969635, "step": 925 }, { "epoch": 0.11236500424705739, "grad_norm": 1.7775638103485107, "learning_rate": 1.7973221962903822e-05, "loss": 0.4484153985977173, "step": 926 }, { "epoch": 0.11248634874408446, "grad_norm": 2.208301305770874, "learning_rate": 1.7970765262252797e-05, "loss": 0.9797415733337402, "step": 927 }, { "epoch": 0.11260769324111151, "grad_norm": 2.22806453704834, "learning_rate": 1.796830856160177e-05, "loss": 0.2920798063278198, "step": 928 }, { "epoch": 0.11272903773813858, "grad_norm": 1.8676373958587646, "learning_rate": 1.7965851860950745e-05, "loss": 0.6016289591789246, "step": 929 }, { "epoch": 0.11285038223516564, "grad_norm": 1.7921056747436523, "learning_rate": 1.796339516029972e-05, "loss": 0.3958570957183838, "step": 930 }, { "epoch": 0.11297172673219269, "grad_norm": 1.4786041975021362, "learning_rate": 1.7960938459648694e-05, "loss": 0.42081084847450256, "step": 931 }, { "epoch": 0.11309307122921976, "grad_norm": 2.0670037269592285, "learning_rate": 1.7958481758997668e-05, "loss": 0.5663632750511169, "step": 932 }, { "epoch": 0.11321441572624681, "grad_norm": 3.5581676959991455, "learning_rate": 1.7956025058346642e-05, "loss": 0.42889904975891113, "step": 933 }, { "epoch": 0.11333576022327388, "grad_norm": 1.748302698135376, "learning_rate": 1.7953568357695616e-05, "loss": 0.22019340097904205, "step": 934 }, { "epoch": 0.11345710472030093, "grad_norm": 1.2991547584533691, "learning_rate": 1.795111165704459e-05, "loss": 0.14706727862358093, "step": 935 }, { "epoch": 0.113578449217328, "grad_norm": 1.810490369796753, "learning_rate": 1.7948654956393565e-05, "loss": 0.39529550075531006, "step": 936 }, { "epoch": 0.11369979371435505, "grad_norm": 1.8873316049575806, "learning_rate": 1.794619825574254e-05, "loss": 0.17087635397911072, "step": 937 }, { "epoch": 0.11382113821138211, "grad_norm": 2.495636463165283, "learning_rate": 1.7943741555091513e-05, "loss": 0.21028999984264374, "step": 938 }, { "epoch": 0.11394248270840918, "grad_norm": 1.333327054977417, "learning_rate": 1.7941284854440488e-05, "loss": 0.14832647144794464, "step": 939 }, { "epoch": 0.11406382720543623, "grad_norm": 1.412825107574463, "learning_rate": 1.7938828153789462e-05, "loss": 0.08243104070425034, "step": 940 }, { "epoch": 0.1141851717024633, "grad_norm": 2.038456439971924, "learning_rate": 1.7936371453138436e-05, "loss": 0.26321762800216675, "step": 941 }, { "epoch": 0.11430651619949035, "grad_norm": 1.5243250131607056, "learning_rate": 1.793391475248741e-05, "loss": 0.24915413558483124, "step": 942 }, { "epoch": 0.11442786069651742, "grad_norm": 2.2460672855377197, "learning_rate": 1.7931458051836385e-05, "loss": 0.25913840532302856, "step": 943 }, { "epoch": 0.11454920519354447, "grad_norm": 1.7053711414337158, "learning_rate": 1.792900135118536e-05, "loss": 0.43879109621047974, "step": 944 }, { "epoch": 0.11467054969057153, "grad_norm": 1.959026575088501, "learning_rate": 1.7926544650534333e-05, "loss": 0.3923461437225342, "step": 945 }, { "epoch": 0.1147918941875986, "grad_norm": 1.6823830604553223, "learning_rate": 1.7924087949883307e-05, "loss": 0.2293175607919693, "step": 946 }, { "epoch": 0.11491323868462565, "grad_norm": 2.255762815475464, "learning_rate": 1.792163124923228e-05, "loss": 0.3342325687408447, "step": 947 }, { "epoch": 0.11503458318165272, "grad_norm": 0.44634830951690674, "learning_rate": 1.7919174548581256e-05, "loss": 0.012504082173109055, "step": 948 }, { "epoch": 0.11515592767867977, "grad_norm": 1.1759107112884521, "learning_rate": 1.791671784793023e-05, "loss": 0.18447083234786987, "step": 949 }, { "epoch": 0.11527727217570684, "grad_norm": 2.7203738689422607, "learning_rate": 1.7914261147279204e-05, "loss": 0.2561550736427307, "step": 950 }, { "epoch": 0.11539861667273389, "grad_norm": 1.4413387775421143, "learning_rate": 1.7911804446628182e-05, "loss": 0.40774011611938477, "step": 951 }, { "epoch": 0.11551996116976095, "grad_norm": 2.1084396839141846, "learning_rate": 1.7909347745977156e-05, "loss": 0.19283291697502136, "step": 952 }, { "epoch": 0.11564130566678801, "grad_norm": 2.9261622428894043, "learning_rate": 1.790689104532613e-05, "loss": 0.6466803550720215, "step": 953 }, { "epoch": 0.11576265016381507, "grad_norm": 3.529693841934204, "learning_rate": 1.7904434344675105e-05, "loss": 0.2920832335948944, "step": 954 }, { "epoch": 0.11588399466084214, "grad_norm": 2.1412620544433594, "learning_rate": 1.790197764402408e-05, "loss": 0.33081355690956116, "step": 955 }, { "epoch": 0.11600533915786919, "grad_norm": 1.9772106409072876, "learning_rate": 1.7899520943373053e-05, "loss": 0.49043765664100647, "step": 956 }, { "epoch": 0.11612668365489626, "grad_norm": 1.2700328826904297, "learning_rate": 1.7897064242722028e-05, "loss": 0.3909747898578644, "step": 957 }, { "epoch": 0.11624802815192331, "grad_norm": 1.4992220401763916, "learning_rate": 1.7894607542071002e-05, "loss": 0.11110074073076248, "step": 958 }, { "epoch": 0.11636937264895036, "grad_norm": 1.2691748142242432, "learning_rate": 1.7892150841419976e-05, "loss": 0.1457030028104782, "step": 959 }, { "epoch": 0.11649071714597743, "grad_norm": 2.2520852088928223, "learning_rate": 1.788969414076895e-05, "loss": 0.4143160581588745, "step": 960 }, { "epoch": 0.11661206164300449, "grad_norm": 2.852043390274048, "learning_rate": 1.7887237440117925e-05, "loss": 0.45678117871284485, "step": 961 }, { "epoch": 0.11673340614003155, "grad_norm": 1.3313668966293335, "learning_rate": 1.78847807394669e-05, "loss": 0.16556109488010406, "step": 962 }, { "epoch": 0.11685475063705861, "grad_norm": 1.356170654296875, "learning_rate": 1.7882324038815873e-05, "loss": 0.2957010567188263, "step": 963 }, { "epoch": 0.11697609513408568, "grad_norm": 2.6344823837280273, "learning_rate": 1.7879867338164847e-05, "loss": 0.6716679334640503, "step": 964 }, { "epoch": 0.11709743963111273, "grad_norm": 1.3099849224090576, "learning_rate": 1.7877410637513822e-05, "loss": 0.18413875997066498, "step": 965 }, { "epoch": 0.11721878412813978, "grad_norm": 1.4843673706054688, "learning_rate": 1.7874953936862796e-05, "loss": 0.12193383276462555, "step": 966 }, { "epoch": 0.11734012862516685, "grad_norm": 1.3463475704193115, "learning_rate": 1.787249723621177e-05, "loss": 0.49620726704597473, "step": 967 }, { "epoch": 0.1174614731221939, "grad_norm": 1.257489800453186, "learning_rate": 1.7870040535560744e-05, "loss": 0.0774109736084938, "step": 968 }, { "epoch": 0.11758281761922097, "grad_norm": 1.8335658311843872, "learning_rate": 1.786758383490972e-05, "loss": 0.5049456357955933, "step": 969 }, { "epoch": 0.11770416211624803, "grad_norm": 1.2735215425491333, "learning_rate": 1.7865127134258693e-05, "loss": 0.5225581526756287, "step": 970 }, { "epoch": 0.1178255066132751, "grad_norm": 2.409795045852661, "learning_rate": 1.7862670433607667e-05, "loss": 0.32925283908843994, "step": 971 }, { "epoch": 0.11794685111030215, "grad_norm": 1.4397212266921997, "learning_rate": 1.786021373295664e-05, "loss": 0.3886275291442871, "step": 972 }, { "epoch": 0.1180681956073292, "grad_norm": 2.468719720840454, "learning_rate": 1.7857757032305616e-05, "loss": 0.40028074383735657, "step": 973 }, { "epoch": 0.11818954010435627, "grad_norm": 2.085083246231079, "learning_rate": 1.785530033165459e-05, "loss": 0.5391139984130859, "step": 974 }, { "epoch": 0.11831088460138332, "grad_norm": 1.8129987716674805, "learning_rate": 1.7852843631003564e-05, "loss": 0.12351949512958527, "step": 975 }, { "epoch": 0.11843222909841039, "grad_norm": 2.5870203971862793, "learning_rate": 1.785038693035254e-05, "loss": 0.5224878191947937, "step": 976 }, { "epoch": 0.11855357359543744, "grad_norm": 1.4589635133743286, "learning_rate": 1.7847930229701513e-05, "loss": 0.21820476651191711, "step": 977 }, { "epoch": 0.11867491809246451, "grad_norm": 1.6059958934783936, "learning_rate": 1.7845473529050487e-05, "loss": 0.21393930912017822, "step": 978 }, { "epoch": 0.11879626258949157, "grad_norm": 1.6004008054733276, "learning_rate": 1.784301682839946e-05, "loss": 0.26858049631118774, "step": 979 }, { "epoch": 0.11891760708651862, "grad_norm": 2.0756053924560547, "learning_rate": 1.7840560127748436e-05, "loss": 0.30814725160598755, "step": 980 }, { "epoch": 0.11903895158354569, "grad_norm": 1.4374176263809204, "learning_rate": 1.783810342709741e-05, "loss": 0.3138670325279236, "step": 981 }, { "epoch": 0.11916029608057274, "grad_norm": 1.748745083808899, "learning_rate": 1.7835646726446384e-05, "loss": 0.5385026931762695, "step": 982 }, { "epoch": 0.11928164057759981, "grad_norm": 1.986444115638733, "learning_rate": 1.783319002579536e-05, "loss": 0.3967476785182953, "step": 983 }, { "epoch": 0.11940298507462686, "grad_norm": 2.7463040351867676, "learning_rate": 1.7830733325144333e-05, "loss": 0.19557945430278778, "step": 984 }, { "epoch": 0.11952432957165393, "grad_norm": 2.5255661010742188, "learning_rate": 1.7828276624493307e-05, "loss": 0.22208239138126373, "step": 985 }, { "epoch": 0.11964567406868098, "grad_norm": 2.8263087272644043, "learning_rate": 1.782581992384228e-05, "loss": 0.5413483381271362, "step": 986 }, { "epoch": 0.11976701856570804, "grad_norm": 2.419537305831909, "learning_rate": 1.7823363223191255e-05, "loss": 0.4241293668746948, "step": 987 }, { "epoch": 0.1198883630627351, "grad_norm": 1.8854936361312866, "learning_rate": 1.782090652254023e-05, "loss": 0.33829793334007263, "step": 988 }, { "epoch": 0.12000970755976216, "grad_norm": 1.954064130783081, "learning_rate": 1.7818449821889204e-05, "loss": 0.2632986307144165, "step": 989 }, { "epoch": 0.12013105205678923, "grad_norm": 2.0794637203216553, "learning_rate": 1.7815993121238178e-05, "loss": 0.4402806758880615, "step": 990 }, { "epoch": 0.12025239655381628, "grad_norm": 1.2783046960830688, "learning_rate": 1.7813536420587152e-05, "loss": 0.45365461707115173, "step": 991 }, { "epoch": 0.12037374105084335, "grad_norm": 2.022775411605835, "learning_rate": 1.7811079719936127e-05, "loss": 0.17468230426311493, "step": 992 }, { "epoch": 0.1204950855478704, "grad_norm": 1.8082324266433716, "learning_rate": 1.78086230192851e-05, "loss": 0.2507702112197876, "step": 993 }, { "epoch": 0.12061643004489746, "grad_norm": 2.072026252746582, "learning_rate": 1.7806166318634075e-05, "loss": 0.39948770403862, "step": 994 }, { "epoch": 0.12073777454192453, "grad_norm": 0.9502598643302917, "learning_rate": 1.780370961798305e-05, "loss": 0.4071555733680725, "step": 995 }, { "epoch": 0.12085911903895158, "grad_norm": 1.7930656671524048, "learning_rate": 1.7801252917332024e-05, "loss": 0.3525886535644531, "step": 996 }, { "epoch": 0.12098046353597865, "grad_norm": 2.9511780738830566, "learning_rate": 1.7798796216680998e-05, "loss": 0.4926954507827759, "step": 997 }, { "epoch": 0.1211018080330057, "grad_norm": 2.4663655757904053, "learning_rate": 1.7796339516029972e-05, "loss": 0.4453226923942566, "step": 998 }, { "epoch": 0.12122315253003277, "grad_norm": 1.8839552402496338, "learning_rate": 1.7793882815378946e-05, "loss": 0.4104011058807373, "step": 999 }, { "epoch": 0.12134449702705982, "grad_norm": 1.7265866994857788, "learning_rate": 1.779142611472792e-05, "loss": 0.3227764964103699, "step": 1000 }, { "epoch": 0.12146584152408688, "grad_norm": 2.007580518722534, "learning_rate": 1.7788969414076895e-05, "loss": 0.5954450368881226, "step": 1001 }, { "epoch": 0.12158718602111394, "grad_norm": 1.7668275833129883, "learning_rate": 1.778651271342587e-05, "loss": 0.2731704115867615, "step": 1002 }, { "epoch": 0.121708530518141, "grad_norm": 2.556995153427124, "learning_rate": 1.7784056012774844e-05, "loss": 0.4359130859375, "step": 1003 }, { "epoch": 0.12182987501516807, "grad_norm": 2.3546860218048096, "learning_rate": 1.7781599312123818e-05, "loss": 0.7123581171035767, "step": 1004 }, { "epoch": 0.12195121951219512, "grad_norm": 1.7314367294311523, "learning_rate": 1.7779142611472792e-05, "loss": 0.3100089132785797, "step": 1005 }, { "epoch": 0.12207256400922219, "grad_norm": 1.5493261814117432, "learning_rate": 1.7776685910821766e-05, "loss": 0.49244967103004456, "step": 1006 }, { "epoch": 0.12219390850624924, "grad_norm": 1.078848123550415, "learning_rate": 1.777422921017074e-05, "loss": 0.2843085825443268, "step": 1007 }, { "epoch": 0.1223152530032763, "grad_norm": 0.9978542327880859, "learning_rate": 1.7771772509519715e-05, "loss": 0.24063430726528168, "step": 1008 }, { "epoch": 0.12243659750030336, "grad_norm": 1.8010896444320679, "learning_rate": 1.776931580886869e-05, "loss": 0.4071890115737915, "step": 1009 }, { "epoch": 0.12255794199733042, "grad_norm": 1.3634896278381348, "learning_rate": 1.7766859108217663e-05, "loss": 0.38111934065818787, "step": 1010 }, { "epoch": 0.12267928649435748, "grad_norm": 1.713880181312561, "learning_rate": 1.7764402407566638e-05, "loss": 0.47287237644195557, "step": 1011 }, { "epoch": 0.12280063099138454, "grad_norm": 0.9091008901596069, "learning_rate": 1.7761945706915612e-05, "loss": 0.29161202907562256, "step": 1012 }, { "epoch": 0.1229219754884116, "grad_norm": 1.7493350505828857, "learning_rate": 1.7759489006264586e-05, "loss": 0.1336471140384674, "step": 1013 }, { "epoch": 0.12304331998543866, "grad_norm": 2.463874578475952, "learning_rate": 1.775703230561356e-05, "loss": 0.32139432430267334, "step": 1014 }, { "epoch": 0.12316466448246571, "grad_norm": 1.8679782152175903, "learning_rate": 1.7754575604962535e-05, "loss": 0.22383840382099152, "step": 1015 }, { "epoch": 0.12328600897949278, "grad_norm": 1.7886244058609009, "learning_rate": 1.775211890431151e-05, "loss": 0.2850145995616913, "step": 1016 }, { "epoch": 0.12340735347651983, "grad_norm": 1.5212403535842896, "learning_rate": 1.7749662203660487e-05, "loss": 0.19067168235778809, "step": 1017 }, { "epoch": 0.1235286979735469, "grad_norm": 1.0102014541625977, "learning_rate": 1.774720550300946e-05, "loss": 0.20473000407218933, "step": 1018 }, { "epoch": 0.12365004247057396, "grad_norm": 3.234743595123291, "learning_rate": 1.7744748802358435e-05, "loss": 0.4390396475791931, "step": 1019 }, { "epoch": 0.12377138696760102, "grad_norm": 1.182918906211853, "learning_rate": 1.774229210170741e-05, "loss": 0.379639208316803, "step": 1020 }, { "epoch": 0.12389273146462808, "grad_norm": 2.2215142250061035, "learning_rate": 1.7739835401056384e-05, "loss": 0.3299146890640259, "step": 1021 }, { "epoch": 0.12401407596165515, "grad_norm": 1.7570428848266602, "learning_rate": 1.7737378700405358e-05, "loss": 0.5761438608169556, "step": 1022 }, { "epoch": 0.1241354204586822, "grad_norm": 2.0322439670562744, "learning_rate": 1.7734921999754332e-05, "loss": 0.4319761395454407, "step": 1023 }, { "epoch": 0.12425676495570925, "grad_norm": 1.6962997913360596, "learning_rate": 1.7732465299103306e-05, "loss": 0.19291162490844727, "step": 1024 }, { "epoch": 0.12437810945273632, "grad_norm": 2.4372918605804443, "learning_rate": 1.773000859845228e-05, "loss": 0.2651346027851105, "step": 1025 }, { "epoch": 0.12449945394976338, "grad_norm": 2.495164632797241, "learning_rate": 1.7727551897801255e-05, "loss": 0.26666179299354553, "step": 1026 }, { "epoch": 0.12462079844679044, "grad_norm": 2.1924889087677, "learning_rate": 1.772509519715023e-05, "loss": 0.4076659083366394, "step": 1027 }, { "epoch": 0.1247421429438175, "grad_norm": 1.9566283226013184, "learning_rate": 1.7722638496499203e-05, "loss": 0.41398754715919495, "step": 1028 }, { "epoch": 0.12486348744084456, "grad_norm": 0.9982297420501709, "learning_rate": 1.7720181795848178e-05, "loss": 0.07757793366909027, "step": 1029 }, { "epoch": 0.12498483193787162, "grad_norm": 1.4906138181686401, "learning_rate": 1.7717725095197152e-05, "loss": 0.22355546057224274, "step": 1030 }, { "epoch": 0.12510617643489869, "grad_norm": 1.2983722686767578, "learning_rate": 1.7715268394546126e-05, "loss": 0.18219910562038422, "step": 1031 }, { "epoch": 0.12522752093192574, "grad_norm": 2.4365146160125732, "learning_rate": 1.77128116938951e-05, "loss": 0.5142960548400879, "step": 1032 }, { "epoch": 0.1253488654289528, "grad_norm": 1.5570324659347534, "learning_rate": 1.7710354993244075e-05, "loss": 0.7369379997253418, "step": 1033 }, { "epoch": 0.12547020992597985, "grad_norm": 1.2866076231002808, "learning_rate": 1.770789829259305e-05, "loss": 0.35260820388793945, "step": 1034 }, { "epoch": 0.12559155442300693, "grad_norm": 1.987706184387207, "learning_rate": 1.7705441591942023e-05, "loss": 0.22281603515148163, "step": 1035 }, { "epoch": 0.12571289892003398, "grad_norm": 1.536669373512268, "learning_rate": 1.7702984891290997e-05, "loss": 0.44109636545181274, "step": 1036 }, { "epoch": 0.12583424341706104, "grad_norm": 2.0726189613342285, "learning_rate": 1.770052819063997e-05, "loss": 0.4802488088607788, "step": 1037 }, { "epoch": 0.1259555879140881, "grad_norm": 1.770462155342102, "learning_rate": 1.7698071489988946e-05, "loss": 0.21857525408267975, "step": 1038 }, { "epoch": 0.12607693241111514, "grad_norm": 1.78280508518219, "learning_rate": 1.769561478933792e-05, "loss": 0.29267618060112, "step": 1039 }, { "epoch": 0.12619827690814223, "grad_norm": 2.35788631439209, "learning_rate": 1.7693158088686894e-05, "loss": 0.3705042898654938, "step": 1040 }, { "epoch": 0.12631962140516928, "grad_norm": 1.8159054517745972, "learning_rate": 1.769070138803587e-05, "loss": 0.8092231154441833, "step": 1041 }, { "epoch": 0.12644096590219633, "grad_norm": 1.4524773359298706, "learning_rate": 1.7688244687384843e-05, "loss": 0.2000163197517395, "step": 1042 }, { "epoch": 0.1265623103992234, "grad_norm": 1.3470537662506104, "learning_rate": 1.7685787986733817e-05, "loss": 0.11520944535732269, "step": 1043 }, { "epoch": 0.12668365489625044, "grad_norm": 1.9596749544143677, "learning_rate": 1.768333128608279e-05, "loss": 0.22145286202430725, "step": 1044 }, { "epoch": 0.12680499939327752, "grad_norm": 1.0872288942337036, "learning_rate": 1.7680874585431766e-05, "loss": 0.24465565383434296, "step": 1045 }, { "epoch": 0.12692634389030458, "grad_norm": 1.756108283996582, "learning_rate": 1.767841788478074e-05, "loss": 0.2880557179450989, "step": 1046 }, { "epoch": 0.12704768838733163, "grad_norm": 1.3416721820831299, "learning_rate": 1.7675961184129714e-05, "loss": 0.22841346263885498, "step": 1047 }, { "epoch": 0.12716903288435868, "grad_norm": 1.3795925378799438, "learning_rate": 1.767350448347869e-05, "loss": 0.18589796125888824, "step": 1048 }, { "epoch": 0.12729037738138577, "grad_norm": 1.842610239982605, "learning_rate": 1.7671047782827663e-05, "loss": 0.25392380356788635, "step": 1049 }, { "epoch": 0.12741172187841282, "grad_norm": 0.906227171421051, "learning_rate": 1.7668591082176637e-05, "loss": 0.019568203017115593, "step": 1050 }, { "epoch": 0.12753306637543987, "grad_norm": 3.2031447887420654, "learning_rate": 1.766613438152561e-05, "loss": 0.4728550314903259, "step": 1051 }, { "epoch": 0.12765441087246693, "grad_norm": 1.2640734910964966, "learning_rate": 1.7663677680874586e-05, "loss": 0.0975283533334732, "step": 1052 }, { "epoch": 0.12777575536949398, "grad_norm": 2.3426437377929688, "learning_rate": 1.766122098022356e-05, "loss": 0.4550413191318512, "step": 1053 }, { "epoch": 0.12789709986652106, "grad_norm": 1.7087280750274658, "learning_rate": 1.7658764279572534e-05, "loss": 0.48773813247680664, "step": 1054 }, { "epoch": 0.12801844436354812, "grad_norm": 2.2121965885162354, "learning_rate": 1.765630757892151e-05, "loss": 0.27385658025741577, "step": 1055 }, { "epoch": 0.12813978886057517, "grad_norm": 2.641083002090454, "learning_rate": 1.7653850878270483e-05, "loss": 0.33635473251342773, "step": 1056 }, { "epoch": 0.12826113335760223, "grad_norm": 1.84109628200531, "learning_rate": 1.765139417761946e-05, "loss": 0.09408122301101685, "step": 1057 }, { "epoch": 0.12838247785462928, "grad_norm": 1.1630797386169434, "learning_rate": 1.7648937476968434e-05, "loss": 0.17066405713558197, "step": 1058 }, { "epoch": 0.12850382235165636, "grad_norm": 0.3085211217403412, "learning_rate": 1.764648077631741e-05, "loss": 0.004453294910490513, "step": 1059 }, { "epoch": 0.12862516684868341, "grad_norm": 3.949612617492676, "learning_rate": 1.7644024075666383e-05, "loss": 0.30024462938308716, "step": 1060 }, { "epoch": 0.12874651134571047, "grad_norm": 2.1858408451080322, "learning_rate": 1.7641567375015357e-05, "loss": 0.26950281858444214, "step": 1061 }, { "epoch": 0.12886785584273752, "grad_norm": 2.139246702194214, "learning_rate": 1.763911067436433e-05, "loss": 0.15661706030368805, "step": 1062 }, { "epoch": 0.1289892003397646, "grad_norm": 2.098543167114258, "learning_rate": 1.7636653973713306e-05, "loss": 0.2845885455608368, "step": 1063 }, { "epoch": 0.12911054483679166, "grad_norm": 2.557609796524048, "learning_rate": 1.763419727306228e-05, "loss": 0.37870073318481445, "step": 1064 }, { "epoch": 0.1292318893338187, "grad_norm": 2.1333935260772705, "learning_rate": 1.7631740572411254e-05, "loss": 0.1683444082736969, "step": 1065 }, { "epoch": 0.12935323383084577, "grad_norm": 2.926968574523926, "learning_rate": 1.762928387176023e-05, "loss": 0.6188098192214966, "step": 1066 }, { "epoch": 0.12947457832787282, "grad_norm": 2.2622830867767334, "learning_rate": 1.7626827171109203e-05, "loss": 0.30848801136016846, "step": 1067 }, { "epoch": 0.1295959228248999, "grad_norm": 2.319904327392578, "learning_rate": 1.7624370470458177e-05, "loss": 0.5221332311630249, "step": 1068 }, { "epoch": 0.12971726732192695, "grad_norm": 1.7250056266784668, "learning_rate": 1.762191376980715e-05, "loss": 0.23689205944538116, "step": 1069 }, { "epoch": 0.129838611818954, "grad_norm": 1.9344574213027954, "learning_rate": 1.7619457069156126e-05, "loss": 0.30666399002075195, "step": 1070 }, { "epoch": 0.12995995631598106, "grad_norm": 1.857367992401123, "learning_rate": 1.76170003685051e-05, "loss": 0.6061972975730896, "step": 1071 }, { "epoch": 0.13008130081300814, "grad_norm": 2.382561445236206, "learning_rate": 1.7614543667854074e-05, "loss": 0.7764930725097656, "step": 1072 }, { "epoch": 0.1302026453100352, "grad_norm": 1.8192449808120728, "learning_rate": 1.761208696720305e-05, "loss": 0.1905331015586853, "step": 1073 }, { "epoch": 0.13032398980706225, "grad_norm": 0.7959886789321899, "learning_rate": 1.7609630266552023e-05, "loss": 0.036777134984731674, "step": 1074 }, { "epoch": 0.1304453343040893, "grad_norm": 2.9312210083007812, "learning_rate": 1.7607173565900997e-05, "loss": 0.5323508977890015, "step": 1075 }, { "epoch": 0.13056667880111636, "grad_norm": 1.7050628662109375, "learning_rate": 1.760471686524997e-05, "loss": 0.3697940707206726, "step": 1076 }, { "epoch": 0.13068802329814344, "grad_norm": 2.098642587661743, "learning_rate": 1.7602260164598945e-05, "loss": 0.45317330956459045, "step": 1077 }, { "epoch": 0.1308093677951705, "grad_norm": 1.8114123344421387, "learning_rate": 1.759980346394792e-05, "loss": 0.46396714448928833, "step": 1078 }, { "epoch": 0.13093071229219755, "grad_norm": 2.046638011932373, "learning_rate": 1.7597346763296894e-05, "loss": 0.31400540471076965, "step": 1079 }, { "epoch": 0.1310520567892246, "grad_norm": 2.056553602218628, "learning_rate": 1.7594890062645868e-05, "loss": 0.21949997544288635, "step": 1080 }, { "epoch": 0.13117340128625166, "grad_norm": 1.797045111656189, "learning_rate": 1.7592433361994842e-05, "loss": 0.6279269456863403, "step": 1081 }, { "epoch": 0.13129474578327874, "grad_norm": 2.286411762237549, "learning_rate": 1.7589976661343817e-05, "loss": 0.3866305649280548, "step": 1082 }, { "epoch": 0.1314160902803058, "grad_norm": 1.5553948879241943, "learning_rate": 1.758751996069279e-05, "loss": 0.13357111811637878, "step": 1083 }, { "epoch": 0.13153743477733285, "grad_norm": 1.6972168684005737, "learning_rate": 1.7585063260041765e-05, "loss": 0.1964944750070572, "step": 1084 }, { "epoch": 0.1316587792743599, "grad_norm": 1.53740394115448, "learning_rate": 1.758260655939074e-05, "loss": 0.20683187246322632, "step": 1085 }, { "epoch": 0.13178012377138698, "grad_norm": 1.4312472343444824, "learning_rate": 1.7580149858739714e-05, "loss": 0.17711327970027924, "step": 1086 }, { "epoch": 0.13190146826841403, "grad_norm": 1.7033421993255615, "learning_rate": 1.7577693158088688e-05, "loss": 0.14665207266807556, "step": 1087 }, { "epoch": 0.1320228127654411, "grad_norm": 1.8516016006469727, "learning_rate": 1.7575236457437662e-05, "loss": 0.22185489535331726, "step": 1088 }, { "epoch": 0.13214415726246814, "grad_norm": 2.1618032455444336, "learning_rate": 1.7572779756786636e-05, "loss": 0.4268803298473358, "step": 1089 }, { "epoch": 0.1322655017594952, "grad_norm": 1.8190878629684448, "learning_rate": 1.757032305613561e-05, "loss": 0.31076788902282715, "step": 1090 }, { "epoch": 0.13238684625652228, "grad_norm": 1.728493571281433, "learning_rate": 1.7567866355484585e-05, "loss": 0.42295384407043457, "step": 1091 }, { "epoch": 0.13250819075354933, "grad_norm": 1.6901476383209229, "learning_rate": 1.756540965483356e-05, "loss": 0.2597338557243347, "step": 1092 }, { "epoch": 0.13262953525057639, "grad_norm": 1.8863033056259155, "learning_rate": 1.7562952954182534e-05, "loss": 0.5735151767730713, "step": 1093 }, { "epoch": 0.13275087974760344, "grad_norm": 1.6283295154571533, "learning_rate": 1.7560496253531508e-05, "loss": 0.22838622331619263, "step": 1094 }, { "epoch": 0.1328722242446305, "grad_norm": 2.0172297954559326, "learning_rate": 1.7558039552880482e-05, "loss": 0.45103025436401367, "step": 1095 }, { "epoch": 0.13299356874165758, "grad_norm": 0.24614541232585907, "learning_rate": 1.755558285222946e-05, "loss": 0.002931026741862297, "step": 1096 }, { "epoch": 0.13311491323868463, "grad_norm": 1.4380757808685303, "learning_rate": 1.7553126151578434e-05, "loss": 0.3003803491592407, "step": 1097 }, { "epoch": 0.13323625773571168, "grad_norm": 2.0845935344696045, "learning_rate": 1.7550669450927408e-05, "loss": 0.5532746315002441, "step": 1098 }, { "epoch": 0.13335760223273874, "grad_norm": 1.5353543758392334, "learning_rate": 1.7548212750276382e-05, "loss": 0.19028273224830627, "step": 1099 }, { "epoch": 0.13347894672976582, "grad_norm": 0.20537756383419037, "learning_rate": 1.7545756049625357e-05, "loss": 0.002893675584346056, "step": 1100 }, { "epoch": 0.13360029122679287, "grad_norm": 3.135423183441162, "learning_rate": 1.754329934897433e-05, "loss": 0.3149774670600891, "step": 1101 }, { "epoch": 0.13372163572381993, "grad_norm": 2.225597620010376, "learning_rate": 1.7540842648323305e-05, "loss": 0.28401559591293335, "step": 1102 }, { "epoch": 0.13384298022084698, "grad_norm": 1.264791488647461, "learning_rate": 1.753838594767228e-05, "loss": 0.32886746525764465, "step": 1103 }, { "epoch": 0.13396432471787403, "grad_norm": 0.3391306698322296, "learning_rate": 1.7535929247021254e-05, "loss": 0.071807861328125, "step": 1104 }, { "epoch": 0.13408566921490112, "grad_norm": 1.0494962930679321, "learning_rate": 1.7533472546370228e-05, "loss": 0.21048381924629211, "step": 1105 }, { "epoch": 0.13420701371192817, "grad_norm": 1.717574954032898, "learning_rate": 1.7531015845719202e-05, "loss": 0.4358229637145996, "step": 1106 }, { "epoch": 0.13432835820895522, "grad_norm": 1.9669817686080933, "learning_rate": 1.7528559145068177e-05, "loss": 0.6443597674369812, "step": 1107 }, { "epoch": 0.13444970270598228, "grad_norm": 2.6505227088928223, "learning_rate": 1.752610244441715e-05, "loss": 0.5362628102302551, "step": 1108 }, { "epoch": 0.13457104720300933, "grad_norm": 2.0963194370269775, "learning_rate": 1.7523645743766125e-05, "loss": 0.3740871250629425, "step": 1109 }, { "epoch": 0.1346923917000364, "grad_norm": 1.8487685918807983, "learning_rate": 1.75211890431151e-05, "loss": 0.41866418719291687, "step": 1110 }, { "epoch": 0.13481373619706347, "grad_norm": 2.146871328353882, "learning_rate": 1.7518732342464074e-05, "loss": 0.561809778213501, "step": 1111 }, { "epoch": 0.13493508069409052, "grad_norm": 1.8240047693252563, "learning_rate": 1.7516275641813048e-05, "loss": 0.5480957627296448, "step": 1112 }, { "epoch": 0.13505642519111757, "grad_norm": 1.8582134246826172, "learning_rate": 1.7513818941162022e-05, "loss": 0.3318016529083252, "step": 1113 }, { "epoch": 0.13517776968814466, "grad_norm": 2.6997921466827393, "learning_rate": 1.7511362240510996e-05, "loss": 0.2811315357685089, "step": 1114 }, { "epoch": 0.1352991141851717, "grad_norm": 2.0669479370117188, "learning_rate": 1.750890553985997e-05, "loss": 0.39928320050239563, "step": 1115 }, { "epoch": 0.13542045868219876, "grad_norm": 1.7311495542526245, "learning_rate": 1.7506448839208945e-05, "loss": 0.3384130895137787, "step": 1116 }, { "epoch": 0.13554180317922582, "grad_norm": 1.5945545434951782, "learning_rate": 1.750399213855792e-05, "loss": 0.23259413242340088, "step": 1117 }, { "epoch": 0.13566314767625287, "grad_norm": 1.2761726379394531, "learning_rate": 1.7501535437906893e-05, "loss": 0.2338969111442566, "step": 1118 }, { "epoch": 0.13578449217327995, "grad_norm": 1.9124693870544434, "learning_rate": 1.7499078737255864e-05, "loss": 0.34637853503227234, "step": 1119 }, { "epoch": 0.135905836670307, "grad_norm": 2.1211183071136475, "learning_rate": 1.749662203660484e-05, "loss": 0.3605934679508209, "step": 1120 }, { "epoch": 0.13602718116733406, "grad_norm": 1.2206485271453857, "learning_rate": 1.7494165335953813e-05, "loss": 0.35341042280197144, "step": 1121 }, { "epoch": 0.13614852566436111, "grad_norm": 1.5597529411315918, "learning_rate": 1.749170863530279e-05, "loss": 0.2815924286842346, "step": 1122 }, { "epoch": 0.13626987016138817, "grad_norm": 2.247127056121826, "learning_rate": 1.7489251934651765e-05, "loss": 0.3463256359100342, "step": 1123 }, { "epoch": 0.13639121465841525, "grad_norm": 2.1039061546325684, "learning_rate": 1.748679523400074e-05, "loss": 0.1709476262331009, "step": 1124 }, { "epoch": 0.1365125591554423, "grad_norm": 2.266066074371338, "learning_rate": 1.7484338533349713e-05, "loss": 0.46501120924949646, "step": 1125 }, { "epoch": 0.13663390365246936, "grad_norm": 1.4224648475646973, "learning_rate": 1.7481881832698687e-05, "loss": 0.11234971880912781, "step": 1126 }, { "epoch": 0.1367552481494964, "grad_norm": 2.30653977394104, "learning_rate": 1.747942513204766e-05, "loss": 0.36751458048820496, "step": 1127 }, { "epoch": 0.1368765926465235, "grad_norm": 1.368033766746521, "learning_rate": 1.7476968431396636e-05, "loss": 0.26016345620155334, "step": 1128 }, { "epoch": 0.13699793714355055, "grad_norm": 1.8139694929122925, "learning_rate": 1.747451173074561e-05, "loss": 0.12573404610157013, "step": 1129 }, { "epoch": 0.1371192816405776, "grad_norm": 1.769921898841858, "learning_rate": 1.7472055030094584e-05, "loss": 0.26439419388771057, "step": 1130 }, { "epoch": 0.13724062613760465, "grad_norm": 1.402004599571228, "learning_rate": 1.746959832944356e-05, "loss": 0.18855473399162292, "step": 1131 }, { "epoch": 0.1373619706346317, "grad_norm": 2.369947671890259, "learning_rate": 1.7467141628792533e-05, "loss": 0.5614909529685974, "step": 1132 }, { "epoch": 0.1374833151316588, "grad_norm": 1.4672306776046753, "learning_rate": 1.7464684928141507e-05, "loss": 0.23816195130348206, "step": 1133 }, { "epoch": 0.13760465962868584, "grad_norm": 1.4055780172348022, "learning_rate": 1.746222822749048e-05, "loss": 0.38255172967910767, "step": 1134 }, { "epoch": 0.1377260041257129, "grad_norm": 2.0331690311431885, "learning_rate": 1.7459771526839456e-05, "loss": 0.44152066111564636, "step": 1135 }, { "epoch": 0.13784734862273995, "grad_norm": 1.058262825012207, "learning_rate": 1.745731482618843e-05, "loss": 0.10875988751649857, "step": 1136 }, { "epoch": 0.137968693119767, "grad_norm": 1.6434763669967651, "learning_rate": 1.7454858125537404e-05, "loss": 0.2978910505771637, "step": 1137 }, { "epoch": 0.1380900376167941, "grad_norm": 2.271061897277832, "learning_rate": 1.745240142488638e-05, "loss": 0.516408383846283, "step": 1138 }, { "epoch": 0.13821138211382114, "grad_norm": 2.4035356044769287, "learning_rate": 1.7449944724235353e-05, "loss": 0.379251629114151, "step": 1139 }, { "epoch": 0.1383327266108482, "grad_norm": 1.6674612760543823, "learning_rate": 1.7447488023584327e-05, "loss": 0.44433078169822693, "step": 1140 }, { "epoch": 0.13845407110787525, "grad_norm": 1.2432188987731934, "learning_rate": 1.74450313229333e-05, "loss": 0.4149407744407654, "step": 1141 }, { "epoch": 0.13857541560490233, "grad_norm": 1.6661003828048706, "learning_rate": 1.7442574622282276e-05, "loss": 0.42093947529792786, "step": 1142 }, { "epoch": 0.13869676010192938, "grad_norm": 2.2240869998931885, "learning_rate": 1.744011792163125e-05, "loss": 0.2196013480424881, "step": 1143 }, { "epoch": 0.13881810459895644, "grad_norm": 1.9101980924606323, "learning_rate": 1.7437661220980224e-05, "loss": 0.3927350342273712, "step": 1144 }, { "epoch": 0.1389394490959835, "grad_norm": 2.2990522384643555, "learning_rate": 1.74352045203292e-05, "loss": 0.35650429129600525, "step": 1145 }, { "epoch": 0.13906079359301055, "grad_norm": 1.582953691482544, "learning_rate": 1.7432747819678173e-05, "loss": 0.2683204412460327, "step": 1146 }, { "epoch": 0.13918213809003763, "grad_norm": 1.6646018028259277, "learning_rate": 1.7430291119027147e-05, "loss": 0.2388467639684677, "step": 1147 }, { "epoch": 0.13930348258706468, "grad_norm": 1.9748457670211792, "learning_rate": 1.742783441837612e-05, "loss": 0.2437652200460434, "step": 1148 }, { "epoch": 0.13942482708409173, "grad_norm": 2.2980165481567383, "learning_rate": 1.7425377717725095e-05, "loss": 0.19980880618095398, "step": 1149 }, { "epoch": 0.1395461715811188, "grad_norm": 1.4055594205856323, "learning_rate": 1.742292101707407e-05, "loss": 0.5625031590461731, "step": 1150 }, { "epoch": 0.13966751607814584, "grad_norm": 1.8812947273254395, "learning_rate": 1.7420464316423044e-05, "loss": 0.4220775067806244, "step": 1151 }, { "epoch": 0.13978886057517292, "grad_norm": 1.6144556999206543, "learning_rate": 1.7418007615772018e-05, "loss": 0.3916918635368347, "step": 1152 }, { "epoch": 0.13991020507219998, "grad_norm": 1.558756947517395, "learning_rate": 1.7415550915120992e-05, "loss": 0.2850634455680847, "step": 1153 }, { "epoch": 0.14003154956922703, "grad_norm": 1.508731484413147, "learning_rate": 1.7413094214469967e-05, "loss": 0.23739512264728546, "step": 1154 }, { "epoch": 0.14015289406625409, "grad_norm": 1.8177855014801025, "learning_rate": 1.741063751381894e-05, "loss": 0.5787197947502136, "step": 1155 }, { "epoch": 0.14027423856328117, "grad_norm": 1.2913404703140259, "learning_rate": 1.7408180813167915e-05, "loss": 0.14389440417289734, "step": 1156 }, { "epoch": 0.14039558306030822, "grad_norm": 1.9017482995986938, "learning_rate": 1.740572411251689e-05, "loss": 0.592059850692749, "step": 1157 }, { "epoch": 0.14051692755733527, "grad_norm": 2.6217381954193115, "learning_rate": 1.7403267411865864e-05, "loss": 0.28019648790359497, "step": 1158 }, { "epoch": 0.14063827205436233, "grad_norm": 1.557847499847412, "learning_rate": 1.7400810711214838e-05, "loss": 0.41275694966316223, "step": 1159 }, { "epoch": 0.14075961655138938, "grad_norm": 1.5144991874694824, "learning_rate": 1.7398354010563812e-05, "loss": 0.6181284785270691, "step": 1160 }, { "epoch": 0.14088096104841646, "grad_norm": 1.5369459390640259, "learning_rate": 1.7395897309912786e-05, "loss": 0.3667132258415222, "step": 1161 }, { "epoch": 0.14100230554544352, "grad_norm": 3.4152791500091553, "learning_rate": 1.7393440609261764e-05, "loss": 0.354489803314209, "step": 1162 }, { "epoch": 0.14112365004247057, "grad_norm": 3.8756494522094727, "learning_rate": 1.739098390861074e-05, "loss": 0.6173999905586243, "step": 1163 }, { "epoch": 0.14124499453949763, "grad_norm": 2.1881439685821533, "learning_rate": 1.7388527207959713e-05, "loss": 0.24914789199829102, "step": 1164 }, { "epoch": 0.1413663390365247, "grad_norm": 2.234002113342285, "learning_rate": 1.7386070507308687e-05, "loss": 0.5625263452529907, "step": 1165 }, { "epoch": 0.14148768353355176, "grad_norm": 1.6645379066467285, "learning_rate": 1.738361380665766e-05, "loss": 0.24430695176124573, "step": 1166 }, { "epoch": 0.14160902803057882, "grad_norm": 1.9651813507080078, "learning_rate": 1.7381157106006635e-05, "loss": 0.5734595060348511, "step": 1167 }, { "epoch": 0.14173037252760587, "grad_norm": 1.7164967060089111, "learning_rate": 1.737870040535561e-05, "loss": 0.3440057635307312, "step": 1168 }, { "epoch": 0.14185171702463292, "grad_norm": 0.0067315357737243176, "learning_rate": 1.7376243704704584e-05, "loss": 7.191597978817299e-05, "step": 1169 }, { "epoch": 0.14197306152166, "grad_norm": 1.702622413635254, "learning_rate": 1.7373787004053558e-05, "loss": 0.32997044920921326, "step": 1170 }, { "epoch": 0.14209440601868706, "grad_norm": 1.761008620262146, "learning_rate": 1.7371330303402532e-05, "loss": 0.2606213390827179, "step": 1171 }, { "epoch": 0.1422157505157141, "grad_norm": 1.6407947540283203, "learning_rate": 1.7368873602751507e-05, "loss": 0.21850815415382385, "step": 1172 }, { "epoch": 0.14233709501274117, "grad_norm": 1.0973106622695923, "learning_rate": 1.736641690210048e-05, "loss": 0.11652334034442902, "step": 1173 }, { "epoch": 0.14245843950976822, "grad_norm": 1.4407575130462646, "learning_rate": 1.7363960201449455e-05, "loss": 0.2936984896659851, "step": 1174 }, { "epoch": 0.1425797840067953, "grad_norm": 2.2742743492126465, "learning_rate": 1.736150350079843e-05, "loss": 0.26105767488479614, "step": 1175 }, { "epoch": 0.14270112850382236, "grad_norm": 1.7479246854782104, "learning_rate": 1.7359046800147404e-05, "loss": 0.18206825852394104, "step": 1176 }, { "epoch": 0.1428224730008494, "grad_norm": 1.737959623336792, "learning_rate": 1.7356590099496378e-05, "loss": 0.1964130699634552, "step": 1177 }, { "epoch": 0.14294381749787646, "grad_norm": 1.4681860208511353, "learning_rate": 1.7354133398845352e-05, "loss": 0.2748938202857971, "step": 1178 }, { "epoch": 0.14306516199490354, "grad_norm": 1.577621579170227, "learning_rate": 1.7351676698194327e-05, "loss": 0.28804469108581543, "step": 1179 }, { "epoch": 0.1431865064919306, "grad_norm": 2.1407663822174072, "learning_rate": 1.73492199975433e-05, "loss": 0.15757066011428833, "step": 1180 }, { "epoch": 0.14330785098895765, "grad_norm": 2.3729565143585205, "learning_rate": 1.7346763296892275e-05, "loss": 0.7062378525733948, "step": 1181 }, { "epoch": 0.1434291954859847, "grad_norm": 1.1742050647735596, "learning_rate": 1.734430659624125e-05, "loss": 0.18161582946777344, "step": 1182 }, { "epoch": 0.14355053998301176, "grad_norm": 1.5152652263641357, "learning_rate": 1.7341849895590224e-05, "loss": 0.3839699923992157, "step": 1183 }, { "epoch": 0.14367188448003884, "grad_norm": 1.7843862771987915, "learning_rate": 1.7339393194939198e-05, "loss": 0.23148319125175476, "step": 1184 }, { "epoch": 0.1437932289770659, "grad_norm": 1.5558139085769653, "learning_rate": 1.7336936494288172e-05, "loss": 0.562111496925354, "step": 1185 }, { "epoch": 0.14391457347409295, "grad_norm": 2.2693772315979004, "learning_rate": 1.7334479793637146e-05, "loss": 0.5566685199737549, "step": 1186 }, { "epoch": 0.14403591797112, "grad_norm": 2.0409531593322754, "learning_rate": 1.733202309298612e-05, "loss": 0.22142720222473145, "step": 1187 }, { "epoch": 0.14415726246814706, "grad_norm": 1.4018200635910034, "learning_rate": 1.7329566392335095e-05, "loss": 0.3808784782886505, "step": 1188 }, { "epoch": 0.14427860696517414, "grad_norm": 1.65623939037323, "learning_rate": 1.732710969168407e-05, "loss": 0.5155322551727295, "step": 1189 }, { "epoch": 0.1443999514622012, "grad_norm": 2.4095706939697266, "learning_rate": 1.7324652991033043e-05, "loss": 0.8260776996612549, "step": 1190 }, { "epoch": 0.14452129595922825, "grad_norm": 2.0686557292938232, "learning_rate": 1.7322196290382018e-05, "loss": 0.3561612069606781, "step": 1191 }, { "epoch": 0.1446426404562553, "grad_norm": 2.2921102046966553, "learning_rate": 1.7319739589730992e-05, "loss": 0.5535135865211487, "step": 1192 }, { "epoch": 0.14476398495328238, "grad_norm": 1.5281484127044678, "learning_rate": 1.7317282889079966e-05, "loss": 0.17620521783828735, "step": 1193 }, { "epoch": 0.14488532945030944, "grad_norm": 1.8255594968795776, "learning_rate": 1.731482618842894e-05, "loss": 0.6243975162506104, "step": 1194 }, { "epoch": 0.1450066739473365, "grad_norm": 1.8629471063613892, "learning_rate": 1.7312369487777915e-05, "loss": 0.3417014479637146, "step": 1195 }, { "epoch": 0.14512801844436354, "grad_norm": 2.4901225566864014, "learning_rate": 1.730991278712689e-05, "loss": 0.3027805685997009, "step": 1196 }, { "epoch": 0.1452493629413906, "grad_norm": 2.5729613304138184, "learning_rate": 1.7307456086475863e-05, "loss": 0.297546923160553, "step": 1197 }, { "epoch": 0.14537070743841768, "grad_norm": 1.570296287536621, "learning_rate": 1.7304999385824837e-05, "loss": 0.0903053879737854, "step": 1198 }, { "epoch": 0.14549205193544473, "grad_norm": 1.8403199911117554, "learning_rate": 1.730254268517381e-05, "loss": 0.3443407118320465, "step": 1199 }, { "epoch": 0.1456133964324718, "grad_norm": 1.7548613548278809, "learning_rate": 1.7300085984522786e-05, "loss": 0.29256072640419006, "step": 1200 }, { "epoch": 0.14573474092949884, "grad_norm": 1.686950445175171, "learning_rate": 1.729762928387176e-05, "loss": 0.19930797815322876, "step": 1201 }, { "epoch": 0.1458560854265259, "grad_norm": 1.3716206550598145, "learning_rate": 1.7295172583220738e-05, "loss": 0.3277512192726135, "step": 1202 }, { "epoch": 0.14597742992355298, "grad_norm": 1.6324442625045776, "learning_rate": 1.7292715882569712e-05, "loss": 0.17892125248908997, "step": 1203 }, { "epoch": 0.14609877442058003, "grad_norm": 1.52149498462677, "learning_rate": 1.7290259181918686e-05, "loss": 0.1897353082895279, "step": 1204 }, { "epoch": 0.14622011891760708, "grad_norm": 2.012589931488037, "learning_rate": 1.728780248126766e-05, "loss": 0.2621530294418335, "step": 1205 }, { "epoch": 0.14634146341463414, "grad_norm": 1.8895965814590454, "learning_rate": 1.7285345780616635e-05, "loss": 0.29131007194519043, "step": 1206 }, { "epoch": 0.14646280791166122, "grad_norm": 1.6223446130752563, "learning_rate": 1.728288907996561e-05, "loss": 0.3021329641342163, "step": 1207 }, { "epoch": 0.14658415240868827, "grad_norm": 1.5214295387268066, "learning_rate": 1.7280432379314583e-05, "loss": 0.16306689381599426, "step": 1208 }, { "epoch": 0.14670549690571533, "grad_norm": 2.3011186122894287, "learning_rate": 1.7277975678663558e-05, "loss": 0.38378268480300903, "step": 1209 }, { "epoch": 0.14682684140274238, "grad_norm": 1.7881890535354614, "learning_rate": 1.7275518978012532e-05, "loss": 0.12493912875652313, "step": 1210 }, { "epoch": 0.14694818589976943, "grad_norm": 1.4234734773635864, "learning_rate": 1.7273062277361506e-05, "loss": 0.3630540668964386, "step": 1211 }, { "epoch": 0.14706953039679652, "grad_norm": 1.7614541053771973, "learning_rate": 1.727060557671048e-05, "loss": 0.05657818168401718, "step": 1212 }, { "epoch": 0.14719087489382357, "grad_norm": 2.1472115516662598, "learning_rate": 1.7268148876059455e-05, "loss": 0.2433232069015503, "step": 1213 }, { "epoch": 0.14731221939085062, "grad_norm": 0.8982884287834167, "learning_rate": 1.726569217540843e-05, "loss": 0.01902024820446968, "step": 1214 }, { "epoch": 0.14743356388787768, "grad_norm": 2.860464572906494, "learning_rate": 1.7263235474757403e-05, "loss": 0.370156466960907, "step": 1215 }, { "epoch": 0.14755490838490473, "grad_norm": 1.7890870571136475, "learning_rate": 1.7260778774106377e-05, "loss": 0.26941537857055664, "step": 1216 }, { "epoch": 0.1476762528819318, "grad_norm": 1.9163086414337158, "learning_rate": 1.7258322073455352e-05, "loss": 0.15845495462417603, "step": 1217 }, { "epoch": 0.14779759737895887, "grad_norm": 1.890729308128357, "learning_rate": 1.7255865372804326e-05, "loss": 0.47432729601860046, "step": 1218 }, { "epoch": 0.14791894187598592, "grad_norm": 1.7181979417800903, "learning_rate": 1.72534086721533e-05, "loss": 0.3108631372451782, "step": 1219 }, { "epoch": 0.14804028637301297, "grad_norm": 2.174555778503418, "learning_rate": 1.7250951971502274e-05, "loss": 0.5981994867324829, "step": 1220 }, { "epoch": 0.14816163087004006, "grad_norm": 1.4610273838043213, "learning_rate": 1.724849527085125e-05, "loss": 0.29758310317993164, "step": 1221 }, { "epoch": 0.1482829753670671, "grad_norm": 2.681323528289795, "learning_rate": 1.7246038570200223e-05, "loss": 0.9386166930198669, "step": 1222 }, { "epoch": 0.14840431986409416, "grad_norm": 2.1302742958068848, "learning_rate": 1.7243581869549197e-05, "loss": 0.17157283425331116, "step": 1223 }, { "epoch": 0.14852566436112122, "grad_norm": 1.9001370668411255, "learning_rate": 1.724112516889817e-05, "loss": 0.47526293992996216, "step": 1224 }, { "epoch": 0.14864700885814827, "grad_norm": 0.009420192800462246, "learning_rate": 1.7238668468247146e-05, "loss": 8.176633855327964e-05, "step": 1225 }, { "epoch": 0.14876835335517535, "grad_norm": 2.4234273433685303, "learning_rate": 1.723621176759612e-05, "loss": 0.25924772024154663, "step": 1226 }, { "epoch": 0.1488896978522024, "grad_norm": 2.9018149375915527, "learning_rate": 1.7233755066945094e-05, "loss": 0.6235582828521729, "step": 1227 }, { "epoch": 0.14901104234922946, "grad_norm": 1.1629736423492432, "learning_rate": 1.723129836629407e-05, "loss": 0.4921530485153198, "step": 1228 }, { "epoch": 0.14913238684625651, "grad_norm": 5.108669281005859, "learning_rate": 1.7228841665643043e-05, "loss": 0.7264307737350464, "step": 1229 }, { "epoch": 0.14925373134328357, "grad_norm": 1.8065292835235596, "learning_rate": 1.7226384964992017e-05, "loss": 0.7250082492828369, "step": 1230 }, { "epoch": 0.14937507584031065, "grad_norm": 1.2931733131408691, "learning_rate": 1.722392826434099e-05, "loss": 0.20766153931617737, "step": 1231 }, { "epoch": 0.1494964203373377, "grad_norm": 2.364239454269409, "learning_rate": 1.7221471563689966e-05, "loss": 0.45660078525543213, "step": 1232 }, { "epoch": 0.14961776483436476, "grad_norm": 2.4389283657073975, "learning_rate": 1.721901486303894e-05, "loss": 0.4532015323638916, "step": 1233 }, { "epoch": 0.1497391093313918, "grad_norm": 2.2398204803466797, "learning_rate": 1.7216558162387914e-05, "loss": 0.6295405626296997, "step": 1234 }, { "epoch": 0.1498604538284189, "grad_norm": 1.3575704097747803, "learning_rate": 1.721410146173689e-05, "loss": 0.40071597695350647, "step": 1235 }, { "epoch": 0.14998179832544595, "grad_norm": 1.4906830787658691, "learning_rate": 1.7211644761085863e-05, "loss": 0.11917506903409958, "step": 1236 }, { "epoch": 0.150103142822473, "grad_norm": 0.0011105046141892672, "learning_rate": 1.7209188060434837e-05, "loss": 3.206374458386563e-05, "step": 1237 }, { "epoch": 0.15022448731950006, "grad_norm": 1.8720066547393799, "learning_rate": 1.720673135978381e-05, "loss": 0.5851035714149475, "step": 1238 }, { "epoch": 0.1503458318165271, "grad_norm": 2.0771634578704834, "learning_rate": 1.7204274659132785e-05, "loss": 0.19338861107826233, "step": 1239 }, { "epoch": 0.1504671763135542, "grad_norm": 2.8861210346221924, "learning_rate": 1.720181795848176e-05, "loss": 0.2231196016073227, "step": 1240 }, { "epoch": 0.15058852081058124, "grad_norm": 1.8362433910369873, "learning_rate": 1.7199361257830737e-05, "loss": 0.15223735570907593, "step": 1241 }, { "epoch": 0.1507098653076083, "grad_norm": 1.6717724800109863, "learning_rate": 1.719690455717971e-05, "loss": 0.11967384070158005, "step": 1242 }, { "epoch": 0.15083120980463535, "grad_norm": 1.8411227464675903, "learning_rate": 1.7194447856528686e-05, "loss": 0.29932233691215515, "step": 1243 }, { "epoch": 0.1509525543016624, "grad_norm": 1.8385282754898071, "learning_rate": 1.719199115587766e-05, "loss": 0.29845061898231506, "step": 1244 }, { "epoch": 0.1510738987986895, "grad_norm": 1.8935575485229492, "learning_rate": 1.7189534455226634e-05, "loss": 0.3484433889389038, "step": 1245 }, { "epoch": 0.15119524329571654, "grad_norm": 2.388352870941162, "learning_rate": 1.718707775457561e-05, "loss": 0.7048746943473816, "step": 1246 }, { "epoch": 0.1513165877927436, "grad_norm": 1.184291124343872, "learning_rate": 1.7184621053924583e-05, "loss": 0.11675606667995453, "step": 1247 }, { "epoch": 0.15143793228977065, "grad_norm": 2.7689146995544434, "learning_rate": 1.7182164353273557e-05, "loss": 0.3825250267982483, "step": 1248 }, { "epoch": 0.15155927678679773, "grad_norm": 1.7167044878005981, "learning_rate": 1.717970765262253e-05, "loss": 0.38567736744880676, "step": 1249 }, { "epoch": 0.15168062128382478, "grad_norm": 1.61948561668396, "learning_rate": 1.7177250951971506e-05, "loss": 0.07746545970439911, "step": 1250 }, { "epoch": 0.15180196578085184, "grad_norm": 1.5445133447647095, "learning_rate": 1.717479425132048e-05, "loss": 0.22907724976539612, "step": 1251 }, { "epoch": 0.1519233102778789, "grad_norm": 2.118232250213623, "learning_rate": 1.7172337550669454e-05, "loss": 0.1413237750530243, "step": 1252 }, { "epoch": 0.15204465477490595, "grad_norm": 1.539642333984375, "learning_rate": 1.716988085001843e-05, "loss": 0.4614269435405731, "step": 1253 }, { "epoch": 0.15216599927193303, "grad_norm": 2.36910343170166, "learning_rate": 1.7167424149367403e-05, "loss": 0.3816324472427368, "step": 1254 }, { "epoch": 0.15228734376896008, "grad_norm": 2.8110387325286865, "learning_rate": 1.7164967448716374e-05, "loss": 0.6992867588996887, "step": 1255 }, { "epoch": 0.15240868826598714, "grad_norm": 2.969259023666382, "learning_rate": 1.7162510748065348e-05, "loss": 0.6973639726638794, "step": 1256 }, { "epoch": 0.1525300327630142, "grad_norm": 1.3217960596084595, "learning_rate": 1.7160054047414322e-05, "loss": 0.1124032661318779, "step": 1257 }, { "epoch": 0.15265137726004124, "grad_norm": 2.619513750076294, "learning_rate": 1.7157597346763296e-05, "loss": 0.37696775794029236, "step": 1258 }, { "epoch": 0.15277272175706832, "grad_norm": 2.0856645107269287, "learning_rate": 1.715514064611227e-05, "loss": 0.15504798293113708, "step": 1259 }, { "epoch": 0.15289406625409538, "grad_norm": 2.5539822578430176, "learning_rate": 1.7152683945461245e-05, "loss": 0.3083515763282776, "step": 1260 }, { "epoch": 0.15301541075112243, "grad_norm": 1.6461360454559326, "learning_rate": 1.715022724481022e-05, "loss": 0.6977939009666443, "step": 1261 }, { "epoch": 0.1531367552481495, "grad_norm": 2.4875383377075195, "learning_rate": 1.7147770544159193e-05, "loss": 0.47936803102493286, "step": 1262 }, { "epoch": 0.15325809974517657, "grad_norm": 3.297154426574707, "learning_rate": 1.7145313843508168e-05, "loss": 0.3359706699848175, "step": 1263 }, { "epoch": 0.15337944424220362, "grad_norm": 2.131556272506714, "learning_rate": 1.7142857142857142e-05, "loss": 0.3174992799758911, "step": 1264 }, { "epoch": 0.15350078873923068, "grad_norm": 1.9300246238708496, "learning_rate": 1.7140400442206116e-05, "loss": 0.4541569948196411, "step": 1265 }, { "epoch": 0.15362213323625773, "grad_norm": 3.374305009841919, "learning_rate": 1.713794374155509e-05, "loss": 0.4640466570854187, "step": 1266 }, { "epoch": 0.15374347773328478, "grad_norm": 2.009493112564087, "learning_rate": 1.7135487040904068e-05, "loss": 0.19491064548492432, "step": 1267 }, { "epoch": 0.15386482223031187, "grad_norm": 1.8354039192199707, "learning_rate": 1.7133030340253042e-05, "loss": 0.3190333843231201, "step": 1268 }, { "epoch": 0.15398616672733892, "grad_norm": 1.7007555961608887, "learning_rate": 1.7130573639602017e-05, "loss": 0.27993080019950867, "step": 1269 }, { "epoch": 0.15410751122436597, "grad_norm": 2.211060047149658, "learning_rate": 1.712811693895099e-05, "loss": 0.17426905035972595, "step": 1270 }, { "epoch": 0.15422885572139303, "grad_norm": 1.33144211769104, "learning_rate": 1.7125660238299965e-05, "loss": 0.24775874614715576, "step": 1271 }, { "epoch": 0.1543502002184201, "grad_norm": 2.5085713863372803, "learning_rate": 1.712320353764894e-05, "loss": 0.5349833965301514, "step": 1272 }, { "epoch": 0.15447154471544716, "grad_norm": 1.8141382932662964, "learning_rate": 1.7120746836997914e-05, "loss": 0.5107168555259705, "step": 1273 }, { "epoch": 0.15459288921247422, "grad_norm": 2.1935603618621826, "learning_rate": 1.7118290136346888e-05, "loss": 0.24863873422145844, "step": 1274 }, { "epoch": 0.15471423370950127, "grad_norm": 2.279149293899536, "learning_rate": 1.7115833435695862e-05, "loss": 0.4017123579978943, "step": 1275 }, { "epoch": 0.15483557820652832, "grad_norm": 1.9747353792190552, "learning_rate": 1.7113376735044836e-05, "loss": 0.4218288064002991, "step": 1276 }, { "epoch": 0.1549569227035554, "grad_norm": 2.6752212047576904, "learning_rate": 1.711092003439381e-05, "loss": 0.39060577750205994, "step": 1277 }, { "epoch": 0.15507826720058246, "grad_norm": 2.636204481124878, "learning_rate": 1.7108463333742785e-05, "loss": 0.4002552628517151, "step": 1278 }, { "epoch": 0.1551996116976095, "grad_norm": 0.532202959060669, "learning_rate": 1.710600663309176e-05, "loss": 0.03157857060432434, "step": 1279 }, { "epoch": 0.15532095619463657, "grad_norm": 1.8398561477661133, "learning_rate": 1.7103549932440733e-05, "loss": 0.27052685618400574, "step": 1280 }, { "epoch": 0.15544230069166362, "grad_norm": 1.626185417175293, "learning_rate": 1.7101093231789708e-05, "loss": 0.09882887452840805, "step": 1281 }, { "epoch": 0.1555636451886907, "grad_norm": 1.96735680103302, "learning_rate": 1.7098636531138682e-05, "loss": 0.4001654088497162, "step": 1282 }, { "epoch": 0.15568498968571776, "grad_norm": 2.2064571380615234, "learning_rate": 1.7096179830487656e-05, "loss": 0.26003214716911316, "step": 1283 }, { "epoch": 0.1558063341827448, "grad_norm": 1.3411259651184082, "learning_rate": 1.709372312983663e-05, "loss": 0.16639862954616547, "step": 1284 }, { "epoch": 0.15592767867977186, "grad_norm": 1.34841787815094, "learning_rate": 1.7091266429185605e-05, "loss": 0.06426498293876648, "step": 1285 }, { "epoch": 0.15604902317679895, "grad_norm": 1.5554633140563965, "learning_rate": 1.708880972853458e-05, "loss": 0.15110211074352264, "step": 1286 }, { "epoch": 0.156170367673826, "grad_norm": 1.845657229423523, "learning_rate": 1.7086353027883553e-05, "loss": 0.776990532875061, "step": 1287 }, { "epoch": 0.15629171217085305, "grad_norm": 1.739893913269043, "learning_rate": 1.7083896327232527e-05, "loss": 0.3289273679256439, "step": 1288 }, { "epoch": 0.1564130566678801, "grad_norm": 1.6401827335357666, "learning_rate": 1.70814396265815e-05, "loss": 0.07742001116275787, "step": 1289 }, { "epoch": 0.15653440116490716, "grad_norm": 2.0880889892578125, "learning_rate": 1.7078982925930476e-05, "loss": 0.43219226598739624, "step": 1290 }, { "epoch": 0.15665574566193424, "grad_norm": 2.825942277908325, "learning_rate": 1.707652622527945e-05, "loss": 0.3840549886226654, "step": 1291 }, { "epoch": 0.1567770901589613, "grad_norm": 2.3352129459381104, "learning_rate": 1.7074069524628424e-05, "loss": 0.44269004464149475, "step": 1292 }, { "epoch": 0.15689843465598835, "grad_norm": 3.0530810356140137, "learning_rate": 1.70716128239774e-05, "loss": 0.4579879641532898, "step": 1293 }, { "epoch": 0.1570197791530154, "grad_norm": 2.729116439819336, "learning_rate": 1.7069156123326373e-05, "loss": 0.29994532465934753, "step": 1294 }, { "epoch": 0.15714112365004246, "grad_norm": 1.3052394390106201, "learning_rate": 1.7066699422675347e-05, "loss": 0.07982388138771057, "step": 1295 }, { "epoch": 0.15726246814706954, "grad_norm": 2.357917547225952, "learning_rate": 1.706424272202432e-05, "loss": 0.2928987145423889, "step": 1296 }, { "epoch": 0.1573838126440966, "grad_norm": 1.4563827514648438, "learning_rate": 1.7061786021373296e-05, "loss": 0.2420179843902588, "step": 1297 }, { "epoch": 0.15750515714112365, "grad_norm": 3.0957210063934326, "learning_rate": 1.705932932072227e-05, "loss": 0.1874426305294037, "step": 1298 }, { "epoch": 0.1576265016381507, "grad_norm": 2.0317540168762207, "learning_rate": 1.7056872620071244e-05, "loss": 0.2113209068775177, "step": 1299 }, { "epoch": 0.15774784613517778, "grad_norm": 1.844716191291809, "learning_rate": 1.705441591942022e-05, "loss": 0.20830923318862915, "step": 1300 }, { "epoch": 0.15786919063220484, "grad_norm": 0.09604529291391373, "learning_rate": 1.7051959218769193e-05, "loss": 0.002145261038094759, "step": 1301 }, { "epoch": 0.1579905351292319, "grad_norm": 1.3927608728408813, "learning_rate": 1.7049502518118167e-05, "loss": 0.4583245515823364, "step": 1302 }, { "epoch": 0.15811187962625894, "grad_norm": 2.9382057189941406, "learning_rate": 1.704704581746714e-05, "loss": 0.6795557141304016, "step": 1303 }, { "epoch": 0.158233224123286, "grad_norm": 3.182617425918579, "learning_rate": 1.7044589116816116e-05, "loss": 0.7760910391807556, "step": 1304 }, { "epoch": 0.15835456862031308, "grad_norm": 2.0352823734283447, "learning_rate": 1.704213241616509e-05, "loss": 0.24128247797489166, "step": 1305 }, { "epoch": 0.15847591311734013, "grad_norm": 2.315619468688965, "learning_rate": 1.7039675715514064e-05, "loss": 0.18327398598194122, "step": 1306 }, { "epoch": 0.1585972576143672, "grad_norm": 2.19699764251709, "learning_rate": 1.7037219014863042e-05, "loss": 0.5556395053863525, "step": 1307 }, { "epoch": 0.15871860211139424, "grad_norm": 1.1444036960601807, "learning_rate": 1.7034762314212016e-05, "loss": 0.12133655697107315, "step": 1308 }, { "epoch": 0.1588399466084213, "grad_norm": 2.0349247455596924, "learning_rate": 1.703230561356099e-05, "loss": 0.3753592073917389, "step": 1309 }, { "epoch": 0.15896129110544838, "grad_norm": 1.413674235343933, "learning_rate": 1.7029848912909964e-05, "loss": 0.14800333976745605, "step": 1310 }, { "epoch": 0.15908263560247543, "grad_norm": 1.274200677871704, "learning_rate": 1.702739221225894e-05, "loss": 0.20592835545539856, "step": 1311 }, { "epoch": 0.15920398009950248, "grad_norm": 2.0405519008636475, "learning_rate": 1.7024935511607913e-05, "loss": 0.605412483215332, "step": 1312 }, { "epoch": 0.15932532459652954, "grad_norm": 2.199052333831787, "learning_rate": 1.7022478810956887e-05, "loss": 0.581245481967926, "step": 1313 }, { "epoch": 0.15944666909355662, "grad_norm": 3.273561954498291, "learning_rate": 1.702002211030586e-05, "loss": 0.19570647180080414, "step": 1314 }, { "epoch": 0.15956801359058367, "grad_norm": 2.3701846599578857, "learning_rate": 1.7017565409654836e-05, "loss": 0.32590925693511963, "step": 1315 }, { "epoch": 0.15968935808761073, "grad_norm": 0.4969005882740021, "learning_rate": 1.701510870900381e-05, "loss": 0.01700139045715332, "step": 1316 }, { "epoch": 0.15981070258463778, "grad_norm": 1.716216802597046, "learning_rate": 1.7012652008352784e-05, "loss": 0.34420692920684814, "step": 1317 }, { "epoch": 0.15993204708166484, "grad_norm": 2.2408816814422607, "learning_rate": 1.701019530770176e-05, "loss": 0.20139765739440918, "step": 1318 }, { "epoch": 0.16005339157869192, "grad_norm": 1.6093449592590332, "learning_rate": 1.7007738607050733e-05, "loss": 0.22592651844024658, "step": 1319 }, { "epoch": 0.16017473607571897, "grad_norm": 1.2304242849349976, "learning_rate": 1.7005281906399707e-05, "loss": 0.06464403867721558, "step": 1320 }, { "epoch": 0.16029608057274602, "grad_norm": 2.2083680629730225, "learning_rate": 1.700282520574868e-05, "loss": 0.5366463661193848, "step": 1321 }, { "epoch": 0.16041742506977308, "grad_norm": 1.9002628326416016, "learning_rate": 1.7000368505097656e-05, "loss": 0.21244825422763824, "step": 1322 }, { "epoch": 0.16053876956680013, "grad_norm": 1.1220264434814453, "learning_rate": 1.699791180444663e-05, "loss": 0.09071967005729675, "step": 1323 }, { "epoch": 0.16066011406382721, "grad_norm": 2.344264030456543, "learning_rate": 1.6995455103795604e-05, "loss": 0.4256601929664612, "step": 1324 }, { "epoch": 0.16078145856085427, "grad_norm": 1.312510371208191, "learning_rate": 1.699299840314458e-05, "loss": 0.07779626548290253, "step": 1325 }, { "epoch": 0.16090280305788132, "grad_norm": 2.834338665008545, "learning_rate": 1.6990541702493553e-05, "loss": 0.46628639101982117, "step": 1326 }, { "epoch": 0.16102414755490838, "grad_norm": 1.238459825515747, "learning_rate": 1.6988085001842527e-05, "loss": 0.11977294832468033, "step": 1327 }, { "epoch": 0.16114549205193546, "grad_norm": 1.8803859949111938, "learning_rate": 1.69856283011915e-05, "loss": 0.25932106375694275, "step": 1328 }, { "epoch": 0.1612668365489625, "grad_norm": 2.950925350189209, "learning_rate": 1.6983171600540475e-05, "loss": 0.32873010635375977, "step": 1329 }, { "epoch": 0.16138818104598956, "grad_norm": 1.580187439918518, "learning_rate": 1.698071489988945e-05, "loss": 0.32839733362197876, "step": 1330 }, { "epoch": 0.16150952554301662, "grad_norm": 2.0951292514801025, "learning_rate": 1.6978258199238424e-05, "loss": 0.2957846522331238, "step": 1331 }, { "epoch": 0.16163087004004367, "grad_norm": 2.181708812713623, "learning_rate": 1.6975801498587398e-05, "loss": 0.2901095151901245, "step": 1332 }, { "epoch": 0.16175221453707075, "grad_norm": 2.0835371017456055, "learning_rate": 1.6973344797936372e-05, "loss": 0.39948153495788574, "step": 1333 }, { "epoch": 0.1618735590340978, "grad_norm": 1.7480794191360474, "learning_rate": 1.6970888097285347e-05, "loss": 0.10842632502317429, "step": 1334 }, { "epoch": 0.16199490353112486, "grad_norm": 1.6553689241409302, "learning_rate": 1.696843139663432e-05, "loss": 0.5176296234130859, "step": 1335 }, { "epoch": 0.16211624802815192, "grad_norm": 2.356034994125366, "learning_rate": 1.6965974695983295e-05, "loss": 0.4009917378425598, "step": 1336 }, { "epoch": 0.16223759252517897, "grad_norm": 1.4407398700714111, "learning_rate": 1.696351799533227e-05, "loss": 0.28117045760154724, "step": 1337 }, { "epoch": 0.16235893702220605, "grad_norm": 1.3323804140090942, "learning_rate": 1.6961061294681244e-05, "loss": 0.24473021924495697, "step": 1338 }, { "epoch": 0.1624802815192331, "grad_norm": 1.888528823852539, "learning_rate": 1.6958604594030218e-05, "loss": 0.33315813541412354, "step": 1339 }, { "epoch": 0.16260162601626016, "grad_norm": 1.6776105165481567, "learning_rate": 1.6956147893379192e-05, "loss": 0.1390353888273239, "step": 1340 }, { "epoch": 0.1627229705132872, "grad_norm": 2.3369064331054688, "learning_rate": 1.6953691192728166e-05, "loss": 0.37260985374450684, "step": 1341 }, { "epoch": 0.1628443150103143, "grad_norm": 1.9706916809082031, "learning_rate": 1.695123449207714e-05, "loss": 0.14245374500751495, "step": 1342 }, { "epoch": 0.16296565950734135, "grad_norm": 2.193972110748291, "learning_rate": 1.6948777791426115e-05, "loss": 0.4535263776779175, "step": 1343 }, { "epoch": 0.1630870040043684, "grad_norm": 2.0166101455688477, "learning_rate": 1.694632109077509e-05, "loss": 0.18072649836540222, "step": 1344 }, { "epoch": 0.16320834850139546, "grad_norm": 0.9884637594223022, "learning_rate": 1.6943864390124064e-05, "loss": 0.14514899253845215, "step": 1345 }, { "epoch": 0.1633296929984225, "grad_norm": 2.3087267875671387, "learning_rate": 1.694140768947304e-05, "loss": 0.31159549951553345, "step": 1346 }, { "epoch": 0.1634510374954496, "grad_norm": 3.271432876586914, "learning_rate": 1.6938950988822015e-05, "loss": 0.27185720205307007, "step": 1347 }, { "epoch": 0.16357238199247665, "grad_norm": 2.102259635925293, "learning_rate": 1.693649428817099e-05, "loss": 0.5381771326065063, "step": 1348 }, { "epoch": 0.1636937264895037, "grad_norm": 1.1917797327041626, "learning_rate": 1.6934037587519964e-05, "loss": 0.10865627229213715, "step": 1349 }, { "epoch": 0.16381507098653075, "grad_norm": 1.9590660333633423, "learning_rate": 1.6931580886868938e-05, "loss": 0.3822226822376251, "step": 1350 }, { "epoch": 0.1639364154835578, "grad_norm": 1.6567803621292114, "learning_rate": 1.6929124186217912e-05, "loss": 0.12453743070363998, "step": 1351 }, { "epoch": 0.1640577599805849, "grad_norm": 1.4354405403137207, "learning_rate": 1.6926667485566887e-05, "loss": 0.2581062912940979, "step": 1352 }, { "epoch": 0.16417910447761194, "grad_norm": 1.3010510206222534, "learning_rate": 1.692421078491586e-05, "loss": 0.5185683965682983, "step": 1353 }, { "epoch": 0.164300448974639, "grad_norm": 1.7877172231674194, "learning_rate": 1.6921754084264835e-05, "loss": 0.21436668932437897, "step": 1354 }, { "epoch": 0.16442179347166605, "grad_norm": 1.7531827688217163, "learning_rate": 1.691929738361381e-05, "loss": 0.12685886025428772, "step": 1355 }, { "epoch": 0.16454313796869313, "grad_norm": 1.4889940023422241, "learning_rate": 1.6916840682962784e-05, "loss": 0.41197526454925537, "step": 1356 }, { "epoch": 0.16466448246572019, "grad_norm": 1.6222858428955078, "learning_rate": 1.6914383982311758e-05, "loss": 0.2630276083946228, "step": 1357 }, { "epoch": 0.16478582696274724, "grad_norm": 2.612492799758911, "learning_rate": 1.6911927281660732e-05, "loss": 0.2662280201911926, "step": 1358 }, { "epoch": 0.1649071714597743, "grad_norm": 1.8409432172775269, "learning_rate": 1.6909470581009707e-05, "loss": 0.3193710446357727, "step": 1359 }, { "epoch": 0.16502851595680135, "grad_norm": 1.8577767610549927, "learning_rate": 1.690701388035868e-05, "loss": 0.40329962968826294, "step": 1360 }, { "epoch": 0.16514986045382843, "grad_norm": 1.5214166641235352, "learning_rate": 1.6904557179707655e-05, "loss": 0.43802523612976074, "step": 1361 }, { "epoch": 0.16527120495085548, "grad_norm": 1.752384901046753, "learning_rate": 1.690210047905663e-05, "loss": 0.15972882509231567, "step": 1362 }, { "epoch": 0.16539254944788254, "grad_norm": 2.470399856567383, "learning_rate": 1.6899643778405604e-05, "loss": 0.5078000426292419, "step": 1363 }, { "epoch": 0.1655138939449096, "grad_norm": 1.7605093717575073, "learning_rate": 1.6897187077754578e-05, "loss": 0.24891872704029083, "step": 1364 }, { "epoch": 0.16563523844193667, "grad_norm": 1.6690492630004883, "learning_rate": 1.6894730377103552e-05, "loss": 0.2679961621761322, "step": 1365 }, { "epoch": 0.16575658293896373, "grad_norm": 2.569883108139038, "learning_rate": 1.6892273676452526e-05, "loss": 0.432903528213501, "step": 1366 }, { "epoch": 0.16587792743599078, "grad_norm": 1.740451693534851, "learning_rate": 1.68898169758015e-05, "loss": 0.12391990423202515, "step": 1367 }, { "epoch": 0.16599927193301783, "grad_norm": 2.0230066776275635, "learning_rate": 1.6887360275150475e-05, "loss": 0.2665267288684845, "step": 1368 }, { "epoch": 0.1661206164300449, "grad_norm": 1.4242761135101318, "learning_rate": 1.688490357449945e-05, "loss": 0.09299017488956451, "step": 1369 }, { "epoch": 0.16624196092707197, "grad_norm": 1.7172342538833618, "learning_rate": 1.6882446873848423e-05, "loss": 0.20614555478096008, "step": 1370 }, { "epoch": 0.16636330542409902, "grad_norm": 1.4239894151687622, "learning_rate": 1.6879990173197398e-05, "loss": 0.34842580556869507, "step": 1371 }, { "epoch": 0.16648464992112608, "grad_norm": 1.8273541927337646, "learning_rate": 1.6877533472546372e-05, "loss": 0.27392613887786865, "step": 1372 }, { "epoch": 0.16660599441815313, "grad_norm": 1.901670217514038, "learning_rate": 1.6875076771895346e-05, "loss": 0.21127557754516602, "step": 1373 }, { "epoch": 0.16672733891518018, "grad_norm": 8.480103492736816, "learning_rate": 1.687262007124432e-05, "loss": 0.15148305892944336, "step": 1374 }, { "epoch": 0.16684868341220727, "grad_norm": 1.8833881616592407, "learning_rate": 1.6870163370593295e-05, "loss": 0.2864915132522583, "step": 1375 }, { "epoch": 0.16697002790923432, "grad_norm": 1.975264549255371, "learning_rate": 1.686770666994227e-05, "loss": 0.36668485403060913, "step": 1376 }, { "epoch": 0.16709137240626137, "grad_norm": 3.24489164352417, "learning_rate": 1.6865249969291243e-05, "loss": 0.45897114276885986, "step": 1377 }, { "epoch": 0.16721271690328843, "grad_norm": 3.0113956928253174, "learning_rate": 1.6862793268640217e-05, "loss": 0.3996666967868805, "step": 1378 }, { "epoch": 0.1673340614003155, "grad_norm": 1.9160172939300537, "learning_rate": 1.6860336567989192e-05, "loss": 0.08457375317811966, "step": 1379 }, { "epoch": 0.16745540589734256, "grad_norm": 2.8236207962036133, "learning_rate": 1.6857879867338166e-05, "loss": 0.36632949113845825, "step": 1380 }, { "epoch": 0.16757675039436962, "grad_norm": 1.984706163406372, "learning_rate": 1.685542316668714e-05, "loss": 0.3010556995868683, "step": 1381 }, { "epoch": 0.16769809489139667, "grad_norm": 1.6785802841186523, "learning_rate": 1.6852966466036114e-05, "loss": 0.4251388907432556, "step": 1382 }, { "epoch": 0.16781943938842372, "grad_norm": 4.960895538330078, "learning_rate": 1.685050976538509e-05, "loss": 0.43394631147384644, "step": 1383 }, { "epoch": 0.1679407838854508, "grad_norm": 1.7378970384597778, "learning_rate": 1.6848053064734063e-05, "loss": 0.29635417461395264, "step": 1384 }, { "epoch": 0.16806212838247786, "grad_norm": 2.0712788105010986, "learning_rate": 1.6845596364083037e-05, "loss": 0.6476526856422424, "step": 1385 }, { "epoch": 0.1681834728795049, "grad_norm": 2.259003162384033, "learning_rate": 1.6843139663432015e-05, "loss": 0.5198768377304077, "step": 1386 }, { "epoch": 0.16830481737653197, "grad_norm": 1.3213210105895996, "learning_rate": 1.684068296278099e-05, "loss": 0.45285072922706604, "step": 1387 }, { "epoch": 0.16842616187355902, "grad_norm": 2.139267683029175, "learning_rate": 1.6838226262129963e-05, "loss": 0.5512986779212952, "step": 1388 }, { "epoch": 0.1685475063705861, "grad_norm": 1.4775429964065552, "learning_rate": 1.6835769561478938e-05, "loss": 0.25231534242630005, "step": 1389 }, { "epoch": 0.16866885086761316, "grad_norm": 1.5690864324569702, "learning_rate": 1.683331286082791e-05, "loss": 0.18181580305099487, "step": 1390 }, { "epoch": 0.1687901953646402, "grad_norm": 2.166705846786499, "learning_rate": 1.6830856160176883e-05, "loss": 0.5808331966400146, "step": 1391 }, { "epoch": 0.16891153986166726, "grad_norm": 2.2479324340820312, "learning_rate": 1.6828399459525857e-05, "loss": 0.3114401400089264, "step": 1392 }, { "epoch": 0.16903288435869435, "grad_norm": 2.7515830993652344, "learning_rate": 1.682594275887483e-05, "loss": 0.7299220561981201, "step": 1393 }, { "epoch": 0.1691542288557214, "grad_norm": 0.346658855676651, "learning_rate": 1.6823486058223806e-05, "loss": 0.007224785629659891, "step": 1394 }, { "epoch": 0.16927557335274845, "grad_norm": 2.2689716815948486, "learning_rate": 1.682102935757278e-05, "loss": 0.3097282946109772, "step": 1395 }, { "epoch": 0.1693969178497755, "grad_norm": 2.001441717147827, "learning_rate": 1.6818572656921754e-05, "loss": 0.1686830371618271, "step": 1396 }, { "epoch": 0.16951826234680256, "grad_norm": 1.140173316001892, "learning_rate": 1.681611595627073e-05, "loss": 0.06480616331100464, "step": 1397 }, { "epoch": 0.16963960684382964, "grad_norm": 2.302633762359619, "learning_rate": 1.6813659255619703e-05, "loss": 0.09449177980422974, "step": 1398 }, { "epoch": 0.1697609513408567, "grad_norm": 1.742457389831543, "learning_rate": 1.6811202554968677e-05, "loss": 0.6511344909667969, "step": 1399 }, { "epoch": 0.16988229583788375, "grad_norm": 2.5629754066467285, "learning_rate": 1.680874585431765e-05, "loss": 0.5311500430107117, "step": 1400 }, { "epoch": 0.1700036403349108, "grad_norm": 2.018834352493286, "learning_rate": 1.6806289153666625e-05, "loss": 0.34516584873199463, "step": 1401 }, { "epoch": 0.17012498483193786, "grad_norm": 2.187851905822754, "learning_rate": 1.68038324530156e-05, "loss": 0.3023394048213959, "step": 1402 }, { "epoch": 0.17024632932896494, "grad_norm": 2.0745787620544434, "learning_rate": 1.6801375752364574e-05, "loss": 0.56155925989151, "step": 1403 }, { "epoch": 0.170367673825992, "grad_norm": 1.7877262830734253, "learning_rate": 1.6798919051713548e-05, "loss": 0.5152330994606018, "step": 1404 }, { "epoch": 0.17048901832301905, "grad_norm": 2.3125364780426025, "learning_rate": 1.6796462351062522e-05, "loss": 0.2775477170944214, "step": 1405 }, { "epoch": 0.1706103628200461, "grad_norm": 2.6091928482055664, "learning_rate": 1.6794005650411497e-05, "loss": 0.2951275706291199, "step": 1406 }, { "epoch": 0.17073170731707318, "grad_norm": 2.670105218887329, "learning_rate": 1.679154894976047e-05, "loss": 0.39512842893600464, "step": 1407 }, { "epoch": 0.17085305181410024, "grad_norm": 2.1665756702423096, "learning_rate": 1.6789092249109445e-05, "loss": 0.7851711511611938, "step": 1408 }, { "epoch": 0.1709743963111273, "grad_norm": 1.8065381050109863, "learning_rate": 1.678663554845842e-05, "loss": 0.15969766676425934, "step": 1409 }, { "epoch": 0.17109574080815435, "grad_norm": 1.518323540687561, "learning_rate": 1.6784178847807394e-05, "loss": 0.20590633153915405, "step": 1410 }, { "epoch": 0.1712170853051814, "grad_norm": 2.523805618286133, "learning_rate": 1.6781722147156368e-05, "loss": 0.31899237632751465, "step": 1411 }, { "epoch": 0.17133842980220848, "grad_norm": 2.5726451873779297, "learning_rate": 1.6779265446505346e-05, "loss": 0.3131209909915924, "step": 1412 }, { "epoch": 0.17145977429923553, "grad_norm": 1.8876440525054932, "learning_rate": 1.677680874585432e-05, "loss": 0.4819314479827881, "step": 1413 }, { "epoch": 0.1715811187962626, "grad_norm": 1.4496047496795654, "learning_rate": 1.6774352045203294e-05, "loss": 0.21618512272834778, "step": 1414 }, { "epoch": 0.17170246329328964, "grad_norm": 1.612060785293579, "learning_rate": 1.677189534455227e-05, "loss": 0.1677655726671219, "step": 1415 }, { "epoch": 0.1718238077903167, "grad_norm": 1.2587897777557373, "learning_rate": 1.6769438643901243e-05, "loss": 0.09883075207471848, "step": 1416 }, { "epoch": 0.17194515228734378, "grad_norm": 2.5266168117523193, "learning_rate": 1.6766981943250217e-05, "loss": 0.5532509088516235, "step": 1417 }, { "epoch": 0.17206649678437083, "grad_norm": 3.3645172119140625, "learning_rate": 1.676452524259919e-05, "loss": 0.14025680720806122, "step": 1418 }, { "epoch": 0.17218784128139789, "grad_norm": 2.097749710083008, "learning_rate": 1.6762068541948165e-05, "loss": 0.37450772523880005, "step": 1419 }, { "epoch": 0.17230918577842494, "grad_norm": 2.6865334510803223, "learning_rate": 1.675961184129714e-05, "loss": 0.46862709522247314, "step": 1420 }, { "epoch": 0.17243053027545202, "grad_norm": 0.005616781767457724, "learning_rate": 1.6757155140646114e-05, "loss": 0.00010598314111120999, "step": 1421 }, { "epoch": 0.17255187477247907, "grad_norm": 1.725932240486145, "learning_rate": 1.6754698439995088e-05, "loss": 0.3152719736099243, "step": 1422 }, { "epoch": 0.17267321926950613, "grad_norm": 2.0537219047546387, "learning_rate": 1.6752241739344062e-05, "loss": 0.2770642042160034, "step": 1423 }, { "epoch": 0.17279456376653318, "grad_norm": 2.266733407974243, "learning_rate": 1.6749785038693037e-05, "loss": 0.20772089064121246, "step": 1424 }, { "epoch": 0.17291590826356024, "grad_norm": 1.9213402271270752, "learning_rate": 1.674732833804201e-05, "loss": 0.378388375043869, "step": 1425 }, { "epoch": 0.17303725276058732, "grad_norm": 0.855719268321991, "learning_rate": 1.6744871637390985e-05, "loss": 0.03948398679494858, "step": 1426 }, { "epoch": 0.17315859725761437, "grad_norm": 2.2846179008483887, "learning_rate": 1.674241493673996e-05, "loss": 0.29622775316238403, "step": 1427 }, { "epoch": 0.17327994175464143, "grad_norm": 1.4371376037597656, "learning_rate": 1.6739958236088934e-05, "loss": 0.29555070400238037, "step": 1428 }, { "epoch": 0.17340128625166848, "grad_norm": 1.5206128358840942, "learning_rate": 1.6737501535437908e-05, "loss": 0.1654072254896164, "step": 1429 }, { "epoch": 0.17352263074869553, "grad_norm": 1.9108872413635254, "learning_rate": 1.6735044834786882e-05, "loss": 0.3061807453632355, "step": 1430 }, { "epoch": 0.17364397524572261, "grad_norm": 2.351694345474243, "learning_rate": 1.6732588134135857e-05, "loss": 0.5521951913833618, "step": 1431 }, { "epoch": 0.17376531974274967, "grad_norm": 1.2257763147354126, "learning_rate": 1.673013143348483e-05, "loss": 0.050480540841817856, "step": 1432 }, { "epoch": 0.17388666423977672, "grad_norm": 1.6109448671340942, "learning_rate": 1.6727674732833805e-05, "loss": 0.7171104550361633, "step": 1433 }, { "epoch": 0.17400800873680378, "grad_norm": 2.370811700820923, "learning_rate": 1.672521803218278e-05, "loss": 0.19278818368911743, "step": 1434 }, { "epoch": 0.17412935323383086, "grad_norm": 1.9877387285232544, "learning_rate": 1.6722761331531754e-05, "loss": 0.6486696004867554, "step": 1435 }, { "epoch": 0.1742506977308579, "grad_norm": 1.7820128202438354, "learning_rate": 1.6720304630880728e-05, "loss": 0.2971023619174957, "step": 1436 }, { "epoch": 0.17437204222788497, "grad_norm": 2.684583902359009, "learning_rate": 1.6717847930229702e-05, "loss": 0.5994515419006348, "step": 1437 }, { "epoch": 0.17449338672491202, "grad_norm": 3.353449583053589, "learning_rate": 1.6715391229578676e-05, "loss": 0.36244866251945496, "step": 1438 }, { "epoch": 0.17461473122193907, "grad_norm": 2.1446571350097656, "learning_rate": 1.671293452892765e-05, "loss": 0.31015217304229736, "step": 1439 }, { "epoch": 0.17473607571896616, "grad_norm": 1.5882110595703125, "learning_rate": 1.6710477828276625e-05, "loss": 0.2135632038116455, "step": 1440 }, { "epoch": 0.1748574202159932, "grad_norm": 1.8885900974273682, "learning_rate": 1.67080211276256e-05, "loss": 0.6606277227401733, "step": 1441 }, { "epoch": 0.17497876471302026, "grad_norm": 1.9658722877502441, "learning_rate": 1.6705564426974573e-05, "loss": 0.6350474953651428, "step": 1442 }, { "epoch": 0.17510010921004732, "grad_norm": 1.3200682401657104, "learning_rate": 1.6703107726323548e-05, "loss": 0.07505234330892563, "step": 1443 }, { "epoch": 0.17522145370707437, "grad_norm": 1.8777532577514648, "learning_rate": 1.6700651025672522e-05, "loss": 0.20652814209461212, "step": 1444 }, { "epoch": 0.17534279820410145, "grad_norm": 2.564035177230835, "learning_rate": 1.6698194325021496e-05, "loss": 0.2737087309360504, "step": 1445 }, { "epoch": 0.1754641427011285, "grad_norm": 1.2406517267227173, "learning_rate": 1.669573762437047e-05, "loss": 0.15805581212043762, "step": 1446 }, { "epoch": 0.17558548719815556, "grad_norm": 1.9846456050872803, "learning_rate": 1.6693280923719445e-05, "loss": 0.2611340284347534, "step": 1447 }, { "epoch": 0.1757068316951826, "grad_norm": 2.8493361473083496, "learning_rate": 1.669082422306842e-05, "loss": 0.4207281470298767, "step": 1448 }, { "epoch": 0.1758281761922097, "grad_norm": 1.8156849145889282, "learning_rate": 1.6688367522417393e-05, "loss": 0.25690674781799316, "step": 1449 }, { "epoch": 0.17594952068923675, "grad_norm": 2.1182401180267334, "learning_rate": 1.6685910821766367e-05, "loss": 0.2869364023208618, "step": 1450 }, { "epoch": 0.1760708651862638, "grad_norm": 1.5937093496322632, "learning_rate": 1.668345412111534e-05, "loss": 0.12525154650211334, "step": 1451 }, { "epoch": 0.17619220968329086, "grad_norm": 1.9949283599853516, "learning_rate": 1.668099742046432e-05, "loss": 0.2950144112110138, "step": 1452 }, { "epoch": 0.1763135541803179, "grad_norm": 2.5047764778137207, "learning_rate": 1.6678540719813294e-05, "loss": 0.4218530058860779, "step": 1453 }, { "epoch": 0.176434898677345, "grad_norm": 1.803932547569275, "learning_rate": 1.6676084019162268e-05, "loss": 0.3246077299118042, "step": 1454 }, { "epoch": 0.17655624317437205, "grad_norm": 2.5857975482940674, "learning_rate": 1.6673627318511242e-05, "loss": 0.31732994318008423, "step": 1455 }, { "epoch": 0.1766775876713991, "grad_norm": 2.1230576038360596, "learning_rate": 1.6671170617860216e-05, "loss": 0.31983518600463867, "step": 1456 }, { "epoch": 0.17679893216842615, "grad_norm": 2.242692470550537, "learning_rate": 1.666871391720919e-05, "loss": 0.2791813910007477, "step": 1457 }, { "epoch": 0.17692027666545324, "grad_norm": 2.2482571601867676, "learning_rate": 1.6666257216558165e-05, "loss": 0.3358612060546875, "step": 1458 }, { "epoch": 0.1770416211624803, "grad_norm": 1.851077675819397, "learning_rate": 1.666380051590714e-05, "loss": 0.22470499575138092, "step": 1459 }, { "epoch": 0.17716296565950734, "grad_norm": 1.292380690574646, "learning_rate": 1.6661343815256113e-05, "loss": 0.07124683260917664, "step": 1460 }, { "epoch": 0.1772843101565344, "grad_norm": 2.1859281063079834, "learning_rate": 1.6658887114605088e-05, "loss": 0.25534307956695557, "step": 1461 }, { "epoch": 0.17740565465356145, "grad_norm": 1.6746493577957153, "learning_rate": 1.6656430413954062e-05, "loss": 0.5188888907432556, "step": 1462 }, { "epoch": 0.17752699915058853, "grad_norm": 1.1798118352890015, "learning_rate": 1.6653973713303036e-05, "loss": 0.13194964826107025, "step": 1463 }, { "epoch": 0.1776483436476156, "grad_norm": 1.6883291006088257, "learning_rate": 1.665151701265201e-05, "loss": 0.3973052501678467, "step": 1464 }, { "epoch": 0.17776968814464264, "grad_norm": 3.1461122035980225, "learning_rate": 1.6649060312000985e-05, "loss": 0.49332958459854126, "step": 1465 }, { "epoch": 0.1778910326416697, "grad_norm": 2.2000954151153564, "learning_rate": 1.664660361134996e-05, "loss": 0.41994062066078186, "step": 1466 }, { "epoch": 0.17801237713869675, "grad_norm": 4.530543804168701, "learning_rate": 1.6644146910698933e-05, "loss": 0.3007562458515167, "step": 1467 }, { "epoch": 0.17813372163572383, "grad_norm": 2.0147387981414795, "learning_rate": 1.6641690210047907e-05, "loss": 0.6290687918663025, "step": 1468 }, { "epoch": 0.17825506613275088, "grad_norm": 2.1812171936035156, "learning_rate": 1.6639233509396882e-05, "loss": 0.27655136585235596, "step": 1469 }, { "epoch": 0.17837641062977794, "grad_norm": 1.9961246252059937, "learning_rate": 1.6636776808745856e-05, "loss": 0.34929129481315613, "step": 1470 }, { "epoch": 0.178497755126805, "grad_norm": 1.2723180055618286, "learning_rate": 1.663432010809483e-05, "loss": 0.13786189258098602, "step": 1471 }, { "epoch": 0.17861909962383207, "grad_norm": 2.7387282848358154, "learning_rate": 1.6631863407443804e-05, "loss": 0.5443019866943359, "step": 1472 }, { "epoch": 0.17874044412085913, "grad_norm": 2.3264379501342773, "learning_rate": 1.662940670679278e-05, "loss": 0.40921467542648315, "step": 1473 }, { "epoch": 0.17886178861788618, "grad_norm": 3.4638664722442627, "learning_rate": 1.6626950006141753e-05, "loss": 0.8635279536247253, "step": 1474 }, { "epoch": 0.17898313311491323, "grad_norm": 2.1667139530181885, "learning_rate": 1.6624493305490727e-05, "loss": 0.27864134311676025, "step": 1475 }, { "epoch": 0.1791044776119403, "grad_norm": 3.282740592956543, "learning_rate": 1.66220366048397e-05, "loss": 0.3811657428741455, "step": 1476 }, { "epoch": 0.17922582210896737, "grad_norm": 1.52030348777771, "learning_rate": 1.6619579904188676e-05, "loss": 0.12177782505750656, "step": 1477 }, { "epoch": 0.17934716660599442, "grad_norm": 1.402393102645874, "learning_rate": 1.661712320353765e-05, "loss": 0.1254013478755951, "step": 1478 }, { "epoch": 0.17946851110302148, "grad_norm": 2.2220330238342285, "learning_rate": 1.6614666502886624e-05, "loss": 0.3898024559020996, "step": 1479 }, { "epoch": 0.17958985560004853, "grad_norm": 1.6085693836212158, "learning_rate": 1.66122098022356e-05, "loss": 0.2051776647567749, "step": 1480 }, { "epoch": 0.17971120009707559, "grad_norm": 3.699589252471924, "learning_rate": 1.6609753101584573e-05, "loss": 0.5865758657455444, "step": 1481 }, { "epoch": 0.17983254459410267, "grad_norm": 1.851828694343567, "learning_rate": 1.6607296400933547e-05, "loss": 0.14690032601356506, "step": 1482 }, { "epoch": 0.17995388909112972, "grad_norm": 2.143031358718872, "learning_rate": 1.660483970028252e-05, "loss": 0.3911144733428955, "step": 1483 }, { "epoch": 0.18007523358815677, "grad_norm": 2.165050745010376, "learning_rate": 1.6602382999631496e-05, "loss": 0.46151846647262573, "step": 1484 }, { "epoch": 0.18019657808518383, "grad_norm": 2.1484158039093018, "learning_rate": 1.659992629898047e-05, "loss": 0.185227170586586, "step": 1485 }, { "epoch": 0.1803179225822109, "grad_norm": 1.8021937608718872, "learning_rate": 1.6597469598329444e-05, "loss": 0.21386900544166565, "step": 1486 }, { "epoch": 0.18043926707923796, "grad_norm": 1.8592053651809692, "learning_rate": 1.659501289767842e-05, "loss": 0.19364283978939056, "step": 1487 }, { "epoch": 0.18056061157626502, "grad_norm": 2.145983934402466, "learning_rate": 1.6592556197027393e-05, "loss": 0.29494696855545044, "step": 1488 }, { "epoch": 0.18068195607329207, "grad_norm": 2.1097612380981445, "learning_rate": 1.6590099496376367e-05, "loss": 0.29082202911376953, "step": 1489 }, { "epoch": 0.18080330057031913, "grad_norm": 1.9546377658843994, "learning_rate": 1.658764279572534e-05, "loss": 0.21720995008945465, "step": 1490 }, { "epoch": 0.1809246450673462, "grad_norm": 1.3910820484161377, "learning_rate": 1.658518609507432e-05, "loss": 0.23794345557689667, "step": 1491 }, { "epoch": 0.18104598956437326, "grad_norm": 1.2930516004562378, "learning_rate": 1.6582729394423293e-05, "loss": 0.07657837867736816, "step": 1492 }, { "epoch": 0.18116733406140031, "grad_norm": 2.9213075637817383, "learning_rate": 1.6580272693772267e-05, "loss": 0.5059197545051575, "step": 1493 }, { "epoch": 0.18128867855842737, "grad_norm": 1.7775009870529175, "learning_rate": 1.657781599312124e-05, "loss": 0.18545974791049957, "step": 1494 }, { "epoch": 0.18141002305545442, "grad_norm": 2.2526183128356934, "learning_rate": 1.6575359292470216e-05, "loss": 0.5661383867263794, "step": 1495 }, { "epoch": 0.1815313675524815, "grad_norm": 2.2152130603790283, "learning_rate": 1.657290259181919e-05, "loss": 0.34863507747650146, "step": 1496 }, { "epoch": 0.18165271204950856, "grad_norm": 1.5461905002593994, "learning_rate": 1.6570445891168164e-05, "loss": 0.2251451462507248, "step": 1497 }, { "epoch": 0.1817740565465356, "grad_norm": 1.752030849456787, "learning_rate": 1.656798919051714e-05, "loss": 0.21238146722316742, "step": 1498 }, { "epoch": 0.18189540104356267, "grad_norm": 2.57700252532959, "learning_rate": 1.6565532489866113e-05, "loss": 0.41275328397750854, "step": 1499 }, { "epoch": 0.18201674554058975, "grad_norm": 1.9533751010894775, "learning_rate": 1.6563075789215087e-05, "loss": 0.6596766710281372, "step": 1500 }, { "epoch": 0.1821380900376168, "grad_norm": 3.1995153427124023, "learning_rate": 1.656061908856406e-05, "loss": 0.3358881175518036, "step": 1501 }, { "epoch": 0.18225943453464385, "grad_norm": 2.530433177947998, "learning_rate": 1.6558162387913036e-05, "loss": 0.468052476644516, "step": 1502 }, { "epoch": 0.1823807790316709, "grad_norm": 1.9043426513671875, "learning_rate": 1.655570568726201e-05, "loss": 0.46296077966690063, "step": 1503 }, { "epoch": 0.18250212352869796, "grad_norm": 1.7315770387649536, "learning_rate": 1.6553248986610984e-05, "loss": 0.35026147961616516, "step": 1504 }, { "epoch": 0.18262346802572504, "grad_norm": 2.0941426753997803, "learning_rate": 1.655079228595996e-05, "loss": 0.159994974732399, "step": 1505 }, { "epoch": 0.1827448125227521, "grad_norm": 2.1495745182037354, "learning_rate": 1.6548335585308933e-05, "loss": 0.19524291157722473, "step": 1506 }, { "epoch": 0.18286615701977915, "grad_norm": 2.2648754119873047, "learning_rate": 1.6545878884657907e-05, "loss": 0.48489266633987427, "step": 1507 }, { "epoch": 0.1829875015168062, "grad_norm": 1.5805639028549194, "learning_rate": 1.654342218400688e-05, "loss": 0.31412607431411743, "step": 1508 }, { "epoch": 0.18310884601383326, "grad_norm": 2.305727243423462, "learning_rate": 1.6540965483355855e-05, "loss": 0.2490062415599823, "step": 1509 }, { "epoch": 0.18323019051086034, "grad_norm": 1.5892924070358276, "learning_rate": 1.653850878270483e-05, "loss": 0.23507654666900635, "step": 1510 }, { "epoch": 0.1833515350078874, "grad_norm": 1.475388765335083, "learning_rate": 1.6536052082053804e-05, "loss": 0.4014114737510681, "step": 1511 }, { "epoch": 0.18347287950491445, "grad_norm": 2.111905336380005, "learning_rate": 1.6533595381402778e-05, "loss": 0.21480782330036163, "step": 1512 }, { "epoch": 0.1835942240019415, "grad_norm": 2.4657700061798096, "learning_rate": 1.6531138680751752e-05, "loss": 0.3230441212654114, "step": 1513 }, { "epoch": 0.18371556849896858, "grad_norm": 2.013392210006714, "learning_rate": 1.6528681980100727e-05, "loss": 0.12354740500450134, "step": 1514 }, { "epoch": 0.18383691299599564, "grad_norm": 1.8254839181900024, "learning_rate": 1.65262252794497e-05, "loss": 0.22884692251682281, "step": 1515 }, { "epoch": 0.1839582574930227, "grad_norm": 1.2895177602767944, "learning_rate": 1.6523768578798675e-05, "loss": 0.14382505416870117, "step": 1516 }, { "epoch": 0.18407960199004975, "grad_norm": 1.4898990392684937, "learning_rate": 1.652131187814765e-05, "loss": 0.07804793864488602, "step": 1517 }, { "epoch": 0.1842009464870768, "grad_norm": 1.7102720737457275, "learning_rate": 1.6518855177496624e-05, "loss": 0.15108470618724823, "step": 1518 }, { "epoch": 0.18432229098410388, "grad_norm": 1.8784161806106567, "learning_rate": 1.6516398476845598e-05, "loss": 0.5007836818695068, "step": 1519 }, { "epoch": 0.18444363548113094, "grad_norm": 2.171732187271118, "learning_rate": 1.6513941776194572e-05, "loss": 0.27269411087036133, "step": 1520 }, { "epoch": 0.184564979978158, "grad_norm": 2.179102897644043, "learning_rate": 1.6511485075543547e-05, "loss": 0.3284929394721985, "step": 1521 }, { "epoch": 0.18468632447518504, "grad_norm": 1.841059684753418, "learning_rate": 1.650902837489252e-05, "loss": 0.25381767749786377, "step": 1522 }, { "epoch": 0.1848076689722121, "grad_norm": 2.1668596267700195, "learning_rate": 1.6506571674241495e-05, "loss": 0.2602318525314331, "step": 1523 }, { "epoch": 0.18492901346923918, "grad_norm": 2.501694917678833, "learning_rate": 1.650411497359047e-05, "loss": 0.3330945372581482, "step": 1524 }, { "epoch": 0.18505035796626623, "grad_norm": 2.025729179382324, "learning_rate": 1.6501658272939444e-05, "loss": 0.28070488572120667, "step": 1525 }, { "epoch": 0.1851717024632933, "grad_norm": 2.1759018898010254, "learning_rate": 1.6499201572288418e-05, "loss": 0.2297292947769165, "step": 1526 }, { "epoch": 0.18529304696032034, "grad_norm": 2.583195447921753, "learning_rate": 1.6496744871637392e-05, "loss": 0.25529593229293823, "step": 1527 }, { "epoch": 0.18541439145734742, "grad_norm": 1.7422884702682495, "learning_rate": 1.6494288170986366e-05, "loss": 0.17456205189228058, "step": 1528 }, { "epoch": 0.18553573595437448, "grad_norm": 2.843953847885132, "learning_rate": 1.649183147033534e-05, "loss": 0.5891610383987427, "step": 1529 }, { "epoch": 0.18565708045140153, "grad_norm": 1.9662164449691772, "learning_rate": 1.6489374769684315e-05, "loss": 0.23069551587104797, "step": 1530 }, { "epoch": 0.18577842494842858, "grad_norm": 2.4861812591552734, "learning_rate": 1.648691806903329e-05, "loss": 0.8122949600219727, "step": 1531 }, { "epoch": 0.18589976944545564, "grad_norm": 1.1293015480041504, "learning_rate": 1.6484461368382263e-05, "loss": 0.018577666953206062, "step": 1532 }, { "epoch": 0.18602111394248272, "grad_norm": 2.1808528900146484, "learning_rate": 1.6482004667731238e-05, "loss": 0.20170214772224426, "step": 1533 }, { "epoch": 0.18614245843950977, "grad_norm": 1.7788127660751343, "learning_rate": 1.6479547967080212e-05, "loss": 0.1955050826072693, "step": 1534 }, { "epoch": 0.18626380293653683, "grad_norm": 1.782490611076355, "learning_rate": 1.6477091266429186e-05, "loss": 0.11952733993530273, "step": 1535 }, { "epoch": 0.18638514743356388, "grad_norm": 1.8649412393569946, "learning_rate": 1.647463456577816e-05, "loss": 0.09318369626998901, "step": 1536 }, { "epoch": 0.18650649193059093, "grad_norm": 2.223051071166992, "learning_rate": 1.6472177865127135e-05, "loss": 0.41792887449264526, "step": 1537 }, { "epoch": 0.18662783642761802, "grad_norm": 2.1368985176086426, "learning_rate": 1.646972116447611e-05, "loss": 0.2785337567329407, "step": 1538 }, { "epoch": 0.18674918092464507, "grad_norm": 2.03661847114563, "learning_rate": 1.6467264463825083e-05, "loss": 0.23641645908355713, "step": 1539 }, { "epoch": 0.18687052542167212, "grad_norm": 1.1259260177612305, "learning_rate": 1.6464807763174057e-05, "loss": 0.10324345529079437, "step": 1540 }, { "epoch": 0.18699186991869918, "grad_norm": 2.2833993434906006, "learning_rate": 1.646235106252303e-05, "loss": 0.2568722367286682, "step": 1541 }, { "epoch": 0.18711321441572626, "grad_norm": 2.33410906791687, "learning_rate": 1.6459894361872006e-05, "loss": 0.4274582266807556, "step": 1542 }, { "epoch": 0.1872345589127533, "grad_norm": 1.0029762983322144, "learning_rate": 1.645743766122098e-05, "loss": 0.15004250407218933, "step": 1543 }, { "epoch": 0.18735590340978037, "grad_norm": 1.938217043876648, "learning_rate": 1.6454980960569954e-05, "loss": 0.42935827374458313, "step": 1544 }, { "epoch": 0.18747724790680742, "grad_norm": 2.3287079334259033, "learning_rate": 1.645252425991893e-05, "loss": 0.5231063365936279, "step": 1545 }, { "epoch": 0.18759859240383447, "grad_norm": 1.9757171869277954, "learning_rate": 1.6450067559267903e-05, "loss": 0.132722407579422, "step": 1546 }, { "epoch": 0.18771993690086156, "grad_norm": 2.0718235969543457, "learning_rate": 1.6447610858616877e-05, "loss": 0.6672481894493103, "step": 1547 }, { "epoch": 0.1878412813978886, "grad_norm": 1.5003156661987305, "learning_rate": 1.644515415796585e-05, "loss": 0.2142794132232666, "step": 1548 }, { "epoch": 0.18796262589491566, "grad_norm": 0.9966719746589661, "learning_rate": 1.6442697457314826e-05, "loss": 0.11649642139673233, "step": 1549 }, { "epoch": 0.18808397039194272, "grad_norm": 1.5972671508789062, "learning_rate": 1.64402407566638e-05, "loss": 0.239975705742836, "step": 1550 }, { "epoch": 0.18820531488896977, "grad_norm": 2.4622395038604736, "learning_rate": 1.6437784056012774e-05, "loss": 0.4359244704246521, "step": 1551 }, { "epoch": 0.18832665938599685, "grad_norm": 1.6812524795532227, "learning_rate": 1.643532735536175e-05, "loss": 0.19567492604255676, "step": 1552 }, { "epoch": 0.1884480038830239, "grad_norm": 1.736446499824524, "learning_rate": 1.6432870654710723e-05, "loss": 0.37146762013435364, "step": 1553 }, { "epoch": 0.18856934838005096, "grad_norm": 2.5409858226776123, "learning_rate": 1.6430413954059697e-05, "loss": 0.544783353805542, "step": 1554 }, { "epoch": 0.18869069287707801, "grad_norm": 1.9650341272354126, "learning_rate": 1.642795725340867e-05, "loss": 0.805822491645813, "step": 1555 }, { "epoch": 0.1888120373741051, "grad_norm": 1.6074107885360718, "learning_rate": 1.6425500552757646e-05, "loss": 0.5202041864395142, "step": 1556 }, { "epoch": 0.18893338187113215, "grad_norm": 2.805347442626953, "learning_rate": 1.6423043852106623e-05, "loss": 0.37637004256248474, "step": 1557 }, { "epoch": 0.1890547263681592, "grad_norm": 0.6213400959968567, "learning_rate": 1.6420587151455597e-05, "loss": 0.0326407290995121, "step": 1558 }, { "epoch": 0.18917607086518626, "grad_norm": 2.420454978942871, "learning_rate": 1.6418130450804572e-05, "loss": 0.2922397255897522, "step": 1559 }, { "epoch": 0.1892974153622133, "grad_norm": 1.9075942039489746, "learning_rate": 1.6415673750153546e-05, "loss": 0.32393327355384827, "step": 1560 }, { "epoch": 0.1894187598592404, "grad_norm": 0.8147814273834229, "learning_rate": 1.641321704950252e-05, "loss": 0.10852165520191193, "step": 1561 }, { "epoch": 0.18954010435626745, "grad_norm": 0.00906144455075264, "learning_rate": 1.6410760348851494e-05, "loss": 0.00013929870328865945, "step": 1562 }, { "epoch": 0.1896614488532945, "grad_norm": 1.9047940969467163, "learning_rate": 1.640830364820047e-05, "loss": 0.13412030041217804, "step": 1563 }, { "epoch": 0.18978279335032155, "grad_norm": 2.203005313873291, "learning_rate": 1.6405846947549443e-05, "loss": 0.4438808560371399, "step": 1564 }, { "epoch": 0.18990413784734864, "grad_norm": 0.6975694298744202, "learning_rate": 1.6403390246898417e-05, "loss": 0.03993745520710945, "step": 1565 }, { "epoch": 0.1900254823443757, "grad_norm": 2.6445183753967285, "learning_rate": 1.640093354624739e-05, "loss": 0.23849809169769287, "step": 1566 }, { "epoch": 0.19014682684140274, "grad_norm": 2.594602346420288, "learning_rate": 1.6398476845596366e-05, "loss": 0.3863987326622009, "step": 1567 }, { "epoch": 0.1902681713384298, "grad_norm": 10.270295143127441, "learning_rate": 1.639602014494534e-05, "loss": 0.41021063923835754, "step": 1568 }, { "epoch": 0.19038951583545685, "grad_norm": 1.9823209047317505, "learning_rate": 1.6393563444294314e-05, "loss": 0.42316073179244995, "step": 1569 }, { "epoch": 0.19051086033248393, "grad_norm": 2.612694263458252, "learning_rate": 1.639110674364329e-05, "loss": 0.9028637409210205, "step": 1570 }, { "epoch": 0.190632204829511, "grad_norm": 2.160759687423706, "learning_rate": 1.6388650042992263e-05, "loss": 0.38729843497276306, "step": 1571 }, { "epoch": 0.19075354932653804, "grad_norm": 1.6438068151474, "learning_rate": 1.6386193342341237e-05, "loss": 0.13886958360671997, "step": 1572 }, { "epoch": 0.1908748938235651, "grad_norm": 2.996663808822632, "learning_rate": 1.638373664169021e-05, "loss": 0.33843570947647095, "step": 1573 }, { "epoch": 0.19099623832059215, "grad_norm": 3.0241034030914307, "learning_rate": 1.6381279941039186e-05, "loss": 0.2785295248031616, "step": 1574 }, { "epoch": 0.19111758281761923, "grad_norm": 2.1101088523864746, "learning_rate": 1.637882324038816e-05, "loss": 0.6817824244499207, "step": 1575 }, { "epoch": 0.19123892731464628, "grad_norm": 1.186375617980957, "learning_rate": 1.6376366539737134e-05, "loss": 0.05503537505865097, "step": 1576 }, { "epoch": 0.19136027181167334, "grad_norm": 1.5986549854278564, "learning_rate": 1.637390983908611e-05, "loss": 0.15630364418029785, "step": 1577 }, { "epoch": 0.1914816163087004, "grad_norm": 2.139268398284912, "learning_rate": 1.6371453138435083e-05, "loss": 0.4205041527748108, "step": 1578 }, { "epoch": 0.19160296080572747, "grad_norm": 1.2108969688415527, "learning_rate": 1.6368996437784057e-05, "loss": 0.10240291804075241, "step": 1579 }, { "epoch": 0.19172430530275453, "grad_norm": 2.612095594406128, "learning_rate": 1.636653973713303e-05, "loss": 0.5215551853179932, "step": 1580 }, { "epoch": 0.19184564979978158, "grad_norm": 1.5930920839309692, "learning_rate": 1.6364083036482005e-05, "loss": 0.25414586067199707, "step": 1581 }, { "epoch": 0.19196699429680864, "grad_norm": 1.0892622470855713, "learning_rate": 1.636162633583098e-05, "loss": 0.20532195270061493, "step": 1582 }, { "epoch": 0.1920883387938357, "grad_norm": 2.093474864959717, "learning_rate": 1.6359169635179954e-05, "loss": 0.7632452249526978, "step": 1583 }, { "epoch": 0.19220968329086277, "grad_norm": 1.594140648841858, "learning_rate": 1.6356712934528928e-05, "loss": 0.13568240404129028, "step": 1584 }, { "epoch": 0.19233102778788982, "grad_norm": 2.5025835037231445, "learning_rate": 1.6354256233877902e-05, "loss": 0.23390145599842072, "step": 1585 }, { "epoch": 0.19245237228491688, "grad_norm": 0.01225997507572174, "learning_rate": 1.6351799533226877e-05, "loss": 0.00020544853759929538, "step": 1586 }, { "epoch": 0.19257371678194393, "grad_norm": 1.9831618070602417, "learning_rate": 1.634934283257585e-05, "loss": 0.1494239866733551, "step": 1587 }, { "epoch": 0.19269506127897099, "grad_norm": 1.9081530570983887, "learning_rate": 1.6346886131924825e-05, "loss": 0.17009824514389038, "step": 1588 }, { "epoch": 0.19281640577599807, "grad_norm": 2.5004465579986572, "learning_rate": 1.63444294312738e-05, "loss": 0.4157010018825531, "step": 1589 }, { "epoch": 0.19293775027302512, "grad_norm": 2.543424367904663, "learning_rate": 1.6341972730622774e-05, "loss": 0.32920312881469727, "step": 1590 }, { "epoch": 0.19305909477005218, "grad_norm": 2.6315767765045166, "learning_rate": 1.6339516029971748e-05, "loss": 0.888789713382721, "step": 1591 }, { "epoch": 0.19318043926707923, "grad_norm": 2.5114357471466064, "learning_rate": 1.6337059329320722e-05, "loss": 0.3034912645816803, "step": 1592 }, { "epoch": 0.1933017837641063, "grad_norm": 1.3469399213790894, "learning_rate": 1.6334602628669697e-05, "loss": 0.2188844382762909, "step": 1593 }, { "epoch": 0.19342312826113336, "grad_norm": 2.7407827377319336, "learning_rate": 1.633214592801867e-05, "loss": 0.27693620324134827, "step": 1594 }, { "epoch": 0.19354447275816042, "grad_norm": 2.3420448303222656, "learning_rate": 1.6329689227367645e-05, "loss": 0.7372701168060303, "step": 1595 }, { "epoch": 0.19366581725518747, "grad_norm": 2.446305990219116, "learning_rate": 1.632723252671662e-05, "loss": 0.34233394265174866, "step": 1596 }, { "epoch": 0.19378716175221453, "grad_norm": 1.8076667785644531, "learning_rate": 1.6324775826065597e-05, "loss": 0.14907054603099823, "step": 1597 }, { "epoch": 0.1939085062492416, "grad_norm": 2.0409836769104004, "learning_rate": 1.632231912541457e-05, "loss": 0.27943429350852966, "step": 1598 }, { "epoch": 0.19402985074626866, "grad_norm": 2.7173757553100586, "learning_rate": 1.6319862424763545e-05, "loss": 0.46326854825019836, "step": 1599 }, { "epoch": 0.19415119524329572, "grad_norm": 2.4713032245635986, "learning_rate": 1.631740572411252e-05, "loss": 0.14638936519622803, "step": 1600 }, { "epoch": 0.19427253974032277, "grad_norm": 0.9708374738693237, "learning_rate": 1.6314949023461494e-05, "loss": 0.05027943104505539, "step": 1601 }, { "epoch": 0.19439388423734982, "grad_norm": 3.3602957725524902, "learning_rate": 1.6312492322810468e-05, "loss": 0.3926747441291809, "step": 1602 }, { "epoch": 0.1945152287343769, "grad_norm": 2.024567127227783, "learning_rate": 1.6310035622159442e-05, "loss": 0.2768881916999817, "step": 1603 }, { "epoch": 0.19463657323140396, "grad_norm": 2.508981227874756, "learning_rate": 1.6307578921508417e-05, "loss": 0.299752414226532, "step": 1604 }, { "epoch": 0.194757917728431, "grad_norm": 2.3269424438476562, "learning_rate": 1.630512222085739e-05, "loss": 0.44652560353279114, "step": 1605 }, { "epoch": 0.19487926222545807, "grad_norm": 2.056466579437256, "learning_rate": 1.6302665520206365e-05, "loss": 0.6093921065330505, "step": 1606 }, { "epoch": 0.19500060672248515, "grad_norm": 3.3587870597839355, "learning_rate": 1.630020881955534e-05, "loss": 0.3951265215873718, "step": 1607 }, { "epoch": 0.1951219512195122, "grad_norm": 2.3945207595825195, "learning_rate": 1.6297752118904314e-05, "loss": 0.19893671572208405, "step": 1608 }, { "epoch": 0.19524329571653926, "grad_norm": 1.694069743156433, "learning_rate": 1.6295295418253288e-05, "loss": 0.10053470730781555, "step": 1609 }, { "epoch": 0.1953646402135663, "grad_norm": 3.7009809017181396, "learning_rate": 1.6292838717602262e-05, "loss": 0.7382639050483704, "step": 1610 }, { "epoch": 0.19548598471059336, "grad_norm": 2.0691487789154053, "learning_rate": 1.6290382016951237e-05, "loss": 0.5720087289810181, "step": 1611 }, { "epoch": 0.19560732920762045, "grad_norm": 1.4953621625900269, "learning_rate": 1.628792531630021e-05, "loss": 0.22495737671852112, "step": 1612 }, { "epoch": 0.1957286737046475, "grad_norm": 2.3584330081939697, "learning_rate": 1.6285468615649185e-05, "loss": 0.4193107783794403, "step": 1613 }, { "epoch": 0.19585001820167455, "grad_norm": 2.2631139755249023, "learning_rate": 1.628301191499816e-05, "loss": 0.21283216774463654, "step": 1614 }, { "epoch": 0.1959713626987016, "grad_norm": 1.281013011932373, "learning_rate": 1.6280555214347134e-05, "loss": 0.11674115061759949, "step": 1615 }, { "epoch": 0.19609270719572866, "grad_norm": 1.323325753211975, "learning_rate": 1.6278098513696108e-05, "loss": 0.12784036993980408, "step": 1616 }, { "epoch": 0.19621405169275574, "grad_norm": 2.992032289505005, "learning_rate": 1.6275641813045082e-05, "loss": 0.6973645687103271, "step": 1617 }, { "epoch": 0.1963353961897828, "grad_norm": 1.3869400024414062, "learning_rate": 1.6273185112394056e-05, "loss": 0.21278777718544006, "step": 1618 }, { "epoch": 0.19645674068680985, "grad_norm": 2.2656288146972656, "learning_rate": 1.627072841174303e-05, "loss": 0.3129113018512726, "step": 1619 }, { "epoch": 0.1965780851838369, "grad_norm": 1.4828702211380005, "learning_rate": 1.6268271711092005e-05, "loss": 0.13820579648017883, "step": 1620 }, { "epoch": 0.19669942968086399, "grad_norm": 2.30009388923645, "learning_rate": 1.626581501044098e-05, "loss": 0.22714544832706451, "step": 1621 }, { "epoch": 0.19682077417789104, "grad_norm": 1.719403624534607, "learning_rate": 1.6263358309789953e-05, "loss": 0.4299517869949341, "step": 1622 }, { "epoch": 0.1969421186749181, "grad_norm": 1.3880891799926758, "learning_rate": 1.6260901609138928e-05, "loss": 0.12024924159049988, "step": 1623 }, { "epoch": 0.19706346317194515, "grad_norm": 0.38428664207458496, "learning_rate": 1.6258444908487902e-05, "loss": 0.014788172207772732, "step": 1624 }, { "epoch": 0.1971848076689722, "grad_norm": 3.133810520172119, "learning_rate": 1.6255988207836876e-05, "loss": 0.4186369776725769, "step": 1625 }, { "epoch": 0.19730615216599928, "grad_norm": 2.946176528930664, "learning_rate": 1.625353150718585e-05, "loss": 0.4143875539302826, "step": 1626 }, { "epoch": 0.19742749666302634, "grad_norm": 1.9746789932250977, "learning_rate": 1.6251074806534825e-05, "loss": 0.33198633790016174, "step": 1627 }, { "epoch": 0.1975488411600534, "grad_norm": 2.5999584197998047, "learning_rate": 1.62486181058838e-05, "loss": 0.19740217924118042, "step": 1628 }, { "epoch": 0.19767018565708044, "grad_norm": 1.9985610246658325, "learning_rate": 1.6246161405232773e-05, "loss": 0.31167468428611755, "step": 1629 }, { "epoch": 0.1977915301541075, "grad_norm": 4.352280616760254, "learning_rate": 1.6243704704581747e-05, "loss": 0.5498289465904236, "step": 1630 }, { "epoch": 0.19791287465113458, "grad_norm": 2.2715535163879395, "learning_rate": 1.6241248003930722e-05, "loss": 0.4732452929019928, "step": 1631 }, { "epoch": 0.19803421914816163, "grad_norm": 1.4650019407272339, "learning_rate": 1.6238791303279696e-05, "loss": 0.06958284229040146, "step": 1632 }, { "epoch": 0.1981555636451887, "grad_norm": 2.1653013229370117, "learning_rate": 1.623633460262867e-05, "loss": 0.20110665261745453, "step": 1633 }, { "epoch": 0.19827690814221574, "grad_norm": 2.947077989578247, "learning_rate": 1.6233877901977644e-05, "loss": 0.44844576716423035, "step": 1634 }, { "epoch": 0.19839825263924282, "grad_norm": 2.272237539291382, "learning_rate": 1.623142120132662e-05, "loss": 0.36181971430778503, "step": 1635 }, { "epoch": 0.19851959713626988, "grad_norm": 1.9349480867385864, "learning_rate": 1.6228964500675596e-05, "loss": 0.07440108060836792, "step": 1636 }, { "epoch": 0.19864094163329693, "grad_norm": 1.5741839408874512, "learning_rate": 1.622650780002457e-05, "loss": 0.3192439377307892, "step": 1637 }, { "epoch": 0.19876228613032398, "grad_norm": 2.3907079696655273, "learning_rate": 1.6224051099373545e-05, "loss": 0.2281782180070877, "step": 1638 }, { "epoch": 0.19888363062735104, "grad_norm": 3.0020861625671387, "learning_rate": 1.622159439872252e-05, "loss": 0.3084147274494171, "step": 1639 }, { "epoch": 0.19900497512437812, "grad_norm": 1.3268686532974243, "learning_rate": 1.6219137698071493e-05, "loss": 0.1354789137840271, "step": 1640 }, { "epoch": 0.19912631962140517, "grad_norm": 2.8789944648742676, "learning_rate": 1.6216680997420468e-05, "loss": 0.3687528967857361, "step": 1641 }, { "epoch": 0.19924766411843223, "grad_norm": 2.311739921569824, "learning_rate": 1.6214224296769442e-05, "loss": 1.1628113985061646, "step": 1642 }, { "epoch": 0.19936900861545928, "grad_norm": 1.3445240259170532, "learning_rate": 1.6211767596118416e-05, "loss": 0.04311354085803032, "step": 1643 }, { "epoch": 0.19949035311248633, "grad_norm": 1.4748131036758423, "learning_rate": 1.620931089546739e-05, "loss": 0.36780551075935364, "step": 1644 }, { "epoch": 0.19961169760951342, "grad_norm": 1.5068604946136475, "learning_rate": 1.6206854194816365e-05, "loss": 0.3677091896533966, "step": 1645 }, { "epoch": 0.19973304210654047, "grad_norm": 2.6158766746520996, "learning_rate": 1.620439749416534e-05, "loss": 0.4385922849178314, "step": 1646 }, { "epoch": 0.19985438660356752, "grad_norm": 1.6956822872161865, "learning_rate": 1.6201940793514313e-05, "loss": 0.11273642629384995, "step": 1647 }, { "epoch": 0.19997573110059458, "grad_norm": 1.862756609916687, "learning_rate": 1.6199484092863287e-05, "loss": 0.25619611144065857, "step": 1648 }, { "epoch": 0.20009707559762166, "grad_norm": 1.0184905529022217, "learning_rate": 1.6197027392212262e-05, "loss": 0.07765750586986542, "step": 1649 }, { "epoch": 0.2002184200946487, "grad_norm": 1.6538012027740479, "learning_rate": 1.6194570691561236e-05, "loss": 0.06520011276006699, "step": 1650 }, { "epoch": 0.20033976459167577, "grad_norm": 2.2978315353393555, "learning_rate": 1.619211399091021e-05, "loss": 0.313754141330719, "step": 1651 }, { "epoch": 0.20046110908870282, "grad_norm": 1.6874979734420776, "learning_rate": 1.6189657290259185e-05, "loss": 0.19735673069953918, "step": 1652 }, { "epoch": 0.20058245358572988, "grad_norm": 2.3955869674682617, "learning_rate": 1.618720058960816e-05, "loss": 0.6544023752212524, "step": 1653 }, { "epoch": 0.20070379808275696, "grad_norm": 2.770576238632202, "learning_rate": 1.6184743888957133e-05, "loss": 0.30553755164146423, "step": 1654 }, { "epoch": 0.200825142579784, "grad_norm": 2.0315816402435303, "learning_rate": 1.6182287188306107e-05, "loss": 0.35758891701698303, "step": 1655 }, { "epoch": 0.20094648707681106, "grad_norm": 1.5492041110992432, "learning_rate": 1.617983048765508e-05, "loss": 0.37275615334510803, "step": 1656 }, { "epoch": 0.20106783157383812, "grad_norm": 1.8539915084838867, "learning_rate": 1.6177373787004056e-05, "loss": 0.4826367199420929, "step": 1657 }, { "epoch": 0.2011891760708652, "grad_norm": 1.8999037742614746, "learning_rate": 1.617491708635303e-05, "loss": 0.4474004805088043, "step": 1658 }, { "epoch": 0.20131052056789225, "grad_norm": 1.7485865354537964, "learning_rate": 1.6172460385702004e-05, "loss": 0.6810899376869202, "step": 1659 }, { "epoch": 0.2014318650649193, "grad_norm": 2.3828468322753906, "learning_rate": 1.617000368505098e-05, "loss": 0.5326903462409973, "step": 1660 }, { "epoch": 0.20155320956194636, "grad_norm": 2.6605119705200195, "learning_rate": 1.6167546984399953e-05, "loss": 0.1593848317861557, "step": 1661 }, { "epoch": 0.20167455405897342, "grad_norm": 2.227161169052124, "learning_rate": 1.6165090283748927e-05, "loss": 0.38251793384552, "step": 1662 }, { "epoch": 0.2017958985560005, "grad_norm": 1.8754427433013916, "learning_rate": 1.61626335830979e-05, "loss": 0.19771966338157654, "step": 1663 }, { "epoch": 0.20191724305302755, "grad_norm": 2.2835958003997803, "learning_rate": 1.6160176882446876e-05, "loss": 0.17081639170646667, "step": 1664 }, { "epoch": 0.2020385875500546, "grad_norm": 1.891542911529541, "learning_rate": 1.615772018179585e-05, "loss": 0.725801944732666, "step": 1665 }, { "epoch": 0.20215993204708166, "grad_norm": 2.1305201053619385, "learning_rate": 1.6155263481144824e-05, "loss": 0.2174772024154663, "step": 1666 }, { "epoch": 0.2022812765441087, "grad_norm": 2.1975913047790527, "learning_rate": 1.61528067804938e-05, "loss": 0.36247697472572327, "step": 1667 }, { "epoch": 0.2024026210411358, "grad_norm": 2.2730934619903564, "learning_rate": 1.6150350079842773e-05, "loss": 0.35575994849205017, "step": 1668 }, { "epoch": 0.20252396553816285, "grad_norm": 2.4796621799468994, "learning_rate": 1.6147893379191747e-05, "loss": 0.6208682060241699, "step": 1669 }, { "epoch": 0.2026453100351899, "grad_norm": 2.4022440910339355, "learning_rate": 1.614543667854072e-05, "loss": 0.5125530362129211, "step": 1670 }, { "epoch": 0.20276665453221696, "grad_norm": 2.6297521591186523, "learning_rate": 1.6142979977889695e-05, "loss": 0.5200785398483276, "step": 1671 }, { "epoch": 0.20288799902924404, "grad_norm": 2.7327070236206055, "learning_rate": 1.614052327723867e-05, "loss": 0.757875919342041, "step": 1672 }, { "epoch": 0.2030093435262711, "grad_norm": 1.6281644105911255, "learning_rate": 1.6138066576587644e-05, "loss": 0.17772671580314636, "step": 1673 }, { "epoch": 0.20313068802329814, "grad_norm": 1.9709150791168213, "learning_rate": 1.6135609875936618e-05, "loss": 0.21530991792678833, "step": 1674 }, { "epoch": 0.2032520325203252, "grad_norm": 2.064605951309204, "learning_rate": 1.6133153175285592e-05, "loss": 0.24332857131958008, "step": 1675 }, { "epoch": 0.20337337701735225, "grad_norm": 2.1962037086486816, "learning_rate": 1.6130696474634567e-05, "loss": 0.34543243050575256, "step": 1676 }, { "epoch": 0.20349472151437933, "grad_norm": 2.6600778102874756, "learning_rate": 1.612823977398354e-05, "loss": 0.47594380378723145, "step": 1677 }, { "epoch": 0.2036160660114064, "grad_norm": 1.7026101350784302, "learning_rate": 1.6125783073332515e-05, "loss": 0.19039933383464813, "step": 1678 }, { "epoch": 0.20373741050843344, "grad_norm": 1.6388001441955566, "learning_rate": 1.612332637268149e-05, "loss": 0.1765032410621643, "step": 1679 }, { "epoch": 0.2038587550054605, "grad_norm": 1.057235598564148, "learning_rate": 1.6120869672030464e-05, "loss": 0.09241899847984314, "step": 1680 }, { "epoch": 0.20398009950248755, "grad_norm": 1.5978493690490723, "learning_rate": 1.6118412971379438e-05, "loss": 0.22115862369537354, "step": 1681 }, { "epoch": 0.20410144399951463, "grad_norm": 2.1803946495056152, "learning_rate": 1.6115956270728412e-05, "loss": 0.5432751178741455, "step": 1682 }, { "epoch": 0.20422278849654169, "grad_norm": 1.2694318294525146, "learning_rate": 1.6113499570077387e-05, "loss": 0.07295191287994385, "step": 1683 }, { "epoch": 0.20434413299356874, "grad_norm": 2.1707067489624023, "learning_rate": 1.611104286942636e-05, "loss": 0.3300400376319885, "step": 1684 }, { "epoch": 0.2044654774905958, "grad_norm": 1.5089091062545776, "learning_rate": 1.6108586168775335e-05, "loss": 0.20541003346443176, "step": 1685 }, { "epoch": 0.20458682198762287, "grad_norm": 2.2536306381225586, "learning_rate": 1.610612946812431e-05, "loss": 0.35043635964393616, "step": 1686 }, { "epoch": 0.20470816648464993, "grad_norm": 1.3641126155853271, "learning_rate": 1.6103672767473284e-05, "loss": 0.5306731462478638, "step": 1687 }, { "epoch": 0.20482951098167698, "grad_norm": 1.6115317344665527, "learning_rate": 1.6101216066822258e-05, "loss": 0.12020076811313629, "step": 1688 }, { "epoch": 0.20495085547870404, "grad_norm": 2.0570430755615234, "learning_rate": 1.6098759366171232e-05, "loss": 0.45422253012657166, "step": 1689 }, { "epoch": 0.2050721999757311, "grad_norm": 0.8935880661010742, "learning_rate": 1.6096302665520206e-05, "loss": 0.2039462774991989, "step": 1690 }, { "epoch": 0.20519354447275817, "grad_norm": 1.405432105064392, "learning_rate": 1.609384596486918e-05, "loss": 0.21527734398841858, "step": 1691 }, { "epoch": 0.20531488896978523, "grad_norm": 1.7508655786514282, "learning_rate": 1.6091389264218155e-05, "loss": 0.15633103251457214, "step": 1692 }, { "epoch": 0.20543623346681228, "grad_norm": 3.1882712841033936, "learning_rate": 1.608893256356713e-05, "loss": 0.12639623880386353, "step": 1693 }, { "epoch": 0.20555757796383933, "grad_norm": 2.8699142932891846, "learning_rate": 1.6086475862916103e-05, "loss": 0.6497660875320435, "step": 1694 }, { "epoch": 0.2056789224608664, "grad_norm": 2.3647992610931396, "learning_rate": 1.6084019162265078e-05, "loss": 0.49607181549072266, "step": 1695 }, { "epoch": 0.20580026695789347, "grad_norm": 2.0349488258361816, "learning_rate": 1.6081562461614052e-05, "loss": 0.4656597375869751, "step": 1696 }, { "epoch": 0.20592161145492052, "grad_norm": 2.6206047534942627, "learning_rate": 1.6079105760963026e-05, "loss": 0.7463130354881287, "step": 1697 }, { "epoch": 0.20604295595194758, "grad_norm": 2.1627695560455322, "learning_rate": 1.6076649060312e-05, "loss": 0.4141017198562622, "step": 1698 }, { "epoch": 0.20616430044897463, "grad_norm": 1.7568947076797485, "learning_rate": 1.6074192359660975e-05, "loss": 0.27343836426734924, "step": 1699 }, { "epoch": 0.2062856449460017, "grad_norm": 2.324117660522461, "learning_rate": 1.607173565900995e-05, "loss": 0.48490825295448303, "step": 1700 }, { "epoch": 0.20640698944302877, "grad_norm": 2.1941676139831543, "learning_rate": 1.6069278958358923e-05, "loss": 0.5859739184379578, "step": 1701 }, { "epoch": 0.20652833394005582, "grad_norm": 1.4805810451507568, "learning_rate": 1.60668222577079e-05, "loss": 0.09182153642177582, "step": 1702 }, { "epoch": 0.20664967843708287, "grad_norm": 2.0386710166931152, "learning_rate": 1.6064365557056875e-05, "loss": 0.45521387457847595, "step": 1703 }, { "epoch": 0.20677102293410993, "grad_norm": 1.5653104782104492, "learning_rate": 1.606190885640585e-05, "loss": 0.13128796219825745, "step": 1704 }, { "epoch": 0.206892367431137, "grad_norm": 0.8828320503234863, "learning_rate": 1.6059452155754824e-05, "loss": 0.3042456805706024, "step": 1705 }, { "epoch": 0.20701371192816406, "grad_norm": 1.8564342260360718, "learning_rate": 1.6056995455103798e-05, "loss": 0.679114580154419, "step": 1706 }, { "epoch": 0.20713505642519112, "grad_norm": 2.100465774536133, "learning_rate": 1.6054538754452772e-05, "loss": 0.4968925714492798, "step": 1707 }, { "epoch": 0.20725640092221817, "grad_norm": 1.758471131324768, "learning_rate": 1.6052082053801746e-05, "loss": 0.3478274643421173, "step": 1708 }, { "epoch": 0.20737774541924522, "grad_norm": 2.1922128200531006, "learning_rate": 1.604962535315072e-05, "loss": 0.23997293412685394, "step": 1709 }, { "epoch": 0.2074990899162723, "grad_norm": 2.635798692703247, "learning_rate": 1.6047168652499695e-05, "loss": 0.4193202257156372, "step": 1710 }, { "epoch": 0.20762043441329936, "grad_norm": 1.5727275609970093, "learning_rate": 1.604471195184867e-05, "loss": 0.286593496799469, "step": 1711 }, { "epoch": 0.2077417789103264, "grad_norm": 2.323625326156616, "learning_rate": 1.6042255251197643e-05, "loss": 0.7401584982872009, "step": 1712 }, { "epoch": 0.20786312340735347, "grad_norm": 1.8029251098632812, "learning_rate": 1.6039798550546618e-05, "loss": 0.07832077145576477, "step": 1713 }, { "epoch": 0.20798446790438055, "grad_norm": 2.1487743854522705, "learning_rate": 1.6037341849895592e-05, "loss": 0.3596492111682892, "step": 1714 }, { "epoch": 0.2081058124014076, "grad_norm": 2.0673341751098633, "learning_rate": 1.6034885149244566e-05, "loss": 0.21680273115634918, "step": 1715 }, { "epoch": 0.20822715689843466, "grad_norm": 1.9851670265197754, "learning_rate": 1.603242844859354e-05, "loss": 0.32054102420806885, "step": 1716 }, { "epoch": 0.2083485013954617, "grad_norm": 1.3611160516738892, "learning_rate": 1.6029971747942515e-05, "loss": 0.16891829669475555, "step": 1717 }, { "epoch": 0.20846984589248876, "grad_norm": 2.0195910930633545, "learning_rate": 1.602751504729149e-05, "loss": 0.1865098774433136, "step": 1718 }, { "epoch": 0.20859119038951585, "grad_norm": 1.8068580627441406, "learning_rate": 1.6025058346640463e-05, "loss": 0.1894182711839676, "step": 1719 }, { "epoch": 0.2087125348865429, "grad_norm": 1.6768163442611694, "learning_rate": 1.6022601645989437e-05, "loss": 0.11060914397239685, "step": 1720 }, { "epoch": 0.20883387938356995, "grad_norm": 1.8469024896621704, "learning_rate": 1.6020144945338412e-05, "loss": 0.34541481733322144, "step": 1721 }, { "epoch": 0.208955223880597, "grad_norm": 1.9112247228622437, "learning_rate": 1.6017688244687386e-05, "loss": 0.7080034017562866, "step": 1722 }, { "epoch": 0.20907656837762406, "grad_norm": 0.9716092944145203, "learning_rate": 1.601523154403636e-05, "loss": 0.15459425747394562, "step": 1723 }, { "epoch": 0.20919791287465114, "grad_norm": 1.692309856414795, "learning_rate": 1.6012774843385334e-05, "loss": 0.17918935418128967, "step": 1724 }, { "epoch": 0.2093192573716782, "grad_norm": 2.3839099407196045, "learning_rate": 1.601031814273431e-05, "loss": 0.6370502710342407, "step": 1725 }, { "epoch": 0.20944060186870525, "grad_norm": 2.299150228500366, "learning_rate": 1.6007861442083283e-05, "loss": 0.4926031231880188, "step": 1726 }, { "epoch": 0.2095619463657323, "grad_norm": 1.5755623579025269, "learning_rate": 1.6005404741432257e-05, "loss": 0.3240659236907959, "step": 1727 }, { "epoch": 0.20968329086275939, "grad_norm": 1.6985849142074585, "learning_rate": 1.600294804078123e-05, "loss": 0.17380595207214355, "step": 1728 }, { "epoch": 0.20980463535978644, "grad_norm": 1.0348140001296997, "learning_rate": 1.6000491340130206e-05, "loss": 0.11599317193031311, "step": 1729 }, { "epoch": 0.2099259798568135, "grad_norm": 2.2621145248413086, "learning_rate": 1.599803463947918e-05, "loss": 0.32561737298965454, "step": 1730 }, { "epoch": 0.21004732435384055, "grad_norm": 2.3321175575256348, "learning_rate": 1.5995577938828154e-05, "loss": 0.16530698537826538, "step": 1731 }, { "epoch": 0.2101686688508676, "grad_norm": 1.777334213256836, "learning_rate": 1.599312123817713e-05, "loss": 0.5165007710456848, "step": 1732 }, { "epoch": 0.21029001334789468, "grad_norm": 2.2404754161834717, "learning_rate": 1.5990664537526103e-05, "loss": 0.3408328592777252, "step": 1733 }, { "epoch": 0.21041135784492174, "grad_norm": 3.0858652591705322, "learning_rate": 1.5988207836875077e-05, "loss": 0.6301594972610474, "step": 1734 }, { "epoch": 0.2105327023419488, "grad_norm": 1.9224309921264648, "learning_rate": 1.598575113622405e-05, "loss": 0.14940844476222992, "step": 1735 }, { "epoch": 0.21065404683897584, "grad_norm": 2.7337467670440674, "learning_rate": 1.5983294435573026e-05, "loss": 0.4004538059234619, "step": 1736 }, { "epoch": 0.2107753913360029, "grad_norm": 1.8744240999221802, "learning_rate": 1.5980837734922e-05, "loss": 0.19295921921730042, "step": 1737 }, { "epoch": 0.21089673583302998, "grad_norm": 3.7155728340148926, "learning_rate": 1.5978381034270974e-05, "loss": 0.5623353719711304, "step": 1738 }, { "epoch": 0.21101808033005703, "grad_norm": 2.182851791381836, "learning_rate": 1.597592433361995e-05, "loss": 0.4261482357978821, "step": 1739 }, { "epoch": 0.2111394248270841, "grad_norm": 1.2220971584320068, "learning_rate": 1.5973467632968923e-05, "loss": 0.05080566555261612, "step": 1740 }, { "epoch": 0.21126076932411114, "grad_norm": 2.0607261657714844, "learning_rate": 1.5971010932317897e-05, "loss": 0.5656209588050842, "step": 1741 }, { "epoch": 0.21138211382113822, "grad_norm": 2.1184959411621094, "learning_rate": 1.5968554231666875e-05, "loss": 0.19552554190158844, "step": 1742 }, { "epoch": 0.21150345831816528, "grad_norm": 1.757657766342163, "learning_rate": 1.596609753101585e-05, "loss": 0.3370252251625061, "step": 1743 }, { "epoch": 0.21162480281519233, "grad_norm": 2.8892951011657715, "learning_rate": 1.5963640830364823e-05, "loss": 0.12219370156526566, "step": 1744 }, { "epoch": 0.21174614731221938, "grad_norm": 1.5726659297943115, "learning_rate": 1.5961184129713797e-05, "loss": 0.29163357615470886, "step": 1745 }, { "epoch": 0.21186749180924644, "grad_norm": 2.813798666000366, "learning_rate": 1.595872742906277e-05, "loss": 0.29331523180007935, "step": 1746 }, { "epoch": 0.21198883630627352, "grad_norm": 2.365394353866577, "learning_rate": 1.5956270728411746e-05, "loss": 0.4060492515563965, "step": 1747 }, { "epoch": 0.21211018080330057, "grad_norm": 3.7761080265045166, "learning_rate": 1.595381402776072e-05, "loss": 0.39178287982940674, "step": 1748 }, { "epoch": 0.21223152530032763, "grad_norm": 1.9571113586425781, "learning_rate": 1.5951357327109694e-05, "loss": 0.10142619907855988, "step": 1749 }, { "epoch": 0.21235286979735468, "grad_norm": 2.1784629821777344, "learning_rate": 1.594890062645867e-05, "loss": 0.6431654691696167, "step": 1750 }, { "epoch": 0.21247421429438176, "grad_norm": 0.5362848043441772, "learning_rate": 1.5946443925807643e-05, "loss": 0.017946042120456696, "step": 1751 }, { "epoch": 0.21259555879140882, "grad_norm": 1.8809864521026611, "learning_rate": 1.5943987225156617e-05, "loss": 0.1440238058567047, "step": 1752 }, { "epoch": 0.21271690328843587, "grad_norm": 2.783003807067871, "learning_rate": 1.594153052450559e-05, "loss": 0.41364654898643494, "step": 1753 }, { "epoch": 0.21283824778546293, "grad_norm": 1.64762282371521, "learning_rate": 1.5939073823854566e-05, "loss": 0.7017732858657837, "step": 1754 }, { "epoch": 0.21295959228248998, "grad_norm": 2.1315536499023438, "learning_rate": 1.593661712320354e-05, "loss": 0.453188419342041, "step": 1755 }, { "epoch": 0.21308093677951706, "grad_norm": 1.585434079170227, "learning_rate": 1.5934160422552514e-05, "loss": 0.08176189661026001, "step": 1756 }, { "epoch": 0.21320228127654411, "grad_norm": 2.495664358139038, "learning_rate": 1.593170372190149e-05, "loss": 0.3820353150367737, "step": 1757 }, { "epoch": 0.21332362577357117, "grad_norm": 1.1123815774917603, "learning_rate": 1.5929247021250463e-05, "loss": 0.033398568630218506, "step": 1758 }, { "epoch": 0.21344497027059822, "grad_norm": 2.053670883178711, "learning_rate": 1.5926790320599437e-05, "loss": 0.2600085437297821, "step": 1759 }, { "epoch": 0.21356631476762528, "grad_norm": 5.9268903732299805, "learning_rate": 1.592433361994841e-05, "loss": 0.4954833984375, "step": 1760 }, { "epoch": 0.21368765926465236, "grad_norm": 1.9209569692611694, "learning_rate": 1.5921876919297385e-05, "loss": 0.5450060963630676, "step": 1761 }, { "epoch": 0.2138090037616794, "grad_norm": 1.479677438735962, "learning_rate": 1.591942021864636e-05, "loss": 0.24173252284526825, "step": 1762 }, { "epoch": 0.21393034825870647, "grad_norm": 3.3846049308776855, "learning_rate": 1.5916963517995334e-05, "loss": 0.47177764773368835, "step": 1763 }, { "epoch": 0.21405169275573352, "grad_norm": 1.9015347957611084, "learning_rate": 1.5914506817344308e-05, "loss": 0.3082675039768219, "step": 1764 }, { "epoch": 0.2141730372527606, "grad_norm": 2.5283203125, "learning_rate": 1.5912050116693282e-05, "loss": 0.16115519404411316, "step": 1765 }, { "epoch": 0.21429438174978765, "grad_norm": 2.1408560276031494, "learning_rate": 1.5909593416042257e-05, "loss": 0.11530830711126328, "step": 1766 }, { "epoch": 0.2144157262468147, "grad_norm": 1.0230469703674316, "learning_rate": 1.590713671539123e-05, "loss": 0.03306128829717636, "step": 1767 }, { "epoch": 0.21453707074384176, "grad_norm": 2.4217331409454346, "learning_rate": 1.5904680014740205e-05, "loss": 0.3269408047199249, "step": 1768 }, { "epoch": 0.21465841524086882, "grad_norm": 0.4477101266384125, "learning_rate": 1.590222331408918e-05, "loss": 0.015322737395763397, "step": 1769 }, { "epoch": 0.2147797597378959, "grad_norm": 2.700812578201294, "learning_rate": 1.5899766613438154e-05, "loss": 0.38714274764060974, "step": 1770 }, { "epoch": 0.21490110423492295, "grad_norm": 0.9410980343818665, "learning_rate": 1.5897309912787128e-05, "loss": 0.017772674560546875, "step": 1771 }, { "epoch": 0.21502244873195, "grad_norm": 1.5146427154541016, "learning_rate": 1.5894853212136102e-05, "loss": 0.08214616775512695, "step": 1772 }, { "epoch": 0.21514379322897706, "grad_norm": 2.555539846420288, "learning_rate": 1.5892396511485077e-05, "loss": 0.41456887125968933, "step": 1773 }, { "epoch": 0.2152651377260041, "grad_norm": 2.356757164001465, "learning_rate": 1.588993981083405e-05, "loss": 0.0947510302066803, "step": 1774 }, { "epoch": 0.2153864822230312, "grad_norm": 2.3131558895111084, "learning_rate": 1.5887483110183025e-05, "loss": 0.2958258390426636, "step": 1775 }, { "epoch": 0.21550782672005825, "grad_norm": 2.141347646713257, "learning_rate": 1.5885026409532e-05, "loss": 0.25594037771224976, "step": 1776 }, { "epoch": 0.2156291712170853, "grad_norm": 0.9824745655059814, "learning_rate": 1.5882569708880974e-05, "loss": 0.04805383086204529, "step": 1777 }, { "epoch": 0.21575051571411236, "grad_norm": 2.266543388366699, "learning_rate": 1.5880113008229948e-05, "loss": 0.3093523681163788, "step": 1778 }, { "epoch": 0.21587186021113944, "grad_norm": 1.2219218015670776, "learning_rate": 1.5877656307578922e-05, "loss": 0.06167016550898552, "step": 1779 }, { "epoch": 0.2159932047081665, "grad_norm": 2.130420207977295, "learning_rate": 1.5875199606927896e-05, "loss": 0.0919126644730568, "step": 1780 }, { "epoch": 0.21611454920519355, "grad_norm": 3.2336394786834717, "learning_rate": 1.5872742906276874e-05, "loss": 0.597897469997406, "step": 1781 }, { "epoch": 0.2162358937022206, "grad_norm": 1.6644809246063232, "learning_rate": 1.5870286205625848e-05, "loss": 0.2237292230129242, "step": 1782 }, { "epoch": 0.21635723819924765, "grad_norm": 2.426450729370117, "learning_rate": 1.5867829504974822e-05, "loss": 0.513863742351532, "step": 1783 }, { "epoch": 0.21647858269627474, "grad_norm": 2.2129006385803223, "learning_rate": 1.5865372804323797e-05, "loss": 0.29766586422920227, "step": 1784 }, { "epoch": 0.2165999271933018, "grad_norm": 2.1841890811920166, "learning_rate": 1.586291610367277e-05, "loss": 0.597987174987793, "step": 1785 }, { "epoch": 0.21672127169032884, "grad_norm": 2.9279837608337402, "learning_rate": 1.5860459403021745e-05, "loss": 0.5400558710098267, "step": 1786 }, { "epoch": 0.2168426161873559, "grad_norm": 1.2487083673477173, "learning_rate": 1.585800270237072e-05, "loss": 0.09377280622720718, "step": 1787 }, { "epoch": 0.21696396068438295, "grad_norm": 3.491300106048584, "learning_rate": 1.5855546001719694e-05, "loss": 0.5781911015510559, "step": 1788 }, { "epoch": 0.21708530518141003, "grad_norm": 1.6507911682128906, "learning_rate": 1.5853089301068668e-05, "loss": 0.20534668862819672, "step": 1789 }, { "epoch": 0.21720664967843709, "grad_norm": 2.0704219341278076, "learning_rate": 1.5850632600417642e-05, "loss": 0.18910321593284607, "step": 1790 }, { "epoch": 0.21732799417546414, "grad_norm": 1.6216387748718262, "learning_rate": 1.5848175899766617e-05, "loss": 0.3603314459323883, "step": 1791 }, { "epoch": 0.2174493386724912, "grad_norm": 2.28153395652771, "learning_rate": 1.584571919911559e-05, "loss": 0.12273404747247696, "step": 1792 }, { "epoch": 0.21757068316951828, "grad_norm": 1.5948500633239746, "learning_rate": 1.5843262498464565e-05, "loss": 0.13908806443214417, "step": 1793 }, { "epoch": 0.21769202766654533, "grad_norm": 1.8439335823059082, "learning_rate": 1.584080579781354e-05, "loss": 0.24253743886947632, "step": 1794 }, { "epoch": 0.21781337216357238, "grad_norm": 2.1192080974578857, "learning_rate": 1.5838349097162514e-05, "loss": 0.27136489748954773, "step": 1795 }, { "epoch": 0.21793471666059944, "grad_norm": 2.2431752681732178, "learning_rate": 1.5835892396511488e-05, "loss": 0.33842700719833374, "step": 1796 }, { "epoch": 0.2180560611576265, "grad_norm": 2.34513783454895, "learning_rate": 1.5833435695860462e-05, "loss": 0.3854665458202362, "step": 1797 }, { "epoch": 0.21817740565465357, "grad_norm": 4.530099868774414, "learning_rate": 1.5830978995209433e-05, "loss": 0.6803076267242432, "step": 1798 }, { "epoch": 0.21829875015168063, "grad_norm": 3.580667495727539, "learning_rate": 1.5828522294558407e-05, "loss": 0.351321280002594, "step": 1799 }, { "epoch": 0.21842009464870768, "grad_norm": 1.670913815498352, "learning_rate": 1.582606559390738e-05, "loss": 0.11861065775156021, "step": 1800 }, { "epoch": 0.21854143914573473, "grad_norm": 1.8805264234542847, "learning_rate": 1.5823608893256356e-05, "loss": 0.19307005405426025, "step": 1801 }, { "epoch": 0.2186627836427618, "grad_norm": 2.305316209793091, "learning_rate": 1.582115219260533e-05, "loss": 0.20600402355194092, "step": 1802 }, { "epoch": 0.21878412813978887, "grad_norm": 1.742647647857666, "learning_rate": 1.5818695491954304e-05, "loss": 0.12964148819446564, "step": 1803 }, { "epoch": 0.21890547263681592, "grad_norm": 2.6212007999420166, "learning_rate": 1.581623879130328e-05, "loss": 0.5329718589782715, "step": 1804 }, { "epoch": 0.21902681713384298, "grad_norm": 2.0527093410491943, "learning_rate": 1.5813782090652253e-05, "loss": 0.27452802658081055, "step": 1805 }, { "epoch": 0.21914816163087003, "grad_norm": 1.9854778051376343, "learning_rate": 1.5811325390001227e-05, "loss": 0.25577837228775024, "step": 1806 }, { "epoch": 0.2192695061278971, "grad_norm": 1.33931565284729, "learning_rate": 1.5808868689350205e-05, "loss": 0.17164123058319092, "step": 1807 }, { "epoch": 0.21939085062492417, "grad_norm": 3.311396360397339, "learning_rate": 1.580641198869918e-05, "loss": 0.4014303684234619, "step": 1808 }, { "epoch": 0.21951219512195122, "grad_norm": 3.0539963245391846, "learning_rate": 1.5803955288048153e-05, "loss": 0.5720174908638, "step": 1809 }, { "epoch": 0.21963353961897827, "grad_norm": 2.067350387573242, "learning_rate": 1.5801498587397127e-05, "loss": 0.16131293773651123, "step": 1810 }, { "epoch": 0.21975488411600533, "grad_norm": 1.9542495012283325, "learning_rate": 1.5799041886746102e-05, "loss": 0.39418622851371765, "step": 1811 }, { "epoch": 0.2198762286130324, "grad_norm": 2.895956039428711, "learning_rate": 1.5796585186095076e-05, "loss": 0.2677726447582245, "step": 1812 }, { "epoch": 0.21999757311005946, "grad_norm": 2.118053674697876, "learning_rate": 1.579412848544405e-05, "loss": 0.4295734465122223, "step": 1813 }, { "epoch": 0.22011891760708652, "grad_norm": 2.0122063159942627, "learning_rate": 1.5791671784793025e-05, "loss": 0.44783979654312134, "step": 1814 }, { "epoch": 0.22024026210411357, "grad_norm": 2.1328415870666504, "learning_rate": 1.5789215084142e-05, "loss": 0.41867291927337646, "step": 1815 }, { "epoch": 0.22036160660114062, "grad_norm": 1.779789686203003, "learning_rate": 1.5786758383490973e-05, "loss": 0.32512974739074707, "step": 1816 }, { "epoch": 0.2204829510981677, "grad_norm": 2.9491400718688965, "learning_rate": 1.5784301682839947e-05, "loss": 0.4705115258693695, "step": 1817 }, { "epoch": 0.22060429559519476, "grad_norm": 1.8154832124710083, "learning_rate": 1.578184498218892e-05, "loss": 0.10341040045022964, "step": 1818 }, { "epoch": 0.22072564009222181, "grad_norm": 1.6541939973831177, "learning_rate": 1.5779388281537896e-05, "loss": 0.22720670700073242, "step": 1819 }, { "epoch": 0.22084698458924887, "grad_norm": 1.5187536478042603, "learning_rate": 1.577693158088687e-05, "loss": 0.44843828678131104, "step": 1820 }, { "epoch": 0.22096832908627595, "grad_norm": 2.146512269973755, "learning_rate": 1.5774474880235844e-05, "loss": 0.11195553839206696, "step": 1821 }, { "epoch": 0.221089673583303, "grad_norm": 1.5938338041305542, "learning_rate": 1.577201817958482e-05, "loss": 0.061287395656108856, "step": 1822 }, { "epoch": 0.22121101808033006, "grad_norm": 2.171705722808838, "learning_rate": 1.5769561478933793e-05, "loss": 0.29137569665908813, "step": 1823 }, { "epoch": 0.2213323625773571, "grad_norm": 2.4083402156829834, "learning_rate": 1.5767104778282767e-05, "loss": 0.28137272596359253, "step": 1824 }, { "epoch": 0.22145370707438417, "grad_norm": 2.548114776611328, "learning_rate": 1.576464807763174e-05, "loss": 0.5311647653579712, "step": 1825 }, { "epoch": 0.22157505157141125, "grad_norm": 2.3285162448883057, "learning_rate": 1.5762191376980716e-05, "loss": 0.6317347884178162, "step": 1826 }, { "epoch": 0.2216963960684383, "grad_norm": 2.0388638973236084, "learning_rate": 1.575973467632969e-05, "loss": 0.27131187915802, "step": 1827 }, { "epoch": 0.22181774056546535, "grad_norm": 2.087533712387085, "learning_rate": 1.5757277975678664e-05, "loss": 0.28966864943504333, "step": 1828 }, { "epoch": 0.2219390850624924, "grad_norm": 2.0379552841186523, "learning_rate": 1.575482127502764e-05, "loss": 0.15862126648426056, "step": 1829 }, { "epoch": 0.22206042955951946, "grad_norm": 2.1554763317108154, "learning_rate": 1.5752364574376613e-05, "loss": 0.4588105380535126, "step": 1830 }, { "epoch": 0.22218177405654654, "grad_norm": 1.7737832069396973, "learning_rate": 1.5749907873725587e-05, "loss": 0.08383680880069733, "step": 1831 }, { "epoch": 0.2223031185535736, "grad_norm": 0.10381510853767395, "learning_rate": 1.574745117307456e-05, "loss": 0.0010423610219731927, "step": 1832 }, { "epoch": 0.22242446305060065, "grad_norm": 1.4465632438659668, "learning_rate": 1.5744994472423535e-05, "loss": 0.2208431214094162, "step": 1833 }, { "epoch": 0.2225458075476277, "grad_norm": 2.1390795707702637, "learning_rate": 1.574253777177251e-05, "loss": 0.40946102142333984, "step": 1834 }, { "epoch": 0.2226671520446548, "grad_norm": 2.1270852088928223, "learning_rate": 1.5740081071121484e-05, "loss": 0.26926371455192566, "step": 1835 }, { "epoch": 0.22278849654168184, "grad_norm": 2.0879156589508057, "learning_rate": 1.5737624370470458e-05, "loss": 0.38591527938842773, "step": 1836 }, { "epoch": 0.2229098410387089, "grad_norm": 2.220813512802124, "learning_rate": 1.5735167669819432e-05, "loss": 0.35801708698272705, "step": 1837 }, { "epoch": 0.22303118553573595, "grad_norm": 2.3260087966918945, "learning_rate": 1.5732710969168407e-05, "loss": 0.2995595335960388, "step": 1838 }, { "epoch": 0.223152530032763, "grad_norm": 1.2784228324890137, "learning_rate": 1.573025426851738e-05, "loss": 0.28344660997390747, "step": 1839 }, { "epoch": 0.22327387452979008, "grad_norm": 2.584073305130005, "learning_rate": 1.5727797567866355e-05, "loss": 0.26841938495635986, "step": 1840 }, { "epoch": 0.22339521902681714, "grad_norm": 3.000821828842163, "learning_rate": 1.572534086721533e-05, "loss": 0.29360127449035645, "step": 1841 }, { "epoch": 0.2235165635238442, "grad_norm": 1.3792856931686401, "learning_rate": 1.5722884166564304e-05, "loss": 0.21720454096794128, "step": 1842 }, { "epoch": 0.22363790802087125, "grad_norm": 1.5156898498535156, "learning_rate": 1.5720427465913278e-05, "loss": 0.07712770253419876, "step": 1843 }, { "epoch": 0.22375925251789833, "grad_norm": 1.8725303411483765, "learning_rate": 1.5717970765262252e-05, "loss": 0.3022371232509613, "step": 1844 }, { "epoch": 0.22388059701492538, "grad_norm": 2.1809723377227783, "learning_rate": 1.5715514064611227e-05, "loss": 0.672953188419342, "step": 1845 }, { "epoch": 0.22400194151195243, "grad_norm": 2.7487642765045166, "learning_rate": 1.57130573639602e-05, "loss": 0.38799959421157837, "step": 1846 }, { "epoch": 0.2241232860089795, "grad_norm": 1.0793215036392212, "learning_rate": 1.571060066330918e-05, "loss": 0.14667223393917084, "step": 1847 }, { "epoch": 0.22424463050600654, "grad_norm": 2.1512701511383057, "learning_rate": 1.5708143962658153e-05, "loss": 0.3110212981700897, "step": 1848 }, { "epoch": 0.22436597500303362, "grad_norm": 0.9467477202415466, "learning_rate": 1.5705687262007127e-05, "loss": 0.03516809642314911, "step": 1849 }, { "epoch": 0.22448731950006068, "grad_norm": 2.599250078201294, "learning_rate": 1.57032305613561e-05, "loss": 0.11458954960107803, "step": 1850 }, { "epoch": 0.22460866399708773, "grad_norm": 1.5941566228866577, "learning_rate": 1.5700773860705075e-05, "loss": 0.18593533337116241, "step": 1851 }, { "epoch": 0.22473000849411479, "grad_norm": 1.8600636720657349, "learning_rate": 1.569831716005405e-05, "loss": 0.12170698493719101, "step": 1852 }, { "epoch": 0.22485135299114184, "grad_norm": 2.5060980319976807, "learning_rate": 1.5695860459403024e-05, "loss": 0.3910202085971832, "step": 1853 }, { "epoch": 0.22497269748816892, "grad_norm": 4.181054592132568, "learning_rate": 1.5693403758751998e-05, "loss": 0.35858675837516785, "step": 1854 }, { "epoch": 0.22509404198519598, "grad_norm": 2.437246084213257, "learning_rate": 1.5690947058100972e-05, "loss": 0.5846027731895447, "step": 1855 }, { "epoch": 0.22521538648222303, "grad_norm": 2.284369468688965, "learning_rate": 1.5688490357449947e-05, "loss": 0.30392757058143616, "step": 1856 }, { "epoch": 0.22533673097925008, "grad_norm": 1.6010268926620483, "learning_rate": 1.568603365679892e-05, "loss": 0.1779608577489853, "step": 1857 }, { "epoch": 0.22545807547627716, "grad_norm": 1.704008936882019, "learning_rate": 1.5683576956147895e-05, "loss": 0.37899062037467957, "step": 1858 }, { "epoch": 0.22557941997330422, "grad_norm": 2.50712251663208, "learning_rate": 1.568112025549687e-05, "loss": 0.5361481308937073, "step": 1859 }, { "epoch": 0.22570076447033127, "grad_norm": 1.7986700534820557, "learning_rate": 1.5678663554845844e-05, "loss": 0.14462506771087646, "step": 1860 }, { "epoch": 0.22582210896735833, "grad_norm": 2.1984047889709473, "learning_rate": 1.5676206854194818e-05, "loss": 0.1491418480873108, "step": 1861 }, { "epoch": 0.22594345346438538, "grad_norm": 2.797156572341919, "learning_rate": 1.5673750153543792e-05, "loss": 0.196271151304245, "step": 1862 }, { "epoch": 0.22606479796141246, "grad_norm": 2.319661855697632, "learning_rate": 1.5671293452892767e-05, "loss": 0.15388545393943787, "step": 1863 }, { "epoch": 0.22618614245843952, "grad_norm": 2.6694042682647705, "learning_rate": 1.566883675224174e-05, "loss": 0.29360467195510864, "step": 1864 }, { "epoch": 0.22630748695546657, "grad_norm": 1.1639213562011719, "learning_rate": 1.5666380051590715e-05, "loss": 0.05175938084721565, "step": 1865 }, { "epoch": 0.22642883145249362, "grad_norm": 2.2814810276031494, "learning_rate": 1.566392335093969e-05, "loss": 0.599335789680481, "step": 1866 }, { "epoch": 0.22655017594952068, "grad_norm": 2.186396837234497, "learning_rate": 1.5661466650288664e-05, "loss": 0.45210182666778564, "step": 1867 }, { "epoch": 0.22667152044654776, "grad_norm": 2.9524850845336914, "learning_rate": 1.5659009949637638e-05, "loss": 0.4272529184818268, "step": 1868 }, { "epoch": 0.2267928649435748, "grad_norm": 1.982879400253296, "learning_rate": 1.5656553248986612e-05, "loss": 0.42066940665245056, "step": 1869 }, { "epoch": 0.22691420944060187, "grad_norm": 1.641210913658142, "learning_rate": 1.5654096548335586e-05, "loss": 0.06164119392633438, "step": 1870 }, { "epoch": 0.22703555393762892, "grad_norm": 2.4468185901641846, "learning_rate": 1.565163984768456e-05, "loss": 0.23071660101413727, "step": 1871 }, { "epoch": 0.227156898434656, "grad_norm": 1.4559755325317383, "learning_rate": 1.5649183147033535e-05, "loss": 0.11746642738580704, "step": 1872 }, { "epoch": 0.22727824293168306, "grad_norm": 2.723864793777466, "learning_rate": 1.564672644638251e-05, "loss": 0.639769434928894, "step": 1873 }, { "epoch": 0.2273995874287101, "grad_norm": 3.133424997329712, "learning_rate": 1.5644269745731483e-05, "loss": 0.20287105441093445, "step": 1874 }, { "epoch": 0.22752093192573716, "grad_norm": 2.657029151916504, "learning_rate": 1.5641813045080458e-05, "loss": 0.3544206917285919, "step": 1875 }, { "epoch": 0.22764227642276422, "grad_norm": 1.662294864654541, "learning_rate": 1.5639356344429432e-05, "loss": 0.08332932740449905, "step": 1876 }, { "epoch": 0.2277636209197913, "grad_norm": 2.7968616485595703, "learning_rate": 1.5636899643778406e-05, "loss": 0.3955608904361725, "step": 1877 }, { "epoch": 0.22788496541681835, "grad_norm": 1.5010136365890503, "learning_rate": 1.563444294312738e-05, "loss": 0.13125336170196533, "step": 1878 }, { "epoch": 0.2280063099138454, "grad_norm": 1.6493403911590576, "learning_rate": 1.5631986242476355e-05, "loss": 0.2688809335231781, "step": 1879 }, { "epoch": 0.22812765441087246, "grad_norm": 3.2882649898529053, "learning_rate": 1.562952954182533e-05, "loss": 0.47903305292129517, "step": 1880 }, { "epoch": 0.22824899890789951, "grad_norm": 2.847830295562744, "learning_rate": 1.5627072841174303e-05, "loss": 0.2155795842409134, "step": 1881 }, { "epoch": 0.2283703434049266, "grad_norm": 1.9242929220199585, "learning_rate": 1.5624616140523277e-05, "loss": 0.35135090351104736, "step": 1882 }, { "epoch": 0.22849168790195365, "grad_norm": 3.96661114692688, "learning_rate": 1.5622159439872252e-05, "loss": 0.11496228724718094, "step": 1883 }, { "epoch": 0.2286130323989807, "grad_norm": 3.5542078018188477, "learning_rate": 1.5619702739221226e-05, "loss": 0.2090737372636795, "step": 1884 }, { "epoch": 0.22873437689600776, "grad_norm": 2.155834436416626, "learning_rate": 1.56172460385702e-05, "loss": 0.1563049852848053, "step": 1885 }, { "epoch": 0.22885572139303484, "grad_norm": 2.6029043197631836, "learning_rate": 1.5614789337919174e-05, "loss": 0.1502380073070526, "step": 1886 }, { "epoch": 0.2289770658900619, "grad_norm": 5.526102542877197, "learning_rate": 1.5612332637268152e-05, "loss": 0.5732027888298035, "step": 1887 }, { "epoch": 0.22909841038708895, "grad_norm": 2.530214548110962, "learning_rate": 1.5609875936617126e-05, "loss": 0.4119799733161926, "step": 1888 }, { "epoch": 0.229219754884116, "grad_norm": 1.4703720808029175, "learning_rate": 1.56074192359661e-05, "loss": 0.14894983172416687, "step": 1889 }, { "epoch": 0.22934109938114305, "grad_norm": 1.975054383277893, "learning_rate": 1.5604962535315075e-05, "loss": 0.20313455164432526, "step": 1890 }, { "epoch": 0.22946244387817014, "grad_norm": 1.664771318435669, "learning_rate": 1.560250583466405e-05, "loss": 0.23257260024547577, "step": 1891 }, { "epoch": 0.2295837883751972, "grad_norm": 2.094118118286133, "learning_rate": 1.5600049134013023e-05, "loss": 0.22651207447052002, "step": 1892 }, { "epoch": 0.22970513287222424, "grad_norm": 1.761396884918213, "learning_rate": 1.5597592433361998e-05, "loss": 0.2868579030036926, "step": 1893 }, { "epoch": 0.2298264773692513, "grad_norm": 2.3177692890167236, "learning_rate": 1.5595135732710972e-05, "loss": 0.1699366718530655, "step": 1894 }, { "epoch": 0.22994782186627835, "grad_norm": 2.4176840782165527, "learning_rate": 1.5592679032059946e-05, "loss": 0.35694047808647156, "step": 1895 }, { "epoch": 0.23006916636330543, "grad_norm": 2.576108455657959, "learning_rate": 1.559022233140892e-05, "loss": 0.213277667760849, "step": 1896 }, { "epoch": 0.2301905108603325, "grad_norm": 1.9640555381774902, "learning_rate": 1.5587765630757895e-05, "loss": 0.18581807613372803, "step": 1897 }, { "epoch": 0.23031185535735954, "grad_norm": 2.2211499214172363, "learning_rate": 1.558530893010687e-05, "loss": 0.3798431158065796, "step": 1898 }, { "epoch": 0.2304331998543866, "grad_norm": 4.352818012237549, "learning_rate": 1.5582852229455843e-05, "loss": 0.49065476655960083, "step": 1899 }, { "epoch": 0.23055454435141368, "grad_norm": 1.9557774066925049, "learning_rate": 1.5580395528804817e-05, "loss": 0.3938944935798645, "step": 1900 }, { "epoch": 0.23067588884844073, "grad_norm": 1.8748207092285156, "learning_rate": 1.5577938828153792e-05, "loss": 0.5157500505447388, "step": 1901 }, { "epoch": 0.23079723334546778, "grad_norm": 1.7990689277648926, "learning_rate": 1.5575482127502766e-05, "loss": 0.2987160384654999, "step": 1902 }, { "epoch": 0.23091857784249484, "grad_norm": 1.7470693588256836, "learning_rate": 1.557302542685174e-05, "loss": 0.16994519531726837, "step": 1903 }, { "epoch": 0.2310399223395219, "grad_norm": 2.477071523666382, "learning_rate": 1.5570568726200715e-05, "loss": 0.48906996846199036, "step": 1904 }, { "epoch": 0.23116126683654897, "grad_norm": 3.2109107971191406, "learning_rate": 1.556811202554969e-05, "loss": 0.5909616947174072, "step": 1905 }, { "epoch": 0.23128261133357603, "grad_norm": 1.8098355531692505, "learning_rate": 1.5565655324898663e-05, "loss": 0.4754788279533386, "step": 1906 }, { "epoch": 0.23140395583060308, "grad_norm": 0.7618553638458252, "learning_rate": 1.5563198624247637e-05, "loss": 0.0402400940656662, "step": 1907 }, { "epoch": 0.23152530032763013, "grad_norm": 2.028736114501953, "learning_rate": 1.556074192359661e-05, "loss": 0.2924274802207947, "step": 1908 }, { "epoch": 0.2316466448246572, "grad_norm": 2.1117236614227295, "learning_rate": 1.5558285222945586e-05, "loss": 0.26550453901290894, "step": 1909 }, { "epoch": 0.23176798932168427, "grad_norm": 2.282334566116333, "learning_rate": 1.555582852229456e-05, "loss": 0.27888938784599304, "step": 1910 }, { "epoch": 0.23188933381871132, "grad_norm": 2.4370946884155273, "learning_rate": 1.5553371821643534e-05, "loss": 0.4627356231212616, "step": 1911 }, { "epoch": 0.23201067831573838, "grad_norm": 0.9429489970207214, "learning_rate": 1.555091512099251e-05, "loss": 0.032071638852357864, "step": 1912 }, { "epoch": 0.23213202281276543, "grad_norm": 1.9986335039138794, "learning_rate": 1.5548458420341483e-05, "loss": 0.30108609795570374, "step": 1913 }, { "epoch": 0.2322533673097925, "grad_norm": 1.6694214344024658, "learning_rate": 1.5546001719690457e-05, "loss": 0.24282769858837128, "step": 1914 }, { "epoch": 0.23237471180681957, "grad_norm": 1.9311928749084473, "learning_rate": 1.554354501903943e-05, "loss": 0.24997027218341827, "step": 1915 }, { "epoch": 0.23249605630384662, "grad_norm": 1.8494151830673218, "learning_rate": 1.5541088318388406e-05, "loss": 0.3138563632965088, "step": 1916 }, { "epoch": 0.23261740080087367, "grad_norm": 2.9844398498535156, "learning_rate": 1.553863161773738e-05, "loss": 0.3842235803604126, "step": 1917 }, { "epoch": 0.23273874529790073, "grad_norm": 1.881724238395691, "learning_rate": 1.5536174917086354e-05, "loss": 0.12849420309066772, "step": 1918 }, { "epoch": 0.2328600897949278, "grad_norm": 2.011195421218872, "learning_rate": 1.553371821643533e-05, "loss": 0.26487109065055847, "step": 1919 }, { "epoch": 0.23298143429195486, "grad_norm": 2.0115201473236084, "learning_rate": 1.5531261515784303e-05, "loss": 0.3657824397087097, "step": 1920 }, { "epoch": 0.23310277878898192, "grad_norm": 2.71881365776062, "learning_rate": 1.5528804815133277e-05, "loss": 0.4061950147151947, "step": 1921 }, { "epoch": 0.23322412328600897, "grad_norm": 1.1871726512908936, "learning_rate": 1.552634811448225e-05, "loss": 0.21809077262878418, "step": 1922 }, { "epoch": 0.23334546778303603, "grad_norm": 1.5245609283447266, "learning_rate": 1.5523891413831225e-05, "loss": 0.1552489697933197, "step": 1923 }, { "epoch": 0.2334668122800631, "grad_norm": 1.666669487953186, "learning_rate": 1.55214347131802e-05, "loss": 0.10551527887582779, "step": 1924 }, { "epoch": 0.23358815677709016, "grad_norm": 1.1993343830108643, "learning_rate": 1.5518978012529174e-05, "loss": 0.06786657869815826, "step": 1925 }, { "epoch": 0.23370950127411722, "grad_norm": 2.8402671813964844, "learning_rate": 1.551652131187815e-05, "loss": 0.2608323097229004, "step": 1926 }, { "epoch": 0.23383084577114427, "grad_norm": 1.042061448097229, "learning_rate": 1.5514064611227126e-05, "loss": 0.03161429241299629, "step": 1927 }, { "epoch": 0.23395219026817135, "grad_norm": 1.6204326152801514, "learning_rate": 1.55116079105761e-05, "loss": 0.28808191418647766, "step": 1928 }, { "epoch": 0.2340735347651984, "grad_norm": 1.9779497385025024, "learning_rate": 1.5509151209925074e-05, "loss": 0.30647483468055725, "step": 1929 }, { "epoch": 0.23419487926222546, "grad_norm": 1.22590970993042, "learning_rate": 1.550669450927405e-05, "loss": 0.07887299358844757, "step": 1930 }, { "epoch": 0.2343162237592525, "grad_norm": 1.897480845451355, "learning_rate": 1.5504237808623023e-05, "loss": 0.45204317569732666, "step": 1931 }, { "epoch": 0.23443756825627957, "grad_norm": 2.028043031692505, "learning_rate": 1.5501781107971997e-05, "loss": 0.16453605890274048, "step": 1932 }, { "epoch": 0.23455891275330665, "grad_norm": 2.369666576385498, "learning_rate": 1.5499324407320968e-05, "loss": 0.3857429325580597, "step": 1933 }, { "epoch": 0.2346802572503337, "grad_norm": 0.9054241180419922, "learning_rate": 1.5496867706669942e-05, "loss": 0.06322108954191208, "step": 1934 }, { "epoch": 0.23480160174736076, "grad_norm": 1.2953579425811768, "learning_rate": 1.5494411006018917e-05, "loss": 0.09342847019433975, "step": 1935 }, { "epoch": 0.2349229462443878, "grad_norm": 3.134705066680908, "learning_rate": 1.549195430536789e-05, "loss": 0.49385154247283936, "step": 1936 }, { "epoch": 0.23504429074141486, "grad_norm": 1.4176846742630005, "learning_rate": 1.5489497604716865e-05, "loss": 0.1273263841867447, "step": 1937 }, { "epoch": 0.23516563523844194, "grad_norm": 0.8674216866493225, "learning_rate": 1.548704090406584e-05, "loss": 0.033561233431100845, "step": 1938 }, { "epoch": 0.235286979735469, "grad_norm": 2.134188413619995, "learning_rate": 1.5484584203414814e-05, "loss": 0.5493992567062378, "step": 1939 }, { "epoch": 0.23540832423249605, "grad_norm": 1.9525834321975708, "learning_rate": 1.5482127502763788e-05, "loss": 0.171519473195076, "step": 1940 }, { "epoch": 0.2355296687295231, "grad_norm": 1.428519368171692, "learning_rate": 1.5479670802112762e-05, "loss": 0.33162039518356323, "step": 1941 }, { "epoch": 0.2356510132265502, "grad_norm": 2.767943859100342, "learning_rate": 1.5477214101461736e-05, "loss": 0.3828122913837433, "step": 1942 }, { "epoch": 0.23577235772357724, "grad_norm": 2.6053030490875244, "learning_rate": 1.547475740081071e-05, "loss": 0.24867229163646698, "step": 1943 }, { "epoch": 0.2358937022206043, "grad_norm": 2.204803228378296, "learning_rate": 1.5472300700159685e-05, "loss": 0.40587592124938965, "step": 1944 }, { "epoch": 0.23601504671763135, "grad_norm": 2.1212689876556396, "learning_rate": 1.546984399950866e-05, "loss": 0.2930845320224762, "step": 1945 }, { "epoch": 0.2361363912146584, "grad_norm": 1.3296728134155273, "learning_rate": 1.5467387298857633e-05, "loss": 0.12678320705890656, "step": 1946 }, { "epoch": 0.23625773571168548, "grad_norm": 2.986150026321411, "learning_rate": 1.5464930598206608e-05, "loss": 0.577646017074585, "step": 1947 }, { "epoch": 0.23637908020871254, "grad_norm": 1.8417999744415283, "learning_rate": 1.5462473897555582e-05, "loss": 0.1973411589860916, "step": 1948 }, { "epoch": 0.2365004247057396, "grad_norm": 2.9808263778686523, "learning_rate": 1.5460017196904556e-05, "loss": 0.3182407319545746, "step": 1949 }, { "epoch": 0.23662176920276665, "grad_norm": 1.6840883493423462, "learning_rate": 1.545756049625353e-05, "loss": 0.22510495781898499, "step": 1950 }, { "epoch": 0.23674311369979373, "grad_norm": 3.430837392807007, "learning_rate": 1.5455103795602505e-05, "loss": 0.2617456018924713, "step": 1951 }, { "epoch": 0.23686445819682078, "grad_norm": 2.058379650115967, "learning_rate": 1.5452647094951482e-05, "loss": 0.2380506843328476, "step": 1952 }, { "epoch": 0.23698580269384784, "grad_norm": 0.942528247833252, "learning_rate": 1.5450190394300457e-05, "loss": 0.14793919026851654, "step": 1953 }, { "epoch": 0.2371071471908749, "grad_norm": 2.2082934379577637, "learning_rate": 1.544773369364943e-05, "loss": 0.1505918800830841, "step": 1954 }, { "epoch": 0.23722849168790194, "grad_norm": 3.3055496215820312, "learning_rate": 1.5445276992998405e-05, "loss": 0.209852397441864, "step": 1955 }, { "epoch": 0.23734983618492903, "grad_norm": 2.0881266593933105, "learning_rate": 1.544282029234738e-05, "loss": 0.2559841275215149, "step": 1956 }, { "epoch": 0.23747118068195608, "grad_norm": 1.9247335195541382, "learning_rate": 1.5440363591696354e-05, "loss": 0.2522317171096802, "step": 1957 }, { "epoch": 0.23759252517898313, "grad_norm": 2.6329305171966553, "learning_rate": 1.5437906891045328e-05, "loss": 0.36928677558898926, "step": 1958 }, { "epoch": 0.2377138696760102, "grad_norm": 1.8593052625656128, "learning_rate": 1.5435450190394302e-05, "loss": 0.15176129341125488, "step": 1959 }, { "epoch": 0.23783521417303724, "grad_norm": 1.2760701179504395, "learning_rate": 1.5432993489743276e-05, "loss": 0.09813748300075531, "step": 1960 }, { "epoch": 0.23795655867006432, "grad_norm": 3.074573516845703, "learning_rate": 1.543053678909225e-05, "loss": 0.241357684135437, "step": 1961 }, { "epoch": 0.23807790316709138, "grad_norm": 2.4001972675323486, "learning_rate": 1.5428080088441225e-05, "loss": 0.33054935932159424, "step": 1962 }, { "epoch": 0.23819924766411843, "grad_norm": 2.2785770893096924, "learning_rate": 1.54256233877902e-05, "loss": 0.35797953605651855, "step": 1963 }, { "epoch": 0.23832059216114548, "grad_norm": 2.6213173866271973, "learning_rate": 1.5423166687139173e-05, "loss": 0.5411311984062195, "step": 1964 }, { "epoch": 0.23844193665817257, "grad_norm": 1.7875020503997803, "learning_rate": 1.5420709986488148e-05, "loss": 0.35356011986732483, "step": 1965 }, { "epoch": 0.23856328115519962, "grad_norm": 1.8374285697937012, "learning_rate": 1.5418253285837122e-05, "loss": 0.10449785739183426, "step": 1966 }, { "epoch": 0.23868462565222667, "grad_norm": 0.0015914601972326636, "learning_rate": 1.5415796585186096e-05, "loss": 4.027661634609103e-05, "step": 1967 }, { "epoch": 0.23880597014925373, "grad_norm": 2.4419233798980713, "learning_rate": 1.541333988453507e-05, "loss": 0.36124101281166077, "step": 1968 }, { "epoch": 0.23892731464628078, "grad_norm": 1.531554937362671, "learning_rate": 1.5410883183884045e-05, "loss": 0.1830349862575531, "step": 1969 }, { "epoch": 0.23904865914330786, "grad_norm": 2.351914644241333, "learning_rate": 1.540842648323302e-05, "loss": 0.315641313791275, "step": 1970 }, { "epoch": 0.23917000364033492, "grad_norm": 1.7109310626983643, "learning_rate": 1.5405969782581993e-05, "loss": 0.3653964698314667, "step": 1971 }, { "epoch": 0.23929134813736197, "grad_norm": 2.335442066192627, "learning_rate": 1.5403513081930967e-05, "loss": 0.3463093638420105, "step": 1972 }, { "epoch": 0.23941269263438902, "grad_norm": 1.2720097303390503, "learning_rate": 1.5401056381279942e-05, "loss": 0.2935695946216583, "step": 1973 }, { "epoch": 0.23953403713141608, "grad_norm": 2.660950183868408, "learning_rate": 1.5398599680628916e-05, "loss": 0.4685511291027069, "step": 1974 }, { "epoch": 0.23965538162844316, "grad_norm": 1.722851276397705, "learning_rate": 1.539614297997789e-05, "loss": 0.13476943969726562, "step": 1975 }, { "epoch": 0.2397767261254702, "grad_norm": 1.1071078777313232, "learning_rate": 1.5393686279326864e-05, "loss": 0.010801886208355427, "step": 1976 }, { "epoch": 0.23989807062249727, "grad_norm": 3.19441819190979, "learning_rate": 1.539122957867584e-05, "loss": 0.4961863160133362, "step": 1977 }, { "epoch": 0.24001941511952432, "grad_norm": 1.6807912588119507, "learning_rate": 1.5388772878024813e-05, "loss": 0.49341505765914917, "step": 1978 }, { "epoch": 0.2401407596165514, "grad_norm": 1.4445300102233887, "learning_rate": 1.5386316177373787e-05, "loss": 0.497768759727478, "step": 1979 }, { "epoch": 0.24026210411357846, "grad_norm": 2.9250829219818115, "learning_rate": 1.538385947672276e-05, "loss": 0.5877270698547363, "step": 1980 }, { "epoch": 0.2403834486106055, "grad_norm": 1.8087142705917358, "learning_rate": 1.5381402776071736e-05, "loss": 0.3921996057033539, "step": 1981 }, { "epoch": 0.24050479310763256, "grad_norm": 2.8521523475646973, "learning_rate": 1.537894607542071e-05, "loss": 0.268110066652298, "step": 1982 }, { "epoch": 0.24062613760465962, "grad_norm": 3.089811086654663, "learning_rate": 1.5376489374769684e-05, "loss": 0.6380707621574402, "step": 1983 }, { "epoch": 0.2407474821016867, "grad_norm": 1.431771159172058, "learning_rate": 1.537403267411866e-05, "loss": 0.035007551312446594, "step": 1984 }, { "epoch": 0.24086882659871375, "grad_norm": 2.0163891315460205, "learning_rate": 1.5371575973467633e-05, "loss": 0.41505101323127747, "step": 1985 }, { "epoch": 0.2409901710957408, "grad_norm": 2.951434850692749, "learning_rate": 1.5369119272816607e-05, "loss": 0.3437270522117615, "step": 1986 }, { "epoch": 0.24111151559276786, "grad_norm": 2.0333163738250732, "learning_rate": 1.536666257216558e-05, "loss": 0.4240697920322418, "step": 1987 }, { "epoch": 0.24123286008979491, "grad_norm": 2.527355670928955, "learning_rate": 1.5364205871514556e-05, "loss": 0.4679162800312042, "step": 1988 }, { "epoch": 0.241354204586822, "grad_norm": 1.9801870584487915, "learning_rate": 1.536174917086353e-05, "loss": 0.5906752347946167, "step": 1989 }, { "epoch": 0.24147554908384905, "grad_norm": 3.036198377609253, "learning_rate": 1.5359292470212504e-05, "loss": 0.35773366689682007, "step": 1990 }, { "epoch": 0.2415968935808761, "grad_norm": 4.780759334564209, "learning_rate": 1.535683576956148e-05, "loss": 0.15195998549461365, "step": 1991 }, { "epoch": 0.24171823807790316, "grad_norm": 1.809084415435791, "learning_rate": 1.5354379068910456e-05, "loss": 0.4638228714466095, "step": 1992 }, { "epoch": 0.24183958257493024, "grad_norm": 3.004784345626831, "learning_rate": 1.535192236825943e-05, "loss": 0.3469436764717102, "step": 1993 }, { "epoch": 0.2419609270719573, "grad_norm": 1.9272234439849854, "learning_rate": 1.5349465667608405e-05, "loss": 0.316535085439682, "step": 1994 }, { "epoch": 0.24208227156898435, "grad_norm": 1.9831242561340332, "learning_rate": 1.534700896695738e-05, "loss": 0.21885432302951813, "step": 1995 }, { "epoch": 0.2422036160660114, "grad_norm": 1.7284822463989258, "learning_rate": 1.5344552266306353e-05, "loss": 0.2139614224433899, "step": 1996 }, { "epoch": 0.24232496056303846, "grad_norm": 4.944525241851807, "learning_rate": 1.5342095565655327e-05, "loss": 0.4055936932563782, "step": 1997 }, { "epoch": 0.24244630506006554, "grad_norm": 4.0585618019104, "learning_rate": 1.53396388650043e-05, "loss": 0.6400541663169861, "step": 1998 }, { "epoch": 0.2425676495570926, "grad_norm": 3.2324647903442383, "learning_rate": 1.5337182164353276e-05, "loss": 0.29065656661987305, "step": 1999 }, { "epoch": 0.24268899405411964, "grad_norm": 2.2216804027557373, "learning_rate": 1.533472546370225e-05, "loss": 0.3676172196865082, "step": 2000 }, { "epoch": 0.2428103385511467, "grad_norm": 2.722198247909546, "learning_rate": 1.5332268763051224e-05, "loss": 0.3258315920829773, "step": 2001 }, { "epoch": 0.24293168304817375, "grad_norm": 2.4563684463500977, "learning_rate": 1.53298120624002e-05, "loss": 0.3917081654071808, "step": 2002 }, { "epoch": 0.24305302754520083, "grad_norm": 2.719510078430176, "learning_rate": 1.5327355361749173e-05, "loss": 0.37439966201782227, "step": 2003 }, { "epoch": 0.2431743720422279, "grad_norm": 1.5176633596420288, "learning_rate": 1.5324898661098147e-05, "loss": 0.28591275215148926, "step": 2004 }, { "epoch": 0.24329571653925494, "grad_norm": 1.9752401113510132, "learning_rate": 1.532244196044712e-05, "loss": 0.4649636745452881, "step": 2005 }, { "epoch": 0.243417061036282, "grad_norm": 1.897263765335083, "learning_rate": 1.5319985259796096e-05, "loss": 0.33651724457740784, "step": 2006 }, { "epoch": 0.24353840553330908, "grad_norm": 2.2642662525177, "learning_rate": 1.531752855914507e-05, "loss": 0.19593358039855957, "step": 2007 }, { "epoch": 0.24365975003033613, "grad_norm": 1.0743077993392944, "learning_rate": 1.5315071858494044e-05, "loss": 0.07533052563667297, "step": 2008 }, { "epoch": 0.24378109452736318, "grad_norm": 1.6566965579986572, "learning_rate": 1.531261515784302e-05, "loss": 0.5573430061340332, "step": 2009 }, { "epoch": 0.24390243902439024, "grad_norm": 1.9225425720214844, "learning_rate": 1.5310158457191993e-05, "loss": 0.3474563956260681, "step": 2010 }, { "epoch": 0.2440237835214173, "grad_norm": 1.7132405042648315, "learning_rate": 1.5307701756540967e-05, "loss": 0.09912965446710587, "step": 2011 }, { "epoch": 0.24414512801844437, "grad_norm": 1.9798846244812012, "learning_rate": 1.530524505588994e-05, "loss": 0.1815606951713562, "step": 2012 }, { "epoch": 0.24426647251547143, "grad_norm": 1.9553190469741821, "learning_rate": 1.5302788355238915e-05, "loss": 0.08286681771278381, "step": 2013 }, { "epoch": 0.24438781701249848, "grad_norm": 3.6544125080108643, "learning_rate": 1.530033165458789e-05, "loss": 0.746819257736206, "step": 2014 }, { "epoch": 0.24450916150952554, "grad_norm": 2.1808061599731445, "learning_rate": 1.5297874953936864e-05, "loss": 0.6136137843132019, "step": 2015 }, { "epoch": 0.2446305060065526, "grad_norm": 1.8095557689666748, "learning_rate": 1.5295418253285838e-05, "loss": 0.3068550229072571, "step": 2016 }, { "epoch": 0.24475185050357967, "grad_norm": 0.7586051225662231, "learning_rate": 1.5292961552634812e-05, "loss": 0.004989935085177422, "step": 2017 }, { "epoch": 0.24487319500060672, "grad_norm": 1.9498528242111206, "learning_rate": 1.5290504851983787e-05, "loss": 0.19964951276779175, "step": 2018 }, { "epoch": 0.24499453949763378, "grad_norm": 2.4533638954162598, "learning_rate": 1.528804815133276e-05, "loss": 0.2574433982372284, "step": 2019 }, { "epoch": 0.24511588399466083, "grad_norm": 1.9604499340057373, "learning_rate": 1.5285591450681735e-05, "loss": 0.14973944425582886, "step": 2020 }, { "epoch": 0.24523722849168791, "grad_norm": 2.5714564323425293, "learning_rate": 1.528313475003071e-05, "loss": 0.33707791566848755, "step": 2021 }, { "epoch": 0.24535857298871497, "grad_norm": 1.7424302101135254, "learning_rate": 1.5280678049379684e-05, "loss": 0.11994636058807373, "step": 2022 }, { "epoch": 0.24547991748574202, "grad_norm": 1.7286163568496704, "learning_rate": 1.5278221348728658e-05, "loss": 0.20991051197052002, "step": 2023 }, { "epoch": 0.24560126198276908, "grad_norm": 2.8101806640625, "learning_rate": 1.5275764648077632e-05, "loss": 0.5377737283706665, "step": 2024 }, { "epoch": 0.24572260647979613, "grad_norm": 2.4450466632843018, "learning_rate": 1.5273307947426607e-05, "loss": 0.2972002625465393, "step": 2025 }, { "epoch": 0.2458439509768232, "grad_norm": 2.2055904865264893, "learning_rate": 1.527085124677558e-05, "loss": 0.4898790121078491, "step": 2026 }, { "epoch": 0.24596529547385027, "grad_norm": 1.6781805753707886, "learning_rate": 1.5268394546124555e-05, "loss": 0.227199986577034, "step": 2027 }, { "epoch": 0.24608663997087732, "grad_norm": 1.2776950597763062, "learning_rate": 1.526593784547353e-05, "loss": 0.05490413308143616, "step": 2028 }, { "epoch": 0.24620798446790437, "grad_norm": 1.8420385122299194, "learning_rate": 1.5263481144822504e-05, "loss": 0.5011781454086304, "step": 2029 }, { "epoch": 0.24632932896493143, "grad_norm": 1.7983120679855347, "learning_rate": 1.5261024444171478e-05, "loss": 0.30248773097991943, "step": 2030 }, { "epoch": 0.2464506734619585, "grad_norm": 1.9787551164627075, "learning_rate": 1.5258567743520454e-05, "loss": 0.27914515137672424, "step": 2031 }, { "epoch": 0.24657201795898556, "grad_norm": 2.571229934692383, "learning_rate": 1.5256111042869428e-05, "loss": 0.26840049028396606, "step": 2032 }, { "epoch": 0.24669336245601262, "grad_norm": 2.642086982727051, "learning_rate": 1.5253654342218402e-05, "loss": 0.6005504727363586, "step": 2033 }, { "epoch": 0.24681470695303967, "grad_norm": 2.7929725646972656, "learning_rate": 1.5251197641567377e-05, "loss": 0.20075125992298126, "step": 2034 }, { "epoch": 0.24693605145006675, "grad_norm": 1.746235966682434, "learning_rate": 1.524874094091635e-05, "loss": 0.21847037971019745, "step": 2035 }, { "epoch": 0.2470573959470938, "grad_norm": 2.4493091106414795, "learning_rate": 1.5246284240265325e-05, "loss": 0.22630271315574646, "step": 2036 }, { "epoch": 0.24717874044412086, "grad_norm": 3.2244346141815186, "learning_rate": 1.52438275396143e-05, "loss": 0.3294290006160736, "step": 2037 }, { "epoch": 0.2473000849411479, "grad_norm": 2.499692916870117, "learning_rate": 1.5241370838963274e-05, "loss": 0.0979139655828476, "step": 2038 }, { "epoch": 0.24742142943817497, "grad_norm": 1.6712913513183594, "learning_rate": 1.5238914138312248e-05, "loss": 0.03487418591976166, "step": 2039 }, { "epoch": 0.24754277393520205, "grad_norm": 2.223564863204956, "learning_rate": 1.5236457437661222e-05, "loss": 0.22165976464748383, "step": 2040 }, { "epoch": 0.2476641184322291, "grad_norm": 2.2923712730407715, "learning_rate": 1.5234000737010198e-05, "loss": 0.48853620886802673, "step": 2041 }, { "epoch": 0.24778546292925616, "grad_norm": 1.7251356840133667, "learning_rate": 1.5231544036359172e-05, "loss": 0.36525624990463257, "step": 2042 }, { "epoch": 0.2479068074262832, "grad_norm": 0.03325257450342178, "learning_rate": 1.5229087335708147e-05, "loss": 0.0005018762894906104, "step": 2043 }, { "epoch": 0.2480281519233103, "grad_norm": 2.6671509742736816, "learning_rate": 1.522663063505712e-05, "loss": 0.2062726467847824, "step": 2044 }, { "epoch": 0.24814949642033735, "grad_norm": 1.7973912954330444, "learning_rate": 1.5224173934406095e-05, "loss": 0.08589420467615128, "step": 2045 }, { "epoch": 0.2482708409173644, "grad_norm": 0.27702754735946655, "learning_rate": 1.522171723375507e-05, "loss": 0.010546392761170864, "step": 2046 }, { "epoch": 0.24839218541439145, "grad_norm": 1.957955241203308, "learning_rate": 1.5219260533104044e-05, "loss": 0.13728350400924683, "step": 2047 }, { "epoch": 0.2485135299114185, "grad_norm": 0.853355348110199, "learning_rate": 1.5216803832453018e-05, "loss": 0.037357147783041, "step": 2048 }, { "epoch": 0.2486348744084456, "grad_norm": 2.572986364364624, "learning_rate": 1.5214347131801992e-05, "loss": 0.4272196888923645, "step": 2049 }, { "epoch": 0.24875621890547264, "grad_norm": 2.455547571182251, "learning_rate": 1.5211890431150966e-05, "loss": 0.1457350105047226, "step": 2050 }, { "epoch": 0.2488775634024997, "grad_norm": 3.401978015899658, "learning_rate": 1.520943373049994e-05, "loss": 0.33558374643325806, "step": 2051 }, { "epoch": 0.24899890789952675, "grad_norm": 3.3851821422576904, "learning_rate": 1.5206977029848915e-05, "loss": 0.2722301483154297, "step": 2052 }, { "epoch": 0.2491202523965538, "grad_norm": 1.254441261291504, "learning_rate": 1.5204520329197889e-05, "loss": 0.10501188039779663, "step": 2053 }, { "epoch": 0.24924159689358089, "grad_norm": 2.4674789905548096, "learning_rate": 1.5202063628546863e-05, "loss": 0.4321037232875824, "step": 2054 }, { "epoch": 0.24936294139060794, "grad_norm": 2.63444185256958, "learning_rate": 1.5199606927895838e-05, "loss": 0.8152517080307007, "step": 2055 }, { "epoch": 0.249484285887635, "grad_norm": 2.0867371559143066, "learning_rate": 1.5197150227244812e-05, "loss": 0.27917084097862244, "step": 2056 }, { "epoch": 0.24960563038466205, "grad_norm": 1.4733816385269165, "learning_rate": 1.5194693526593786e-05, "loss": 0.08613657206296921, "step": 2057 }, { "epoch": 0.24972697488168913, "grad_norm": 2.284787893295288, "learning_rate": 1.519223682594276e-05, "loss": 0.20989270508289337, "step": 2058 }, { "epoch": 0.24984831937871618, "grad_norm": 2.9490373134613037, "learning_rate": 1.5189780125291735e-05, "loss": 0.30377861857414246, "step": 2059 }, { "epoch": 0.24996966387574324, "grad_norm": 1.8254625797271729, "learning_rate": 1.5187323424640709e-05, "loss": 0.13897734880447388, "step": 2060 }, { "epoch": 0.2500910083727703, "grad_norm": 2.1339690685272217, "learning_rate": 1.5184866723989685e-05, "loss": 0.1870119720697403, "step": 2061 }, { "epoch": 0.25021235286979737, "grad_norm": 2.154693365097046, "learning_rate": 1.518241002333866e-05, "loss": 0.32715970277786255, "step": 2062 }, { "epoch": 0.2503336973668244, "grad_norm": 2.3708536624908447, "learning_rate": 1.5179953322687633e-05, "loss": 0.5419004559516907, "step": 2063 }, { "epoch": 0.2504550418638515, "grad_norm": 2.2447872161865234, "learning_rate": 1.5177496622036608e-05, "loss": 0.3017771244049072, "step": 2064 }, { "epoch": 0.25057638636087853, "grad_norm": 2.2402448654174805, "learning_rate": 1.5175039921385582e-05, "loss": 0.31448161602020264, "step": 2065 }, { "epoch": 0.2506977308579056, "grad_norm": 2.5074141025543213, "learning_rate": 1.5172583220734556e-05, "loss": 0.6035457253456116, "step": 2066 }, { "epoch": 0.25081907535493264, "grad_norm": 2.2186553478240967, "learning_rate": 1.517012652008353e-05, "loss": 0.586137056350708, "step": 2067 }, { "epoch": 0.2509404198519597, "grad_norm": 2.2771787643432617, "learning_rate": 1.5167669819432505e-05, "loss": 0.24772994220256805, "step": 2068 }, { "epoch": 0.25106176434898675, "grad_norm": 1.10565984249115, "learning_rate": 1.5165213118781477e-05, "loss": 0.04752576723694801, "step": 2069 }, { "epoch": 0.25118310884601386, "grad_norm": 1.4915709495544434, "learning_rate": 1.5162756418130452e-05, "loss": 0.2911011576652527, "step": 2070 }, { "epoch": 0.2513044533430409, "grad_norm": 3.409348249435425, "learning_rate": 1.5160299717479426e-05, "loss": 0.5246961712837219, "step": 2071 }, { "epoch": 0.25142579784006797, "grad_norm": 2.192699432373047, "learning_rate": 1.51578430168284e-05, "loss": 0.60477614402771, "step": 2072 }, { "epoch": 0.251547142337095, "grad_norm": 2.0602874755859375, "learning_rate": 1.5155386316177374e-05, "loss": 0.26899605989456177, "step": 2073 }, { "epoch": 0.2516684868341221, "grad_norm": 4.965397834777832, "learning_rate": 1.5152929615526349e-05, "loss": 0.27953648567199707, "step": 2074 }, { "epoch": 0.2517898313311491, "grad_norm": 1.779467225074768, "learning_rate": 1.5150472914875323e-05, "loss": 0.2358415126800537, "step": 2075 }, { "epoch": 0.2519111758281762, "grad_norm": 1.53863525390625, "learning_rate": 1.5148016214224297e-05, "loss": 0.0970386490225792, "step": 2076 }, { "epoch": 0.25203252032520324, "grad_norm": 2.409815788269043, "learning_rate": 1.5145559513573271e-05, "loss": 0.29230624437332153, "step": 2077 }, { "epoch": 0.2521538648222303, "grad_norm": 2.354555130004883, "learning_rate": 1.5143102812922246e-05, "loss": 0.139778271317482, "step": 2078 }, { "epoch": 0.2522752093192574, "grad_norm": 2.3420677185058594, "learning_rate": 1.514064611227122e-05, "loss": 0.31483131647109985, "step": 2079 }, { "epoch": 0.25239655381628445, "grad_norm": 1.7981594800949097, "learning_rate": 1.5138189411620194e-05, "loss": 0.18265822529792786, "step": 2080 }, { "epoch": 0.2525178983133115, "grad_norm": 1.9754524230957031, "learning_rate": 1.5135732710969168e-05, "loss": 0.21006661653518677, "step": 2081 }, { "epoch": 0.25263924281033856, "grad_norm": 2.199647903442383, "learning_rate": 1.5133276010318143e-05, "loss": 0.3640359938144684, "step": 2082 }, { "epoch": 0.2527605873073656, "grad_norm": 2.174180030822754, "learning_rate": 1.5130819309667117e-05, "loss": 0.2530081868171692, "step": 2083 }, { "epoch": 0.25288193180439267, "grad_norm": 2.420414686203003, "learning_rate": 1.5128362609016091e-05, "loss": 0.2343895435333252, "step": 2084 }, { "epoch": 0.2530032763014197, "grad_norm": 2.9052069187164307, "learning_rate": 1.5125905908365065e-05, "loss": 0.2803361713886261, "step": 2085 }, { "epoch": 0.2531246207984468, "grad_norm": 2.1363558769226074, "learning_rate": 1.512344920771404e-05, "loss": 0.2120419144630432, "step": 2086 }, { "epoch": 0.25324596529547383, "grad_norm": 3.7581350803375244, "learning_rate": 1.5120992507063016e-05, "loss": 0.5633907318115234, "step": 2087 }, { "epoch": 0.2533673097925009, "grad_norm": 2.090036392211914, "learning_rate": 1.511853580641199e-05, "loss": 0.6909904479980469, "step": 2088 }, { "epoch": 0.253488654289528, "grad_norm": 1.761902093887329, "learning_rate": 1.5116079105760964e-05, "loss": 0.30952948331832886, "step": 2089 }, { "epoch": 0.25360999878655505, "grad_norm": 2.110933780670166, "learning_rate": 1.5113622405109938e-05, "loss": 0.286069393157959, "step": 2090 }, { "epoch": 0.2537313432835821, "grad_norm": 1.956461787223816, "learning_rate": 1.5111165704458913e-05, "loss": 0.13381141424179077, "step": 2091 }, { "epoch": 0.25385268778060915, "grad_norm": 1.5321189165115356, "learning_rate": 1.5108709003807887e-05, "loss": 0.10883570462465286, "step": 2092 }, { "epoch": 0.2539740322776362, "grad_norm": 1.6738094091415405, "learning_rate": 1.5106252303156861e-05, "loss": 0.3568721115589142, "step": 2093 }, { "epoch": 0.25409537677466326, "grad_norm": 3.5300426483154297, "learning_rate": 1.5103795602505835e-05, "loss": 0.15414369106292725, "step": 2094 }, { "epoch": 0.2542167212716903, "grad_norm": 2.9318861961364746, "learning_rate": 1.510133890185481e-05, "loss": 0.2508835196495056, "step": 2095 }, { "epoch": 0.25433806576871737, "grad_norm": 1.9397698640823364, "learning_rate": 1.5098882201203784e-05, "loss": 0.43463265895843506, "step": 2096 }, { "epoch": 0.2544594102657444, "grad_norm": 3.07175612449646, "learning_rate": 1.5096425500552758e-05, "loss": 0.4037576913833618, "step": 2097 }, { "epoch": 0.25458075476277153, "grad_norm": 2.9463930130004883, "learning_rate": 1.5093968799901732e-05, "loss": 0.40242353081703186, "step": 2098 }, { "epoch": 0.2547020992597986, "grad_norm": 3.273380994796753, "learning_rate": 1.5091512099250707e-05, "loss": 0.4325547218322754, "step": 2099 }, { "epoch": 0.25482344375682564, "grad_norm": 3.8387303352355957, "learning_rate": 1.5089055398599681e-05, "loss": 0.19408497214317322, "step": 2100 }, { "epoch": 0.2549447882538527, "grad_norm": 1.8059979677200317, "learning_rate": 1.5086598697948655e-05, "loss": 0.3167189955711365, "step": 2101 }, { "epoch": 0.25506613275087975, "grad_norm": 2.231421947479248, "learning_rate": 1.508414199729763e-05, "loss": 0.3446089029312134, "step": 2102 }, { "epoch": 0.2551874772479068, "grad_norm": 2.24876070022583, "learning_rate": 1.5081685296646604e-05, "loss": 0.27822139859199524, "step": 2103 }, { "epoch": 0.25530882174493386, "grad_norm": 1.2263654470443726, "learning_rate": 1.5079228595995578e-05, "loss": 0.1035546362400055, "step": 2104 }, { "epoch": 0.2554301662419609, "grad_norm": 2.2528395652770996, "learning_rate": 1.5076771895344552e-05, "loss": 0.2142188996076584, "step": 2105 }, { "epoch": 0.25555151073898796, "grad_norm": 2.0527801513671875, "learning_rate": 1.5074315194693527e-05, "loss": 0.17007258534431458, "step": 2106 }, { "epoch": 0.2556728552360151, "grad_norm": 1.6478774547576904, "learning_rate": 1.5071858494042502e-05, "loss": 0.10429070889949799, "step": 2107 }, { "epoch": 0.2557941997330421, "grad_norm": 1.3817076683044434, "learning_rate": 1.5069401793391477e-05, "loss": 0.08755984902381897, "step": 2108 }, { "epoch": 0.2559155442300692, "grad_norm": 3.489661931991577, "learning_rate": 1.5066945092740451e-05, "loss": 0.6258473992347717, "step": 2109 }, { "epoch": 0.25603688872709623, "grad_norm": 2.4805729389190674, "learning_rate": 1.5064488392089425e-05, "loss": 0.38748401403427124, "step": 2110 }, { "epoch": 0.2561582332241233, "grad_norm": 2.485771656036377, "learning_rate": 1.50620316914384e-05, "loss": 0.6375660300254822, "step": 2111 }, { "epoch": 0.25627957772115034, "grad_norm": 2.3500325679779053, "learning_rate": 1.5059574990787374e-05, "loss": 0.43053320050239563, "step": 2112 }, { "epoch": 0.2564009222181774, "grad_norm": 0.9563068747520447, "learning_rate": 1.5057118290136348e-05, "loss": 0.068636953830719, "step": 2113 }, { "epoch": 0.25652226671520445, "grad_norm": 1.7979881763458252, "learning_rate": 1.5054661589485322e-05, "loss": 0.2505972683429718, "step": 2114 }, { "epoch": 0.2566436112122315, "grad_norm": 1.7445679903030396, "learning_rate": 1.5052204888834297e-05, "loss": 0.15084236860275269, "step": 2115 }, { "epoch": 0.25676495570925856, "grad_norm": 1.9328049421310425, "learning_rate": 1.504974818818327e-05, "loss": 0.293321430683136, "step": 2116 }, { "epoch": 0.25688630020628567, "grad_norm": 0.001952366204932332, "learning_rate": 1.5047291487532245e-05, "loss": 3.136243685730733e-05, "step": 2117 }, { "epoch": 0.2570076447033127, "grad_norm": 1.4848837852478027, "learning_rate": 1.504483478688122e-05, "loss": 0.31598547101020813, "step": 2118 }, { "epoch": 0.2571289892003398, "grad_norm": 1.692036509513855, "learning_rate": 1.5042378086230194e-05, "loss": 0.3037584722042084, "step": 2119 }, { "epoch": 0.25725033369736683, "grad_norm": 2.918186664581299, "learning_rate": 1.5039921385579168e-05, "loss": 0.25492578744888306, "step": 2120 }, { "epoch": 0.2573716781943939, "grad_norm": 1.0130181312561035, "learning_rate": 1.5037464684928142e-05, "loss": 0.03894425556063652, "step": 2121 }, { "epoch": 0.25749302269142094, "grad_norm": 1.9439772367477417, "learning_rate": 1.5035007984277116e-05, "loss": 0.4366956651210785, "step": 2122 }, { "epoch": 0.257614367188448, "grad_norm": 2.389458656311035, "learning_rate": 1.503255128362609e-05, "loss": 0.2752874195575714, "step": 2123 }, { "epoch": 0.25773571168547504, "grad_norm": 3.298961639404297, "learning_rate": 1.5030094582975065e-05, "loss": 0.21089746057987213, "step": 2124 }, { "epoch": 0.2578570561825021, "grad_norm": 1.609940767288208, "learning_rate": 1.5027637882324039e-05, "loss": 0.4226279854774475, "step": 2125 }, { "epoch": 0.2579784006795292, "grad_norm": 2.3028876781463623, "learning_rate": 1.5025181181673013e-05, "loss": 0.31807318329811096, "step": 2126 }, { "epoch": 0.25809974517655626, "grad_norm": 1.385694146156311, "learning_rate": 1.502272448102199e-05, "loss": 0.12783929705619812, "step": 2127 }, { "epoch": 0.2582210896735833, "grad_norm": 2.2158091068267822, "learning_rate": 1.5020267780370964e-05, "loss": 0.3851372003555298, "step": 2128 }, { "epoch": 0.25834243417061037, "grad_norm": 1.4377362728118896, "learning_rate": 1.5017811079719938e-05, "loss": 0.11848616600036621, "step": 2129 }, { "epoch": 0.2584637786676374, "grad_norm": 3.7586557865142822, "learning_rate": 1.5015354379068912e-05, "loss": 0.4604339599609375, "step": 2130 }, { "epoch": 0.2585851231646645, "grad_norm": 3.421212673187256, "learning_rate": 1.5012897678417886e-05, "loss": 0.4293072819709778, "step": 2131 }, { "epoch": 0.25870646766169153, "grad_norm": 1.0327222347259521, "learning_rate": 1.501044097776686e-05, "loss": 0.03275943547487259, "step": 2132 }, { "epoch": 0.2588278121587186, "grad_norm": 2.4123880863189697, "learning_rate": 1.5007984277115835e-05, "loss": 0.4415239691734314, "step": 2133 }, { "epoch": 0.25894915665574564, "grad_norm": 3.552459955215454, "learning_rate": 1.5005527576464809e-05, "loss": 0.25388628244400024, "step": 2134 }, { "epoch": 0.25907050115277275, "grad_norm": 2.9123752117156982, "learning_rate": 1.5003070875813783e-05, "loss": 0.4704461395740509, "step": 2135 }, { "epoch": 0.2591918456497998, "grad_norm": 2.8575401306152344, "learning_rate": 1.5000614175162758e-05, "loss": 0.2883625626564026, "step": 2136 }, { "epoch": 0.25931319014682686, "grad_norm": 2.4062740802764893, "learning_rate": 1.4998157474511732e-05, "loss": 0.6726546883583069, "step": 2137 }, { "epoch": 0.2594345346438539, "grad_norm": 1.805238962173462, "learning_rate": 1.4995700773860706e-05, "loss": 0.1653829663991928, "step": 2138 }, { "epoch": 0.25955587914088096, "grad_norm": 1.1520496606826782, "learning_rate": 1.499324407320968e-05, "loss": 0.14368143677711487, "step": 2139 }, { "epoch": 0.259677223637908, "grad_norm": 1.4968868494033813, "learning_rate": 1.4990787372558655e-05, "loss": 0.24565939605236053, "step": 2140 }, { "epoch": 0.25979856813493507, "grad_norm": 1.9920177459716797, "learning_rate": 1.4988330671907629e-05, "loss": 0.2082982361316681, "step": 2141 }, { "epoch": 0.2599199126319621, "grad_norm": 2.7933566570281982, "learning_rate": 1.4985873971256603e-05, "loss": 0.9215075969696045, "step": 2142 }, { "epoch": 0.2600412571289892, "grad_norm": 1.636723518371582, "learning_rate": 1.4983417270605577e-05, "loss": 0.08533577620983124, "step": 2143 }, { "epoch": 0.2601626016260163, "grad_norm": 3.587369441986084, "learning_rate": 1.4980960569954552e-05, "loss": 0.6961395144462585, "step": 2144 }, { "epoch": 0.26028394612304334, "grad_norm": 2.541285991668701, "learning_rate": 1.4978503869303526e-05, "loss": 0.0697111263871193, "step": 2145 }, { "epoch": 0.2604052906200704, "grad_norm": 1.8340191841125488, "learning_rate": 1.49760471686525e-05, "loss": 0.14525513350963593, "step": 2146 }, { "epoch": 0.26052663511709745, "grad_norm": 2.301511526107788, "learning_rate": 1.4973590468001476e-05, "loss": 0.44270119071006775, "step": 2147 }, { "epoch": 0.2606479796141245, "grad_norm": 2.5070643424987793, "learning_rate": 1.497113376735045e-05, "loss": 0.47159114480018616, "step": 2148 }, { "epoch": 0.26076932411115156, "grad_norm": 2.2494490146636963, "learning_rate": 1.4968677066699425e-05, "loss": 0.22947777807712555, "step": 2149 }, { "epoch": 0.2608906686081786, "grad_norm": 2.050786018371582, "learning_rate": 1.4966220366048399e-05, "loss": 0.49438372254371643, "step": 2150 }, { "epoch": 0.26101201310520566, "grad_norm": 2.4287447929382324, "learning_rate": 1.4963763665397373e-05, "loss": 0.20165540277957916, "step": 2151 }, { "epoch": 0.2611333576022327, "grad_norm": 2.0431525707244873, "learning_rate": 1.4961306964746347e-05, "loss": 0.2393646389245987, "step": 2152 }, { "epoch": 0.2612547020992598, "grad_norm": 0.9281308650970459, "learning_rate": 1.4958850264095322e-05, "loss": 0.020468752831220627, "step": 2153 }, { "epoch": 0.2613760465962869, "grad_norm": 2.2387876510620117, "learning_rate": 1.4956393563444296e-05, "loss": 0.27769941091537476, "step": 2154 }, { "epoch": 0.26149739109331394, "grad_norm": 2.1103501319885254, "learning_rate": 1.495393686279327e-05, "loss": 0.4620177745819092, "step": 2155 }, { "epoch": 0.261618735590341, "grad_norm": 2.1693103313446045, "learning_rate": 1.4951480162142245e-05, "loss": 0.488872230052948, "step": 2156 }, { "epoch": 0.26174008008736804, "grad_norm": 2.1997673511505127, "learning_rate": 1.4949023461491219e-05, "loss": 0.2967035472393036, "step": 2157 }, { "epoch": 0.2618614245843951, "grad_norm": 1.9877324104309082, "learning_rate": 1.4946566760840193e-05, "loss": 0.39621827006340027, "step": 2158 }, { "epoch": 0.26198276908142215, "grad_norm": 2.1811094284057617, "learning_rate": 1.4944110060189167e-05, "loss": 0.4217057228088379, "step": 2159 }, { "epoch": 0.2621041135784492, "grad_norm": 1.8478000164031982, "learning_rate": 1.4941653359538142e-05, "loss": 0.25536009669303894, "step": 2160 }, { "epoch": 0.26222545807547626, "grad_norm": 3.0081064701080322, "learning_rate": 1.4939196658887116e-05, "loss": 0.44147688150405884, "step": 2161 }, { "epoch": 0.2623468025725033, "grad_norm": 3.5933215618133545, "learning_rate": 1.493673995823609e-05, "loss": 0.29955124855041504, "step": 2162 }, { "epoch": 0.2624681470695304, "grad_norm": 2.178781270980835, "learning_rate": 1.4934283257585064e-05, "loss": 0.31088438630104065, "step": 2163 }, { "epoch": 0.2625894915665575, "grad_norm": 0.008322947658598423, "learning_rate": 1.4931826556934039e-05, "loss": 0.0001154024648712948, "step": 2164 }, { "epoch": 0.26271083606358453, "grad_norm": 2.786419153213501, "learning_rate": 1.4929369856283013e-05, "loss": 0.355583518743515, "step": 2165 }, { "epoch": 0.2628321805606116, "grad_norm": 2.188692808151245, "learning_rate": 1.4926913155631989e-05, "loss": 0.05528976395726204, "step": 2166 }, { "epoch": 0.26295352505763864, "grad_norm": 1.79579496383667, "learning_rate": 1.4924456454980963e-05, "loss": 0.27237698435783386, "step": 2167 }, { "epoch": 0.2630748695546657, "grad_norm": 1.7321343421936035, "learning_rate": 1.4921999754329937e-05, "loss": 0.12968100607395172, "step": 2168 }, { "epoch": 0.26319621405169275, "grad_norm": 3.122284173965454, "learning_rate": 1.4919543053678912e-05, "loss": 0.17153069376945496, "step": 2169 }, { "epoch": 0.2633175585487198, "grad_norm": 1.6060526371002197, "learning_rate": 1.4917086353027886e-05, "loss": 0.09631694853305817, "step": 2170 }, { "epoch": 0.26343890304574685, "grad_norm": 2.729292154312134, "learning_rate": 1.491462965237686e-05, "loss": 0.16976892948150635, "step": 2171 }, { "epoch": 0.26356024754277396, "grad_norm": 1.546196699142456, "learning_rate": 1.4912172951725834e-05, "loss": 0.11652785539627075, "step": 2172 }, { "epoch": 0.263681592039801, "grad_norm": 1.605636715888977, "learning_rate": 1.4909716251074809e-05, "loss": 0.17108720541000366, "step": 2173 }, { "epoch": 0.26380293653682807, "grad_norm": 1.3509597778320312, "learning_rate": 1.4907259550423783e-05, "loss": 0.11237197369337082, "step": 2174 }, { "epoch": 0.2639242810338551, "grad_norm": 1.7617028951644897, "learning_rate": 1.4904802849772757e-05, "loss": 0.3738328516483307, "step": 2175 }, { "epoch": 0.2640456255308822, "grad_norm": 2.5859122276306152, "learning_rate": 1.4902346149121731e-05, "loss": 0.14145395159721375, "step": 2176 }, { "epoch": 0.26416697002790923, "grad_norm": 2.6692991256713867, "learning_rate": 1.4899889448470706e-05, "loss": 0.2799850404262543, "step": 2177 }, { "epoch": 0.2642883145249363, "grad_norm": 2.333256244659424, "learning_rate": 1.489743274781968e-05, "loss": 0.76203852891922, "step": 2178 }, { "epoch": 0.26440965902196334, "grad_norm": 0.8633162379264832, "learning_rate": 1.4894976047168654e-05, "loss": 0.0584387369453907, "step": 2179 }, { "epoch": 0.2645310035189904, "grad_norm": 3.16007137298584, "learning_rate": 1.4892519346517628e-05, "loss": 0.3512621223926544, "step": 2180 }, { "epoch": 0.26465234801601745, "grad_norm": 1.7493587732315063, "learning_rate": 1.4890062645866603e-05, "loss": 0.5163399577140808, "step": 2181 }, { "epoch": 0.26477369251304456, "grad_norm": 2.6557040214538574, "learning_rate": 1.4887605945215577e-05, "loss": 0.26694709062576294, "step": 2182 }, { "epoch": 0.2648950370100716, "grad_norm": 2.3680715560913086, "learning_rate": 1.4885149244564551e-05, "loss": 0.23716874420642853, "step": 2183 }, { "epoch": 0.26501638150709866, "grad_norm": 3.0584466457366943, "learning_rate": 1.4882692543913525e-05, "loss": 0.5385470986366272, "step": 2184 }, { "epoch": 0.2651377260041257, "grad_norm": 1.8691861629486084, "learning_rate": 1.48802358432625e-05, "loss": 0.09475544095039368, "step": 2185 }, { "epoch": 0.26525907050115277, "grad_norm": 2.55315899848938, "learning_rate": 1.4877779142611476e-05, "loss": 0.21837680041790009, "step": 2186 }, { "epoch": 0.2653804149981798, "grad_norm": 2.546182632446289, "learning_rate": 1.487532244196045e-05, "loss": 0.37429964542388916, "step": 2187 }, { "epoch": 0.2655017594952069, "grad_norm": 2.265850305557251, "learning_rate": 1.4872865741309424e-05, "loss": 0.4278375506401062, "step": 2188 }, { "epoch": 0.26562310399223393, "grad_norm": 2.3295276165008545, "learning_rate": 1.4870409040658398e-05, "loss": 0.3769855499267578, "step": 2189 }, { "epoch": 0.265744448489261, "grad_norm": 1.795820951461792, "learning_rate": 1.4867952340007373e-05, "loss": 0.2219768464565277, "step": 2190 }, { "epoch": 0.2658657929862881, "grad_norm": 2.0393049716949463, "learning_rate": 1.4865495639356347e-05, "loss": 0.2658393085002899, "step": 2191 }, { "epoch": 0.26598713748331515, "grad_norm": 2.258559465408325, "learning_rate": 1.4863038938705321e-05, "loss": 0.25479304790496826, "step": 2192 }, { "epoch": 0.2661084819803422, "grad_norm": 2.1604347229003906, "learning_rate": 1.4860582238054295e-05, "loss": 0.39344412088394165, "step": 2193 }, { "epoch": 0.26622982647736926, "grad_norm": 2.4166760444641113, "learning_rate": 1.485812553740327e-05, "loss": 0.17573045194149017, "step": 2194 }, { "epoch": 0.2663511709743963, "grad_norm": 1.7303146123886108, "learning_rate": 1.4855668836752244e-05, "loss": 0.13295504450798035, "step": 2195 }, { "epoch": 0.26647251547142337, "grad_norm": 2.6552679538726807, "learning_rate": 1.4853212136101218e-05, "loss": 0.3497859537601471, "step": 2196 }, { "epoch": 0.2665938599684504, "grad_norm": 4.82560396194458, "learning_rate": 1.4850755435450192e-05, "loss": 0.34919238090515137, "step": 2197 }, { "epoch": 0.2667152044654775, "grad_norm": 1.834449052810669, "learning_rate": 1.4848298734799167e-05, "loss": 0.1825961172580719, "step": 2198 }, { "epoch": 0.2668365489625045, "grad_norm": 2.188383102416992, "learning_rate": 1.4845842034148141e-05, "loss": 0.4335808753967285, "step": 2199 }, { "epoch": 0.26695789345953164, "grad_norm": 1.5220776796340942, "learning_rate": 1.4843385333497115e-05, "loss": 0.15386682748794556, "step": 2200 }, { "epoch": 0.2670792379565587, "grad_norm": 1.5477015972137451, "learning_rate": 1.484092863284609e-05, "loss": 0.12548573315143585, "step": 2201 }, { "epoch": 0.26720058245358574, "grad_norm": 2.084110975265503, "learning_rate": 1.4838471932195064e-05, "loss": 0.5170855522155762, "step": 2202 }, { "epoch": 0.2673219269506128, "grad_norm": 1.241639256477356, "learning_rate": 1.4836015231544038e-05, "loss": 0.1204286739230156, "step": 2203 }, { "epoch": 0.26744327144763985, "grad_norm": 2.8557798862457275, "learning_rate": 1.4833558530893012e-05, "loss": 0.5340369939804077, "step": 2204 }, { "epoch": 0.2675646159446669, "grad_norm": 2.8909528255462646, "learning_rate": 1.4831101830241985e-05, "loss": 0.512728750705719, "step": 2205 }, { "epoch": 0.26768596044169396, "grad_norm": 2.8460562229156494, "learning_rate": 1.4828645129590959e-05, "loss": 0.5244192481040955, "step": 2206 }, { "epoch": 0.267807304938721, "grad_norm": 1.083155870437622, "learning_rate": 1.4826188428939933e-05, "loss": 0.08616457134485245, "step": 2207 }, { "epoch": 0.26792864943574807, "grad_norm": 2.8009111881256104, "learning_rate": 1.4823731728288908e-05, "loss": 0.4594787657260895, "step": 2208 }, { "epoch": 0.2680499939327751, "grad_norm": 2.523655652999878, "learning_rate": 1.4821275027637882e-05, "loss": 0.17828914523124695, "step": 2209 }, { "epoch": 0.26817133842980223, "grad_norm": 3.436047315597534, "learning_rate": 1.4818818326986856e-05, "loss": 0.576928973197937, "step": 2210 }, { "epoch": 0.2682926829268293, "grad_norm": 1.741632342338562, "learning_rate": 1.481636162633583e-05, "loss": 0.08921710401773453, "step": 2211 }, { "epoch": 0.26841402742385634, "grad_norm": 0.03540222719311714, "learning_rate": 1.4813904925684806e-05, "loss": 0.0003073185798712075, "step": 2212 }, { "epoch": 0.2685353719208834, "grad_norm": 2.1982312202453613, "learning_rate": 1.481144822503378e-05, "loss": 0.2795671820640564, "step": 2213 }, { "epoch": 0.26865671641791045, "grad_norm": 2.006270170211792, "learning_rate": 1.4808991524382755e-05, "loss": 0.341614305973053, "step": 2214 }, { "epoch": 0.2687780609149375, "grad_norm": 2.3411900997161865, "learning_rate": 1.4806534823731729e-05, "loss": 0.23282599449157715, "step": 2215 }, { "epoch": 0.26889940541196455, "grad_norm": 2.9898903369903564, "learning_rate": 1.4804078123080703e-05, "loss": 0.32836875319480896, "step": 2216 }, { "epoch": 0.2690207499089916, "grad_norm": 1.7510194778442383, "learning_rate": 1.4801621422429678e-05, "loss": 0.1715484857559204, "step": 2217 }, { "epoch": 0.26914209440601866, "grad_norm": 1.937170147895813, "learning_rate": 1.4799164721778652e-05, "loss": 0.2645847201347351, "step": 2218 }, { "epoch": 0.26926343890304577, "grad_norm": 3.1905677318573, "learning_rate": 1.4796708021127626e-05, "loss": 0.2771417200565338, "step": 2219 }, { "epoch": 0.2693847834000728, "grad_norm": 2.6751186847686768, "learning_rate": 1.47942513204766e-05, "loss": 0.22138531506061554, "step": 2220 }, { "epoch": 0.2695061278970999, "grad_norm": 1.834376573562622, "learning_rate": 1.4791794619825575e-05, "loss": 0.2948099374771118, "step": 2221 }, { "epoch": 0.26962747239412693, "grad_norm": 2.072049379348755, "learning_rate": 1.4789337919174549e-05, "loss": 0.2355620414018631, "step": 2222 }, { "epoch": 0.269748816891154, "grad_norm": 2.5111448764801025, "learning_rate": 1.4786881218523523e-05, "loss": 0.27840733528137207, "step": 2223 }, { "epoch": 0.26987016138818104, "grad_norm": 1.1506294012069702, "learning_rate": 1.4784424517872497e-05, "loss": 0.011712668463587761, "step": 2224 }, { "epoch": 0.2699915058852081, "grad_norm": 2.4833226203918457, "learning_rate": 1.4781967817221472e-05, "loss": 0.33597809076309204, "step": 2225 }, { "epoch": 0.27011285038223515, "grad_norm": 2.3968794345855713, "learning_rate": 1.4779511116570446e-05, "loss": 0.10020019114017487, "step": 2226 }, { "epoch": 0.2702341948792622, "grad_norm": 3.9725828170776367, "learning_rate": 1.477705441591942e-05, "loss": 0.5326024293899536, "step": 2227 }, { "epoch": 0.2703555393762893, "grad_norm": 2.1287968158721924, "learning_rate": 1.4774597715268394e-05, "loss": 0.2615710198879242, "step": 2228 }, { "epoch": 0.27047688387331636, "grad_norm": 2.4575917720794678, "learning_rate": 1.4772141014617369e-05, "loss": 0.6502370834350586, "step": 2229 }, { "epoch": 0.2705982283703434, "grad_norm": 3.2591922283172607, "learning_rate": 1.4769684313966343e-05, "loss": 0.3786463737487793, "step": 2230 }, { "epoch": 0.2707195728673705, "grad_norm": 2.3155269622802734, "learning_rate": 1.4767227613315317e-05, "loss": 0.35610049962997437, "step": 2231 }, { "epoch": 0.2708409173643975, "grad_norm": 2.6793532371520996, "learning_rate": 1.4764770912664293e-05, "loss": 0.378531277179718, "step": 2232 }, { "epoch": 0.2709622618614246, "grad_norm": 1.7378513813018799, "learning_rate": 1.4762314212013267e-05, "loss": 0.14084599912166595, "step": 2233 }, { "epoch": 0.27108360635845163, "grad_norm": 2.425596237182617, "learning_rate": 1.4759857511362242e-05, "loss": 0.3366726040840149, "step": 2234 }, { "epoch": 0.2712049508554787, "grad_norm": 1.7024770975112915, "learning_rate": 1.4757400810711216e-05, "loss": 0.08021130412817001, "step": 2235 }, { "epoch": 0.27132629535250574, "grad_norm": 2.109647035598755, "learning_rate": 1.475494411006019e-05, "loss": 0.4019131660461426, "step": 2236 }, { "epoch": 0.27144763984953285, "grad_norm": 1.5787715911865234, "learning_rate": 1.4752487409409165e-05, "loss": 0.07558346539735794, "step": 2237 }, { "epoch": 0.2715689843465599, "grad_norm": 1.6632368564605713, "learning_rate": 1.4750030708758139e-05, "loss": 0.2343292236328125, "step": 2238 }, { "epoch": 0.27169032884358696, "grad_norm": 2.395317316055298, "learning_rate": 1.4747574008107113e-05, "loss": 0.5279711484909058, "step": 2239 }, { "epoch": 0.271811673340614, "grad_norm": 1.4840434789657593, "learning_rate": 1.4745117307456087e-05, "loss": 0.09839776903390884, "step": 2240 }, { "epoch": 0.27193301783764107, "grad_norm": 2.1698062419891357, "learning_rate": 1.4742660606805062e-05, "loss": 0.36876392364501953, "step": 2241 }, { "epoch": 0.2720543623346681, "grad_norm": 3.7944204807281494, "learning_rate": 1.4740203906154036e-05, "loss": 0.2219565361738205, "step": 2242 }, { "epoch": 0.2721757068316952, "grad_norm": 2.875039577484131, "learning_rate": 1.473774720550301e-05, "loss": 0.2375985085964203, "step": 2243 }, { "epoch": 0.27229705132872223, "grad_norm": 1.9923136234283447, "learning_rate": 1.4735290504851984e-05, "loss": 0.333662211894989, "step": 2244 }, { "epoch": 0.2724183958257493, "grad_norm": 1.5942877531051636, "learning_rate": 1.4732833804200959e-05, "loss": 0.24374574422836304, "step": 2245 }, { "epoch": 0.27253974032277634, "grad_norm": 2.685206651687622, "learning_rate": 1.4730377103549933e-05, "loss": 0.2717750072479248, "step": 2246 }, { "epoch": 0.27266108481980345, "grad_norm": 2.2949953079223633, "learning_rate": 1.4727920402898907e-05, "loss": 0.6460723876953125, "step": 2247 }, { "epoch": 0.2727824293168305, "grad_norm": 2.3787600994110107, "learning_rate": 1.4725463702247881e-05, "loss": 0.269961416721344, "step": 2248 }, { "epoch": 0.27290377381385755, "grad_norm": 2.749967336654663, "learning_rate": 1.4723007001596856e-05, "loss": 0.25449487566947937, "step": 2249 }, { "epoch": 0.2730251183108846, "grad_norm": 2.1262457370758057, "learning_rate": 1.472055030094583e-05, "loss": 0.2170713096857071, "step": 2250 }, { "epoch": 0.27314646280791166, "grad_norm": 3.3156375885009766, "learning_rate": 1.4718093600294804e-05, "loss": 0.3634417951107025, "step": 2251 }, { "epoch": 0.2732678073049387, "grad_norm": 2.9557299613952637, "learning_rate": 1.471563689964378e-05, "loss": 0.33523958921432495, "step": 2252 }, { "epoch": 0.27338915180196577, "grad_norm": 1.2465989589691162, "learning_rate": 1.4713180198992754e-05, "loss": 0.10973364859819412, "step": 2253 }, { "epoch": 0.2735104962989928, "grad_norm": 2.5796959400177, "learning_rate": 1.4710723498341729e-05, "loss": 0.17034733295440674, "step": 2254 }, { "epoch": 0.2736318407960199, "grad_norm": 2.6189379692077637, "learning_rate": 1.4708266797690703e-05, "loss": 0.3938886225223541, "step": 2255 }, { "epoch": 0.273753185293047, "grad_norm": 1.9602938890457153, "learning_rate": 1.4705810097039677e-05, "loss": 0.06322696805000305, "step": 2256 }, { "epoch": 0.27387452979007404, "grad_norm": 2.8924646377563477, "learning_rate": 1.4703353396388651e-05, "loss": 0.20917552709579468, "step": 2257 }, { "epoch": 0.2739958742871011, "grad_norm": 3.3789329528808594, "learning_rate": 1.4700896695737626e-05, "loss": 0.5144970417022705, "step": 2258 }, { "epoch": 0.27411721878412815, "grad_norm": 1.824726939201355, "learning_rate": 1.46984399950866e-05, "loss": 0.06435342133045197, "step": 2259 }, { "epoch": 0.2742385632811552, "grad_norm": 2.550523519515991, "learning_rate": 1.4695983294435574e-05, "loss": 0.6688740849494934, "step": 2260 }, { "epoch": 0.27435990777818225, "grad_norm": 3.7656705379486084, "learning_rate": 1.4693526593784548e-05, "loss": 0.3615667223930359, "step": 2261 }, { "epoch": 0.2744812522752093, "grad_norm": 2.5979697704315186, "learning_rate": 1.4691069893133523e-05, "loss": 0.24014034867286682, "step": 2262 }, { "epoch": 0.27460259677223636, "grad_norm": 2.1243038177490234, "learning_rate": 1.4688613192482497e-05, "loss": 0.28634655475616455, "step": 2263 }, { "epoch": 0.2747239412692634, "grad_norm": 1.4423234462738037, "learning_rate": 1.4686156491831471e-05, "loss": 0.05492861941456795, "step": 2264 }, { "epoch": 0.2748452857662905, "grad_norm": 2.3820173740386963, "learning_rate": 1.4683699791180445e-05, "loss": 0.33802103996276855, "step": 2265 }, { "epoch": 0.2749666302633176, "grad_norm": 1.9587498903274536, "learning_rate": 1.468124309052942e-05, "loss": 0.20275847613811493, "step": 2266 }, { "epoch": 0.27508797476034463, "grad_norm": 1.5850027799606323, "learning_rate": 1.4678786389878394e-05, "loss": 0.174808070063591, "step": 2267 }, { "epoch": 0.2752093192573717, "grad_norm": 2.649083375930786, "learning_rate": 1.4676329689227368e-05, "loss": 0.23923219740390778, "step": 2268 }, { "epoch": 0.27533066375439874, "grad_norm": 4.099024772644043, "learning_rate": 1.4673872988576342e-05, "loss": 0.4453088045120239, "step": 2269 }, { "epoch": 0.2754520082514258, "grad_norm": 3.006272315979004, "learning_rate": 1.4671416287925317e-05, "loss": 0.4450489282608032, "step": 2270 }, { "epoch": 0.27557335274845285, "grad_norm": 2.6077582836151123, "learning_rate": 1.4668959587274291e-05, "loss": 0.1065952479839325, "step": 2271 }, { "epoch": 0.2756946972454799, "grad_norm": 2.509742021560669, "learning_rate": 1.4666502886623267e-05, "loss": 0.08338365703821182, "step": 2272 }, { "epoch": 0.27581604174250696, "grad_norm": 2.725430965423584, "learning_rate": 1.4664046185972241e-05, "loss": 0.254682719707489, "step": 2273 }, { "epoch": 0.275937386239534, "grad_norm": 2.45782470703125, "learning_rate": 1.4661589485321215e-05, "loss": 0.2728583514690399, "step": 2274 }, { "epoch": 0.2760587307365611, "grad_norm": 2.486548900604248, "learning_rate": 1.465913278467019e-05, "loss": 0.16687417030334473, "step": 2275 }, { "epoch": 0.2761800752335882, "grad_norm": 2.0167322158813477, "learning_rate": 1.4656676084019164e-05, "loss": 0.2224939465522766, "step": 2276 }, { "epoch": 0.2763014197306152, "grad_norm": 4.493525505065918, "learning_rate": 1.4654219383368138e-05, "loss": 0.19643045961856842, "step": 2277 }, { "epoch": 0.2764227642276423, "grad_norm": 2.704180955886841, "learning_rate": 1.4651762682717112e-05, "loss": 0.6486481428146362, "step": 2278 }, { "epoch": 0.27654410872466934, "grad_norm": 2.1249678134918213, "learning_rate": 1.4649305982066087e-05, "loss": 0.11037565767765045, "step": 2279 }, { "epoch": 0.2766654532216964, "grad_norm": 6.904737949371338, "learning_rate": 1.4646849281415061e-05, "loss": 0.2306111752986908, "step": 2280 }, { "epoch": 0.27678679771872344, "grad_norm": 1.558171033859253, "learning_rate": 1.4644392580764035e-05, "loss": 0.14664669334888458, "step": 2281 }, { "epoch": 0.2769081422157505, "grad_norm": 3.1155104637145996, "learning_rate": 1.464193588011301e-05, "loss": 0.48484039306640625, "step": 2282 }, { "epoch": 0.27702948671277755, "grad_norm": 1.796221137046814, "learning_rate": 1.4639479179461984e-05, "loss": 0.1460006982088089, "step": 2283 }, { "epoch": 0.27715083120980466, "grad_norm": 2.843824863433838, "learning_rate": 1.4637022478810958e-05, "loss": 0.2773866653442383, "step": 2284 }, { "epoch": 0.2772721757068317, "grad_norm": 2.0969021320343018, "learning_rate": 1.4634565778159932e-05, "loss": 0.1800985485315323, "step": 2285 }, { "epoch": 0.27739352020385877, "grad_norm": 2.214677095413208, "learning_rate": 1.4632109077508907e-05, "loss": 0.14064058661460876, "step": 2286 }, { "epoch": 0.2775148647008858, "grad_norm": 2.574021816253662, "learning_rate": 1.462965237685788e-05, "loss": 0.363839715719223, "step": 2287 }, { "epoch": 0.2776362091979129, "grad_norm": 2.657700538635254, "learning_rate": 1.4627195676206855e-05, "loss": 0.26481837034225464, "step": 2288 }, { "epoch": 0.27775755369493993, "grad_norm": 1.2845975160598755, "learning_rate": 1.462473897555583e-05, "loss": 0.06213737279176712, "step": 2289 }, { "epoch": 0.277878898191967, "grad_norm": 1.7377910614013672, "learning_rate": 1.4622282274904804e-05, "loss": 0.25193387269973755, "step": 2290 }, { "epoch": 0.27800024268899404, "grad_norm": 1.1945483684539795, "learning_rate": 1.4619825574253778e-05, "loss": 0.05514277145266533, "step": 2291 }, { "epoch": 0.2781215871860211, "grad_norm": 1.3370293378829956, "learning_rate": 1.4617368873602754e-05, "loss": 0.26924511790275574, "step": 2292 }, { "epoch": 0.2782429316830482, "grad_norm": 3.4798552989959717, "learning_rate": 1.4614912172951728e-05, "loss": 0.491172194480896, "step": 2293 }, { "epoch": 0.27836427618007525, "grad_norm": 2.978219509124756, "learning_rate": 1.4612455472300702e-05, "loss": 0.12911397218704224, "step": 2294 }, { "epoch": 0.2784856206771023, "grad_norm": 3.0245933532714844, "learning_rate": 1.4609998771649677e-05, "loss": 0.25463297963142395, "step": 2295 }, { "epoch": 0.27860696517412936, "grad_norm": 0.0010776594281196594, "learning_rate": 1.460754207099865e-05, "loss": 2.679898898350075e-05, "step": 2296 }, { "epoch": 0.2787283096711564, "grad_norm": 2.9437077045440674, "learning_rate": 1.4605085370347625e-05, "loss": 0.15635134279727936, "step": 2297 }, { "epoch": 0.27884965416818347, "grad_norm": 2.673121452331543, "learning_rate": 1.46026286696966e-05, "loss": 0.13004031777381897, "step": 2298 }, { "epoch": 0.2789709986652105, "grad_norm": 2.7737526893615723, "learning_rate": 1.4600171969045574e-05, "loss": 0.5386815667152405, "step": 2299 }, { "epoch": 0.2790923431622376, "grad_norm": 3.677978038787842, "learning_rate": 1.4597715268394548e-05, "loss": 0.24360714852809906, "step": 2300 }, { "epoch": 0.27921368765926463, "grad_norm": 2.476017475128174, "learning_rate": 1.4595258567743522e-05, "loss": 0.3138554096221924, "step": 2301 }, { "epoch": 0.2793350321562917, "grad_norm": 3.1135735511779785, "learning_rate": 1.4592801867092496e-05, "loss": 0.3930318355560303, "step": 2302 }, { "epoch": 0.2794563766533188, "grad_norm": 1.6521217823028564, "learning_rate": 1.459034516644147e-05, "loss": 0.3495832681655884, "step": 2303 }, { "epoch": 0.27957772115034585, "grad_norm": 2.9899768829345703, "learning_rate": 1.4587888465790445e-05, "loss": 0.35416388511657715, "step": 2304 }, { "epoch": 0.2796990656473729, "grad_norm": 2.8638105392456055, "learning_rate": 1.458543176513942e-05, "loss": 0.5694817304611206, "step": 2305 }, { "epoch": 0.27982041014439996, "grad_norm": 2.0999724864959717, "learning_rate": 1.4582975064488393e-05, "loss": 0.12378177046775818, "step": 2306 }, { "epoch": 0.279941754641427, "grad_norm": 0.044359240680933, "learning_rate": 1.4580518363837368e-05, "loss": 0.0005623744218610227, "step": 2307 }, { "epoch": 0.28006309913845406, "grad_norm": 3.165572166442871, "learning_rate": 1.4578061663186342e-05, "loss": 0.16220404207706451, "step": 2308 }, { "epoch": 0.2801844436354811, "grad_norm": 2.750955820083618, "learning_rate": 1.4575604962535316e-05, "loss": 0.2112129181623459, "step": 2309 }, { "epoch": 0.28030578813250817, "grad_norm": 2.1458773612976074, "learning_rate": 1.457314826188429e-05, "loss": 0.11941893398761749, "step": 2310 }, { "epoch": 0.2804271326295352, "grad_norm": 1.2971408367156982, "learning_rate": 1.4570691561233266e-05, "loss": 0.1420069932937622, "step": 2311 }, { "epoch": 0.28054847712656233, "grad_norm": 2.0736007690429688, "learning_rate": 1.456823486058224e-05, "loss": 0.19403484463691711, "step": 2312 }, { "epoch": 0.2806698216235894, "grad_norm": 1.8388627767562866, "learning_rate": 1.4565778159931215e-05, "loss": 0.46471458673477173, "step": 2313 }, { "epoch": 0.28079116612061644, "grad_norm": 2.6409003734588623, "learning_rate": 1.456332145928019e-05, "loss": 0.25268906354904175, "step": 2314 }, { "epoch": 0.2809125106176435, "grad_norm": 1.8578016757965088, "learning_rate": 1.4560864758629163e-05, "loss": 0.31751006841659546, "step": 2315 }, { "epoch": 0.28103385511467055, "grad_norm": 2.8499512672424316, "learning_rate": 1.4558408057978138e-05, "loss": 0.22498056292533875, "step": 2316 }, { "epoch": 0.2811551996116976, "grad_norm": 1.2381291389465332, "learning_rate": 1.4555951357327112e-05, "loss": 0.2600299119949341, "step": 2317 }, { "epoch": 0.28127654410872466, "grad_norm": 2.155503034591675, "learning_rate": 1.4553494656676086e-05, "loss": 0.20116908848285675, "step": 2318 }, { "epoch": 0.2813978886057517, "grad_norm": 0.005920345429331064, "learning_rate": 1.455103795602506e-05, "loss": 0.00013091710570733994, "step": 2319 }, { "epoch": 0.28151923310277877, "grad_norm": 2.8475534915924072, "learning_rate": 1.4548581255374035e-05, "loss": 0.2616243362426758, "step": 2320 }, { "epoch": 0.2816405775998059, "grad_norm": 2.4341745376586914, "learning_rate": 1.4546124554723009e-05, "loss": 0.42221981287002563, "step": 2321 }, { "epoch": 0.28176192209683293, "grad_norm": 2.084240198135376, "learning_rate": 1.4543667854071983e-05, "loss": 0.11653836071491241, "step": 2322 }, { "epoch": 0.28188326659386, "grad_norm": 1.3616430759429932, "learning_rate": 1.4541211153420957e-05, "loss": 0.03960473835468292, "step": 2323 }, { "epoch": 0.28200461109088704, "grad_norm": 2.470994710922241, "learning_rate": 1.4538754452769932e-05, "loss": 0.1367553472518921, "step": 2324 }, { "epoch": 0.2821259555879141, "grad_norm": 1.8862669467926025, "learning_rate": 1.4536297752118906e-05, "loss": 0.46348825097084045, "step": 2325 }, { "epoch": 0.28224730008494114, "grad_norm": 2.269005537033081, "learning_rate": 1.453384105146788e-05, "loss": 0.698644757270813, "step": 2326 }, { "epoch": 0.2823686445819682, "grad_norm": 2.1463327407836914, "learning_rate": 1.4531384350816855e-05, "loss": 0.3938106894493103, "step": 2327 }, { "epoch": 0.28248998907899525, "grad_norm": 4.106855869293213, "learning_rate": 1.4528927650165829e-05, "loss": 0.2572036683559418, "step": 2328 }, { "epoch": 0.2826113335760223, "grad_norm": 3.1966500282287598, "learning_rate": 1.4526470949514803e-05, "loss": 0.5454995632171631, "step": 2329 }, { "epoch": 0.2827326780730494, "grad_norm": 2.915327548980713, "learning_rate": 1.4524014248863777e-05, "loss": 0.2935107946395874, "step": 2330 }, { "epoch": 0.28285402257007647, "grad_norm": 2.243208885192871, "learning_rate": 1.4521557548212753e-05, "loss": 0.18108990788459778, "step": 2331 }, { "epoch": 0.2829753670671035, "grad_norm": 2.35526704788208, "learning_rate": 1.4519100847561728e-05, "loss": 0.40194523334503174, "step": 2332 }, { "epoch": 0.2830967115641306, "grad_norm": 3.389552593231201, "learning_rate": 1.4516644146910702e-05, "loss": 0.12860919535160065, "step": 2333 }, { "epoch": 0.28321805606115763, "grad_norm": 2.883084774017334, "learning_rate": 1.4514187446259676e-05, "loss": 0.768912672996521, "step": 2334 }, { "epoch": 0.2833394005581847, "grad_norm": 2.719496250152588, "learning_rate": 1.451173074560865e-05, "loss": 0.28342047333717346, "step": 2335 }, { "epoch": 0.28346074505521174, "grad_norm": 1.3616071939468384, "learning_rate": 1.4509274044957625e-05, "loss": 0.037327129393815994, "step": 2336 }, { "epoch": 0.2835820895522388, "grad_norm": 2.3787996768951416, "learning_rate": 1.4506817344306599e-05, "loss": 0.27518394589424133, "step": 2337 }, { "epoch": 0.28370343404926585, "grad_norm": 1.4686760902404785, "learning_rate": 1.4504360643655573e-05, "loss": 0.030330002307891846, "step": 2338 }, { "epoch": 0.2838247785462929, "grad_norm": 2.7788968086242676, "learning_rate": 1.4501903943004547e-05, "loss": 0.29226791858673096, "step": 2339 }, { "epoch": 0.28394612304332, "grad_norm": 3.7886874675750732, "learning_rate": 1.449944724235352e-05, "loss": 0.434556245803833, "step": 2340 }, { "epoch": 0.28406746754034706, "grad_norm": 2.317943572998047, "learning_rate": 1.4496990541702494e-05, "loss": 0.22898314893245697, "step": 2341 }, { "epoch": 0.2841888120373741, "grad_norm": 1.6351468563079834, "learning_rate": 1.4494533841051468e-05, "loss": 0.36461710929870605, "step": 2342 }, { "epoch": 0.28431015653440117, "grad_norm": 2.5408875942230225, "learning_rate": 1.4492077140400443e-05, "loss": 0.21778815984725952, "step": 2343 }, { "epoch": 0.2844315010314282, "grad_norm": 1.5979942083358765, "learning_rate": 1.4489620439749417e-05, "loss": 0.19117160141468048, "step": 2344 }, { "epoch": 0.2845528455284553, "grad_norm": 2.846651315689087, "learning_rate": 1.4487163739098391e-05, "loss": 0.3508620262145996, "step": 2345 }, { "epoch": 0.28467419002548233, "grad_norm": 0.7627307176589966, "learning_rate": 1.4484707038447365e-05, "loss": 0.01792456954717636, "step": 2346 }, { "epoch": 0.2847955345225094, "grad_norm": 2.633852481842041, "learning_rate": 1.448225033779634e-05, "loss": 0.16636960208415985, "step": 2347 }, { "epoch": 0.28491687901953644, "grad_norm": 2.0512325763702393, "learning_rate": 1.4479793637145314e-05, "loss": 0.33269551396369934, "step": 2348 }, { "epoch": 0.28503822351656355, "grad_norm": 1.6394566297531128, "learning_rate": 1.4477336936494288e-05, "loss": 0.225246861577034, "step": 2349 }, { "epoch": 0.2851595680135906, "grad_norm": 2.5464892387390137, "learning_rate": 1.4474880235843262e-05, "loss": 0.19317497313022614, "step": 2350 }, { "epoch": 0.28528091251061766, "grad_norm": 1.4693812131881714, "learning_rate": 1.4472423535192237e-05, "loss": 0.09994740784168243, "step": 2351 }, { "epoch": 0.2854022570076447, "grad_norm": 1.9715471267700195, "learning_rate": 1.4469966834541211e-05, "loss": 0.2948615849018097, "step": 2352 }, { "epoch": 0.28552360150467176, "grad_norm": 1.835134506225586, "learning_rate": 1.4467510133890185e-05, "loss": 0.35654348134994507, "step": 2353 }, { "epoch": 0.2856449460016988, "grad_norm": 2.886162519454956, "learning_rate": 1.446505343323916e-05, "loss": 0.445361465215683, "step": 2354 }, { "epoch": 0.2857662904987259, "grad_norm": 6.913744926452637, "learning_rate": 1.4462596732588134e-05, "loss": 0.2596283555030823, "step": 2355 }, { "epoch": 0.2858876349957529, "grad_norm": 3.093846082687378, "learning_rate": 1.4460140031937108e-05, "loss": 0.33309924602508545, "step": 2356 }, { "epoch": 0.28600897949278, "grad_norm": 2.4137792587280273, "learning_rate": 1.4457683331286084e-05, "loss": 0.3461116850376129, "step": 2357 }, { "epoch": 0.2861303239898071, "grad_norm": 1.9789154529571533, "learning_rate": 1.4455226630635058e-05, "loss": 0.47336727380752563, "step": 2358 }, { "epoch": 0.28625166848683414, "grad_norm": 2.6370766162872314, "learning_rate": 1.4452769929984032e-05, "loss": 0.5336825251579285, "step": 2359 }, { "epoch": 0.2863730129838612, "grad_norm": 2.2987093925476074, "learning_rate": 1.4450313229333007e-05, "loss": 0.16392672061920166, "step": 2360 }, { "epoch": 0.28649435748088825, "grad_norm": 1.725595235824585, "learning_rate": 1.4447856528681981e-05, "loss": 0.10611050575971603, "step": 2361 }, { "epoch": 0.2866157019779153, "grad_norm": 1.7771233320236206, "learning_rate": 1.4445399828030955e-05, "loss": 0.17881464958190918, "step": 2362 }, { "epoch": 0.28673704647494236, "grad_norm": 1.7786450386047363, "learning_rate": 1.444294312737993e-05, "loss": 0.11273477971553802, "step": 2363 }, { "epoch": 0.2868583909719694, "grad_norm": 3.6905012130737305, "learning_rate": 1.4440486426728904e-05, "loss": 0.3938071131706238, "step": 2364 }, { "epoch": 0.28697973546899647, "grad_norm": 1.7522951364517212, "learning_rate": 1.4438029726077878e-05, "loss": 0.062087565660476685, "step": 2365 }, { "epoch": 0.2871010799660235, "grad_norm": 3.22233510017395, "learning_rate": 1.4435573025426852e-05, "loss": 0.30694952607154846, "step": 2366 }, { "epoch": 0.2872224244630506, "grad_norm": 1.6954015493392944, "learning_rate": 1.4433116324775827e-05, "loss": 0.1827646791934967, "step": 2367 }, { "epoch": 0.2873437689600777, "grad_norm": 2.5658228397369385, "learning_rate": 1.44306596241248e-05, "loss": 0.7508528232574463, "step": 2368 }, { "epoch": 0.28746511345710474, "grad_norm": 2.071087121963501, "learning_rate": 1.4428202923473775e-05, "loss": 0.19768717885017395, "step": 2369 }, { "epoch": 0.2875864579541318, "grad_norm": 3.3089733123779297, "learning_rate": 1.442574622282275e-05, "loss": 0.37479057908058167, "step": 2370 }, { "epoch": 0.28770780245115884, "grad_norm": 2.419912576675415, "learning_rate": 1.4423289522171724e-05, "loss": 0.14660024642944336, "step": 2371 }, { "epoch": 0.2878291469481859, "grad_norm": 2.0513293743133545, "learning_rate": 1.4420832821520698e-05, "loss": 0.301160991191864, "step": 2372 }, { "epoch": 0.28795049144521295, "grad_norm": 1.7172777652740479, "learning_rate": 1.4418376120869672e-05, "loss": 0.08773735910654068, "step": 2373 }, { "epoch": 0.28807183594224, "grad_norm": 3.0460121631622314, "learning_rate": 1.4415919420218646e-05, "loss": 0.3944966793060303, "step": 2374 }, { "epoch": 0.28819318043926706, "grad_norm": 2.4737274646759033, "learning_rate": 1.441346271956762e-05, "loss": 0.14740464091300964, "step": 2375 }, { "epoch": 0.2883145249362941, "grad_norm": 2.25986385345459, "learning_rate": 1.4411006018916595e-05, "loss": 0.16227565705776215, "step": 2376 }, { "epoch": 0.2884358694333212, "grad_norm": 3.9188077449798584, "learning_rate": 1.440854931826557e-05, "loss": 0.174227774143219, "step": 2377 }, { "epoch": 0.2885572139303483, "grad_norm": 2.5465247631073, "learning_rate": 1.4406092617614545e-05, "loss": 0.07011450082063675, "step": 2378 }, { "epoch": 0.28867855842737533, "grad_norm": 2.8433055877685547, "learning_rate": 1.440363591696352e-05, "loss": 0.41096341609954834, "step": 2379 }, { "epoch": 0.2887999029244024, "grad_norm": 2.99009108543396, "learning_rate": 1.4401179216312494e-05, "loss": 0.13420191407203674, "step": 2380 }, { "epoch": 0.28892124742142944, "grad_norm": 4.198729991912842, "learning_rate": 1.4398722515661468e-05, "loss": 0.17982061207294464, "step": 2381 }, { "epoch": 0.2890425919184565, "grad_norm": 2.9838645458221436, "learning_rate": 1.4396265815010442e-05, "loss": 0.4002196490764618, "step": 2382 }, { "epoch": 0.28916393641548355, "grad_norm": 2.1307244300842285, "learning_rate": 1.4393809114359416e-05, "loss": 0.34108394384384155, "step": 2383 }, { "epoch": 0.2892852809125106, "grad_norm": 3.062396287918091, "learning_rate": 1.439135241370839e-05, "loss": 0.19610823690891266, "step": 2384 }, { "epoch": 0.28940662540953765, "grad_norm": 3.283600091934204, "learning_rate": 1.4388895713057365e-05, "loss": 0.291292667388916, "step": 2385 }, { "epoch": 0.28952796990656476, "grad_norm": 1.4206572771072388, "learning_rate": 1.4386439012406339e-05, "loss": 0.10331039875745773, "step": 2386 }, { "epoch": 0.2896493144035918, "grad_norm": 2.4822564125061035, "learning_rate": 1.4383982311755313e-05, "loss": 0.7069910168647766, "step": 2387 }, { "epoch": 0.28977065890061887, "grad_norm": 2.143632173538208, "learning_rate": 1.4381525611104288e-05, "loss": 0.11336402595043182, "step": 2388 }, { "epoch": 0.2898920033976459, "grad_norm": 3.152005910873413, "learning_rate": 1.4379068910453262e-05, "loss": 0.1477632224559784, "step": 2389 }, { "epoch": 0.290013347894673, "grad_norm": 2.4455134868621826, "learning_rate": 1.4376612209802236e-05, "loss": 0.3816097378730774, "step": 2390 }, { "epoch": 0.29013469239170003, "grad_norm": 3.482804775238037, "learning_rate": 1.437415550915121e-05, "loss": 0.7042055726051331, "step": 2391 }, { "epoch": 0.2902560368887271, "grad_norm": 2.1608364582061768, "learning_rate": 1.4371698808500185e-05, "loss": 0.13173907995224, "step": 2392 }, { "epoch": 0.29037738138575414, "grad_norm": 2.268423557281494, "learning_rate": 1.4369242107849159e-05, "loss": 0.2680203914642334, "step": 2393 }, { "epoch": 0.2904987258827812, "grad_norm": 13.093183517456055, "learning_rate": 1.4366785407198133e-05, "loss": 0.3524789810180664, "step": 2394 }, { "epoch": 0.29062007037980825, "grad_norm": 1.6701669692993164, "learning_rate": 1.4364328706547107e-05, "loss": 0.23719149827957153, "step": 2395 }, { "epoch": 0.29074141487683536, "grad_norm": 2.475621461868286, "learning_rate": 1.4361872005896082e-05, "loss": 0.24349063634872437, "step": 2396 }, { "epoch": 0.2908627593738624, "grad_norm": 1.499640703201294, "learning_rate": 1.4359415305245058e-05, "loss": 0.02156628668308258, "step": 2397 }, { "epoch": 0.29098410387088947, "grad_norm": 0.6849834322929382, "learning_rate": 1.4356958604594032e-05, "loss": 0.039487432688474655, "step": 2398 }, { "epoch": 0.2911054483679165, "grad_norm": 1.7018049955368042, "learning_rate": 1.4354501903943006e-05, "loss": 0.5675016641616821, "step": 2399 }, { "epoch": 0.2912267928649436, "grad_norm": 2.959660530090332, "learning_rate": 1.435204520329198e-05, "loss": 0.5447296500205994, "step": 2400 }, { "epoch": 0.2913481373619706, "grad_norm": 3.4573299884796143, "learning_rate": 1.4349588502640955e-05, "loss": 0.47932490706443787, "step": 2401 }, { "epoch": 0.2914694818589977, "grad_norm": 4.334619045257568, "learning_rate": 1.4347131801989929e-05, "loss": 0.2604002356529236, "step": 2402 }, { "epoch": 0.29159082635602473, "grad_norm": 1.2521309852600098, "learning_rate": 1.4344675101338903e-05, "loss": 0.03895732760429382, "step": 2403 }, { "epoch": 0.2917121708530518, "grad_norm": 2.6345105171203613, "learning_rate": 1.4342218400687877e-05, "loss": 0.18209978938102722, "step": 2404 }, { "epoch": 0.2918335153500789, "grad_norm": 6.25616455078125, "learning_rate": 1.4339761700036852e-05, "loss": 0.26063674688339233, "step": 2405 }, { "epoch": 0.29195485984710595, "grad_norm": 2.0457608699798584, "learning_rate": 1.4337304999385826e-05, "loss": 0.17444831132888794, "step": 2406 }, { "epoch": 0.292076204344133, "grad_norm": 1.9592281579971313, "learning_rate": 1.43348482987348e-05, "loss": 0.40388017892837524, "step": 2407 }, { "epoch": 0.29219754884116006, "grad_norm": 1.884320855140686, "learning_rate": 1.4332391598083775e-05, "loss": 0.39805707335472107, "step": 2408 }, { "epoch": 0.2923188933381871, "grad_norm": 2.758399248123169, "learning_rate": 1.4329934897432749e-05, "loss": 0.23270559310913086, "step": 2409 }, { "epoch": 0.29244023783521417, "grad_norm": 1.7793257236480713, "learning_rate": 1.4327478196781723e-05, "loss": 0.22666704654693604, "step": 2410 }, { "epoch": 0.2925615823322412, "grad_norm": 2.531182289123535, "learning_rate": 1.4325021496130697e-05, "loss": 0.24797149002552032, "step": 2411 }, { "epoch": 0.2926829268292683, "grad_norm": 0.8958587646484375, "learning_rate": 1.4322564795479672e-05, "loss": 0.03709115460515022, "step": 2412 }, { "epoch": 0.29280427132629533, "grad_norm": 1.8994909524917603, "learning_rate": 1.4320108094828646e-05, "loss": 0.3536483943462372, "step": 2413 }, { "epoch": 0.29292561582332244, "grad_norm": 1.8767240047454834, "learning_rate": 1.431765139417762e-05, "loss": 0.4431970417499542, "step": 2414 }, { "epoch": 0.2930469603203495, "grad_norm": 2.977033853530884, "learning_rate": 1.4315194693526594e-05, "loss": 0.6355453133583069, "step": 2415 }, { "epoch": 0.29316830481737655, "grad_norm": 1.2712355852127075, "learning_rate": 1.4312737992875569e-05, "loss": 0.3493037819862366, "step": 2416 }, { "epoch": 0.2932896493144036, "grad_norm": 2.980384588241577, "learning_rate": 1.4310281292224545e-05, "loss": 0.7393550276756287, "step": 2417 }, { "epoch": 0.29341099381143065, "grad_norm": 2.503192186355591, "learning_rate": 1.4307824591573519e-05, "loss": 0.43294692039489746, "step": 2418 }, { "epoch": 0.2935323383084577, "grad_norm": 1.8902491331100464, "learning_rate": 1.4305367890922493e-05, "loss": 0.31721875071525574, "step": 2419 }, { "epoch": 0.29365368280548476, "grad_norm": 1.9838643074035645, "learning_rate": 1.4302911190271467e-05, "loss": 0.270327091217041, "step": 2420 }, { "epoch": 0.2937750273025118, "grad_norm": 1.4788776636123657, "learning_rate": 1.4300454489620442e-05, "loss": 0.1401694118976593, "step": 2421 }, { "epoch": 0.29389637179953887, "grad_norm": 1.731963038444519, "learning_rate": 1.4297997788969416e-05, "loss": 0.1834172010421753, "step": 2422 }, { "epoch": 0.2940177162965659, "grad_norm": 1.6887357234954834, "learning_rate": 1.429554108831839e-05, "loss": 0.1098705381155014, "step": 2423 }, { "epoch": 0.29413906079359303, "grad_norm": 3.4765584468841553, "learning_rate": 1.4293084387667364e-05, "loss": 0.34448036551475525, "step": 2424 }, { "epoch": 0.2942604052906201, "grad_norm": 2.472482204437256, "learning_rate": 1.4290627687016339e-05, "loss": 0.6094886064529419, "step": 2425 }, { "epoch": 0.29438174978764714, "grad_norm": 2.6117072105407715, "learning_rate": 1.4288170986365313e-05, "loss": 0.3450254797935486, "step": 2426 }, { "epoch": 0.2945030942846742, "grad_norm": 2.193211317062378, "learning_rate": 1.4285714285714287e-05, "loss": 0.6642693281173706, "step": 2427 }, { "epoch": 0.29462443878170125, "grad_norm": 1.7561453580856323, "learning_rate": 1.4283257585063261e-05, "loss": 0.3937324583530426, "step": 2428 }, { "epoch": 0.2947457832787283, "grad_norm": 1.7868642807006836, "learning_rate": 1.4280800884412236e-05, "loss": 0.12268880009651184, "step": 2429 }, { "epoch": 0.29486712777575536, "grad_norm": 1.5349853038787842, "learning_rate": 1.427834418376121e-05, "loss": 0.09088528156280518, "step": 2430 }, { "epoch": 0.2949884722727824, "grad_norm": 2.605236291885376, "learning_rate": 1.4275887483110184e-05, "loss": 0.25133824348449707, "step": 2431 }, { "epoch": 0.29510981676980946, "grad_norm": 1.354604959487915, "learning_rate": 1.4273430782459158e-05, "loss": 0.043326713144779205, "step": 2432 }, { "epoch": 0.2952311612668366, "grad_norm": 2.319394111633301, "learning_rate": 1.4270974081808133e-05, "loss": 0.27239248156547546, "step": 2433 }, { "epoch": 0.2953525057638636, "grad_norm": 0.021819638088345528, "learning_rate": 1.4268517381157107e-05, "loss": 0.0002714463334996253, "step": 2434 }, { "epoch": 0.2954738502608907, "grad_norm": 1.6144486665725708, "learning_rate": 1.4266060680506081e-05, "loss": 0.12072954326868057, "step": 2435 }, { "epoch": 0.29559519475791773, "grad_norm": 3.3453211784362793, "learning_rate": 1.4263603979855055e-05, "loss": 0.24734504520893097, "step": 2436 }, { "epoch": 0.2957165392549448, "grad_norm": 0.917447566986084, "learning_rate": 1.4261147279204031e-05, "loss": 0.012377920560538769, "step": 2437 }, { "epoch": 0.29583788375197184, "grad_norm": 1.9250411987304688, "learning_rate": 1.4258690578553006e-05, "loss": 0.23009344935417175, "step": 2438 }, { "epoch": 0.2959592282489989, "grad_norm": 1.2952210903167725, "learning_rate": 1.425623387790198e-05, "loss": 0.136221244931221, "step": 2439 }, { "epoch": 0.29608057274602595, "grad_norm": 2.7569329738616943, "learning_rate": 1.4253777177250954e-05, "loss": 0.3644600510597229, "step": 2440 }, { "epoch": 0.296201917243053, "grad_norm": 2.0895564556121826, "learning_rate": 1.4251320476599928e-05, "loss": 0.11592680215835571, "step": 2441 }, { "epoch": 0.2963232617400801, "grad_norm": 2.5663390159606934, "learning_rate": 1.4248863775948903e-05, "loss": 0.3135583996772766, "step": 2442 }, { "epoch": 0.29644460623710717, "grad_norm": 2.4366345405578613, "learning_rate": 1.4246407075297877e-05, "loss": 0.5698183178901672, "step": 2443 }, { "epoch": 0.2965659507341342, "grad_norm": 3.226321220397949, "learning_rate": 1.4243950374646851e-05, "loss": 0.4014982283115387, "step": 2444 }, { "epoch": 0.2966872952311613, "grad_norm": 2.4463469982147217, "learning_rate": 1.4241493673995825e-05, "loss": 0.5382885932922363, "step": 2445 }, { "epoch": 0.29680863972818833, "grad_norm": 2.7886500358581543, "learning_rate": 1.42390369733448e-05, "loss": 0.1627078503370285, "step": 2446 }, { "epoch": 0.2969299842252154, "grad_norm": 2.2656824588775635, "learning_rate": 1.4236580272693774e-05, "loss": 0.32474666833877563, "step": 2447 }, { "epoch": 0.29705132872224244, "grad_norm": 1.7347723245620728, "learning_rate": 1.4234123572042748e-05, "loss": 0.1043761670589447, "step": 2448 }, { "epoch": 0.2971726732192695, "grad_norm": 1.877159833908081, "learning_rate": 1.4231666871391722e-05, "loss": 0.3229933977127075, "step": 2449 }, { "epoch": 0.29729401771629654, "grad_norm": 1.9499645233154297, "learning_rate": 1.4229210170740697e-05, "loss": 0.4199894666671753, "step": 2450 }, { "epoch": 0.29741536221332365, "grad_norm": 1.3216056823730469, "learning_rate": 1.4226753470089671e-05, "loss": 0.048775821924209595, "step": 2451 }, { "epoch": 0.2975367067103507, "grad_norm": 2.8225107192993164, "learning_rate": 1.4224296769438645e-05, "loss": 0.23263612389564514, "step": 2452 }, { "epoch": 0.29765805120737776, "grad_norm": 2.0464394092559814, "learning_rate": 1.422184006878762e-05, "loss": 0.3901311755180359, "step": 2453 }, { "epoch": 0.2977793957044048, "grad_norm": 2.7550644874572754, "learning_rate": 1.4219383368136594e-05, "loss": 0.42294594645500183, "step": 2454 }, { "epoch": 0.29790074020143187, "grad_norm": 2.5105109214782715, "learning_rate": 1.4216926667485568e-05, "loss": 0.5021324157714844, "step": 2455 }, { "epoch": 0.2980220846984589, "grad_norm": 1.6055526733398438, "learning_rate": 1.4214469966834544e-05, "loss": 0.20961758494377136, "step": 2456 }, { "epoch": 0.298143429195486, "grad_norm": 2.4210283756256104, "learning_rate": 1.4212013266183518e-05, "loss": 0.25656208395957947, "step": 2457 }, { "epoch": 0.29826477369251303, "grad_norm": 2.8255374431610107, "learning_rate": 1.4209556565532493e-05, "loss": 0.302267849445343, "step": 2458 }, { "epoch": 0.2983861181895401, "grad_norm": 3.174891471862793, "learning_rate": 1.4207099864881467e-05, "loss": 0.2618406414985657, "step": 2459 }, { "epoch": 0.29850746268656714, "grad_norm": 4.338184833526611, "learning_rate": 1.4204643164230441e-05, "loss": 0.5188778638839722, "step": 2460 }, { "epoch": 0.29862880718359425, "grad_norm": 3.044785261154175, "learning_rate": 1.4202186463579415e-05, "loss": 0.2174786925315857, "step": 2461 }, { "epoch": 0.2987501516806213, "grad_norm": 3.8430070877075195, "learning_rate": 1.419972976292839e-05, "loss": 0.6191221475601196, "step": 2462 }, { "epoch": 0.29887149617764835, "grad_norm": 2.5778520107269287, "learning_rate": 1.4197273062277364e-05, "loss": 0.25278812646865845, "step": 2463 }, { "epoch": 0.2989928406746754, "grad_norm": 2.2133378982543945, "learning_rate": 1.4194816361626338e-05, "loss": 0.30241888761520386, "step": 2464 }, { "epoch": 0.29911418517170246, "grad_norm": 2.2290940284729004, "learning_rate": 1.4192359660975312e-05, "loss": 0.26590943336486816, "step": 2465 }, { "epoch": 0.2992355296687295, "grad_norm": 3.7877848148345947, "learning_rate": 1.4189902960324287e-05, "loss": 0.3344946801662445, "step": 2466 }, { "epoch": 0.29935687416575657, "grad_norm": 3.0269033908843994, "learning_rate": 1.418744625967326e-05, "loss": 0.4494978189468384, "step": 2467 }, { "epoch": 0.2994782186627836, "grad_norm": 1.7358289957046509, "learning_rate": 1.4184989559022235e-05, "loss": 0.36531975865364075, "step": 2468 }, { "epoch": 0.2995995631598107, "grad_norm": 2.7170467376708984, "learning_rate": 1.418253285837121e-05, "loss": 0.12027442455291748, "step": 2469 }, { "epoch": 0.2997209076568378, "grad_norm": 2.8832855224609375, "learning_rate": 1.4180076157720184e-05, "loss": 0.5412907600402832, "step": 2470 }, { "epoch": 0.29984225215386484, "grad_norm": 2.934298515319824, "learning_rate": 1.4177619457069158e-05, "loss": 0.4732085168361664, "step": 2471 }, { "epoch": 0.2999635966508919, "grad_norm": 1.8136005401611328, "learning_rate": 1.4175162756418132e-05, "loss": 0.1498020738363266, "step": 2472 }, { "epoch": 0.30008494114791895, "grad_norm": 2.257565975189209, "learning_rate": 1.4172706055767106e-05, "loss": 0.13816264271736145, "step": 2473 }, { "epoch": 0.300206285644946, "grad_norm": 1.8980768918991089, "learning_rate": 1.417024935511608e-05, "loss": 0.21113350987434387, "step": 2474 }, { "epoch": 0.30032763014197306, "grad_norm": 1.987173080444336, "learning_rate": 1.4167792654465055e-05, "loss": 0.1198820099234581, "step": 2475 }, { "epoch": 0.3004489746390001, "grad_norm": 2.3875110149383545, "learning_rate": 1.4165335953814027e-05, "loss": 0.3360016942024231, "step": 2476 }, { "epoch": 0.30057031913602716, "grad_norm": 2.5721499919891357, "learning_rate": 1.4162879253163002e-05, "loss": 0.39307811856269836, "step": 2477 }, { "epoch": 0.3006916636330542, "grad_norm": 1.5788015127182007, "learning_rate": 1.4160422552511976e-05, "loss": 0.18997590243816376, "step": 2478 }, { "epoch": 0.3008130081300813, "grad_norm": 1.8559566736221313, "learning_rate": 1.415796585186095e-05, "loss": 0.39030617475509644, "step": 2479 }, { "epoch": 0.3009343526271084, "grad_norm": 2.378830909729004, "learning_rate": 1.4155509151209925e-05, "loss": 0.22130689024925232, "step": 2480 }, { "epoch": 0.30105569712413544, "grad_norm": 2.7740321159362793, "learning_rate": 1.4153052450558899e-05, "loss": 0.21331149339675903, "step": 2481 }, { "epoch": 0.3011770416211625, "grad_norm": 1.0219964981079102, "learning_rate": 1.4150595749907875e-05, "loss": 0.014357716776430607, "step": 2482 }, { "epoch": 0.30129838611818954, "grad_norm": 2.1695570945739746, "learning_rate": 1.4148139049256849e-05, "loss": 0.3297964930534363, "step": 2483 }, { "epoch": 0.3014197306152166, "grad_norm": 2.3358898162841797, "learning_rate": 1.4145682348605823e-05, "loss": 0.5272507667541504, "step": 2484 }, { "epoch": 0.30154107511224365, "grad_norm": 2.6879382133483887, "learning_rate": 1.4143225647954797e-05, "loss": 0.16552339494228363, "step": 2485 }, { "epoch": 0.3016624196092707, "grad_norm": 0.8002064824104309, "learning_rate": 1.4140768947303772e-05, "loss": 0.024760831147432327, "step": 2486 }, { "epoch": 0.30178376410629776, "grad_norm": 2.4009058475494385, "learning_rate": 1.4138312246652746e-05, "loss": 0.28254228830337524, "step": 2487 }, { "epoch": 0.3019051086033248, "grad_norm": 2.0572311878204346, "learning_rate": 1.413585554600172e-05, "loss": 0.11042163521051407, "step": 2488 }, { "epoch": 0.3020264531003519, "grad_norm": 2.5954744815826416, "learning_rate": 1.4133398845350695e-05, "loss": 0.3538762331008911, "step": 2489 }, { "epoch": 0.302147797597379, "grad_norm": 1.7822202444076538, "learning_rate": 1.4130942144699669e-05, "loss": 0.16841650009155273, "step": 2490 }, { "epoch": 0.30226914209440603, "grad_norm": 1.5831629037857056, "learning_rate": 1.4128485444048643e-05, "loss": 0.1426149308681488, "step": 2491 }, { "epoch": 0.3023904865914331, "grad_norm": 1.7833654880523682, "learning_rate": 1.4126028743397617e-05, "loss": 0.06926427036523819, "step": 2492 }, { "epoch": 0.30251183108846014, "grad_norm": 1.857125997543335, "learning_rate": 1.4123572042746592e-05, "loss": 0.38097965717315674, "step": 2493 }, { "epoch": 0.3026331755854872, "grad_norm": 2.433147430419922, "learning_rate": 1.4121115342095566e-05, "loss": 0.4175128936767578, "step": 2494 }, { "epoch": 0.30275452008251424, "grad_norm": 1.8602956533432007, "learning_rate": 1.411865864144454e-05, "loss": 0.16847746074199677, "step": 2495 }, { "epoch": 0.3028758645795413, "grad_norm": 2.69030499458313, "learning_rate": 1.4116201940793514e-05, "loss": 0.2057284712791443, "step": 2496 }, { "epoch": 0.30299720907656835, "grad_norm": 1.6462435722351074, "learning_rate": 1.4113745240142489e-05, "loss": 0.1570907086133957, "step": 2497 }, { "epoch": 0.30311855357359546, "grad_norm": 1.7094794511795044, "learning_rate": 1.4111288539491463e-05, "loss": 0.3432033956050873, "step": 2498 }, { "epoch": 0.3032398980706225, "grad_norm": 1.9925639629364014, "learning_rate": 1.4108831838840437e-05, "loss": 0.3235139846801758, "step": 2499 }, { "epoch": 0.30336124256764957, "grad_norm": 1.7073578834533691, "learning_rate": 1.4106375138189411e-05, "loss": 0.2412710338830948, "step": 2500 }, { "epoch": 0.3034825870646766, "grad_norm": 3.1426279544830322, "learning_rate": 1.4103918437538386e-05, "loss": 0.6673815846443176, "step": 2501 }, { "epoch": 0.3036039315617037, "grad_norm": 1.851920485496521, "learning_rate": 1.4101461736887362e-05, "loss": 0.14570853114128113, "step": 2502 }, { "epoch": 0.30372527605873073, "grad_norm": 2.583951234817505, "learning_rate": 1.4099005036236336e-05, "loss": 0.24237391352653503, "step": 2503 }, { "epoch": 0.3038466205557578, "grad_norm": 1.823894739151001, "learning_rate": 1.409654833558531e-05, "loss": 0.3425629734992981, "step": 2504 }, { "epoch": 0.30396796505278484, "grad_norm": 2.8365423679351807, "learning_rate": 1.4094091634934284e-05, "loss": 0.26538074016571045, "step": 2505 }, { "epoch": 0.3040893095498119, "grad_norm": 2.6532278060913086, "learning_rate": 1.4091634934283259e-05, "loss": 0.3051794767379761, "step": 2506 }, { "epoch": 0.304210654046839, "grad_norm": 1.5138508081436157, "learning_rate": 1.4089178233632233e-05, "loss": 0.05153714492917061, "step": 2507 }, { "epoch": 0.30433199854386606, "grad_norm": 2.6453068256378174, "learning_rate": 1.4086721532981207e-05, "loss": 0.5268983244895935, "step": 2508 }, { "epoch": 0.3044533430408931, "grad_norm": 2.8347370624542236, "learning_rate": 1.4084264832330181e-05, "loss": 0.624761164188385, "step": 2509 }, { "epoch": 0.30457468753792016, "grad_norm": 0.3800169825553894, "learning_rate": 1.4081808131679156e-05, "loss": 0.00921088457107544, "step": 2510 }, { "epoch": 0.3046960320349472, "grad_norm": 1.8986505270004272, "learning_rate": 1.407935143102813e-05, "loss": 0.13484662771224976, "step": 2511 }, { "epoch": 0.30481737653197427, "grad_norm": 1.785301685333252, "learning_rate": 1.4076894730377104e-05, "loss": 0.27752214670181274, "step": 2512 }, { "epoch": 0.3049387210290013, "grad_norm": 2.454313039779663, "learning_rate": 1.4074438029726078e-05, "loss": 0.3176259398460388, "step": 2513 }, { "epoch": 0.3050600655260284, "grad_norm": 2.9403469562530518, "learning_rate": 1.4071981329075053e-05, "loss": 0.29386773705482483, "step": 2514 }, { "epoch": 0.30518141002305543, "grad_norm": 1.5787403583526611, "learning_rate": 1.4069524628424027e-05, "loss": 0.5370906591415405, "step": 2515 }, { "epoch": 0.3053027545200825, "grad_norm": 1.9172804355621338, "learning_rate": 1.4067067927773001e-05, "loss": 0.23191463947296143, "step": 2516 }, { "epoch": 0.3054240990171096, "grad_norm": 1.754380464553833, "learning_rate": 1.4064611227121975e-05, "loss": 0.30815523862838745, "step": 2517 }, { "epoch": 0.30554544351413665, "grad_norm": 3.231311082839966, "learning_rate": 1.406215452647095e-05, "loss": 0.4425506591796875, "step": 2518 }, { "epoch": 0.3056667880111637, "grad_norm": 2.873631477355957, "learning_rate": 1.4059697825819924e-05, "loss": 0.11558450758457184, "step": 2519 }, { "epoch": 0.30578813250819076, "grad_norm": 2.3214457035064697, "learning_rate": 1.4057241125168898e-05, "loss": 0.22642116248607635, "step": 2520 }, { "epoch": 0.3059094770052178, "grad_norm": 2.071108341217041, "learning_rate": 1.4054784424517872e-05, "loss": 0.39317962527275085, "step": 2521 }, { "epoch": 0.30603082150224487, "grad_norm": 0.2151782512664795, "learning_rate": 1.4052327723866848e-05, "loss": 0.0017753503052517772, "step": 2522 }, { "epoch": 0.3061521659992719, "grad_norm": 2.5965371131896973, "learning_rate": 1.4049871023215823e-05, "loss": 0.246237114071846, "step": 2523 }, { "epoch": 0.306273510496299, "grad_norm": 2.4071786403656006, "learning_rate": 1.4047414322564797e-05, "loss": 0.4483923316001892, "step": 2524 }, { "epoch": 0.306394854993326, "grad_norm": 2.2549428939819336, "learning_rate": 1.4044957621913771e-05, "loss": 0.13364174962043762, "step": 2525 }, { "epoch": 0.30651619949035314, "grad_norm": 4.5541300773620605, "learning_rate": 1.4042500921262745e-05, "loss": 0.4089297950267792, "step": 2526 }, { "epoch": 0.3066375439873802, "grad_norm": 2.6574063301086426, "learning_rate": 1.404004422061172e-05, "loss": 0.20073917508125305, "step": 2527 }, { "epoch": 0.30675888848440724, "grad_norm": 2.037043333053589, "learning_rate": 1.4037587519960694e-05, "loss": 0.21796780824661255, "step": 2528 }, { "epoch": 0.3068802329814343, "grad_norm": 2.436596632003784, "learning_rate": 1.4035130819309668e-05, "loss": 0.4355173408985138, "step": 2529 }, { "epoch": 0.30700157747846135, "grad_norm": 2.3366832733154297, "learning_rate": 1.4032674118658642e-05, "loss": 0.13320335745811462, "step": 2530 }, { "epoch": 0.3071229219754884, "grad_norm": 2.294221878051758, "learning_rate": 1.4030217418007617e-05, "loss": 0.3165469467639923, "step": 2531 }, { "epoch": 0.30724426647251546, "grad_norm": 5.119211196899414, "learning_rate": 1.4027760717356591e-05, "loss": 0.9067642688751221, "step": 2532 }, { "epoch": 0.3073656109695425, "grad_norm": 2.33833909034729, "learning_rate": 1.4025304016705565e-05, "loss": 0.284976065158844, "step": 2533 }, { "epoch": 0.30748695546656957, "grad_norm": 3.3833560943603516, "learning_rate": 1.402284731605454e-05, "loss": 0.16957956552505493, "step": 2534 }, { "epoch": 0.3076082999635967, "grad_norm": 1.6084033250808716, "learning_rate": 1.4020390615403514e-05, "loss": 0.49287718534469604, "step": 2535 }, { "epoch": 0.30772964446062373, "grad_norm": 3.138307809829712, "learning_rate": 1.4017933914752488e-05, "loss": 0.3367580473423004, "step": 2536 }, { "epoch": 0.3078509889576508, "grad_norm": 2.3280084133148193, "learning_rate": 1.4015477214101462e-05, "loss": 0.5034416913986206, "step": 2537 }, { "epoch": 0.30797233345467784, "grad_norm": 2.856696605682373, "learning_rate": 1.4013020513450437e-05, "loss": 0.1836393177509308, "step": 2538 }, { "epoch": 0.3080936779517049, "grad_norm": 2.4697892665863037, "learning_rate": 1.401056381279941e-05, "loss": 0.16828244924545288, "step": 2539 }, { "epoch": 0.30821502244873195, "grad_norm": 2.539705753326416, "learning_rate": 1.4008107112148385e-05, "loss": 0.4240834712982178, "step": 2540 }, { "epoch": 0.308336366945759, "grad_norm": 2.0786688327789307, "learning_rate": 1.400565041149736e-05, "loss": 0.10091252624988556, "step": 2541 }, { "epoch": 0.30845771144278605, "grad_norm": 3.49723219871521, "learning_rate": 1.4003193710846335e-05, "loss": 0.9220690727233887, "step": 2542 }, { "epoch": 0.3085790559398131, "grad_norm": 3.2246007919311523, "learning_rate": 1.400073701019531e-05, "loss": 0.3404640257358551, "step": 2543 }, { "epoch": 0.3087004004368402, "grad_norm": 2.174621105194092, "learning_rate": 1.3998280309544284e-05, "loss": 0.14235185086727142, "step": 2544 }, { "epoch": 0.30882174493386727, "grad_norm": 2.7580313682556152, "learning_rate": 1.3995823608893258e-05, "loss": 0.38407284021377563, "step": 2545 }, { "epoch": 0.3089430894308943, "grad_norm": 2.5175609588623047, "learning_rate": 1.3993366908242232e-05, "loss": 0.405870258808136, "step": 2546 }, { "epoch": 0.3090644339279214, "grad_norm": 2.1974775791168213, "learning_rate": 1.3990910207591207e-05, "loss": 0.31769827008247375, "step": 2547 }, { "epoch": 0.30918577842494843, "grad_norm": 2.1293437480926514, "learning_rate": 1.398845350694018e-05, "loss": 0.21596483886241913, "step": 2548 }, { "epoch": 0.3093071229219755, "grad_norm": 2.270765542984009, "learning_rate": 1.3985996806289155e-05, "loss": 0.4722200036048889, "step": 2549 }, { "epoch": 0.30942846741900254, "grad_norm": 2.49306583404541, "learning_rate": 1.398354010563813e-05, "loss": 0.26866334676742554, "step": 2550 }, { "epoch": 0.3095498119160296, "grad_norm": 2.2344727516174316, "learning_rate": 1.3981083404987104e-05, "loss": 0.489822119474411, "step": 2551 }, { "epoch": 0.30967115641305665, "grad_norm": 1.7446762323379517, "learning_rate": 1.3978626704336078e-05, "loss": 0.1108304038643837, "step": 2552 }, { "epoch": 0.3097925009100837, "grad_norm": 2.0523681640625, "learning_rate": 1.3976170003685052e-05, "loss": 0.04370498284697533, "step": 2553 }, { "epoch": 0.3099138454071108, "grad_norm": 1.4232988357543945, "learning_rate": 1.3973713303034026e-05, "loss": 0.10781297832727432, "step": 2554 }, { "epoch": 0.31003518990413786, "grad_norm": 3.349212884902954, "learning_rate": 1.3971256602383e-05, "loss": 0.06405492126941681, "step": 2555 }, { "epoch": 0.3101565344011649, "grad_norm": 1.9357292652130127, "learning_rate": 1.3968799901731975e-05, "loss": 0.3964538276195526, "step": 2556 }, { "epoch": 0.31027787889819197, "grad_norm": 2.4218809604644775, "learning_rate": 1.396634320108095e-05, "loss": 0.24902822077274323, "step": 2557 }, { "epoch": 0.310399223395219, "grad_norm": 4.099609375, "learning_rate": 1.3963886500429923e-05, "loss": 0.4399346709251404, "step": 2558 }, { "epoch": 0.3105205678922461, "grad_norm": 2.6035866737365723, "learning_rate": 1.3961429799778898e-05, "loss": 0.15043525397777557, "step": 2559 }, { "epoch": 0.31064191238927313, "grad_norm": 2.88090443611145, "learning_rate": 1.3958973099127872e-05, "loss": 0.15296348929405212, "step": 2560 }, { "epoch": 0.3107632568863002, "grad_norm": 3.220996141433716, "learning_rate": 1.3956516398476846e-05, "loss": 0.27642595767974854, "step": 2561 }, { "epoch": 0.31088460138332724, "grad_norm": 4.000929355621338, "learning_rate": 1.3954059697825822e-05, "loss": 0.29429298639297485, "step": 2562 }, { "epoch": 0.31100594588035435, "grad_norm": 2.1541647911071777, "learning_rate": 1.3951602997174796e-05, "loss": 0.4215291738510132, "step": 2563 }, { "epoch": 0.3111272903773814, "grad_norm": 2.1848764419555664, "learning_rate": 1.394914629652377e-05, "loss": 0.4602106809616089, "step": 2564 }, { "epoch": 0.31124863487440846, "grad_norm": 2.27817702293396, "learning_rate": 1.3946689595872745e-05, "loss": 0.49429088830947876, "step": 2565 }, { "epoch": 0.3113699793714355, "grad_norm": 2.1268224716186523, "learning_rate": 1.394423289522172e-05, "loss": 0.16737808287143707, "step": 2566 }, { "epoch": 0.31149132386846257, "grad_norm": 2.6273324489593506, "learning_rate": 1.3941776194570693e-05, "loss": 0.6507529616355896, "step": 2567 }, { "epoch": 0.3116126683654896, "grad_norm": 2.917470693588257, "learning_rate": 1.3939319493919668e-05, "loss": 0.6891695261001587, "step": 2568 }, { "epoch": 0.3117340128625167, "grad_norm": 5.002813339233398, "learning_rate": 1.3936862793268642e-05, "loss": 0.16554062068462372, "step": 2569 }, { "epoch": 0.31185535735954373, "grad_norm": 0.9612758159637451, "learning_rate": 1.3934406092617616e-05, "loss": 0.08712995052337646, "step": 2570 }, { "epoch": 0.3119767018565708, "grad_norm": 2.228410243988037, "learning_rate": 1.393194939196659e-05, "loss": 0.5029768347740173, "step": 2571 }, { "epoch": 0.3120980463535979, "grad_norm": 1.271191120147705, "learning_rate": 1.3929492691315565e-05, "loss": 0.05700064077973366, "step": 2572 }, { "epoch": 0.31221939085062494, "grad_norm": 3.567169189453125, "learning_rate": 1.3927035990664539e-05, "loss": 0.17123110592365265, "step": 2573 }, { "epoch": 0.312340735347652, "grad_norm": 2.542236566543579, "learning_rate": 1.3924579290013513e-05, "loss": 0.332086443901062, "step": 2574 }, { "epoch": 0.31246207984467905, "grad_norm": 1.2240946292877197, "learning_rate": 1.3922122589362488e-05, "loss": 0.04972168803215027, "step": 2575 }, { "epoch": 0.3125834243417061, "grad_norm": 2.9079947471618652, "learning_rate": 1.3919665888711462e-05, "loss": 0.6036537885665894, "step": 2576 }, { "epoch": 0.31270476883873316, "grad_norm": 2.383118152618408, "learning_rate": 1.3917209188060436e-05, "loss": 0.25742924213409424, "step": 2577 }, { "epoch": 0.3128261133357602, "grad_norm": 1.5192009210586548, "learning_rate": 1.391475248740941e-05, "loss": 0.14922620356082916, "step": 2578 }, { "epoch": 0.31294745783278727, "grad_norm": 1.5510634183883667, "learning_rate": 1.3912295786758385e-05, "loss": 0.46871283650398254, "step": 2579 }, { "epoch": 0.3130688023298143, "grad_norm": 2.901395559310913, "learning_rate": 1.3909839086107359e-05, "loss": 0.3961530923843384, "step": 2580 }, { "epoch": 0.3131901468268414, "grad_norm": 2.473994731903076, "learning_rate": 1.3907382385456335e-05, "loss": 0.06779327243566513, "step": 2581 }, { "epoch": 0.3133114913238685, "grad_norm": 2.99898362159729, "learning_rate": 1.3904925684805309e-05, "loss": 0.24095484614372253, "step": 2582 }, { "epoch": 0.31343283582089554, "grad_norm": 1.9804432392120361, "learning_rate": 1.3902468984154283e-05, "loss": 0.14461076259613037, "step": 2583 }, { "epoch": 0.3135541803179226, "grad_norm": 3.6258225440979004, "learning_rate": 1.3900012283503258e-05, "loss": 0.39698484539985657, "step": 2584 }, { "epoch": 0.31367552481494965, "grad_norm": 2.218045234680176, "learning_rate": 1.3897555582852232e-05, "loss": 0.38136905431747437, "step": 2585 }, { "epoch": 0.3137968693119767, "grad_norm": 3.269705057144165, "learning_rate": 1.3895098882201206e-05, "loss": 0.6193602085113525, "step": 2586 }, { "epoch": 0.31391821380900375, "grad_norm": 3.008082628250122, "learning_rate": 1.389264218155018e-05, "loss": 0.506243109703064, "step": 2587 }, { "epoch": 0.3140395583060308, "grad_norm": 3.4504647254943848, "learning_rate": 1.3890185480899155e-05, "loss": 0.4054690897464752, "step": 2588 }, { "epoch": 0.31416090280305786, "grad_norm": 2.2036619186401367, "learning_rate": 1.3887728780248129e-05, "loss": 0.21069829165935516, "step": 2589 }, { "epoch": 0.3142822473000849, "grad_norm": 2.6821749210357666, "learning_rate": 1.3885272079597103e-05, "loss": 0.06172182410955429, "step": 2590 }, { "epoch": 0.314403591797112, "grad_norm": 4.993224143981934, "learning_rate": 1.3882815378946077e-05, "loss": 0.4049087166786194, "step": 2591 }, { "epoch": 0.3145249362941391, "grad_norm": 2.4747045040130615, "learning_rate": 1.3880358678295052e-05, "loss": 0.20540593564510345, "step": 2592 }, { "epoch": 0.31464628079116613, "grad_norm": 1.2817527055740356, "learning_rate": 1.3877901977644026e-05, "loss": 0.07196521759033203, "step": 2593 }, { "epoch": 0.3147676252881932, "grad_norm": 2.2558658123016357, "learning_rate": 1.3875445276993e-05, "loss": 0.06774863600730896, "step": 2594 }, { "epoch": 0.31488896978522024, "grad_norm": 2.3449127674102783, "learning_rate": 1.3872988576341974e-05, "loss": 0.16595718264579773, "step": 2595 }, { "epoch": 0.3150103142822473, "grad_norm": 1.9539029598236084, "learning_rate": 1.3870531875690949e-05, "loss": 0.1529058814048767, "step": 2596 }, { "epoch": 0.31513165877927435, "grad_norm": 4.238483905792236, "learning_rate": 1.3868075175039923e-05, "loss": 0.563798189163208, "step": 2597 }, { "epoch": 0.3152530032763014, "grad_norm": 2.3801801204681396, "learning_rate": 1.3865618474388897e-05, "loss": 0.3669548034667969, "step": 2598 }, { "epoch": 0.31537434777332846, "grad_norm": 2.9926679134368896, "learning_rate": 1.3863161773737871e-05, "loss": 0.26389652490615845, "step": 2599 }, { "epoch": 0.31549569227035557, "grad_norm": 2.5304622650146484, "learning_rate": 1.3860705073086846e-05, "loss": 0.3482251465320587, "step": 2600 }, { "epoch": 0.3156170367673826, "grad_norm": 2.2760822772979736, "learning_rate": 1.3858248372435822e-05, "loss": 0.25967150926589966, "step": 2601 }, { "epoch": 0.3157383812644097, "grad_norm": 1.1612865924835205, "learning_rate": 1.3855791671784796e-05, "loss": 0.09870055317878723, "step": 2602 }, { "epoch": 0.3158597257614367, "grad_norm": 2.1818089485168457, "learning_rate": 1.385333497113377e-05, "loss": 0.24336664378643036, "step": 2603 }, { "epoch": 0.3159810702584638, "grad_norm": 3.514836072921753, "learning_rate": 1.3850878270482744e-05, "loss": 0.5613154172897339, "step": 2604 }, { "epoch": 0.31610241475549083, "grad_norm": 3.2239785194396973, "learning_rate": 1.3848421569831719e-05, "loss": 0.5051560997962952, "step": 2605 }, { "epoch": 0.3162237592525179, "grad_norm": 2.4050111770629883, "learning_rate": 1.3845964869180693e-05, "loss": 0.23731665313243866, "step": 2606 }, { "epoch": 0.31634510374954494, "grad_norm": 2.546924591064453, "learning_rate": 1.3843508168529667e-05, "loss": 0.1069282665848732, "step": 2607 }, { "epoch": 0.316466448246572, "grad_norm": 2.6186447143554688, "learning_rate": 1.3841051467878641e-05, "loss": 0.14223074913024902, "step": 2608 }, { "epoch": 0.31658779274359905, "grad_norm": 2.2534494400024414, "learning_rate": 1.3838594767227616e-05, "loss": 0.200982928276062, "step": 2609 }, { "epoch": 0.31670913724062616, "grad_norm": 3.0203487873077393, "learning_rate": 1.383613806657659e-05, "loss": 0.4329514503479004, "step": 2610 }, { "epoch": 0.3168304817376532, "grad_norm": 2.4250941276550293, "learning_rate": 1.3833681365925564e-05, "loss": 0.5131547451019287, "step": 2611 }, { "epoch": 0.31695182623468027, "grad_norm": 2.658128261566162, "learning_rate": 1.3831224665274537e-05, "loss": 0.4151817262172699, "step": 2612 }, { "epoch": 0.3170731707317073, "grad_norm": 3.0296595096588135, "learning_rate": 1.3828767964623511e-05, "loss": 0.36928361654281616, "step": 2613 }, { "epoch": 0.3171945152287344, "grad_norm": 2.8335986137390137, "learning_rate": 1.3826311263972485e-05, "loss": 0.29700344800949097, "step": 2614 }, { "epoch": 0.31731585972576143, "grad_norm": 2.5099239349365234, "learning_rate": 1.382385456332146e-05, "loss": 0.35675522685050964, "step": 2615 }, { "epoch": 0.3174372042227885, "grad_norm": 2.2245917320251465, "learning_rate": 1.3821397862670434e-05, "loss": 0.268341600894928, "step": 2616 }, { "epoch": 0.31755854871981554, "grad_norm": 2.405837297439575, "learning_rate": 1.3818941162019408e-05, "loss": 0.37409159541130066, "step": 2617 }, { "epoch": 0.3176798932168426, "grad_norm": 2.6217498779296875, "learning_rate": 1.3816484461368382e-05, "loss": 0.3058534264564514, "step": 2618 }, { "epoch": 0.3178012377138697, "grad_norm": 2.3653597831726074, "learning_rate": 1.3814027760717357e-05, "loss": 0.4587075114250183, "step": 2619 }, { "epoch": 0.31792258221089675, "grad_norm": 1.733944296836853, "learning_rate": 1.381157106006633e-05, "loss": 0.1447191834449768, "step": 2620 }, { "epoch": 0.3180439267079238, "grad_norm": 1.9843920469284058, "learning_rate": 1.3809114359415305e-05, "loss": 0.22765451669692993, "step": 2621 }, { "epoch": 0.31816527120495086, "grad_norm": 2.412477970123291, "learning_rate": 1.380665765876428e-05, "loss": 0.49215835332870483, "step": 2622 }, { "epoch": 0.3182866157019779, "grad_norm": 3.0536649227142334, "learning_rate": 1.3804200958113254e-05, "loss": 0.22170889377593994, "step": 2623 }, { "epoch": 0.31840796019900497, "grad_norm": 2.3984286785125732, "learning_rate": 1.3801744257462228e-05, "loss": 0.26721903681755066, "step": 2624 }, { "epoch": 0.318529304696032, "grad_norm": 2.1126484870910645, "learning_rate": 1.3799287556811202e-05, "loss": 0.39440837502479553, "step": 2625 }, { "epoch": 0.3186506491930591, "grad_norm": 1.7480491399765015, "learning_rate": 1.3796830856160176e-05, "loss": 0.07162956148386002, "step": 2626 }, { "epoch": 0.31877199369008613, "grad_norm": 3.1320767402648926, "learning_rate": 1.3794374155509152e-05, "loss": 0.4290386140346527, "step": 2627 }, { "epoch": 0.31889333818711324, "grad_norm": 3.318819522857666, "learning_rate": 1.3791917454858127e-05, "loss": 0.40751951932907104, "step": 2628 }, { "epoch": 0.3190146826841403, "grad_norm": 3.5489182472229004, "learning_rate": 1.37894607542071e-05, "loss": 0.22148944437503815, "step": 2629 }, { "epoch": 0.31913602718116735, "grad_norm": 2.1631932258605957, "learning_rate": 1.3787004053556075e-05, "loss": 0.43457284569740295, "step": 2630 }, { "epoch": 0.3192573716781944, "grad_norm": 3.955735445022583, "learning_rate": 1.378454735290505e-05, "loss": 0.43126925826072693, "step": 2631 }, { "epoch": 0.31937871617522146, "grad_norm": 0.3669489324092865, "learning_rate": 1.3782090652254024e-05, "loss": 0.0014458309160545468, "step": 2632 }, { "epoch": 0.3195000606722485, "grad_norm": 2.2121047973632812, "learning_rate": 1.3779633951602998e-05, "loss": 0.35696786642074585, "step": 2633 }, { "epoch": 0.31962140516927556, "grad_norm": 2.8321733474731445, "learning_rate": 1.3777177250951972e-05, "loss": 0.5759726166725159, "step": 2634 }, { "epoch": 0.3197427496663026, "grad_norm": 2.503988027572632, "learning_rate": 1.3774720550300946e-05, "loss": 0.24126414954662323, "step": 2635 }, { "epoch": 0.31986409416332967, "grad_norm": 3.8036837577819824, "learning_rate": 1.377226384964992e-05, "loss": 0.14890190958976746, "step": 2636 }, { "epoch": 0.3199854386603568, "grad_norm": 2.332839012145996, "learning_rate": 1.3769807148998895e-05, "loss": 0.4150230288505554, "step": 2637 }, { "epoch": 0.32010678315738383, "grad_norm": 1.6931169033050537, "learning_rate": 1.376735044834787e-05, "loss": 0.02355354093015194, "step": 2638 }, { "epoch": 0.3202281276544109, "grad_norm": 1.7993088960647583, "learning_rate": 1.3764893747696843e-05, "loss": 0.228868767619133, "step": 2639 }, { "epoch": 0.32034947215143794, "grad_norm": 4.86004114151001, "learning_rate": 1.3762437047045818e-05, "loss": 0.6723122000694275, "step": 2640 }, { "epoch": 0.320470816648465, "grad_norm": 2.648098945617676, "learning_rate": 1.3759980346394792e-05, "loss": 0.3872552514076233, "step": 2641 }, { "epoch": 0.32059216114549205, "grad_norm": 2.9110360145568848, "learning_rate": 1.3757523645743766e-05, "loss": 0.535675585269928, "step": 2642 }, { "epoch": 0.3207135056425191, "grad_norm": 2.2493841648101807, "learning_rate": 1.375506694509274e-05, "loss": 0.12403810024261475, "step": 2643 }, { "epoch": 0.32083485013954616, "grad_norm": 2.314582586288452, "learning_rate": 1.3752610244441715e-05, "loss": 0.17878662049770355, "step": 2644 }, { "epoch": 0.3209561946365732, "grad_norm": 2.2142958641052246, "learning_rate": 1.3750153543790689e-05, "loss": 0.23200297355651855, "step": 2645 }, { "epoch": 0.32107753913360026, "grad_norm": 2.109805107116699, "learning_rate": 1.3747696843139663e-05, "loss": 0.5256378650665283, "step": 2646 }, { "epoch": 0.3211988836306274, "grad_norm": 1.9347625970840454, "learning_rate": 1.374524014248864e-05, "loss": 0.18874908983707428, "step": 2647 }, { "epoch": 0.32132022812765443, "grad_norm": 1.4329655170440674, "learning_rate": 1.3742783441837613e-05, "loss": 0.13318997621536255, "step": 2648 }, { "epoch": 0.3214415726246815, "grad_norm": 1.443900465965271, "learning_rate": 1.3740326741186588e-05, "loss": 0.0861474797129631, "step": 2649 }, { "epoch": 0.32156291712170854, "grad_norm": 3.0313220024108887, "learning_rate": 1.3737870040535562e-05, "loss": 0.4402236342430115, "step": 2650 }, { "epoch": 0.3216842616187356, "grad_norm": 2.4113519191741943, "learning_rate": 1.3735413339884536e-05, "loss": 0.980143129825592, "step": 2651 }, { "epoch": 0.32180560611576264, "grad_norm": 2.563471794128418, "learning_rate": 1.373295663923351e-05, "loss": 0.1953703910112381, "step": 2652 }, { "epoch": 0.3219269506127897, "grad_norm": 0.014330465346574783, "learning_rate": 1.3730499938582485e-05, "loss": 0.0001082075759768486, "step": 2653 }, { "epoch": 0.32204829510981675, "grad_norm": 2.2561421394348145, "learning_rate": 1.3728043237931459e-05, "loss": 0.5709770917892456, "step": 2654 }, { "epoch": 0.3221696396068438, "grad_norm": 2.220679521560669, "learning_rate": 1.3725586537280433e-05, "loss": 0.21078574657440186, "step": 2655 }, { "epoch": 0.3222909841038709, "grad_norm": 3.595139741897583, "learning_rate": 1.3723129836629407e-05, "loss": 0.28871840238571167, "step": 2656 }, { "epoch": 0.32241232860089797, "grad_norm": 2.6884381771087646, "learning_rate": 1.3720673135978382e-05, "loss": 0.312512069940567, "step": 2657 }, { "epoch": 0.322533673097925, "grad_norm": 2.3358237743377686, "learning_rate": 1.3718216435327356e-05, "loss": 0.21305698156356812, "step": 2658 }, { "epoch": 0.3226550175949521, "grad_norm": 2.857361078262329, "learning_rate": 1.371575973467633e-05, "loss": 0.2686937749385834, "step": 2659 }, { "epoch": 0.32277636209197913, "grad_norm": 4.118078708648682, "learning_rate": 1.3713303034025305e-05, "loss": 0.22622153162956238, "step": 2660 }, { "epoch": 0.3228977065890062, "grad_norm": 1.957195520401001, "learning_rate": 1.3710846333374279e-05, "loss": 0.04407578334212303, "step": 2661 }, { "epoch": 0.32301905108603324, "grad_norm": 2.119107484817505, "learning_rate": 1.3708389632723253e-05, "loss": 0.23805676400661469, "step": 2662 }, { "epoch": 0.3231403955830603, "grad_norm": 1.9530389308929443, "learning_rate": 1.3705932932072227e-05, "loss": 0.48880478739738464, "step": 2663 }, { "epoch": 0.32326174008008735, "grad_norm": 1.2080408334732056, "learning_rate": 1.3703476231421202e-05, "loss": 0.11424320936203003, "step": 2664 }, { "epoch": 0.32338308457711445, "grad_norm": 1.7990244626998901, "learning_rate": 1.3701019530770176e-05, "loss": 0.06525573879480362, "step": 2665 }, { "epoch": 0.3235044290741415, "grad_norm": 2.054141044616699, "learning_rate": 1.369856283011915e-05, "loss": 0.365490585565567, "step": 2666 }, { "epoch": 0.32362577357116856, "grad_norm": 3.4649338722229004, "learning_rate": 1.3696106129468126e-05, "loss": 0.6358323097229004, "step": 2667 }, { "epoch": 0.3237471180681956, "grad_norm": 2.41003680229187, "learning_rate": 1.36936494288171e-05, "loss": 0.3890521824359894, "step": 2668 }, { "epoch": 0.32386846256522267, "grad_norm": 3.2424607276916504, "learning_rate": 1.3691192728166075e-05, "loss": 0.27454662322998047, "step": 2669 }, { "epoch": 0.3239898070622497, "grad_norm": 2.258336067199707, "learning_rate": 1.3688736027515049e-05, "loss": 0.1877439320087433, "step": 2670 }, { "epoch": 0.3241111515592768, "grad_norm": 2.6736388206481934, "learning_rate": 1.3686279326864023e-05, "loss": 0.4286113977432251, "step": 2671 }, { "epoch": 0.32423249605630383, "grad_norm": 1.740665078163147, "learning_rate": 1.3683822626212997e-05, "loss": 0.16415956616401672, "step": 2672 }, { "epoch": 0.3243538405533309, "grad_norm": 2.988381862640381, "learning_rate": 1.3681365925561972e-05, "loss": 0.17869721353054047, "step": 2673 }, { "epoch": 0.32447518505035794, "grad_norm": 1.8452136516571045, "learning_rate": 1.3678909224910946e-05, "loss": 0.338632196187973, "step": 2674 }, { "epoch": 0.32459652954738505, "grad_norm": 3.0976476669311523, "learning_rate": 1.367645252425992e-05, "loss": 0.12716446816921234, "step": 2675 }, { "epoch": 0.3247178740444121, "grad_norm": 2.4348232746124268, "learning_rate": 1.3673995823608894e-05, "loss": 0.038799576461315155, "step": 2676 }, { "epoch": 0.32483921854143916, "grad_norm": 6.233269691467285, "learning_rate": 1.3671539122957869e-05, "loss": 0.14202484488487244, "step": 2677 }, { "epoch": 0.3249605630384662, "grad_norm": 2.443070411682129, "learning_rate": 1.3669082422306843e-05, "loss": 0.11883317679166794, "step": 2678 }, { "epoch": 0.32508190753549326, "grad_norm": 1.7617214918136597, "learning_rate": 1.3666625721655817e-05, "loss": 0.16712503135204315, "step": 2679 }, { "epoch": 0.3252032520325203, "grad_norm": 1.9943039417266846, "learning_rate": 1.3664169021004791e-05, "loss": 0.3453546166419983, "step": 2680 }, { "epoch": 0.32532459652954737, "grad_norm": 2.232401132583618, "learning_rate": 1.3661712320353766e-05, "loss": 0.2506612241268158, "step": 2681 }, { "epoch": 0.3254459410265744, "grad_norm": 1.0962969064712524, "learning_rate": 1.365925561970274e-05, "loss": 0.06261173635721207, "step": 2682 }, { "epoch": 0.3255672855236015, "grad_norm": 2.8957111835479736, "learning_rate": 1.3656798919051714e-05, "loss": 0.2506040036678314, "step": 2683 }, { "epoch": 0.3256886300206286, "grad_norm": 3.8329215049743652, "learning_rate": 1.3654342218400688e-05, "loss": 0.2881605923175812, "step": 2684 }, { "epoch": 0.32580997451765564, "grad_norm": 1.4724440574645996, "learning_rate": 1.3651885517749663e-05, "loss": 0.03610328212380409, "step": 2685 }, { "epoch": 0.3259313190146827, "grad_norm": 2.247314929962158, "learning_rate": 1.3649428817098637e-05, "loss": 0.1618456095457077, "step": 2686 }, { "epoch": 0.32605266351170975, "grad_norm": 1.9265762567520142, "learning_rate": 1.3646972116447613e-05, "loss": 0.1461503505706787, "step": 2687 }, { "epoch": 0.3261740080087368, "grad_norm": 2.0526015758514404, "learning_rate": 1.3644515415796587e-05, "loss": 0.08651389181613922, "step": 2688 }, { "epoch": 0.32629535250576386, "grad_norm": 3.633225202560425, "learning_rate": 1.3642058715145561e-05, "loss": 0.20691147446632385, "step": 2689 }, { "epoch": 0.3264166970027909, "grad_norm": 2.8911032676696777, "learning_rate": 1.3639602014494536e-05, "loss": 0.28065308928489685, "step": 2690 }, { "epoch": 0.32653804149981797, "grad_norm": 2.2704734802246094, "learning_rate": 1.363714531384351e-05, "loss": 0.1923559606075287, "step": 2691 }, { "epoch": 0.326659385996845, "grad_norm": 2.778303384780884, "learning_rate": 1.3634688613192484e-05, "loss": 0.35757672786712646, "step": 2692 }, { "epoch": 0.32678073049387213, "grad_norm": 2.3999030590057373, "learning_rate": 1.3632231912541458e-05, "loss": 0.23560933768749237, "step": 2693 }, { "epoch": 0.3269020749908992, "grad_norm": 2.9183757305145264, "learning_rate": 1.3629775211890433e-05, "loss": 0.4588801860809326, "step": 2694 }, { "epoch": 0.32702341948792624, "grad_norm": 2.39132022857666, "learning_rate": 1.3627318511239407e-05, "loss": 0.13901375234127045, "step": 2695 }, { "epoch": 0.3271447639849533, "grad_norm": 1.504375696182251, "learning_rate": 1.3624861810588381e-05, "loss": 0.29446569085121155, "step": 2696 }, { "epoch": 0.32726610848198034, "grad_norm": 2.672706127166748, "learning_rate": 1.3622405109937355e-05, "loss": 0.1738225817680359, "step": 2697 }, { "epoch": 0.3273874529790074, "grad_norm": 2.682893991470337, "learning_rate": 1.361994840928633e-05, "loss": 0.42572760581970215, "step": 2698 }, { "epoch": 0.32750879747603445, "grad_norm": 2.149664878845215, "learning_rate": 1.3617491708635304e-05, "loss": 0.22784820199012756, "step": 2699 }, { "epoch": 0.3276301419730615, "grad_norm": 2.5921425819396973, "learning_rate": 1.3615035007984278e-05, "loss": 0.2059967815876007, "step": 2700 }, { "epoch": 0.32775148647008856, "grad_norm": 2.3024137020111084, "learning_rate": 1.3612578307333253e-05, "loss": 0.3200721740722656, "step": 2701 }, { "epoch": 0.3278728309671156, "grad_norm": 2.3466262817382812, "learning_rate": 1.3610121606682227e-05, "loss": 0.4672166109085083, "step": 2702 }, { "epoch": 0.3279941754641427, "grad_norm": 2.3088572025299072, "learning_rate": 1.3607664906031201e-05, "loss": 0.3052404224872589, "step": 2703 }, { "epoch": 0.3281155199611698, "grad_norm": 2.4651174545288086, "learning_rate": 1.3605208205380175e-05, "loss": 0.2764608860015869, "step": 2704 }, { "epoch": 0.32823686445819683, "grad_norm": 2.1033411026000977, "learning_rate": 1.360275150472915e-05, "loss": 0.15545253455638885, "step": 2705 }, { "epoch": 0.3283582089552239, "grad_norm": 1.5688694715499878, "learning_rate": 1.3600294804078124e-05, "loss": 0.22580206394195557, "step": 2706 }, { "epoch": 0.32847955345225094, "grad_norm": 1.5795338153839111, "learning_rate": 1.35978381034271e-05, "loss": 0.05386420339345932, "step": 2707 }, { "epoch": 0.328600897949278, "grad_norm": 2.356640338897705, "learning_rate": 1.3595381402776074e-05, "loss": 0.5281941294670105, "step": 2708 }, { "epoch": 0.32872224244630505, "grad_norm": 2.8616931438446045, "learning_rate": 1.3592924702125048e-05, "loss": 0.317641943693161, "step": 2709 }, { "epoch": 0.3288435869433321, "grad_norm": 3.2473084926605225, "learning_rate": 1.3590468001474023e-05, "loss": 0.32571110129356384, "step": 2710 }, { "epoch": 0.32896493144035915, "grad_norm": 1.7362678050994873, "learning_rate": 1.3588011300822997e-05, "loss": 0.08650978654623032, "step": 2711 }, { "epoch": 0.32908627593738626, "grad_norm": 2.0047483444213867, "learning_rate": 1.3585554600171971e-05, "loss": 0.2949868142604828, "step": 2712 }, { "epoch": 0.3292076204344133, "grad_norm": 1.2510021924972534, "learning_rate": 1.3583097899520945e-05, "loss": 0.05454785004258156, "step": 2713 }, { "epoch": 0.32932896493144037, "grad_norm": 2.209566831588745, "learning_rate": 1.358064119886992e-05, "loss": 0.23208726942539215, "step": 2714 }, { "epoch": 0.3294503094284674, "grad_norm": 1.2048362493515015, "learning_rate": 1.3578184498218894e-05, "loss": 0.013374043628573418, "step": 2715 }, { "epoch": 0.3295716539254945, "grad_norm": 2.5049784183502197, "learning_rate": 1.3575727797567868e-05, "loss": 0.5582481622695923, "step": 2716 }, { "epoch": 0.32969299842252153, "grad_norm": 3.1326956748962402, "learning_rate": 1.3573271096916842e-05, "loss": 0.2733760476112366, "step": 2717 }, { "epoch": 0.3298143429195486, "grad_norm": 1.2727867364883423, "learning_rate": 1.3570814396265817e-05, "loss": 0.09801354259252548, "step": 2718 }, { "epoch": 0.32993568741657564, "grad_norm": 4.572424411773682, "learning_rate": 1.356835769561479e-05, "loss": 0.5125389695167542, "step": 2719 }, { "epoch": 0.3300570319136027, "grad_norm": 3.2168471813201904, "learning_rate": 1.3565900994963765e-05, "loss": 0.7108581066131592, "step": 2720 }, { "epoch": 0.3301783764106298, "grad_norm": 2.22446608543396, "learning_rate": 1.356344429431274e-05, "loss": 0.38087382912635803, "step": 2721 }, { "epoch": 0.33029972090765686, "grad_norm": 1.1339706182479858, "learning_rate": 1.3560987593661714e-05, "loss": 0.2368617057800293, "step": 2722 }, { "epoch": 0.3304210654046839, "grad_norm": 1.8774468898773193, "learning_rate": 1.3558530893010688e-05, "loss": 0.298879474401474, "step": 2723 }, { "epoch": 0.33054240990171097, "grad_norm": 2.565373420715332, "learning_rate": 1.3556074192359662e-05, "loss": 0.3679310977458954, "step": 2724 }, { "epoch": 0.330663754398738, "grad_norm": 2.55824613571167, "learning_rate": 1.3553617491708636e-05, "loss": 0.40981119871139526, "step": 2725 }, { "epoch": 0.3307850988957651, "grad_norm": 1.0780901908874512, "learning_rate": 1.3551160791057612e-05, "loss": 0.09795771539211273, "step": 2726 }, { "epoch": 0.3309064433927921, "grad_norm": 2.007880210876465, "learning_rate": 1.3548704090406587e-05, "loss": 0.12057384848594666, "step": 2727 }, { "epoch": 0.3310277878898192, "grad_norm": 2.1274709701538086, "learning_rate": 1.3546247389755561e-05, "loss": 0.25686660408973694, "step": 2728 }, { "epoch": 0.33114913238684623, "grad_norm": 4.40798282623291, "learning_rate": 1.3543790689104535e-05, "loss": 0.16807983815670013, "step": 2729 }, { "epoch": 0.33127047688387334, "grad_norm": 2.4525368213653564, "learning_rate": 1.354133398845351e-05, "loss": 0.5694208145141602, "step": 2730 }, { "epoch": 0.3313918213809004, "grad_norm": 1.2971670627593994, "learning_rate": 1.3538877287802484e-05, "loss": 0.044074006378650665, "step": 2731 }, { "epoch": 0.33151316587792745, "grad_norm": 4.044321060180664, "learning_rate": 1.3536420587151458e-05, "loss": 0.32732656598091125, "step": 2732 }, { "epoch": 0.3316345103749545, "grad_norm": 3.670825242996216, "learning_rate": 1.3533963886500432e-05, "loss": 0.9322635531425476, "step": 2733 }, { "epoch": 0.33175585487198156, "grad_norm": 1.3728653192520142, "learning_rate": 1.3531507185849406e-05, "loss": 0.01705666445195675, "step": 2734 }, { "epoch": 0.3318771993690086, "grad_norm": 0.8902427554130554, "learning_rate": 1.352905048519838e-05, "loss": 0.03738236054778099, "step": 2735 }, { "epoch": 0.33199854386603567, "grad_norm": 1.3336197137832642, "learning_rate": 1.3526593784547355e-05, "loss": 0.13155893981456757, "step": 2736 }, { "epoch": 0.3321198883630627, "grad_norm": 1.853929877281189, "learning_rate": 1.352413708389633e-05, "loss": 0.08310995995998383, "step": 2737 }, { "epoch": 0.3322412328600898, "grad_norm": 2.526223659515381, "learning_rate": 1.3521680383245303e-05, "loss": 0.28907132148742676, "step": 2738 }, { "epoch": 0.33236257735711683, "grad_norm": 3.0757598876953125, "learning_rate": 1.3519223682594278e-05, "loss": 0.3350384831428528, "step": 2739 }, { "epoch": 0.33248392185414394, "grad_norm": 2.2847514152526855, "learning_rate": 1.3516766981943252e-05, "loss": 0.19784024357795715, "step": 2740 }, { "epoch": 0.332605266351171, "grad_norm": 4.3813042640686035, "learning_rate": 1.3514310281292226e-05, "loss": 0.29084765911102295, "step": 2741 }, { "epoch": 0.33272661084819805, "grad_norm": 1.7603188753128052, "learning_rate": 1.35118535806412e-05, "loss": 0.261428564786911, "step": 2742 }, { "epoch": 0.3328479553452251, "grad_norm": 1.2502996921539307, "learning_rate": 1.3509396879990175e-05, "loss": 0.07206853479146957, "step": 2743 }, { "epoch": 0.33296929984225215, "grad_norm": 2.522667169570923, "learning_rate": 1.3506940179339149e-05, "loss": 0.3589305877685547, "step": 2744 }, { "epoch": 0.3330906443392792, "grad_norm": 2.1586971282958984, "learning_rate": 1.3504483478688123e-05, "loss": 0.17322710156440735, "step": 2745 }, { "epoch": 0.33321198883630626, "grad_norm": 1.3340508937835693, "learning_rate": 1.35020267780371e-05, "loss": 0.08164606988430023, "step": 2746 }, { "epoch": 0.3333333333333333, "grad_norm": 2.6283721923828125, "learning_rate": 1.349957007738607e-05, "loss": 0.24979859590530396, "step": 2747 }, { "epoch": 0.33345467783036037, "grad_norm": 3.0438013076782227, "learning_rate": 1.3497113376735044e-05, "loss": 0.17684221267700195, "step": 2748 }, { "epoch": 0.3335760223273875, "grad_norm": 2.437587022781372, "learning_rate": 1.3494656676084019e-05, "loss": 0.06924796104431152, "step": 2749 }, { "epoch": 0.33369736682441453, "grad_norm": 3.365072011947632, "learning_rate": 1.3492199975432993e-05, "loss": 0.19792334735393524, "step": 2750 }, { "epoch": 0.3338187113214416, "grad_norm": 3.1566526889801025, "learning_rate": 1.3489743274781967e-05, "loss": 0.30358290672302246, "step": 2751 }, { "epoch": 0.33394005581846864, "grad_norm": 1.8867077827453613, "learning_rate": 1.3487286574130943e-05, "loss": 0.4433608949184418, "step": 2752 }, { "epoch": 0.3340614003154957, "grad_norm": 1.9975477457046509, "learning_rate": 1.3484829873479917e-05, "loss": 0.03996870666742325, "step": 2753 }, { "epoch": 0.33418274481252275, "grad_norm": 2.1786231994628906, "learning_rate": 1.3482373172828892e-05, "loss": 0.4242089092731476, "step": 2754 }, { "epoch": 0.3343040893095498, "grad_norm": 3.0105373859405518, "learning_rate": 1.3479916472177866e-05, "loss": 0.14656175673007965, "step": 2755 }, { "epoch": 0.33442543380657686, "grad_norm": 2.8930420875549316, "learning_rate": 1.347745977152684e-05, "loss": 0.4321075677871704, "step": 2756 }, { "epoch": 0.3345467783036039, "grad_norm": 3.0688719749450684, "learning_rate": 1.3475003070875814e-05, "loss": 0.4075854420661926, "step": 2757 }, { "epoch": 0.334668122800631, "grad_norm": 2.5898208618164062, "learning_rate": 1.3472546370224789e-05, "loss": 0.2931281626224518, "step": 2758 }, { "epoch": 0.33478946729765807, "grad_norm": 3.415714740753174, "learning_rate": 1.3470089669573763e-05, "loss": 0.3117362856864929, "step": 2759 }, { "epoch": 0.3349108117946851, "grad_norm": 2.4045135974884033, "learning_rate": 1.3467632968922737e-05, "loss": 0.6036303043365479, "step": 2760 }, { "epoch": 0.3350321562917122, "grad_norm": 3.6322500705718994, "learning_rate": 1.3465176268271711e-05, "loss": 0.2127571851015091, "step": 2761 }, { "epoch": 0.33515350078873923, "grad_norm": 1.8815101385116577, "learning_rate": 1.3462719567620686e-05, "loss": 0.33295542001724243, "step": 2762 }, { "epoch": 0.3352748452857663, "grad_norm": 3.2687511444091797, "learning_rate": 1.346026286696966e-05, "loss": 0.22823671996593475, "step": 2763 }, { "epoch": 0.33539618978279334, "grad_norm": 2.9881391525268555, "learning_rate": 1.3457806166318634e-05, "loss": 0.20742876827716827, "step": 2764 }, { "epoch": 0.3355175342798204, "grad_norm": 3.587043523788452, "learning_rate": 1.3455349465667608e-05, "loss": 0.26324501633644104, "step": 2765 }, { "epoch": 0.33563887877684745, "grad_norm": 2.8624846935272217, "learning_rate": 1.3452892765016583e-05, "loss": 0.31277331709861755, "step": 2766 }, { "epoch": 0.3357602232738745, "grad_norm": 2.4092533588409424, "learning_rate": 1.3450436064365557e-05, "loss": 0.3587488532066345, "step": 2767 }, { "epoch": 0.3358815677709016, "grad_norm": 2.2200207710266113, "learning_rate": 1.3447979363714531e-05, "loss": 0.3722842335700989, "step": 2768 }, { "epoch": 0.33600291226792867, "grad_norm": 2.3017053604125977, "learning_rate": 1.3445522663063505e-05, "loss": 0.4437088966369629, "step": 2769 }, { "epoch": 0.3361242567649557, "grad_norm": 2.613037586212158, "learning_rate": 1.344306596241248e-05, "loss": 0.44210320711135864, "step": 2770 }, { "epoch": 0.3362456012619828, "grad_norm": 1.9856221675872803, "learning_rate": 1.3440609261761454e-05, "loss": 0.5379482507705688, "step": 2771 }, { "epoch": 0.3363669457590098, "grad_norm": 4.091605186462402, "learning_rate": 1.343815256111043e-05, "loss": 0.26920321583747864, "step": 2772 }, { "epoch": 0.3364882902560369, "grad_norm": 2.7099874019622803, "learning_rate": 1.3435695860459404e-05, "loss": 0.4916417598724365, "step": 2773 }, { "epoch": 0.33660963475306394, "grad_norm": 3.850006580352783, "learning_rate": 1.3433239159808378e-05, "loss": 0.44688522815704346, "step": 2774 }, { "epoch": 0.336730979250091, "grad_norm": 2.5527663230895996, "learning_rate": 1.3430782459157353e-05, "loss": 0.5459522604942322, "step": 2775 }, { "epoch": 0.33685232374711804, "grad_norm": 2.5261878967285156, "learning_rate": 1.3428325758506327e-05, "loss": 0.29146242141723633, "step": 2776 }, { "epoch": 0.33697366824414515, "grad_norm": 1.816943645477295, "learning_rate": 1.3425869057855301e-05, "loss": 0.24747847020626068, "step": 2777 }, { "epoch": 0.3370950127411722, "grad_norm": 3.2097055912017822, "learning_rate": 1.3423412357204275e-05, "loss": 0.2549365758895874, "step": 2778 }, { "epoch": 0.33721635723819926, "grad_norm": 1.550027847290039, "learning_rate": 1.342095565655325e-05, "loss": 0.12433695793151855, "step": 2779 }, { "epoch": 0.3373377017352263, "grad_norm": 1.417708158493042, "learning_rate": 1.3418498955902224e-05, "loss": 0.05040618032217026, "step": 2780 }, { "epoch": 0.33745904623225337, "grad_norm": 2.225236415863037, "learning_rate": 1.3416042255251198e-05, "loss": 0.1814093440771103, "step": 2781 }, { "epoch": 0.3375803907292804, "grad_norm": 1.977687120437622, "learning_rate": 1.3413585554600172e-05, "loss": 0.2293473184108734, "step": 2782 }, { "epoch": 0.3377017352263075, "grad_norm": 1.7540130615234375, "learning_rate": 1.3411128853949147e-05, "loss": 0.18827372789382935, "step": 2783 }, { "epoch": 0.33782307972333453, "grad_norm": 2.545426845550537, "learning_rate": 1.3408672153298121e-05, "loss": 0.3193444609642029, "step": 2784 }, { "epoch": 0.3379444242203616, "grad_norm": 1.901411533355713, "learning_rate": 1.3406215452647095e-05, "loss": 0.048846084624528885, "step": 2785 }, { "epoch": 0.3380657687173887, "grad_norm": 1.747877597808838, "learning_rate": 1.340375875199607e-05, "loss": 0.4019920229911804, "step": 2786 }, { "epoch": 0.33818711321441575, "grad_norm": 2.040862798690796, "learning_rate": 1.3401302051345044e-05, "loss": 0.3238888084888458, "step": 2787 }, { "epoch": 0.3383084577114428, "grad_norm": 1.6558022499084473, "learning_rate": 1.3398845350694018e-05, "loss": 0.1995115578174591, "step": 2788 }, { "epoch": 0.33842980220846985, "grad_norm": 2.0918025970458984, "learning_rate": 1.3396388650042992e-05, "loss": 0.2865673303604126, "step": 2789 }, { "epoch": 0.3385511467054969, "grad_norm": 2.004918098449707, "learning_rate": 1.3393931949391967e-05, "loss": 0.19865933060646057, "step": 2790 }, { "epoch": 0.33867249120252396, "grad_norm": 2.260880947113037, "learning_rate": 1.339147524874094e-05, "loss": 0.516450047492981, "step": 2791 }, { "epoch": 0.338793835699551, "grad_norm": 1.7905795574188232, "learning_rate": 1.3389018548089917e-05, "loss": 0.17964567244052887, "step": 2792 }, { "epoch": 0.33891518019657807, "grad_norm": 1.4596978425979614, "learning_rate": 1.3386561847438891e-05, "loss": 0.2544354200363159, "step": 2793 }, { "epoch": 0.3390365246936051, "grad_norm": 3.439584732055664, "learning_rate": 1.3384105146787865e-05, "loss": 0.2773582637310028, "step": 2794 }, { "epoch": 0.3391578691906322, "grad_norm": 0.0027794241905212402, "learning_rate": 1.338164844613684e-05, "loss": 4.521989467320964e-05, "step": 2795 }, { "epoch": 0.3392792136876593, "grad_norm": 1.4505723714828491, "learning_rate": 1.3379191745485814e-05, "loss": 0.07739366590976715, "step": 2796 }, { "epoch": 0.33940055818468634, "grad_norm": 2.9833128452301025, "learning_rate": 1.3376735044834788e-05, "loss": 0.4213433563709259, "step": 2797 }, { "epoch": 0.3395219026817134, "grad_norm": 2.603236436843872, "learning_rate": 1.3374278344183762e-05, "loss": 0.4406318962574005, "step": 2798 }, { "epoch": 0.33964324717874045, "grad_norm": 1.4903669357299805, "learning_rate": 1.3371821643532737e-05, "loss": 0.07684893161058426, "step": 2799 }, { "epoch": 0.3397645916757675, "grad_norm": 1.9389923810958862, "learning_rate": 1.336936494288171e-05, "loss": 0.2516050338745117, "step": 2800 }, { "epoch": 0.33988593617279456, "grad_norm": 2.0152909755706787, "learning_rate": 1.3366908242230685e-05, "loss": 0.15068350732326508, "step": 2801 }, { "epoch": 0.3400072806698216, "grad_norm": 2.715830087661743, "learning_rate": 1.336445154157966e-05, "loss": 0.21225924789905548, "step": 2802 }, { "epoch": 0.34012862516684866, "grad_norm": 1.9526959657669067, "learning_rate": 1.3361994840928634e-05, "loss": 0.1875372678041458, "step": 2803 }, { "epoch": 0.3402499696638757, "grad_norm": 2.714557409286499, "learning_rate": 1.3359538140277608e-05, "loss": 0.11924070864915848, "step": 2804 }, { "epoch": 0.3403713141609028, "grad_norm": 2.0488007068634033, "learning_rate": 1.3357081439626582e-05, "loss": 0.17785635590553284, "step": 2805 }, { "epoch": 0.3404926586579299, "grad_norm": 3.2751269340515137, "learning_rate": 1.3354624738975556e-05, "loss": 0.3669911026954651, "step": 2806 }, { "epoch": 0.34061400315495693, "grad_norm": 2.7896599769592285, "learning_rate": 1.335216803832453e-05, "loss": 0.5928270816802979, "step": 2807 }, { "epoch": 0.340735347651984, "grad_norm": 2.9548301696777344, "learning_rate": 1.3349711337673505e-05, "loss": 0.1790076196193695, "step": 2808 }, { "epoch": 0.34085669214901104, "grad_norm": 2.8890271186828613, "learning_rate": 1.334725463702248e-05, "loss": 0.24262064695358276, "step": 2809 }, { "epoch": 0.3409780366460381, "grad_norm": 2.664588212966919, "learning_rate": 1.3344797936371453e-05, "loss": 0.2634744346141815, "step": 2810 }, { "epoch": 0.34109938114306515, "grad_norm": 2.912458658218384, "learning_rate": 1.3342341235720428e-05, "loss": 0.16553360223770142, "step": 2811 }, { "epoch": 0.3412207256400922, "grad_norm": 1.9930040836334229, "learning_rate": 1.3339884535069404e-05, "loss": 0.22839638590812683, "step": 2812 }, { "epoch": 0.34134207013711926, "grad_norm": 3.27184796333313, "learning_rate": 1.3337427834418378e-05, "loss": 0.17788903415203094, "step": 2813 }, { "epoch": 0.34146341463414637, "grad_norm": 2.9638190269470215, "learning_rate": 1.3334971133767352e-05, "loss": 0.39199915528297424, "step": 2814 }, { "epoch": 0.3415847591311734, "grad_norm": 1.9852628707885742, "learning_rate": 1.3332514433116326e-05, "loss": 0.09361261874437332, "step": 2815 }, { "epoch": 0.3417061036282005, "grad_norm": 2.0682806968688965, "learning_rate": 1.33300577324653e-05, "loss": 0.6686457991600037, "step": 2816 }, { "epoch": 0.34182744812522753, "grad_norm": 2.4491777420043945, "learning_rate": 1.3327601031814275e-05, "loss": 0.22851845622062683, "step": 2817 }, { "epoch": 0.3419487926222546, "grad_norm": 2.942040205001831, "learning_rate": 1.332514433116325e-05, "loss": 0.33597275614738464, "step": 2818 }, { "epoch": 0.34207013711928164, "grad_norm": 3.0147740840911865, "learning_rate": 1.3322687630512223e-05, "loss": 0.2704648971557617, "step": 2819 }, { "epoch": 0.3421914816163087, "grad_norm": 2.686281442642212, "learning_rate": 1.3320230929861198e-05, "loss": 0.18692731857299805, "step": 2820 }, { "epoch": 0.34231282611333574, "grad_norm": 0.9338500499725342, "learning_rate": 1.3317774229210172e-05, "loss": 0.026601918041706085, "step": 2821 }, { "epoch": 0.3424341706103628, "grad_norm": 2.4016454219818115, "learning_rate": 1.3315317528559146e-05, "loss": 0.15437975525856018, "step": 2822 }, { "epoch": 0.3425555151073899, "grad_norm": 1.8265492916107178, "learning_rate": 1.331286082790812e-05, "loss": 0.5835421681404114, "step": 2823 }, { "epoch": 0.34267685960441696, "grad_norm": 1.672721266746521, "learning_rate": 1.3310404127257095e-05, "loss": 0.20360293984413147, "step": 2824 }, { "epoch": 0.342798204101444, "grad_norm": 2.058891534805298, "learning_rate": 1.3307947426606069e-05, "loss": 0.2055591642856598, "step": 2825 }, { "epoch": 0.34291954859847107, "grad_norm": 1.4964263439178467, "learning_rate": 1.3305490725955043e-05, "loss": 0.05043473467230797, "step": 2826 }, { "epoch": 0.3430408930954981, "grad_norm": 1.9713994264602661, "learning_rate": 1.3303034025304018e-05, "loss": 0.250923216342926, "step": 2827 }, { "epoch": 0.3431622375925252, "grad_norm": 2.310774564743042, "learning_rate": 1.3300577324652992e-05, "loss": 0.35582536458969116, "step": 2828 }, { "epoch": 0.34328358208955223, "grad_norm": 3.157845973968506, "learning_rate": 1.3298120624001966e-05, "loss": 0.4011169672012329, "step": 2829 }, { "epoch": 0.3434049265865793, "grad_norm": 2.0145437717437744, "learning_rate": 1.329566392335094e-05, "loss": 0.13335394859313965, "step": 2830 }, { "epoch": 0.34352627108360634, "grad_norm": 1.5914976596832275, "learning_rate": 1.3293207222699915e-05, "loss": 0.12164802849292755, "step": 2831 }, { "epoch": 0.3436476155806334, "grad_norm": 2.443269729614258, "learning_rate": 1.329075052204889e-05, "loss": 0.29157182574272156, "step": 2832 }, { "epoch": 0.3437689600776605, "grad_norm": 2.2892653942108154, "learning_rate": 1.3288293821397865e-05, "loss": 0.23905837535858154, "step": 2833 }, { "epoch": 0.34389030457468756, "grad_norm": 1.754455804824829, "learning_rate": 1.3285837120746839e-05, "loss": 0.30073070526123047, "step": 2834 }, { "epoch": 0.3440116490717146, "grad_norm": 1.9648005962371826, "learning_rate": 1.3283380420095813e-05, "loss": 0.18791383504867554, "step": 2835 }, { "epoch": 0.34413299356874166, "grad_norm": 2.0399529933929443, "learning_rate": 1.3280923719444788e-05, "loss": 0.21662276983261108, "step": 2836 }, { "epoch": 0.3442543380657687, "grad_norm": 2.2935118675231934, "learning_rate": 1.3278467018793762e-05, "loss": 0.4054362177848816, "step": 2837 }, { "epoch": 0.34437568256279577, "grad_norm": 1.9891663789749146, "learning_rate": 1.3276010318142736e-05, "loss": 0.2039833813905716, "step": 2838 }, { "epoch": 0.3444970270598228, "grad_norm": 2.4605650901794434, "learning_rate": 1.327355361749171e-05, "loss": 0.6762115359306335, "step": 2839 }, { "epoch": 0.3446183715568499, "grad_norm": 2.63932728767395, "learning_rate": 1.3271096916840685e-05, "loss": 0.6073483228683472, "step": 2840 }, { "epoch": 0.34473971605387693, "grad_norm": 3.1658761501312256, "learning_rate": 1.3268640216189659e-05, "loss": 0.31008243560791016, "step": 2841 }, { "epoch": 0.34486106055090404, "grad_norm": 1.9690529108047485, "learning_rate": 1.3266183515538633e-05, "loss": 0.20550884306430817, "step": 2842 }, { "epoch": 0.3449824050479311, "grad_norm": 2.4023427963256836, "learning_rate": 1.3263726814887607e-05, "loss": 0.16229194402694702, "step": 2843 }, { "epoch": 0.34510374954495815, "grad_norm": 2.4014225006103516, "learning_rate": 1.3261270114236582e-05, "loss": 0.5345857739448547, "step": 2844 }, { "epoch": 0.3452250940419852, "grad_norm": 3.2167434692382812, "learning_rate": 1.3258813413585556e-05, "loss": 0.5057387351989746, "step": 2845 }, { "epoch": 0.34534643853901226, "grad_norm": 2.031100034713745, "learning_rate": 1.325635671293453e-05, "loss": 0.2413824498653412, "step": 2846 }, { "epoch": 0.3454677830360393, "grad_norm": 1.7678101062774658, "learning_rate": 1.3253900012283504e-05, "loss": 0.1743219941854477, "step": 2847 }, { "epoch": 0.34558912753306636, "grad_norm": 2.502016067504883, "learning_rate": 1.3251443311632479e-05, "loss": 0.1482171267271042, "step": 2848 }, { "epoch": 0.3457104720300934, "grad_norm": 2.780811071395874, "learning_rate": 1.3248986610981453e-05, "loss": 0.5652299523353577, "step": 2849 }, { "epoch": 0.3458318165271205, "grad_norm": 2.4874913692474365, "learning_rate": 1.3246529910330427e-05, "loss": 0.36608320474624634, "step": 2850 }, { "epoch": 0.3459531610241476, "grad_norm": 2.2988367080688477, "learning_rate": 1.3244073209679401e-05, "loss": 0.33672210574150085, "step": 2851 }, { "epoch": 0.34607450552117464, "grad_norm": 2.0662240982055664, "learning_rate": 1.3241616509028377e-05, "loss": 0.2632628083229065, "step": 2852 }, { "epoch": 0.3461958500182017, "grad_norm": 2.669175624847412, "learning_rate": 1.3239159808377352e-05, "loss": 0.2074550986289978, "step": 2853 }, { "epoch": 0.34631719451522874, "grad_norm": 1.6437294483184814, "learning_rate": 1.3236703107726326e-05, "loss": 0.15819406509399414, "step": 2854 }, { "epoch": 0.3464385390122558, "grad_norm": 3.062838554382324, "learning_rate": 1.32342464070753e-05, "loss": 0.11034675687551498, "step": 2855 }, { "epoch": 0.34655988350928285, "grad_norm": 4.433793067932129, "learning_rate": 1.3231789706424274e-05, "loss": 0.4895234704017639, "step": 2856 }, { "epoch": 0.3466812280063099, "grad_norm": 2.2202999591827393, "learning_rate": 1.3229333005773249e-05, "loss": 0.1974233090877533, "step": 2857 }, { "epoch": 0.34680257250333696, "grad_norm": 2.6762070655822754, "learning_rate": 1.3226876305122223e-05, "loss": 0.3637422025203705, "step": 2858 }, { "epoch": 0.346923917000364, "grad_norm": 3.210794687271118, "learning_rate": 1.3224419604471197e-05, "loss": 0.34235748648643494, "step": 2859 }, { "epoch": 0.34704526149739107, "grad_norm": 2.9384000301361084, "learning_rate": 1.3221962903820171e-05, "loss": 0.5411485433578491, "step": 2860 }, { "epoch": 0.3471666059944182, "grad_norm": 2.98555064201355, "learning_rate": 1.3219506203169146e-05, "loss": 0.23736192286014557, "step": 2861 }, { "epoch": 0.34728795049144523, "grad_norm": 2.6572117805480957, "learning_rate": 1.321704950251812e-05, "loss": 0.4607403874397278, "step": 2862 }, { "epoch": 0.3474092949884723, "grad_norm": 4.189919948577881, "learning_rate": 1.3214592801867094e-05, "loss": 0.19775786995887756, "step": 2863 }, { "epoch": 0.34753063948549934, "grad_norm": 1.5939427614212036, "learning_rate": 1.3212136101216068e-05, "loss": 0.09462389349937439, "step": 2864 }, { "epoch": 0.3476519839825264, "grad_norm": 3.5034804344177246, "learning_rate": 1.3209679400565043e-05, "loss": 0.24761658906936646, "step": 2865 }, { "epoch": 0.34777332847955345, "grad_norm": 3.050703287124634, "learning_rate": 1.3207222699914017e-05, "loss": 0.49068590998649597, "step": 2866 }, { "epoch": 0.3478946729765805, "grad_norm": 1.9454255104064941, "learning_rate": 1.3204765999262991e-05, "loss": 0.1986999660730362, "step": 2867 }, { "epoch": 0.34801601747360755, "grad_norm": 2.60258150100708, "learning_rate": 1.3202309298611965e-05, "loss": 0.35261183977127075, "step": 2868 }, { "epoch": 0.3481373619706346, "grad_norm": 1.4820805788040161, "learning_rate": 1.319985259796094e-05, "loss": 0.09027974307537079, "step": 2869 }, { "epoch": 0.3482587064676617, "grad_norm": 2.0941667556762695, "learning_rate": 1.3197395897309914e-05, "loss": 0.2873879671096802, "step": 2870 }, { "epoch": 0.34838005096468877, "grad_norm": 1.7293140888214111, "learning_rate": 1.319493919665889e-05, "loss": 0.18081168830394745, "step": 2871 }, { "epoch": 0.3485013954617158, "grad_norm": 4.354601860046387, "learning_rate": 1.3192482496007864e-05, "loss": 0.4792991280555725, "step": 2872 }, { "epoch": 0.3486227399587429, "grad_norm": 2.6210784912109375, "learning_rate": 1.3190025795356838e-05, "loss": 0.1830996870994568, "step": 2873 }, { "epoch": 0.34874408445576993, "grad_norm": 4.812962055206299, "learning_rate": 1.3187569094705813e-05, "loss": 0.7624297142028809, "step": 2874 }, { "epoch": 0.348865428952797, "grad_norm": 2.0475847721099854, "learning_rate": 1.3185112394054787e-05, "loss": 0.2510002851486206, "step": 2875 }, { "epoch": 0.34898677344982404, "grad_norm": 4.413797378540039, "learning_rate": 1.3182655693403761e-05, "loss": 0.516799807548523, "step": 2876 }, { "epoch": 0.3491081179468511, "grad_norm": 1.7577311992645264, "learning_rate": 1.3180198992752735e-05, "loss": 0.12230469286441803, "step": 2877 }, { "epoch": 0.34922946244387815, "grad_norm": 1.4777488708496094, "learning_rate": 1.317774229210171e-05, "loss": 0.33226674795150757, "step": 2878 }, { "epoch": 0.34935080694090526, "grad_norm": 3.4159419536590576, "learning_rate": 1.3175285591450684e-05, "loss": 0.49763816595077515, "step": 2879 }, { "epoch": 0.3494721514379323, "grad_norm": 2.3247647285461426, "learning_rate": 1.3172828890799658e-05, "loss": 0.11598958820104599, "step": 2880 }, { "epoch": 0.34959349593495936, "grad_norm": 2.48176908493042, "learning_rate": 1.3170372190148633e-05, "loss": 0.6656193733215332, "step": 2881 }, { "epoch": 0.3497148404319864, "grad_norm": 3.7606394290924072, "learning_rate": 1.3167915489497607e-05, "loss": 0.6240760087966919, "step": 2882 }, { "epoch": 0.34983618492901347, "grad_norm": 2.8003673553466797, "learning_rate": 1.316545878884658e-05, "loss": 0.2076607495546341, "step": 2883 }, { "epoch": 0.3499575294260405, "grad_norm": 2.08559250831604, "learning_rate": 1.3163002088195554e-05, "loss": 0.24259454011917114, "step": 2884 }, { "epoch": 0.3500788739230676, "grad_norm": 0.9130677580833435, "learning_rate": 1.3160545387544528e-05, "loss": 0.03686123341321945, "step": 2885 }, { "epoch": 0.35020021842009463, "grad_norm": 2.195913314819336, "learning_rate": 1.3158088686893502e-05, "loss": 0.34596142172813416, "step": 2886 }, { "epoch": 0.3503215629171217, "grad_norm": 2.618661642074585, "learning_rate": 1.3155631986242476e-05, "loss": 0.3908511996269226, "step": 2887 }, { "epoch": 0.35044290741414874, "grad_norm": 3.42486834526062, "learning_rate": 1.315317528559145e-05, "loss": 0.11605776101350784, "step": 2888 }, { "epoch": 0.35056425191117585, "grad_norm": 2.014707326889038, "learning_rate": 1.3150718584940425e-05, "loss": 0.37118685245513916, "step": 2889 }, { "epoch": 0.3506855964082029, "grad_norm": 1.9236880540847778, "learning_rate": 1.31482618842894e-05, "loss": 0.4760400950908661, "step": 2890 }, { "epoch": 0.35080694090522996, "grad_norm": 2.5255494117736816, "learning_rate": 1.3145805183638373e-05, "loss": 0.2357504814863205, "step": 2891 }, { "epoch": 0.350928285402257, "grad_norm": 1.822582483291626, "learning_rate": 1.3143348482987348e-05, "loss": 0.0703888088464737, "step": 2892 }, { "epoch": 0.35104962989928407, "grad_norm": 2.847137451171875, "learning_rate": 1.3140891782336322e-05, "loss": 0.2367681860923767, "step": 2893 }, { "epoch": 0.3511709743963111, "grad_norm": 3.9951229095458984, "learning_rate": 1.3138435081685296e-05, "loss": 0.1303313970565796, "step": 2894 }, { "epoch": 0.3512923188933382, "grad_norm": 1.6860606670379639, "learning_rate": 1.313597838103427e-05, "loss": 0.18127286434173584, "step": 2895 }, { "epoch": 0.3514136633903652, "grad_norm": 1.8364192247390747, "learning_rate": 1.3133521680383245e-05, "loss": 0.12500934302806854, "step": 2896 }, { "epoch": 0.3515350078873923, "grad_norm": 2.4998505115509033, "learning_rate": 1.313106497973222e-05, "loss": 0.3593692481517792, "step": 2897 }, { "epoch": 0.3516563523844194, "grad_norm": 2.5611114501953125, "learning_rate": 1.3128608279081195e-05, "loss": 0.4094930589199066, "step": 2898 }, { "epoch": 0.35177769688144644, "grad_norm": 3.476863384246826, "learning_rate": 1.312615157843017e-05, "loss": 0.23066210746765137, "step": 2899 }, { "epoch": 0.3518990413784735, "grad_norm": 2.578794479370117, "learning_rate": 1.3123694877779143e-05, "loss": 0.05409723147749901, "step": 2900 }, { "epoch": 0.35202038587550055, "grad_norm": 2.6811587810516357, "learning_rate": 1.3121238177128118e-05, "loss": 0.2996734380722046, "step": 2901 }, { "epoch": 0.3521417303725276, "grad_norm": 2.539841413497925, "learning_rate": 1.3118781476477092e-05, "loss": 0.2994079887866974, "step": 2902 }, { "epoch": 0.35226307486955466, "grad_norm": 2.050487756729126, "learning_rate": 1.3116324775826066e-05, "loss": 0.46317172050476074, "step": 2903 }, { "epoch": 0.3523844193665817, "grad_norm": 2.817878484725952, "learning_rate": 1.311386807517504e-05, "loss": 0.2869478464126587, "step": 2904 }, { "epoch": 0.35250576386360877, "grad_norm": 2.065239191055298, "learning_rate": 1.3111411374524015e-05, "loss": 0.2752199172973633, "step": 2905 }, { "epoch": 0.3526271083606358, "grad_norm": 3.927312135696411, "learning_rate": 1.3108954673872989e-05, "loss": 0.23858079314231873, "step": 2906 }, { "epoch": 0.35274845285766293, "grad_norm": 2.510765552520752, "learning_rate": 1.3106497973221963e-05, "loss": 0.578784704208374, "step": 2907 }, { "epoch": 0.35286979735469, "grad_norm": 2.7408816814422607, "learning_rate": 1.3104041272570938e-05, "loss": 0.4173663854598999, "step": 2908 }, { "epoch": 0.35299114185171704, "grad_norm": 2.8283655643463135, "learning_rate": 1.3101584571919912e-05, "loss": 0.17574214935302734, "step": 2909 }, { "epoch": 0.3531124863487441, "grad_norm": 2.037839412689209, "learning_rate": 1.3099127871268886e-05, "loss": 0.10666762292385101, "step": 2910 }, { "epoch": 0.35323383084577115, "grad_norm": 3.1735360622406006, "learning_rate": 1.309667117061786e-05, "loss": 0.2770954668521881, "step": 2911 }, { "epoch": 0.3533551753427982, "grad_norm": 1.4993237257003784, "learning_rate": 1.3094214469966835e-05, "loss": 0.06719283759593964, "step": 2912 }, { "epoch": 0.35347651983982525, "grad_norm": 1.993152141571045, "learning_rate": 1.3091757769315809e-05, "loss": 0.2014002799987793, "step": 2913 }, { "epoch": 0.3535978643368523, "grad_norm": 2.437696695327759, "learning_rate": 1.3089301068664783e-05, "loss": 0.3466431498527527, "step": 2914 }, { "epoch": 0.35371920883387936, "grad_norm": 2.866640329360962, "learning_rate": 1.3086844368013757e-05, "loss": 0.3083437383174896, "step": 2915 }, { "epoch": 0.35384055333090647, "grad_norm": 2.857501983642578, "learning_rate": 1.3084387667362732e-05, "loss": 0.5835140347480774, "step": 2916 }, { "epoch": 0.3539618978279335, "grad_norm": 2.6366968154907227, "learning_rate": 1.3081930966711708e-05, "loss": 0.3993077576160431, "step": 2917 }, { "epoch": 0.3540832423249606, "grad_norm": 1.1135361194610596, "learning_rate": 1.3079474266060682e-05, "loss": 0.22319062054157257, "step": 2918 }, { "epoch": 0.35420458682198763, "grad_norm": 1.3128582239151, "learning_rate": 1.3077017565409656e-05, "loss": 0.12067007273435593, "step": 2919 }, { "epoch": 0.3543259313190147, "grad_norm": 3.6358463764190674, "learning_rate": 1.307456086475863e-05, "loss": 0.3024868965148926, "step": 2920 }, { "epoch": 0.35444727581604174, "grad_norm": 1.5476981401443481, "learning_rate": 1.3072104164107605e-05, "loss": 0.07749363780021667, "step": 2921 }, { "epoch": 0.3545686203130688, "grad_norm": 1.784947395324707, "learning_rate": 1.3069647463456579e-05, "loss": 0.18837328255176544, "step": 2922 }, { "epoch": 0.35468996481009585, "grad_norm": 2.081301212310791, "learning_rate": 1.3067190762805553e-05, "loss": 0.14046838879585266, "step": 2923 }, { "epoch": 0.3548113093071229, "grad_norm": 1.8582570552825928, "learning_rate": 1.3064734062154527e-05, "loss": 0.17502886056900024, "step": 2924 }, { "epoch": 0.35493265380414996, "grad_norm": 2.0138418674468994, "learning_rate": 1.3062277361503502e-05, "loss": 0.08949162065982819, "step": 2925 }, { "epoch": 0.35505399830117707, "grad_norm": 1.6978049278259277, "learning_rate": 1.3059820660852476e-05, "loss": 0.19336704909801483, "step": 2926 }, { "epoch": 0.3551753427982041, "grad_norm": 4.394315242767334, "learning_rate": 1.305736396020145e-05, "loss": 0.43278732895851135, "step": 2927 }, { "epoch": 0.3552966872952312, "grad_norm": 2.5735270977020264, "learning_rate": 1.3054907259550424e-05, "loss": 0.38995563983917236, "step": 2928 }, { "epoch": 0.3554180317922582, "grad_norm": 3.6233863830566406, "learning_rate": 1.3052450558899399e-05, "loss": 0.14834274351596832, "step": 2929 }, { "epoch": 0.3555393762892853, "grad_norm": 1.9931936264038086, "learning_rate": 1.3049993858248373e-05, "loss": 0.16212575137615204, "step": 2930 }, { "epoch": 0.35566072078631233, "grad_norm": 2.0874016284942627, "learning_rate": 1.3047537157597347e-05, "loss": 0.2454877644777298, "step": 2931 }, { "epoch": 0.3557820652833394, "grad_norm": 1.7809704542160034, "learning_rate": 1.3045080456946321e-05, "loss": 0.2872205078601837, "step": 2932 }, { "epoch": 0.35590340978036644, "grad_norm": 2.3312153816223145, "learning_rate": 1.3042623756295296e-05, "loss": 0.21331094205379486, "step": 2933 }, { "epoch": 0.3560247542773935, "grad_norm": 2.0230391025543213, "learning_rate": 1.304016705564427e-05, "loss": 0.23937641084194183, "step": 2934 }, { "epoch": 0.3561460987744206, "grad_norm": 0.25450608134269714, "learning_rate": 1.3037710354993244e-05, "loss": 0.002068326575681567, "step": 2935 }, { "epoch": 0.35626744327144766, "grad_norm": 2.4915611743927, "learning_rate": 1.3035253654342218e-05, "loss": 0.48472559452056885, "step": 2936 }, { "epoch": 0.3563887877684747, "grad_norm": 2.8751635551452637, "learning_rate": 1.3032796953691194e-05, "loss": 0.3046378791332245, "step": 2937 }, { "epoch": 0.35651013226550177, "grad_norm": 3.4425442218780518, "learning_rate": 1.3030340253040169e-05, "loss": 0.3476297855377197, "step": 2938 }, { "epoch": 0.3566314767625288, "grad_norm": 1.4276381731033325, "learning_rate": 1.3027883552389143e-05, "loss": 0.06332909315824509, "step": 2939 }, { "epoch": 0.3567528212595559, "grad_norm": 2.643845558166504, "learning_rate": 1.3025426851738117e-05, "loss": 0.26920342445373535, "step": 2940 }, { "epoch": 0.35687416575658293, "grad_norm": 1.530829668045044, "learning_rate": 1.3022970151087091e-05, "loss": 0.0930422991514206, "step": 2941 }, { "epoch": 0.35699551025361, "grad_norm": 2.0474140644073486, "learning_rate": 1.3020513450436066e-05, "loss": 0.20393139123916626, "step": 2942 }, { "epoch": 0.35711685475063704, "grad_norm": 2.6223721504211426, "learning_rate": 1.301805674978504e-05, "loss": 0.4995730519294739, "step": 2943 }, { "epoch": 0.35723819924766415, "grad_norm": 6.0837321281433105, "learning_rate": 1.3015600049134014e-05, "loss": 0.44847533106803894, "step": 2944 }, { "epoch": 0.3573595437446912, "grad_norm": 0.8783859014511108, "learning_rate": 1.3013143348482988e-05, "loss": 0.05103648826479912, "step": 2945 }, { "epoch": 0.35748088824171825, "grad_norm": 2.090203046798706, "learning_rate": 1.3010686647831963e-05, "loss": 0.0770326629281044, "step": 2946 }, { "epoch": 0.3576022327387453, "grad_norm": 1.8669445514678955, "learning_rate": 1.3008229947180937e-05, "loss": 0.12285949289798737, "step": 2947 }, { "epoch": 0.35772357723577236, "grad_norm": 1.8406798839569092, "learning_rate": 1.3005773246529911e-05, "loss": 0.11922474205493927, "step": 2948 }, { "epoch": 0.3578449217327994, "grad_norm": 2.202354669570923, "learning_rate": 1.3003316545878885e-05, "loss": 0.15273219347000122, "step": 2949 }, { "epoch": 0.35796626622982647, "grad_norm": 2.0528676509857178, "learning_rate": 1.300085984522786e-05, "loss": 0.2751436233520508, "step": 2950 }, { "epoch": 0.3580876107268535, "grad_norm": 2.0697927474975586, "learning_rate": 1.2998403144576834e-05, "loss": 0.1616508513689041, "step": 2951 }, { "epoch": 0.3582089552238806, "grad_norm": 3.1109538078308105, "learning_rate": 1.2995946443925808e-05, "loss": 0.27921411395072937, "step": 2952 }, { "epoch": 0.35833029972090763, "grad_norm": 3.62821364402771, "learning_rate": 1.2993489743274783e-05, "loss": 0.5124978423118591, "step": 2953 }, { "epoch": 0.35845164421793474, "grad_norm": 3.2712950706481934, "learning_rate": 1.2991033042623757e-05, "loss": 0.4778803586959839, "step": 2954 }, { "epoch": 0.3585729887149618, "grad_norm": 1.9799039363861084, "learning_rate": 1.2988576341972731e-05, "loss": 0.16806745529174805, "step": 2955 }, { "epoch": 0.35869433321198885, "grad_norm": 2.7012410163879395, "learning_rate": 1.2986119641321705e-05, "loss": 0.13724809885025024, "step": 2956 }, { "epoch": 0.3588156777090159, "grad_norm": 2.23858380317688, "learning_rate": 1.2983662940670681e-05, "loss": 0.873268723487854, "step": 2957 }, { "epoch": 0.35893702220604295, "grad_norm": 2.514082670211792, "learning_rate": 1.2981206240019655e-05, "loss": 0.2670159339904785, "step": 2958 }, { "epoch": 0.35905836670307, "grad_norm": 1.5424782037734985, "learning_rate": 1.297874953936863e-05, "loss": 0.11754392087459564, "step": 2959 }, { "epoch": 0.35917971120009706, "grad_norm": 2.0172996520996094, "learning_rate": 1.2976292838717604e-05, "loss": 0.24237750470638275, "step": 2960 }, { "epoch": 0.3593010556971241, "grad_norm": 2.2911813259124756, "learning_rate": 1.2973836138066578e-05, "loss": 0.17581063508987427, "step": 2961 }, { "epoch": 0.35942240019415117, "grad_norm": 2.3171849250793457, "learning_rate": 1.2971379437415553e-05, "loss": 0.23211178183555603, "step": 2962 }, { "epoch": 0.3595437446911783, "grad_norm": 3.187316656112671, "learning_rate": 1.2968922736764527e-05, "loss": 0.35318523645401, "step": 2963 }, { "epoch": 0.35966508918820533, "grad_norm": 2.611950159072876, "learning_rate": 1.2966466036113501e-05, "loss": 0.1411629617214203, "step": 2964 }, { "epoch": 0.3597864336852324, "grad_norm": 2.4547252655029297, "learning_rate": 1.2964009335462475e-05, "loss": 0.36622804403305054, "step": 2965 }, { "epoch": 0.35990777818225944, "grad_norm": 1.1976194381713867, "learning_rate": 1.296155263481145e-05, "loss": 0.27061358094215393, "step": 2966 }, { "epoch": 0.3600291226792865, "grad_norm": 1.6270411014556885, "learning_rate": 1.2959095934160424e-05, "loss": 0.14751648902893066, "step": 2967 }, { "epoch": 0.36015046717631355, "grad_norm": 2.6941215991973877, "learning_rate": 1.2956639233509398e-05, "loss": 0.22208136320114136, "step": 2968 }, { "epoch": 0.3602718116733406, "grad_norm": 0.12202031910419464, "learning_rate": 1.2954182532858372e-05, "loss": 0.0013922062935307622, "step": 2969 }, { "epoch": 0.36039315617036766, "grad_norm": 3.520007848739624, "learning_rate": 1.2951725832207347e-05, "loss": 0.40424954891204834, "step": 2970 }, { "epoch": 0.3605145006673947, "grad_norm": 2.3271749019622803, "learning_rate": 1.2949269131556321e-05, "loss": 0.533889889717102, "step": 2971 }, { "epoch": 0.3606358451644218, "grad_norm": 2.4552700519561768, "learning_rate": 1.2946812430905295e-05, "loss": 0.5242980718612671, "step": 2972 }, { "epoch": 0.3607571896614489, "grad_norm": 1.2487123012542725, "learning_rate": 1.294435573025427e-05, "loss": 0.1968783587217331, "step": 2973 }, { "epoch": 0.3608785341584759, "grad_norm": 2.4111859798431396, "learning_rate": 1.2941899029603244e-05, "loss": 0.20926323533058167, "step": 2974 }, { "epoch": 0.360999878655503, "grad_norm": 2.784630298614502, "learning_rate": 1.2939442328952218e-05, "loss": 0.18628396093845367, "step": 2975 }, { "epoch": 0.36112122315253004, "grad_norm": 2.0924742221832275, "learning_rate": 1.2936985628301192e-05, "loss": 0.36132916808128357, "step": 2976 }, { "epoch": 0.3612425676495571, "grad_norm": 1.6369595527648926, "learning_rate": 1.2934528927650168e-05, "loss": 0.20456495881080627, "step": 2977 }, { "epoch": 0.36136391214658414, "grad_norm": 1.6293615102767944, "learning_rate": 1.2932072226999142e-05, "loss": 0.1595347821712494, "step": 2978 }, { "epoch": 0.3614852566436112, "grad_norm": 2.087491989135742, "learning_rate": 1.2929615526348117e-05, "loss": 0.23917625844478607, "step": 2979 }, { "epoch": 0.36160660114063825, "grad_norm": 2.3202195167541504, "learning_rate": 1.2927158825697091e-05, "loss": 0.19291448593139648, "step": 2980 }, { "epoch": 0.3617279456376653, "grad_norm": 2.6504223346710205, "learning_rate": 1.2924702125046065e-05, "loss": 0.35504570603370667, "step": 2981 }, { "epoch": 0.3618492901346924, "grad_norm": 2.5289902687072754, "learning_rate": 1.292224542439504e-05, "loss": 0.30801528692245483, "step": 2982 }, { "epoch": 0.36197063463171947, "grad_norm": 2.0009565353393555, "learning_rate": 1.2919788723744014e-05, "loss": 0.1884746551513672, "step": 2983 }, { "epoch": 0.3620919791287465, "grad_norm": 3.072634696960449, "learning_rate": 1.2917332023092988e-05, "loss": 0.3839377462863922, "step": 2984 }, { "epoch": 0.3622133236257736, "grad_norm": 2.597742795944214, "learning_rate": 1.2914875322441962e-05, "loss": 0.5811505317687988, "step": 2985 }, { "epoch": 0.36233466812280063, "grad_norm": 2.4145267009735107, "learning_rate": 1.2912418621790936e-05, "loss": 0.443876713514328, "step": 2986 }, { "epoch": 0.3624560126198277, "grad_norm": 1.8467440605163574, "learning_rate": 1.290996192113991e-05, "loss": 0.29026928544044495, "step": 2987 }, { "epoch": 0.36257735711685474, "grad_norm": 2.9077274799346924, "learning_rate": 1.2907505220488885e-05, "loss": 0.35525912046432495, "step": 2988 }, { "epoch": 0.3626987016138818, "grad_norm": 0.22295789420604706, "learning_rate": 1.290504851983786e-05, "loss": 0.004607439041137695, "step": 2989 }, { "epoch": 0.36282004611090884, "grad_norm": 2.0454721450805664, "learning_rate": 1.2902591819186833e-05, "loss": 0.3613908588886261, "step": 2990 }, { "epoch": 0.36294139060793595, "grad_norm": 3.7352137565612793, "learning_rate": 1.2900135118535808e-05, "loss": 0.25977185368537903, "step": 2991 }, { "epoch": 0.363062735104963, "grad_norm": 3.1388018131256104, "learning_rate": 1.2897678417884782e-05, "loss": 0.4856216311454773, "step": 2992 }, { "epoch": 0.36318407960199006, "grad_norm": 1.5494506359100342, "learning_rate": 1.2895221717233756e-05, "loss": 0.1923268884420395, "step": 2993 }, { "epoch": 0.3633054240990171, "grad_norm": 3.564504861831665, "learning_rate": 1.289276501658273e-05, "loss": 0.30987757444381714, "step": 2994 }, { "epoch": 0.36342676859604417, "grad_norm": 2.160773515701294, "learning_rate": 1.2890308315931705e-05, "loss": 0.21949148178100586, "step": 2995 }, { "epoch": 0.3635481130930712, "grad_norm": 3.99308443069458, "learning_rate": 1.288785161528068e-05, "loss": 0.4553235173225403, "step": 2996 }, { "epoch": 0.3636694575900983, "grad_norm": 2.348376989364624, "learning_rate": 1.2885394914629655e-05, "loss": 0.22087374329566956, "step": 2997 }, { "epoch": 0.36379080208712533, "grad_norm": 1.88888680934906, "learning_rate": 1.288293821397863e-05, "loss": 0.5941238403320312, "step": 2998 }, { "epoch": 0.3639121465841524, "grad_norm": 1.6137655973434448, "learning_rate": 1.2880481513327603e-05, "loss": 0.06201152503490448, "step": 2999 }, { "epoch": 0.3640334910811795, "grad_norm": 2.1788697242736816, "learning_rate": 1.2878024812676578e-05, "loss": 0.2939947545528412, "step": 3000 }, { "epoch": 0.36415483557820655, "grad_norm": 2.3649392127990723, "learning_rate": 1.2875568112025552e-05, "loss": 0.2894429564476013, "step": 3001 }, { "epoch": 0.3642761800752336, "grad_norm": 2.135638475418091, "learning_rate": 1.2873111411374526e-05, "loss": 0.2235684096813202, "step": 3002 }, { "epoch": 0.36439752457226066, "grad_norm": 3.520004987716675, "learning_rate": 1.28706547107235e-05, "loss": 0.17777599394321442, "step": 3003 }, { "epoch": 0.3645188690692877, "grad_norm": 3.2360692024230957, "learning_rate": 1.2868198010072475e-05, "loss": 0.229665145277977, "step": 3004 }, { "epoch": 0.36464021356631476, "grad_norm": 2.8983798027038574, "learning_rate": 1.2865741309421449e-05, "loss": 0.388833224773407, "step": 3005 }, { "epoch": 0.3647615580633418, "grad_norm": 1.9312299489974976, "learning_rate": 1.2863284608770423e-05, "loss": 0.3245135247707367, "step": 3006 }, { "epoch": 0.36488290256036887, "grad_norm": 2.141505718231201, "learning_rate": 1.2860827908119398e-05, "loss": 0.39916810393333435, "step": 3007 }, { "epoch": 0.3650042470573959, "grad_norm": 2.692316770553589, "learning_rate": 1.2858371207468372e-05, "loss": 0.3534470200538635, "step": 3008 }, { "epoch": 0.36512559155442303, "grad_norm": 1.6258231401443481, "learning_rate": 1.2855914506817346e-05, "loss": 0.1111140251159668, "step": 3009 }, { "epoch": 0.3652469360514501, "grad_norm": 1.6851928234100342, "learning_rate": 1.285345780616632e-05, "loss": 0.04014979302883148, "step": 3010 }, { "epoch": 0.36536828054847714, "grad_norm": 2.299107313156128, "learning_rate": 1.2851001105515295e-05, "loss": 0.07421331852674484, "step": 3011 }, { "epoch": 0.3654896250455042, "grad_norm": 2.7299962043762207, "learning_rate": 1.2848544404864269e-05, "loss": 0.23941995203495026, "step": 3012 }, { "epoch": 0.36561096954253125, "grad_norm": 2.1895580291748047, "learning_rate": 1.2846087704213243e-05, "loss": 0.39537790417671204, "step": 3013 }, { "epoch": 0.3657323140395583, "grad_norm": 2.276113748550415, "learning_rate": 1.2843631003562217e-05, "loss": 0.32509279251098633, "step": 3014 }, { "epoch": 0.36585365853658536, "grad_norm": 1.897255539894104, "learning_rate": 1.2841174302911192e-05, "loss": 0.1867866814136505, "step": 3015 }, { "epoch": 0.3659750030336124, "grad_norm": 2.16688871383667, "learning_rate": 1.2838717602260168e-05, "loss": 0.21447589993476868, "step": 3016 }, { "epoch": 0.36609634753063947, "grad_norm": 1.9278409481048584, "learning_rate": 1.2836260901609142e-05, "loss": 0.07187424600124359, "step": 3017 }, { "epoch": 0.3662176920276665, "grad_norm": 3.1750142574310303, "learning_rate": 1.2833804200958116e-05, "loss": 0.36346420645713806, "step": 3018 }, { "epoch": 0.36633903652469363, "grad_norm": 2.207082509994507, "learning_rate": 1.2831347500307087e-05, "loss": 0.5422084927558899, "step": 3019 }, { "epoch": 0.3664603810217207, "grad_norm": 2.3459904193878174, "learning_rate": 1.2828890799656061e-05, "loss": 0.21172483265399933, "step": 3020 }, { "epoch": 0.36658172551874774, "grad_norm": 4.062553405761719, "learning_rate": 1.2826434099005035e-05, "loss": 0.29971086978912354, "step": 3021 }, { "epoch": 0.3667030700157748, "grad_norm": 2.1893389225006104, "learning_rate": 1.2823977398354011e-05, "loss": 0.28227633237838745, "step": 3022 }, { "epoch": 0.36682441451280184, "grad_norm": 2.9444973468780518, "learning_rate": 1.2821520697702986e-05, "loss": 0.6413102149963379, "step": 3023 }, { "epoch": 0.3669457590098289, "grad_norm": 1.4639172554016113, "learning_rate": 1.281906399705196e-05, "loss": 0.03601495176553726, "step": 3024 }, { "epoch": 0.36706710350685595, "grad_norm": 3.3567471504211426, "learning_rate": 1.2816607296400934e-05, "loss": 0.6441450715065002, "step": 3025 }, { "epoch": 0.367188448003883, "grad_norm": 3.2175350189208984, "learning_rate": 1.2814150595749908e-05, "loss": 0.4079563319683075, "step": 3026 }, { "epoch": 0.36730979250091006, "grad_norm": 2.4198379516601562, "learning_rate": 1.2811693895098883e-05, "loss": 0.3430071473121643, "step": 3027 }, { "epoch": 0.36743113699793717, "grad_norm": 2.058039426803589, "learning_rate": 1.2809237194447857e-05, "loss": 0.29600954055786133, "step": 3028 }, { "epoch": 0.3675524814949642, "grad_norm": 1.824734091758728, "learning_rate": 1.2806780493796831e-05, "loss": 0.1772790551185608, "step": 3029 }, { "epoch": 0.3676738259919913, "grad_norm": 2.377044200897217, "learning_rate": 1.2804323793145805e-05, "loss": 0.24278861284255981, "step": 3030 }, { "epoch": 0.36779517048901833, "grad_norm": 2.844923257827759, "learning_rate": 1.280186709249478e-05, "loss": 0.6934537887573242, "step": 3031 }, { "epoch": 0.3679165149860454, "grad_norm": 1.1115227937698364, "learning_rate": 1.2799410391843754e-05, "loss": 0.037027470767498016, "step": 3032 }, { "epoch": 0.36803785948307244, "grad_norm": 1.604121208190918, "learning_rate": 1.2796953691192728e-05, "loss": 0.06987448036670685, "step": 3033 }, { "epoch": 0.3681592039800995, "grad_norm": 2.811095714569092, "learning_rate": 1.2794496990541703e-05, "loss": 0.5348390936851501, "step": 3034 }, { "epoch": 0.36828054847712655, "grad_norm": 2.9516241550445557, "learning_rate": 1.2792040289890677e-05, "loss": 0.13303931057453156, "step": 3035 }, { "epoch": 0.3684018929741536, "grad_norm": 2.7784767150878906, "learning_rate": 1.2789583589239651e-05, "loss": 0.32491278648376465, "step": 3036 }, { "epoch": 0.3685232374711807, "grad_norm": 3.5733752250671387, "learning_rate": 1.2787126888588625e-05, "loss": 0.49746209383010864, "step": 3037 }, { "epoch": 0.36864458196820776, "grad_norm": 3.677290916442871, "learning_rate": 1.27846701879376e-05, "loss": 0.18780988454818726, "step": 3038 }, { "epoch": 0.3687659264652348, "grad_norm": 2.170703411102295, "learning_rate": 1.2782213487286574e-05, "loss": 0.18865230679512024, "step": 3039 }, { "epoch": 0.36888727096226187, "grad_norm": 2.0496461391448975, "learning_rate": 1.2779756786635548e-05, "loss": 0.12246742844581604, "step": 3040 }, { "epoch": 0.3690086154592889, "grad_norm": 4.869044303894043, "learning_rate": 1.2777300085984522e-05, "loss": 0.2805744409561157, "step": 3041 }, { "epoch": 0.369129959956316, "grad_norm": 3.1038382053375244, "learning_rate": 1.2774843385333498e-05, "loss": 0.4670161306858063, "step": 3042 }, { "epoch": 0.36925130445334303, "grad_norm": 2.6496403217315674, "learning_rate": 1.2772386684682473e-05, "loss": 0.1732165813446045, "step": 3043 }, { "epoch": 0.3693726489503701, "grad_norm": 3.3521132469177246, "learning_rate": 1.2769929984031447e-05, "loss": 0.16238215565681458, "step": 3044 }, { "epoch": 0.36949399344739714, "grad_norm": 1.6630042791366577, "learning_rate": 1.2767473283380421e-05, "loss": 0.22390274703502655, "step": 3045 }, { "epoch": 0.3696153379444242, "grad_norm": 2.7344253063201904, "learning_rate": 1.2765016582729395e-05, "loss": 0.04648754373192787, "step": 3046 }, { "epoch": 0.3697366824414513, "grad_norm": 2.4483656883239746, "learning_rate": 1.276255988207837e-05, "loss": 0.23034049570560455, "step": 3047 }, { "epoch": 0.36985802693847836, "grad_norm": 2.579706907272339, "learning_rate": 1.2760103181427344e-05, "loss": 0.4101458489894867, "step": 3048 }, { "epoch": 0.3699793714355054, "grad_norm": 5.1360931396484375, "learning_rate": 1.2757646480776318e-05, "loss": 0.795612633228302, "step": 3049 }, { "epoch": 0.37010071593253246, "grad_norm": 3.743968963623047, "learning_rate": 1.2755189780125292e-05, "loss": 0.34071826934814453, "step": 3050 }, { "epoch": 0.3702220604295595, "grad_norm": 2.5002975463867188, "learning_rate": 1.2752733079474267e-05, "loss": 0.2797744870185852, "step": 3051 }, { "epoch": 0.3703434049265866, "grad_norm": 3.206812620162964, "learning_rate": 1.275027637882324e-05, "loss": 0.31890809535980225, "step": 3052 }, { "epoch": 0.3704647494236136, "grad_norm": 2.2290642261505127, "learning_rate": 1.2747819678172215e-05, "loss": 0.3868074417114258, "step": 3053 }, { "epoch": 0.3705860939206407, "grad_norm": 1.7649569511413574, "learning_rate": 1.274536297752119e-05, "loss": 0.5119009613990784, "step": 3054 }, { "epoch": 0.37070743841766773, "grad_norm": 2.605090856552124, "learning_rate": 1.2742906276870164e-05, "loss": 0.13250136375427246, "step": 3055 }, { "epoch": 0.37082878291469484, "grad_norm": 2.710275650024414, "learning_rate": 1.2740449576219138e-05, "loss": 0.5559759140014648, "step": 3056 }, { "epoch": 0.3709501274117219, "grad_norm": 3.008519172668457, "learning_rate": 1.2737992875568112e-05, "loss": 0.43797487020492554, "step": 3057 }, { "epoch": 0.37107147190874895, "grad_norm": 2.1876819133758545, "learning_rate": 1.2735536174917086e-05, "loss": 0.13243570923805237, "step": 3058 }, { "epoch": 0.371192816405776, "grad_norm": 1.1735800504684448, "learning_rate": 1.273307947426606e-05, "loss": 0.04852079600095749, "step": 3059 }, { "epoch": 0.37131416090280306, "grad_norm": 2.3674979209899902, "learning_rate": 1.2730622773615035e-05, "loss": 0.21951356530189514, "step": 3060 }, { "epoch": 0.3714355053998301, "grad_norm": 2.802356719970703, "learning_rate": 1.272816607296401e-05, "loss": 0.5340931415557861, "step": 3061 }, { "epoch": 0.37155684989685717, "grad_norm": 2.132953643798828, "learning_rate": 1.2725709372312985e-05, "loss": 0.21803082525730133, "step": 3062 }, { "epoch": 0.3716781943938842, "grad_norm": 2.711453676223755, "learning_rate": 1.272325267166196e-05, "loss": 0.20228137075901031, "step": 3063 }, { "epoch": 0.3717995388909113, "grad_norm": 3.1502573490142822, "learning_rate": 1.2720795971010934e-05, "loss": 0.3638427257537842, "step": 3064 }, { "epoch": 0.3719208833879384, "grad_norm": 1.838789463043213, "learning_rate": 1.2718339270359908e-05, "loss": 0.08531152456998825, "step": 3065 }, { "epoch": 0.37204222788496544, "grad_norm": 4.831695556640625, "learning_rate": 1.2715882569708882e-05, "loss": 0.3012272119522095, "step": 3066 }, { "epoch": 0.3721635723819925, "grad_norm": 2.007178783416748, "learning_rate": 1.2713425869057856e-05, "loss": 0.3394854962825775, "step": 3067 }, { "epoch": 0.37228491687901955, "grad_norm": 2.307107448577881, "learning_rate": 1.271096916840683e-05, "loss": 0.19313615560531616, "step": 3068 }, { "epoch": 0.3724062613760466, "grad_norm": 1.7929253578186035, "learning_rate": 1.2708512467755805e-05, "loss": 0.3770360052585602, "step": 3069 }, { "epoch": 0.37252760587307365, "grad_norm": 3.1486785411834717, "learning_rate": 1.270605576710478e-05, "loss": 0.12018324434757233, "step": 3070 }, { "epoch": 0.3726489503701007, "grad_norm": 1.9934414625167847, "learning_rate": 1.2703599066453753e-05, "loss": 0.22126701474189758, "step": 3071 }, { "epoch": 0.37277029486712776, "grad_norm": 2.5344128608703613, "learning_rate": 1.2701142365802728e-05, "loss": 0.4378224313259125, "step": 3072 }, { "epoch": 0.3728916393641548, "grad_norm": 2.389474868774414, "learning_rate": 1.2698685665151702e-05, "loss": 0.15001432597637177, "step": 3073 }, { "epoch": 0.37301298386118187, "grad_norm": 2.823737382888794, "learning_rate": 1.2696228964500676e-05, "loss": 0.2103637009859085, "step": 3074 }, { "epoch": 0.373134328358209, "grad_norm": 1.848333716392517, "learning_rate": 1.269377226384965e-05, "loss": 0.1431073397397995, "step": 3075 }, { "epoch": 0.37325567285523603, "grad_norm": 1.597927212715149, "learning_rate": 1.2691315563198625e-05, "loss": 0.2240583747625351, "step": 3076 }, { "epoch": 0.3733770173522631, "grad_norm": 2.668405055999756, "learning_rate": 1.2688858862547599e-05, "loss": 0.07994963228702545, "step": 3077 }, { "epoch": 0.37349836184929014, "grad_norm": 3.062838554382324, "learning_rate": 1.2686402161896573e-05, "loss": 0.5268748998641968, "step": 3078 }, { "epoch": 0.3736197063463172, "grad_norm": 1.37921142578125, "learning_rate": 1.2683945461245548e-05, "loss": 0.1220514327287674, "step": 3079 }, { "epoch": 0.37374105084334425, "grad_norm": 1.977890133857727, "learning_rate": 1.2681488760594522e-05, "loss": 0.3197315037250519, "step": 3080 }, { "epoch": 0.3738623953403713, "grad_norm": 2.71738862991333, "learning_rate": 1.2679032059943496e-05, "loss": 0.22290587425231934, "step": 3081 }, { "epoch": 0.37398373983739835, "grad_norm": 1.8566792011260986, "learning_rate": 1.2676575359292472e-05, "loss": 0.20437034964561462, "step": 3082 }, { "epoch": 0.3741050843344254, "grad_norm": 3.2051069736480713, "learning_rate": 1.2674118658641446e-05, "loss": 0.22961628437042236, "step": 3083 }, { "epoch": 0.3742264288314525, "grad_norm": 2.850677251815796, "learning_rate": 1.267166195799042e-05, "loss": 0.40382158756256104, "step": 3084 }, { "epoch": 0.37434777332847957, "grad_norm": 2.2020742893218994, "learning_rate": 1.2669205257339395e-05, "loss": 0.08760152012109756, "step": 3085 }, { "epoch": 0.3744691178255066, "grad_norm": 2.463630199432373, "learning_rate": 1.2666748556688369e-05, "loss": 0.5425976514816284, "step": 3086 }, { "epoch": 0.3745904623225337, "grad_norm": 2.6825172901153564, "learning_rate": 1.2664291856037343e-05, "loss": 0.5442331433296204, "step": 3087 }, { "epoch": 0.37471180681956073, "grad_norm": 2.0428361892700195, "learning_rate": 1.2661835155386318e-05, "loss": 0.1842765361070633, "step": 3088 }, { "epoch": 0.3748331513165878, "grad_norm": 2.6168863773345947, "learning_rate": 1.2659378454735292e-05, "loss": 0.17524027824401855, "step": 3089 }, { "epoch": 0.37495449581361484, "grad_norm": 1.9933257102966309, "learning_rate": 1.2656921754084266e-05, "loss": 0.19788393378257751, "step": 3090 }, { "epoch": 0.3750758403106419, "grad_norm": 1.8031022548675537, "learning_rate": 1.265446505343324e-05, "loss": 0.2875882685184479, "step": 3091 }, { "epoch": 0.37519718480766895, "grad_norm": 3.0728745460510254, "learning_rate": 1.2652008352782215e-05, "loss": 0.34630057215690613, "step": 3092 }, { "epoch": 0.37531852930469606, "grad_norm": 3.2881243228912354, "learning_rate": 1.2649551652131189e-05, "loss": 0.4444838762283325, "step": 3093 }, { "epoch": 0.3754398738017231, "grad_norm": 1.610763669013977, "learning_rate": 1.2647094951480163e-05, "loss": 0.08967998623847961, "step": 3094 }, { "epoch": 0.37556121829875017, "grad_norm": 0.7797218561172485, "learning_rate": 1.2644638250829137e-05, "loss": 0.014695637859404087, "step": 3095 }, { "epoch": 0.3756825627957772, "grad_norm": 4.406348705291748, "learning_rate": 1.2642181550178112e-05, "loss": 0.10652171820402145, "step": 3096 }, { "epoch": 0.3758039072928043, "grad_norm": 5.548430919647217, "learning_rate": 1.2639724849527086e-05, "loss": 0.7209188938140869, "step": 3097 }, { "epoch": 0.3759252517898313, "grad_norm": 3.451643705368042, "learning_rate": 1.263726814887606e-05, "loss": 0.31551826000213623, "step": 3098 }, { "epoch": 0.3760465962868584, "grad_norm": 2.1619338989257812, "learning_rate": 1.2634811448225034e-05, "loss": 0.3468658924102783, "step": 3099 }, { "epoch": 0.37616794078388543, "grad_norm": 0.46372610330581665, "learning_rate": 1.2632354747574009e-05, "loss": 0.007035141810774803, "step": 3100 }, { "epoch": 0.3762892852809125, "grad_norm": 3.1669082641601562, "learning_rate": 1.2629898046922983e-05, "loss": 0.5927947759628296, "step": 3101 }, { "epoch": 0.37641062977793954, "grad_norm": 2.1995599269866943, "learning_rate": 1.2627441346271959e-05, "loss": 0.19552944600582123, "step": 3102 }, { "epoch": 0.37653197427496665, "grad_norm": 1.4851335287094116, "learning_rate": 1.2624984645620933e-05, "loss": 0.02476375550031662, "step": 3103 }, { "epoch": 0.3766533187719937, "grad_norm": 3.12634015083313, "learning_rate": 1.2622527944969907e-05, "loss": 0.2813194990158081, "step": 3104 }, { "epoch": 0.37677466326902076, "grad_norm": 2.6773481369018555, "learning_rate": 1.2620071244318882e-05, "loss": 0.20298711955547333, "step": 3105 }, { "epoch": 0.3768960077660478, "grad_norm": 2.8419148921966553, "learning_rate": 1.2617614543667856e-05, "loss": 0.31778767704963684, "step": 3106 }, { "epoch": 0.37701735226307487, "grad_norm": 3.2999582290649414, "learning_rate": 1.261515784301683e-05, "loss": 0.27695170044898987, "step": 3107 }, { "epoch": 0.3771386967601019, "grad_norm": 2.0065877437591553, "learning_rate": 1.2612701142365804e-05, "loss": 0.3481106758117676, "step": 3108 }, { "epoch": 0.377260041257129, "grad_norm": 1.229748249053955, "learning_rate": 1.2610244441714779e-05, "loss": 0.037845637649297714, "step": 3109 }, { "epoch": 0.37738138575415603, "grad_norm": 2.5632731914520264, "learning_rate": 1.2607787741063753e-05, "loss": 0.3110155165195465, "step": 3110 }, { "epoch": 0.3775027302511831, "grad_norm": 3.8191258907318115, "learning_rate": 1.2605331040412727e-05, "loss": 0.5061408281326294, "step": 3111 }, { "epoch": 0.3776240747482102, "grad_norm": 2.7737317085266113, "learning_rate": 1.2602874339761701e-05, "loss": 0.603361964225769, "step": 3112 }, { "epoch": 0.37774541924523725, "grad_norm": 2.004978656768799, "learning_rate": 1.2600417639110676e-05, "loss": 0.19077220559120178, "step": 3113 }, { "epoch": 0.3778667637422643, "grad_norm": 4.966524124145508, "learning_rate": 1.259796093845965e-05, "loss": 0.32509520649909973, "step": 3114 }, { "epoch": 0.37798810823929135, "grad_norm": 4.069063186645508, "learning_rate": 1.2595504237808624e-05, "loss": 0.4654199481010437, "step": 3115 }, { "epoch": 0.3781094527363184, "grad_norm": 2.699005126953125, "learning_rate": 1.2593047537157598e-05, "loss": 0.20681169629096985, "step": 3116 }, { "epoch": 0.37823079723334546, "grad_norm": 2.2115743160247803, "learning_rate": 1.2590590836506573e-05, "loss": 0.4543258249759674, "step": 3117 }, { "epoch": 0.3783521417303725, "grad_norm": 3.3127706050872803, "learning_rate": 1.2588134135855547e-05, "loss": 0.3135000765323639, "step": 3118 }, { "epoch": 0.37847348622739957, "grad_norm": 2.7028489112854004, "learning_rate": 1.2585677435204521e-05, "loss": 0.49060899019241333, "step": 3119 }, { "epoch": 0.3785948307244266, "grad_norm": 4.582715034484863, "learning_rate": 1.2583220734553495e-05, "loss": 0.5623689293861389, "step": 3120 }, { "epoch": 0.37871617522145373, "grad_norm": 2.5746421813964844, "learning_rate": 1.258076403390247e-05, "loss": 0.2465989589691162, "step": 3121 }, { "epoch": 0.3788375197184808, "grad_norm": 4.443078517913818, "learning_rate": 1.2578307333251446e-05, "loss": 0.4048062264919281, "step": 3122 }, { "epoch": 0.37895886421550784, "grad_norm": 2.589857816696167, "learning_rate": 1.257585063260042e-05, "loss": 0.4509902596473694, "step": 3123 }, { "epoch": 0.3790802087125349, "grad_norm": 1.951952576637268, "learning_rate": 1.2573393931949394e-05, "loss": 0.1623089760541916, "step": 3124 }, { "epoch": 0.37920155320956195, "grad_norm": 3.1534080505371094, "learning_rate": 1.2570937231298368e-05, "loss": 0.46682316064834595, "step": 3125 }, { "epoch": 0.379322897706589, "grad_norm": 3.0295398235321045, "learning_rate": 1.2568480530647343e-05, "loss": 0.3204183280467987, "step": 3126 }, { "epoch": 0.37944424220361606, "grad_norm": 2.0212247371673584, "learning_rate": 1.2566023829996317e-05, "loss": 0.17289389669895172, "step": 3127 }, { "epoch": 0.3795655867006431, "grad_norm": 3.1270949840545654, "learning_rate": 1.2563567129345291e-05, "loss": 0.9947634935379028, "step": 3128 }, { "epoch": 0.37968693119767016, "grad_norm": 2.232048273086548, "learning_rate": 1.2561110428694266e-05, "loss": 0.2776552438735962, "step": 3129 }, { "epoch": 0.3798082756946973, "grad_norm": 2.834043502807617, "learning_rate": 1.255865372804324e-05, "loss": 0.13570261001586914, "step": 3130 }, { "epoch": 0.3799296201917243, "grad_norm": 2.6776390075683594, "learning_rate": 1.2556197027392214e-05, "loss": 0.5355879664421082, "step": 3131 }, { "epoch": 0.3800509646887514, "grad_norm": 2.001067638397217, "learning_rate": 1.2553740326741188e-05, "loss": 0.18915635347366333, "step": 3132 }, { "epoch": 0.38017230918577843, "grad_norm": 2.9914603233337402, "learning_rate": 1.2551283626090163e-05, "loss": 0.6213992834091187, "step": 3133 }, { "epoch": 0.3802936536828055, "grad_norm": 2.629340887069702, "learning_rate": 1.2548826925439137e-05, "loss": 0.29846295714378357, "step": 3134 }, { "epoch": 0.38041499817983254, "grad_norm": 3.0676817893981934, "learning_rate": 1.2546370224788111e-05, "loss": 0.16866937279701233, "step": 3135 }, { "epoch": 0.3805363426768596, "grad_norm": 2.5763516426086426, "learning_rate": 1.2543913524137085e-05, "loss": 0.2580344080924988, "step": 3136 }, { "epoch": 0.38065768717388665, "grad_norm": 3.0476651191711426, "learning_rate": 1.254145682348606e-05, "loss": 0.3251637816429138, "step": 3137 }, { "epoch": 0.3807790316709137, "grad_norm": 2.6623525619506836, "learning_rate": 1.2539000122835034e-05, "loss": 0.3432225286960602, "step": 3138 }, { "epoch": 0.38090037616794076, "grad_norm": 2.9788427352905273, "learning_rate": 1.2536543422184008e-05, "loss": 0.17502693831920624, "step": 3139 }, { "epoch": 0.38102172066496787, "grad_norm": 3.0660781860351562, "learning_rate": 1.2534086721532982e-05, "loss": 0.4956677556037903, "step": 3140 }, { "epoch": 0.3811430651619949, "grad_norm": 1.9002922773361206, "learning_rate": 1.2531630020881958e-05, "loss": 0.17276537418365479, "step": 3141 }, { "epoch": 0.381264409659022, "grad_norm": 3.2068376541137695, "learning_rate": 1.2529173320230933e-05, "loss": 0.19760315120220184, "step": 3142 }, { "epoch": 0.38138575415604903, "grad_norm": 2.6643495559692383, "learning_rate": 1.2526716619579907e-05, "loss": 0.29236993193626404, "step": 3143 }, { "epoch": 0.3815070986530761, "grad_norm": 2.8827171325683594, "learning_rate": 1.2524259918928881e-05, "loss": 0.2830185294151306, "step": 3144 }, { "epoch": 0.38162844315010314, "grad_norm": 3.2341325283050537, "learning_rate": 1.2521803218277855e-05, "loss": 0.20774394273757935, "step": 3145 }, { "epoch": 0.3817497876471302, "grad_norm": 2.291086196899414, "learning_rate": 1.251934651762683e-05, "loss": 0.4800224304199219, "step": 3146 }, { "epoch": 0.38187113214415724, "grad_norm": 3.330087661743164, "learning_rate": 1.2516889816975804e-05, "loss": 0.6222670078277588, "step": 3147 }, { "epoch": 0.3819924766411843, "grad_norm": 2.547192335128784, "learning_rate": 1.2514433116324778e-05, "loss": 0.38417696952819824, "step": 3148 }, { "epoch": 0.3821138211382114, "grad_norm": 2.1112709045410156, "learning_rate": 1.2511976415673752e-05, "loss": 0.25194817781448364, "step": 3149 }, { "epoch": 0.38223516563523846, "grad_norm": 3.026857376098633, "learning_rate": 1.2509519715022727e-05, "loss": 0.3098749816417694, "step": 3150 }, { "epoch": 0.3823565101322655, "grad_norm": 1.1321700811386108, "learning_rate": 1.2507063014371701e-05, "loss": 0.059583015739917755, "step": 3151 }, { "epoch": 0.38247785462929257, "grad_norm": 2.0039877891540527, "learning_rate": 1.2504606313720675e-05, "loss": 0.11419906467199326, "step": 3152 }, { "epoch": 0.3825991991263196, "grad_norm": 2.0959770679473877, "learning_rate": 1.250214961306965e-05, "loss": 0.27986180782318115, "step": 3153 }, { "epoch": 0.3827205436233467, "grad_norm": 2.224518060684204, "learning_rate": 1.2499692912418622e-05, "loss": 0.4512319564819336, "step": 3154 }, { "epoch": 0.38284188812037373, "grad_norm": 3.628472328186035, "learning_rate": 1.2497236211767596e-05, "loss": 0.17027032375335693, "step": 3155 }, { "epoch": 0.3829632326174008, "grad_norm": 2.3958566188812256, "learning_rate": 1.249477951111657e-05, "loss": 0.4874890148639679, "step": 3156 }, { "epoch": 0.38308457711442784, "grad_norm": 3.159470319747925, "learning_rate": 1.2492322810465545e-05, "loss": 0.22992083430290222, "step": 3157 }, { "epoch": 0.38320592161145495, "grad_norm": 2.3461990356445312, "learning_rate": 1.2489866109814519e-05, "loss": 0.17536038160324097, "step": 3158 }, { "epoch": 0.383327266108482, "grad_norm": 2.364351749420166, "learning_rate": 1.2487409409163493e-05, "loss": 0.17691849172115326, "step": 3159 }, { "epoch": 0.38344861060550905, "grad_norm": 2.3514404296875, "learning_rate": 1.2484952708512468e-05, "loss": 0.6298539638519287, "step": 3160 }, { "epoch": 0.3835699551025361, "grad_norm": 2.537447452545166, "learning_rate": 1.2482496007861442e-05, "loss": 0.35013389587402344, "step": 3161 }, { "epoch": 0.38369129959956316, "grad_norm": 2.3254945278167725, "learning_rate": 1.2480039307210416e-05, "loss": 0.3717833161354065, "step": 3162 }, { "epoch": 0.3838126440965902, "grad_norm": 2.0248873233795166, "learning_rate": 1.247758260655939e-05, "loss": 0.46691590547561646, "step": 3163 }, { "epoch": 0.38393398859361727, "grad_norm": 2.308234214782715, "learning_rate": 1.2475125905908365e-05, "loss": 0.2653298079967499, "step": 3164 }, { "epoch": 0.3840553330906443, "grad_norm": 2.288908004760742, "learning_rate": 1.2472669205257339e-05, "loss": 0.33068254590034485, "step": 3165 }, { "epoch": 0.3841766775876714, "grad_norm": 2.5129432678222656, "learning_rate": 1.2470212504606313e-05, "loss": 0.7068184018135071, "step": 3166 }, { "epoch": 0.38429802208469843, "grad_norm": 0.04241632670164108, "learning_rate": 1.2467755803955289e-05, "loss": 0.0002795231994241476, "step": 3167 }, { "epoch": 0.38441936658172554, "grad_norm": 2.9558823108673096, "learning_rate": 1.2465299103304263e-05, "loss": 0.5982683897018433, "step": 3168 }, { "epoch": 0.3845407110787526, "grad_norm": 3.4366278648376465, "learning_rate": 1.2462842402653238e-05, "loss": 0.1740218549966812, "step": 3169 }, { "epoch": 0.38466205557577965, "grad_norm": 1.9050742387771606, "learning_rate": 1.2460385702002212e-05, "loss": 0.3764839172363281, "step": 3170 }, { "epoch": 0.3847834000728067, "grad_norm": 2.1938631534576416, "learning_rate": 1.2457929001351186e-05, "loss": 0.22942158579826355, "step": 3171 }, { "epoch": 0.38490474456983376, "grad_norm": 1.5889396667480469, "learning_rate": 1.245547230070016e-05, "loss": 0.4512004852294922, "step": 3172 }, { "epoch": 0.3850260890668608, "grad_norm": 2.1552133560180664, "learning_rate": 1.2453015600049135e-05, "loss": 0.41254958510398865, "step": 3173 }, { "epoch": 0.38514743356388786, "grad_norm": 2.0876870155334473, "learning_rate": 1.2450558899398109e-05, "loss": 0.18605846166610718, "step": 3174 }, { "epoch": 0.3852687780609149, "grad_norm": 1.8509613275527954, "learning_rate": 1.2448102198747083e-05, "loss": 0.054551247507333755, "step": 3175 }, { "epoch": 0.38539012255794197, "grad_norm": 1.963201880455017, "learning_rate": 1.2445645498096057e-05, "loss": 0.11149666458368301, "step": 3176 }, { "epoch": 0.3855114670549691, "grad_norm": 1.8932554721832275, "learning_rate": 1.2443188797445032e-05, "loss": 0.15254680812358856, "step": 3177 }, { "epoch": 0.38563281155199614, "grad_norm": 1.92061448097229, "learning_rate": 1.2440732096794006e-05, "loss": 0.2103467434644699, "step": 3178 }, { "epoch": 0.3857541560490232, "grad_norm": 3.4142160415649414, "learning_rate": 1.243827539614298e-05, "loss": 0.4288268983364105, "step": 3179 }, { "epoch": 0.38587550054605024, "grad_norm": 1.7066508531570435, "learning_rate": 1.2435818695491954e-05, "loss": 0.23310059309005737, "step": 3180 }, { "epoch": 0.3859968450430773, "grad_norm": 1.9355300664901733, "learning_rate": 1.2433361994840929e-05, "loss": 0.1703137308359146, "step": 3181 }, { "epoch": 0.38611818954010435, "grad_norm": 2.196570634841919, "learning_rate": 1.2430905294189903e-05, "loss": 0.3992791175842285, "step": 3182 }, { "epoch": 0.3862395340371314, "grad_norm": 2.8637094497680664, "learning_rate": 1.2428448593538877e-05, "loss": 0.2217373102903366, "step": 3183 }, { "epoch": 0.38636087853415846, "grad_norm": 2.239389657974243, "learning_rate": 1.2425991892887851e-05, "loss": 0.16824842989444733, "step": 3184 }, { "epoch": 0.3864822230311855, "grad_norm": 1.2548781633377075, "learning_rate": 1.2423535192236826e-05, "loss": 0.3385087847709656, "step": 3185 }, { "epoch": 0.3866035675282126, "grad_norm": 1.8939255475997925, "learning_rate": 1.24210784915858e-05, "loss": 0.17056138813495636, "step": 3186 }, { "epoch": 0.3867249120252397, "grad_norm": 2.307015895843506, "learning_rate": 1.2418621790934776e-05, "loss": 0.26844489574432373, "step": 3187 }, { "epoch": 0.38684625652226673, "grad_norm": 2.7086517810821533, "learning_rate": 1.241616509028375e-05, "loss": 0.46166932582855225, "step": 3188 }, { "epoch": 0.3869676010192938, "grad_norm": 1.6944949626922607, "learning_rate": 1.2413708389632724e-05, "loss": 0.25167518854141235, "step": 3189 }, { "epoch": 0.38708894551632084, "grad_norm": 2.3843600749969482, "learning_rate": 1.2411251688981699e-05, "loss": 0.13508471846580505, "step": 3190 }, { "epoch": 0.3872102900133479, "grad_norm": 2.2334437370300293, "learning_rate": 1.2408794988330673e-05, "loss": 0.4638756215572357, "step": 3191 }, { "epoch": 0.38733163451037494, "grad_norm": 1.9633973836898804, "learning_rate": 1.2406338287679647e-05, "loss": 0.28580886125564575, "step": 3192 }, { "epoch": 0.387452979007402, "grad_norm": 2.670125722885132, "learning_rate": 1.2403881587028621e-05, "loss": 0.3812810480594635, "step": 3193 }, { "epoch": 0.38757432350442905, "grad_norm": 1.94724702835083, "learning_rate": 1.2401424886377596e-05, "loss": 0.060899440199136734, "step": 3194 }, { "epoch": 0.3876956680014561, "grad_norm": 3.4984238147735596, "learning_rate": 1.239896818572657e-05, "loss": 0.2988520562648773, "step": 3195 }, { "epoch": 0.3878170124984832, "grad_norm": 3.7518434524536133, "learning_rate": 1.2396511485075544e-05, "loss": 0.4453827738761902, "step": 3196 }, { "epoch": 0.38793835699551027, "grad_norm": 1.590352177619934, "learning_rate": 1.2394054784424518e-05, "loss": 0.14353054761886597, "step": 3197 }, { "epoch": 0.3880597014925373, "grad_norm": 2.1668214797973633, "learning_rate": 1.2391598083773493e-05, "loss": 0.4555373191833496, "step": 3198 }, { "epoch": 0.3881810459895644, "grad_norm": 2.0084173679351807, "learning_rate": 1.2389141383122467e-05, "loss": 0.4028552770614624, "step": 3199 }, { "epoch": 0.38830239048659143, "grad_norm": 2.393423318862915, "learning_rate": 1.2386684682471441e-05, "loss": 0.5026838779449463, "step": 3200 }, { "epoch": 0.3884237349836185, "grad_norm": 2.0486631393432617, "learning_rate": 1.2384227981820415e-05, "loss": 0.24359053373336792, "step": 3201 }, { "epoch": 0.38854507948064554, "grad_norm": 2.0221498012542725, "learning_rate": 1.238177128116939e-05, "loss": 0.2597520053386688, "step": 3202 }, { "epoch": 0.3886664239776726, "grad_norm": 3.5579679012298584, "learning_rate": 1.2379314580518364e-05, "loss": 0.2755480706691742, "step": 3203 }, { "epoch": 0.38878776847469965, "grad_norm": 2.409950017929077, "learning_rate": 1.2376857879867338e-05, "loss": 0.42971426248550415, "step": 3204 }, { "epoch": 0.38890911297172676, "grad_norm": 1.7714725732803345, "learning_rate": 1.2374401179216313e-05, "loss": 0.36897578835487366, "step": 3205 }, { "epoch": 0.3890304574687538, "grad_norm": 1.6995115280151367, "learning_rate": 1.2371944478565287e-05, "loss": 0.10867939889431, "step": 3206 }, { "epoch": 0.38915180196578086, "grad_norm": 3.117710828781128, "learning_rate": 1.2369487777914263e-05, "loss": 0.233770951628685, "step": 3207 }, { "epoch": 0.3892731464628079, "grad_norm": 1.8280388116836548, "learning_rate": 1.2367031077263237e-05, "loss": 0.15694071352481842, "step": 3208 }, { "epoch": 0.38939449095983497, "grad_norm": 2.1613285541534424, "learning_rate": 1.2364574376612211e-05, "loss": 0.2800590991973877, "step": 3209 }, { "epoch": 0.389515835456862, "grad_norm": 3.0443708896636963, "learning_rate": 1.2362117675961185e-05, "loss": 0.2745928466320038, "step": 3210 }, { "epoch": 0.3896371799538891, "grad_norm": 2.057307243347168, "learning_rate": 1.235966097531016e-05, "loss": 0.2623305022716522, "step": 3211 }, { "epoch": 0.38975852445091613, "grad_norm": 2.3027143478393555, "learning_rate": 1.2357204274659134e-05, "loss": 0.22861507534980774, "step": 3212 }, { "epoch": 0.3898798689479432, "grad_norm": 1.321251392364502, "learning_rate": 1.2354747574008108e-05, "loss": 0.048757895827293396, "step": 3213 }, { "epoch": 0.3900012134449703, "grad_norm": 3.1153645515441895, "learning_rate": 1.2352290873357083e-05, "loss": 0.5409466624259949, "step": 3214 }, { "epoch": 0.39012255794199735, "grad_norm": 2.7181615829467773, "learning_rate": 1.2349834172706057e-05, "loss": 0.2796017825603485, "step": 3215 }, { "epoch": 0.3902439024390244, "grad_norm": 2.5121684074401855, "learning_rate": 1.2347377472055031e-05, "loss": 0.11338968575000763, "step": 3216 }, { "epoch": 0.39036524693605146, "grad_norm": 4.269004821777344, "learning_rate": 1.2344920771404005e-05, "loss": 0.6595829129219055, "step": 3217 }, { "epoch": 0.3904865914330785, "grad_norm": 2.7294209003448486, "learning_rate": 1.234246407075298e-05, "loss": 0.0769912600517273, "step": 3218 }, { "epoch": 0.39060793593010557, "grad_norm": 2.528923749923706, "learning_rate": 1.2340007370101954e-05, "loss": 0.1698744297027588, "step": 3219 }, { "epoch": 0.3907292804271326, "grad_norm": 2.590547800064087, "learning_rate": 1.2337550669450928e-05, "loss": 0.16006270051002502, "step": 3220 }, { "epoch": 0.3908506249241597, "grad_norm": 2.1922099590301514, "learning_rate": 1.2335093968799902e-05, "loss": 0.4186220169067383, "step": 3221 }, { "epoch": 0.3909719694211867, "grad_norm": 3.8413069248199463, "learning_rate": 1.2332637268148877e-05, "loss": 0.3306880593299866, "step": 3222 }, { "epoch": 0.39109331391821384, "grad_norm": 2.2855052947998047, "learning_rate": 1.2330180567497851e-05, "loss": 0.1272583305835724, "step": 3223 }, { "epoch": 0.3912146584152409, "grad_norm": 1.7248187065124512, "learning_rate": 1.2327723866846825e-05, "loss": 0.3310267925262451, "step": 3224 }, { "epoch": 0.39133600291226794, "grad_norm": 3.021890163421631, "learning_rate": 1.23252671661958e-05, "loss": 0.0816137045621872, "step": 3225 }, { "epoch": 0.391457347409295, "grad_norm": 1.641603708267212, "learning_rate": 1.2322810465544774e-05, "loss": 0.15734626352787018, "step": 3226 }, { "epoch": 0.39157869190632205, "grad_norm": 2.941344976425171, "learning_rate": 1.232035376489375e-05, "loss": 0.48225948214530945, "step": 3227 }, { "epoch": 0.3917000364033491, "grad_norm": 1.3670870065689087, "learning_rate": 1.2317897064242724e-05, "loss": 0.07061704248189926, "step": 3228 }, { "epoch": 0.39182138090037616, "grad_norm": 2.085196018218994, "learning_rate": 1.2315440363591698e-05, "loss": 0.02575668878853321, "step": 3229 }, { "epoch": 0.3919427253974032, "grad_norm": 2.4026095867156982, "learning_rate": 1.2312983662940672e-05, "loss": 0.6204736828804016, "step": 3230 }, { "epoch": 0.39206406989443027, "grad_norm": 0.7162059545516968, "learning_rate": 1.2310526962289647e-05, "loss": 0.06766770780086517, "step": 3231 }, { "epoch": 0.3921854143914573, "grad_norm": 2.660524606704712, "learning_rate": 1.2308070261638621e-05, "loss": 0.40516796708106995, "step": 3232 }, { "epoch": 0.39230675888848443, "grad_norm": 2.5455191135406494, "learning_rate": 1.2305613560987595e-05, "loss": 0.6203538179397583, "step": 3233 }, { "epoch": 0.3924281033855115, "grad_norm": 2.572350025177002, "learning_rate": 1.230315686033657e-05, "loss": 0.4282997250556946, "step": 3234 }, { "epoch": 0.39254944788253854, "grad_norm": 1.7551993131637573, "learning_rate": 1.2300700159685544e-05, "loss": 0.11783856898546219, "step": 3235 }, { "epoch": 0.3926707923795656, "grad_norm": 2.145592212677002, "learning_rate": 1.2298243459034518e-05, "loss": 0.38956859707832336, "step": 3236 }, { "epoch": 0.39279213687659265, "grad_norm": 2.457643985748291, "learning_rate": 1.2295786758383492e-05, "loss": 0.4253358244895935, "step": 3237 }, { "epoch": 0.3929134813736197, "grad_norm": 3.5604052543640137, "learning_rate": 1.2293330057732466e-05, "loss": 0.3451758325099945, "step": 3238 }, { "epoch": 0.39303482587064675, "grad_norm": 3.7612414360046387, "learning_rate": 1.229087335708144e-05, "loss": 0.48366186022758484, "step": 3239 }, { "epoch": 0.3931561703676738, "grad_norm": 2.9638922214508057, "learning_rate": 1.2288416656430415e-05, "loss": 0.2552471458911896, "step": 3240 }, { "epoch": 0.39327751486470086, "grad_norm": 1.779090404510498, "learning_rate": 1.228595995577939e-05, "loss": 0.22641043365001678, "step": 3241 }, { "epoch": 0.39339885936172797, "grad_norm": 3.6208078861236572, "learning_rate": 1.2283503255128363e-05, "loss": 0.6202171444892883, "step": 3242 }, { "epoch": 0.393520203858755, "grad_norm": 2.2945821285247803, "learning_rate": 1.2281046554477338e-05, "loss": 0.23252303898334503, "step": 3243 }, { "epoch": 0.3936415483557821, "grad_norm": 3.1645302772521973, "learning_rate": 1.2278589853826312e-05, "loss": 0.3423767685890198, "step": 3244 }, { "epoch": 0.39376289285280913, "grad_norm": 2.032697916030884, "learning_rate": 1.2276133153175286e-05, "loss": 0.21341218054294586, "step": 3245 }, { "epoch": 0.3938842373498362, "grad_norm": 2.588686227798462, "learning_rate": 1.227367645252426e-05, "loss": 0.3370200991630554, "step": 3246 }, { "epoch": 0.39400558184686324, "grad_norm": 2.5420618057250977, "learning_rate": 1.2271219751873236e-05, "loss": 0.24502411484718323, "step": 3247 }, { "epoch": 0.3941269263438903, "grad_norm": 1.9366720914840698, "learning_rate": 1.226876305122221e-05, "loss": 0.33743882179260254, "step": 3248 }, { "epoch": 0.39424827084091735, "grad_norm": 1.910409927368164, "learning_rate": 1.2266306350571185e-05, "loss": 0.16318891942501068, "step": 3249 }, { "epoch": 0.3943696153379444, "grad_norm": 4.104035377502441, "learning_rate": 1.226384964992016e-05, "loss": 0.16479748487472534, "step": 3250 }, { "epoch": 0.3944909598349715, "grad_norm": 2.1861987113952637, "learning_rate": 1.2261392949269133e-05, "loss": 0.19104893505573273, "step": 3251 }, { "epoch": 0.39461230433199856, "grad_norm": 2.10404109954834, "learning_rate": 1.2258936248618108e-05, "loss": 0.3565627336502075, "step": 3252 }, { "epoch": 0.3947336488290256, "grad_norm": 1.9464977979660034, "learning_rate": 1.2256479547967082e-05, "loss": 0.18876899778842926, "step": 3253 }, { "epoch": 0.3948549933260527, "grad_norm": 2.625919818878174, "learning_rate": 1.2254022847316056e-05, "loss": 0.2997460663318634, "step": 3254 }, { "epoch": 0.3949763378230797, "grad_norm": 1.9485828876495361, "learning_rate": 1.225156614666503e-05, "loss": 0.15886113047599792, "step": 3255 }, { "epoch": 0.3950976823201068, "grad_norm": 2.5636370182037354, "learning_rate": 1.2249109446014005e-05, "loss": 0.13460613787174225, "step": 3256 }, { "epoch": 0.39521902681713383, "grad_norm": 2.213409662246704, "learning_rate": 1.2246652745362979e-05, "loss": 0.2532368302345276, "step": 3257 }, { "epoch": 0.3953403713141609, "grad_norm": 2.0923726558685303, "learning_rate": 1.2244196044711953e-05, "loss": 0.3443406820297241, "step": 3258 }, { "epoch": 0.39546171581118794, "grad_norm": 2.9076550006866455, "learning_rate": 1.2241739344060928e-05, "loss": 0.3790132999420166, "step": 3259 }, { "epoch": 0.395583060308215, "grad_norm": 3.352142572402954, "learning_rate": 1.2239282643409902e-05, "loss": 0.27705270051956177, "step": 3260 }, { "epoch": 0.3957044048052421, "grad_norm": 2.4780049324035645, "learning_rate": 1.2236825942758876e-05, "loss": 0.22344955801963806, "step": 3261 }, { "epoch": 0.39582574930226916, "grad_norm": 0.6083203554153442, "learning_rate": 1.223436924210785e-05, "loss": 0.008890108205378056, "step": 3262 }, { "epoch": 0.3959470937992962, "grad_norm": 3.4495291709899902, "learning_rate": 1.2231912541456825e-05, "loss": 0.3349066376686096, "step": 3263 }, { "epoch": 0.39606843829632327, "grad_norm": 4.21277379989624, "learning_rate": 1.2229455840805799e-05, "loss": 0.2736901044845581, "step": 3264 }, { "epoch": 0.3961897827933503, "grad_norm": 3.274369239807129, "learning_rate": 1.2226999140154773e-05, "loss": 0.332505464553833, "step": 3265 }, { "epoch": 0.3963111272903774, "grad_norm": 3.3138391971588135, "learning_rate": 1.2224542439503749e-05, "loss": 0.3367203176021576, "step": 3266 }, { "epoch": 0.39643247178740443, "grad_norm": 1.1294124126434326, "learning_rate": 1.2222085738852723e-05, "loss": 0.14279571175575256, "step": 3267 }, { "epoch": 0.3965538162844315, "grad_norm": 2.2251570224761963, "learning_rate": 1.2219629038201698e-05, "loss": 0.20248806476593018, "step": 3268 }, { "epoch": 0.39667516078145854, "grad_norm": 3.620305061340332, "learning_rate": 1.2217172337550672e-05, "loss": 0.2944931387901306, "step": 3269 }, { "epoch": 0.39679650527848565, "grad_norm": 2.076772689819336, "learning_rate": 1.2214715636899646e-05, "loss": 0.2450747936964035, "step": 3270 }, { "epoch": 0.3969178497755127, "grad_norm": 2.4754016399383545, "learning_rate": 1.221225893624862e-05, "loss": 0.329637348651886, "step": 3271 }, { "epoch": 0.39703919427253975, "grad_norm": 3.68548583984375, "learning_rate": 1.2209802235597595e-05, "loss": 0.36103370785713196, "step": 3272 }, { "epoch": 0.3971605387695668, "grad_norm": 3.922680616378784, "learning_rate": 1.2207345534946569e-05, "loss": 0.35007330775260925, "step": 3273 }, { "epoch": 0.39728188326659386, "grad_norm": 3.0613858699798584, "learning_rate": 1.2204888834295543e-05, "loss": 0.5026612877845764, "step": 3274 }, { "epoch": 0.3974032277636209, "grad_norm": 3.63730525970459, "learning_rate": 1.2202432133644517e-05, "loss": 0.5656598806381226, "step": 3275 }, { "epoch": 0.39752457226064797, "grad_norm": 2.1219875812530518, "learning_rate": 1.2199975432993492e-05, "loss": 0.15861688554286957, "step": 3276 }, { "epoch": 0.397645916757675, "grad_norm": 2.8872973918914795, "learning_rate": 1.2197518732342466e-05, "loss": 0.13566479086875916, "step": 3277 }, { "epoch": 0.3977672612547021, "grad_norm": 2.136732816696167, "learning_rate": 1.219506203169144e-05, "loss": 0.14007946848869324, "step": 3278 }, { "epoch": 0.3978886057517292, "grad_norm": 1.4482614994049072, "learning_rate": 1.2192605331040414e-05, "loss": 0.4385533332824707, "step": 3279 }, { "epoch": 0.39800995024875624, "grad_norm": 1.9478120803833008, "learning_rate": 1.2190148630389389e-05, "loss": 0.14154918491840363, "step": 3280 }, { "epoch": 0.3981312947457833, "grad_norm": 2.286846876144409, "learning_rate": 1.2187691929738363e-05, "loss": 0.12173419445753098, "step": 3281 }, { "epoch": 0.39825263924281035, "grad_norm": 1.8455288410186768, "learning_rate": 1.2185235229087337e-05, "loss": 0.08757827430963516, "step": 3282 }, { "epoch": 0.3983739837398374, "grad_norm": 2.537466287612915, "learning_rate": 1.2182778528436311e-05, "loss": 0.32250505685806274, "step": 3283 }, { "epoch": 0.39849532823686445, "grad_norm": 0.4925089478492737, "learning_rate": 1.2180321827785286e-05, "loss": 0.004798689857125282, "step": 3284 }, { "epoch": 0.3986166727338915, "grad_norm": 1.9180618524551392, "learning_rate": 1.217786512713426e-05, "loss": 0.1593484878540039, "step": 3285 }, { "epoch": 0.39873801723091856, "grad_norm": 2.7618894577026367, "learning_rate": 1.2175408426483236e-05, "loss": 0.2754642069339752, "step": 3286 }, { "epoch": 0.3988593617279456, "grad_norm": 3.483588933944702, "learning_rate": 1.217295172583221e-05, "loss": 0.1414378434419632, "step": 3287 }, { "epoch": 0.39898070622497267, "grad_norm": 1.5736106634140015, "learning_rate": 1.2170495025181184e-05, "loss": 0.07543988525867462, "step": 3288 }, { "epoch": 0.3991020507219998, "grad_norm": 3.2746686935424805, "learning_rate": 1.2168038324530159e-05, "loss": 0.3163296580314636, "step": 3289 }, { "epoch": 0.39922339521902683, "grad_norm": 1.879117727279663, "learning_rate": 1.216558162387913e-05, "loss": 0.06840763986110687, "step": 3290 }, { "epoch": 0.3993447397160539, "grad_norm": 2.6469168663024902, "learning_rate": 1.2163124923228104e-05, "loss": 0.283497154712677, "step": 3291 }, { "epoch": 0.39946608421308094, "grad_norm": 2.1057522296905518, "learning_rate": 1.216066822257708e-05, "loss": 0.4513644278049469, "step": 3292 }, { "epoch": 0.399587428710108, "grad_norm": 1.2667713165283203, "learning_rate": 1.2158211521926054e-05, "loss": 0.06725046038627625, "step": 3293 }, { "epoch": 0.39970877320713505, "grad_norm": 1.7996500730514526, "learning_rate": 1.2155754821275028e-05, "loss": 0.0848223939538002, "step": 3294 }, { "epoch": 0.3998301177041621, "grad_norm": 2.1585934162139893, "learning_rate": 1.2153298120624003e-05, "loss": 0.14250634610652924, "step": 3295 }, { "epoch": 0.39995146220118916, "grad_norm": 2.8726449012756348, "learning_rate": 1.2150841419972977e-05, "loss": 0.4488234519958496, "step": 3296 }, { "epoch": 0.4000728066982162, "grad_norm": 3.3992249965667725, "learning_rate": 1.2148384719321951e-05, "loss": 0.06597807258367538, "step": 3297 }, { "epoch": 0.4001941511952433, "grad_norm": 2.812727689743042, "learning_rate": 1.2145928018670925e-05, "loss": 0.34727030992507935, "step": 3298 }, { "epoch": 0.4003154956922704, "grad_norm": 2.010152578353882, "learning_rate": 1.21434713180199e-05, "loss": 0.5472154021263123, "step": 3299 }, { "epoch": 0.4004368401892974, "grad_norm": 2.1349987983703613, "learning_rate": 1.2141014617368874e-05, "loss": 0.34844332933425903, "step": 3300 }, { "epoch": 0.4005581846863245, "grad_norm": 4.817236423492432, "learning_rate": 1.2138557916717848e-05, "loss": 0.3785272240638733, "step": 3301 }, { "epoch": 0.40067952918335153, "grad_norm": 1.7386378049850464, "learning_rate": 1.2136101216066822e-05, "loss": 0.1248244047164917, "step": 3302 }, { "epoch": 0.4008008736803786, "grad_norm": 1.653026819229126, "learning_rate": 1.2133644515415797e-05, "loss": 0.19100800156593323, "step": 3303 }, { "epoch": 0.40092221817740564, "grad_norm": 2.473686695098877, "learning_rate": 1.2131187814764771e-05, "loss": 0.32151561975479126, "step": 3304 }, { "epoch": 0.4010435626744327, "grad_norm": 4.7555670738220215, "learning_rate": 1.2128731114113745e-05, "loss": 0.19386518001556396, "step": 3305 }, { "epoch": 0.40116490717145975, "grad_norm": 2.2356576919555664, "learning_rate": 1.212627441346272e-05, "loss": 0.2218162715435028, "step": 3306 }, { "epoch": 0.40128625166848686, "grad_norm": 4.039072513580322, "learning_rate": 1.2123817712811694e-05, "loss": 0.18677039444446564, "step": 3307 }, { "epoch": 0.4014075961655139, "grad_norm": 3.065251588821411, "learning_rate": 1.2121361012160668e-05, "loss": 0.21303556859493256, "step": 3308 }, { "epoch": 0.40152894066254097, "grad_norm": 2.333803653717041, "learning_rate": 1.2118904311509642e-05, "loss": 0.23278328776359558, "step": 3309 }, { "epoch": 0.401650285159568, "grad_norm": 1.8717297315597534, "learning_rate": 1.2116447610858616e-05, "loss": 0.5052412748336792, "step": 3310 }, { "epoch": 0.4017716296565951, "grad_norm": 2.5327258110046387, "learning_rate": 1.211399091020759e-05, "loss": 0.2127455770969391, "step": 3311 }, { "epoch": 0.40189297415362213, "grad_norm": 3.4019956588745117, "learning_rate": 1.2111534209556567e-05, "loss": 0.25699320435523987, "step": 3312 }, { "epoch": 0.4020143186506492, "grad_norm": 3.1442573070526123, "learning_rate": 1.2109077508905541e-05, "loss": 0.08312302827835083, "step": 3313 }, { "epoch": 0.40213566314767624, "grad_norm": 2.6303045749664307, "learning_rate": 1.2106620808254515e-05, "loss": 0.28288233280181885, "step": 3314 }, { "epoch": 0.4022570076447033, "grad_norm": 2.8890466690063477, "learning_rate": 1.210416410760349e-05, "loss": 0.23453925549983978, "step": 3315 }, { "epoch": 0.4023783521417304, "grad_norm": 2.136404514312744, "learning_rate": 1.2101707406952464e-05, "loss": 0.2528800964355469, "step": 3316 }, { "epoch": 0.40249969663875745, "grad_norm": 3.9638454914093018, "learning_rate": 1.2099250706301438e-05, "loss": 0.25377410650253296, "step": 3317 }, { "epoch": 0.4026210411357845, "grad_norm": 3.9589438438415527, "learning_rate": 1.2096794005650412e-05, "loss": 0.3660302758216858, "step": 3318 }, { "epoch": 0.40274238563281156, "grad_norm": 1.771589994430542, "learning_rate": 1.2094337304999386e-05, "loss": 0.08093957602977753, "step": 3319 }, { "epoch": 0.4028637301298386, "grad_norm": 1.7189016342163086, "learning_rate": 1.209188060434836e-05, "loss": 0.07831019163131714, "step": 3320 }, { "epoch": 0.40298507462686567, "grad_norm": 3.500816583633423, "learning_rate": 1.2089423903697335e-05, "loss": 0.24039025604724884, "step": 3321 }, { "epoch": 0.4031064191238927, "grad_norm": 1.0927573442459106, "learning_rate": 1.208696720304631e-05, "loss": 0.03208920359611511, "step": 3322 }, { "epoch": 0.4032277636209198, "grad_norm": 2.8350446224212646, "learning_rate": 1.2084510502395283e-05, "loss": 0.10441645979881287, "step": 3323 }, { "epoch": 0.40334910811794683, "grad_norm": 2.9019887447357178, "learning_rate": 1.2082053801744258e-05, "loss": 0.3544785976409912, "step": 3324 }, { "epoch": 0.4034704526149739, "grad_norm": 3.5630905628204346, "learning_rate": 1.2079597101093232e-05, "loss": 0.48876744508743286, "step": 3325 }, { "epoch": 0.403591797112001, "grad_norm": 2.5119359493255615, "learning_rate": 1.2077140400442206e-05, "loss": 0.37746191024780273, "step": 3326 }, { "epoch": 0.40371314160902805, "grad_norm": 3.674171209335327, "learning_rate": 1.207468369979118e-05, "loss": 0.13165758550167084, "step": 3327 }, { "epoch": 0.4038344861060551, "grad_norm": 2.2177445888519287, "learning_rate": 1.2072226999140155e-05, "loss": 0.2323538362979889, "step": 3328 }, { "epoch": 0.40395583060308216, "grad_norm": 2.0879428386688232, "learning_rate": 1.2069770298489129e-05, "loss": 0.37489527463912964, "step": 3329 }, { "epoch": 0.4040771751001092, "grad_norm": 3.3663511276245117, "learning_rate": 1.2067313597838103e-05, "loss": 0.28204816579818726, "step": 3330 }, { "epoch": 0.40419851959713626, "grad_norm": 2.811638355255127, "learning_rate": 1.2064856897187078e-05, "loss": 0.2594698965549469, "step": 3331 }, { "epoch": 0.4043198640941633, "grad_norm": 4.1061882972717285, "learning_rate": 1.2062400196536053e-05, "loss": 0.5023982524871826, "step": 3332 }, { "epoch": 0.40444120859119037, "grad_norm": 1.7032650709152222, "learning_rate": 1.2059943495885028e-05, "loss": 0.1312413364648819, "step": 3333 }, { "epoch": 0.4045625530882174, "grad_norm": 2.882871150970459, "learning_rate": 1.2057486795234002e-05, "loss": 0.13748528063297272, "step": 3334 }, { "epoch": 0.40468389758524453, "grad_norm": 2.590442180633545, "learning_rate": 1.2055030094582976e-05, "loss": 0.2229790985584259, "step": 3335 }, { "epoch": 0.4048052420822716, "grad_norm": 2.217602491378784, "learning_rate": 1.205257339393195e-05, "loss": 0.3741094470024109, "step": 3336 }, { "epoch": 0.40492658657929864, "grad_norm": 5.3910322189331055, "learning_rate": 1.2050116693280925e-05, "loss": 0.6367065906524658, "step": 3337 }, { "epoch": 0.4050479310763257, "grad_norm": 2.7664647102355957, "learning_rate": 1.2047659992629899e-05, "loss": 0.21413977444171906, "step": 3338 }, { "epoch": 0.40516927557335275, "grad_norm": 2.5352351665496826, "learning_rate": 1.2045203291978873e-05, "loss": 0.41863587498664856, "step": 3339 }, { "epoch": 0.4052906200703798, "grad_norm": 1.850381851196289, "learning_rate": 1.2042746591327848e-05, "loss": 0.08942709118127823, "step": 3340 }, { "epoch": 0.40541196456740686, "grad_norm": 1.6138914823532104, "learning_rate": 1.2040289890676822e-05, "loss": 0.0864180326461792, "step": 3341 }, { "epoch": 0.4055333090644339, "grad_norm": 2.189840316772461, "learning_rate": 1.2037833190025796e-05, "loss": 0.27191150188446045, "step": 3342 }, { "epoch": 0.40565465356146096, "grad_norm": 2.550299882888794, "learning_rate": 1.203537648937477e-05, "loss": 0.5286152362823486, "step": 3343 }, { "epoch": 0.4057759980584881, "grad_norm": 1.8738741874694824, "learning_rate": 1.2032919788723745e-05, "loss": 0.183305025100708, "step": 3344 }, { "epoch": 0.40589734255551513, "grad_norm": 2.3162126541137695, "learning_rate": 1.2030463088072719e-05, "loss": 0.48604118824005127, "step": 3345 }, { "epoch": 0.4060186870525422, "grad_norm": 1.9461023807525635, "learning_rate": 1.2028006387421693e-05, "loss": 0.1616666615009308, "step": 3346 }, { "epoch": 0.40614003154956924, "grad_norm": 4.0628743171691895, "learning_rate": 1.2025549686770667e-05, "loss": 0.3301737904548645, "step": 3347 }, { "epoch": 0.4062613760465963, "grad_norm": 2.348146438598633, "learning_rate": 1.2023092986119642e-05, "loss": 0.16470575332641602, "step": 3348 }, { "epoch": 0.40638272054362334, "grad_norm": 2.3404624462127686, "learning_rate": 1.2020636285468616e-05, "loss": 0.3218824863433838, "step": 3349 }, { "epoch": 0.4065040650406504, "grad_norm": 1.9611907005310059, "learning_rate": 1.201817958481759e-05, "loss": 0.12972447276115417, "step": 3350 }, { "epoch": 0.40662540953767745, "grad_norm": 2.2058868408203125, "learning_rate": 1.2015722884166564e-05, "loss": 0.10044681280851364, "step": 3351 }, { "epoch": 0.4067467540347045, "grad_norm": 2.242136240005493, "learning_rate": 1.201326618351554e-05, "loss": 0.11007091403007507, "step": 3352 }, { "epoch": 0.40686809853173156, "grad_norm": 0.8104648590087891, "learning_rate": 1.2010809482864515e-05, "loss": 0.01841391623020172, "step": 3353 }, { "epoch": 0.40698944302875867, "grad_norm": 3.7218685150146484, "learning_rate": 1.2008352782213489e-05, "loss": 0.270338237285614, "step": 3354 }, { "epoch": 0.4071107875257857, "grad_norm": 1.942610502243042, "learning_rate": 1.2005896081562463e-05, "loss": 0.22724571824073792, "step": 3355 }, { "epoch": 0.4072321320228128, "grad_norm": 2.6146812438964844, "learning_rate": 1.2003439380911437e-05, "loss": 0.23565348982810974, "step": 3356 }, { "epoch": 0.40735347651983983, "grad_norm": 2.875767707824707, "learning_rate": 1.2000982680260412e-05, "loss": 0.2510961592197418, "step": 3357 }, { "epoch": 0.4074748210168669, "grad_norm": 3.8768150806427, "learning_rate": 1.1998525979609386e-05, "loss": 0.32638803124427795, "step": 3358 }, { "epoch": 0.40759616551389394, "grad_norm": 2.081332206726074, "learning_rate": 1.199606927895836e-05, "loss": 0.1831972599029541, "step": 3359 }, { "epoch": 0.407717510010921, "grad_norm": 2.9332032203674316, "learning_rate": 1.1993612578307334e-05, "loss": 0.22655761241912842, "step": 3360 }, { "epoch": 0.40783885450794805, "grad_norm": 2.714696168899536, "learning_rate": 1.1991155877656309e-05, "loss": 0.22243930399417877, "step": 3361 }, { "epoch": 0.4079601990049751, "grad_norm": 3.028733253479004, "learning_rate": 1.1988699177005283e-05, "loss": 0.10828859359025955, "step": 3362 }, { "epoch": 0.4080815435020022, "grad_norm": 2.400108814239502, "learning_rate": 1.1986242476354257e-05, "loss": 0.4200745224952698, "step": 3363 }, { "epoch": 0.40820288799902926, "grad_norm": 2.8346779346466064, "learning_rate": 1.1983785775703231e-05, "loss": 0.30214208364486694, "step": 3364 }, { "epoch": 0.4083242324960563, "grad_norm": 3.227933883666992, "learning_rate": 1.1981329075052206e-05, "loss": 0.4001484513282776, "step": 3365 }, { "epoch": 0.40844557699308337, "grad_norm": 2.5522067546844482, "learning_rate": 1.197887237440118e-05, "loss": 0.10643468797206879, "step": 3366 }, { "epoch": 0.4085669214901104, "grad_norm": 2.336789608001709, "learning_rate": 1.1976415673750154e-05, "loss": 0.25655195116996765, "step": 3367 }, { "epoch": 0.4086882659871375, "grad_norm": 1.6107679605484009, "learning_rate": 1.1973958973099128e-05, "loss": 0.05966953933238983, "step": 3368 }, { "epoch": 0.40880961048416453, "grad_norm": 2.949903726577759, "learning_rate": 1.1971502272448103e-05, "loss": 0.21616233885288239, "step": 3369 }, { "epoch": 0.4089309549811916, "grad_norm": 0.012700640596449375, "learning_rate": 1.1969045571797077e-05, "loss": 0.00018036008987110108, "step": 3370 }, { "epoch": 0.40905229947821864, "grad_norm": 2.55505108833313, "learning_rate": 1.1966588871146051e-05, "loss": 0.18714764714241028, "step": 3371 }, { "epoch": 0.40917364397524575, "grad_norm": 2.1479029655456543, "learning_rate": 1.1964132170495027e-05, "loss": 0.4700539708137512, "step": 3372 }, { "epoch": 0.4092949884722728, "grad_norm": 3.7351419925689697, "learning_rate": 1.1961675469844001e-05, "loss": 0.051050830632448196, "step": 3373 }, { "epoch": 0.40941633296929986, "grad_norm": 0.914373517036438, "learning_rate": 1.1959218769192976e-05, "loss": 0.026432767510414124, "step": 3374 }, { "epoch": 0.4095376774663269, "grad_norm": 2.449012041091919, "learning_rate": 1.195676206854195e-05, "loss": 0.22211278975009918, "step": 3375 }, { "epoch": 0.40965902196335396, "grad_norm": 2.449822187423706, "learning_rate": 1.1954305367890924e-05, "loss": 0.1475517302751541, "step": 3376 }, { "epoch": 0.409780366460381, "grad_norm": 2.3760457038879395, "learning_rate": 1.1951848667239898e-05, "loss": 0.2158413827419281, "step": 3377 }, { "epoch": 0.40990171095740807, "grad_norm": 3.990077257156372, "learning_rate": 1.1949391966588873e-05, "loss": 0.2949408292770386, "step": 3378 }, { "epoch": 0.4100230554544351, "grad_norm": 2.18121600151062, "learning_rate": 1.1946935265937847e-05, "loss": 0.3253840208053589, "step": 3379 }, { "epoch": 0.4101443999514622, "grad_norm": 4.510829448699951, "learning_rate": 1.1944478565286821e-05, "loss": 0.25793325901031494, "step": 3380 }, { "epoch": 0.41026574444848923, "grad_norm": 1.6633398532867432, "learning_rate": 1.1942021864635796e-05, "loss": 0.23681926727294922, "step": 3381 }, { "epoch": 0.41038708894551634, "grad_norm": 3.3275249004364014, "learning_rate": 1.193956516398477e-05, "loss": 0.314714252948761, "step": 3382 }, { "epoch": 0.4105084334425434, "grad_norm": 1.8083856105804443, "learning_rate": 1.1937108463333744e-05, "loss": 0.1765631139278412, "step": 3383 }, { "epoch": 0.41062977793957045, "grad_norm": 2.6071438789367676, "learning_rate": 1.1934651762682718e-05, "loss": 0.3219744563102722, "step": 3384 }, { "epoch": 0.4107511224365975, "grad_norm": 2.4086461067199707, "learning_rate": 1.1932195062031693e-05, "loss": 0.09224195033311844, "step": 3385 }, { "epoch": 0.41087246693362456, "grad_norm": 0.7400537729263306, "learning_rate": 1.1929738361380667e-05, "loss": 0.009996118023991585, "step": 3386 }, { "epoch": 0.4109938114306516, "grad_norm": 1.6936898231506348, "learning_rate": 1.1927281660729641e-05, "loss": 0.013633204624056816, "step": 3387 }, { "epoch": 0.41111515592767867, "grad_norm": 1.4746376276016235, "learning_rate": 1.1924824960078615e-05, "loss": 0.07111170887947083, "step": 3388 }, { "epoch": 0.4112365004247057, "grad_norm": 1.5193908214569092, "learning_rate": 1.192236825942759e-05, "loss": 0.18472343683242798, "step": 3389 }, { "epoch": 0.4113578449217328, "grad_norm": 2.5291969776153564, "learning_rate": 1.1919911558776564e-05, "loss": 0.6256142854690552, "step": 3390 }, { "epoch": 0.4114791894187599, "grad_norm": 1.5568585395812988, "learning_rate": 1.1917454858125538e-05, "loss": 0.17391648888587952, "step": 3391 }, { "epoch": 0.41160053391578694, "grad_norm": 1.5837876796722412, "learning_rate": 1.1914998157474514e-05, "loss": 0.16797544062137604, "step": 3392 }, { "epoch": 0.411721878412814, "grad_norm": 2.7582831382751465, "learning_rate": 1.1912541456823488e-05, "loss": 0.5750889778137207, "step": 3393 }, { "epoch": 0.41184322290984104, "grad_norm": 2.240612745285034, "learning_rate": 1.1910084756172463e-05, "loss": 0.4195220470428467, "step": 3394 }, { "epoch": 0.4119645674068681, "grad_norm": 3.0088918209075928, "learning_rate": 1.1907628055521437e-05, "loss": 0.2987426519393921, "step": 3395 }, { "epoch": 0.41208591190389515, "grad_norm": 1.6480534076690674, "learning_rate": 1.1905171354870411e-05, "loss": 0.07369241118431091, "step": 3396 }, { "epoch": 0.4122072564009222, "grad_norm": 3.518282175064087, "learning_rate": 1.1902714654219385e-05, "loss": 0.22647514939308167, "step": 3397 }, { "epoch": 0.41232860089794926, "grad_norm": 2.50191068649292, "learning_rate": 1.190025795356836e-05, "loss": 0.5125393867492676, "step": 3398 }, { "epoch": 0.4124499453949763, "grad_norm": 1.9100526571273804, "learning_rate": 1.1897801252917334e-05, "loss": 0.32923686504364014, "step": 3399 }, { "epoch": 0.4125712898920034, "grad_norm": 1.7821210622787476, "learning_rate": 1.1895344552266308e-05, "loss": 0.25902876257896423, "step": 3400 }, { "epoch": 0.4126926343890305, "grad_norm": 1.7797950506210327, "learning_rate": 1.1892887851615282e-05, "loss": 0.14268380403518677, "step": 3401 }, { "epoch": 0.41281397888605753, "grad_norm": 2.540289878845215, "learning_rate": 1.1890431150964257e-05, "loss": 0.24589216709136963, "step": 3402 }, { "epoch": 0.4129353233830846, "grad_norm": 2.3687174320220947, "learning_rate": 1.1887974450313231e-05, "loss": 0.2886613607406616, "step": 3403 }, { "epoch": 0.41305666788011164, "grad_norm": 3.2549076080322266, "learning_rate": 1.1885517749662205e-05, "loss": 0.3263327479362488, "step": 3404 }, { "epoch": 0.4131780123771387, "grad_norm": 3.080291509628296, "learning_rate": 1.188306104901118e-05, "loss": 0.5150718092918396, "step": 3405 }, { "epoch": 0.41329935687416575, "grad_norm": 2.8416271209716797, "learning_rate": 1.1880604348360154e-05, "loss": 0.41582363843917847, "step": 3406 }, { "epoch": 0.4134207013711928, "grad_norm": 1.054196834564209, "learning_rate": 1.1878147647709128e-05, "loss": 0.013622181490063667, "step": 3407 }, { "epoch": 0.41354204586821985, "grad_norm": 2.1204440593719482, "learning_rate": 1.1875690947058102e-05, "loss": 0.11492344737052917, "step": 3408 }, { "epoch": 0.41366339036524696, "grad_norm": 3.895838975906372, "learning_rate": 1.1873234246407076e-05, "loss": 0.3368130624294281, "step": 3409 }, { "epoch": 0.413784734862274, "grad_norm": 1.3537365198135376, "learning_rate": 1.187077754575605e-05, "loss": 0.12048971652984619, "step": 3410 }, { "epoch": 0.41390607935930107, "grad_norm": 1.7345317602157593, "learning_rate": 1.1868320845105027e-05, "loss": 0.23148749768733978, "step": 3411 }, { "epoch": 0.4140274238563281, "grad_norm": 0.9605816602706909, "learning_rate": 1.1865864144454001e-05, "loss": 0.03351011127233505, "step": 3412 }, { "epoch": 0.4141487683533552, "grad_norm": 3.424372673034668, "learning_rate": 1.1863407443802975e-05, "loss": 0.14960801601409912, "step": 3413 }, { "epoch": 0.41427011285038223, "grad_norm": 2.743013381958008, "learning_rate": 1.186095074315195e-05, "loss": 0.47832292318344116, "step": 3414 }, { "epoch": 0.4143914573474093, "grad_norm": 2.6609420776367188, "learning_rate": 1.1858494042500924e-05, "loss": 0.16290368139743805, "step": 3415 }, { "epoch": 0.41451280184443634, "grad_norm": 3.455261707305908, "learning_rate": 1.1856037341849898e-05, "loss": 0.39955049753189087, "step": 3416 }, { "epoch": 0.4146341463414634, "grad_norm": 1.9961707592010498, "learning_rate": 1.1853580641198872e-05, "loss": 0.15304440259933472, "step": 3417 }, { "epoch": 0.41475549083849045, "grad_norm": 5.198159217834473, "learning_rate": 1.1851123940547846e-05, "loss": 0.1540556102991104, "step": 3418 }, { "epoch": 0.41487683533551756, "grad_norm": 2.2172584533691406, "learning_rate": 1.184866723989682e-05, "loss": 0.1485886126756668, "step": 3419 }, { "epoch": 0.4149981798325446, "grad_norm": 2.6809258460998535, "learning_rate": 1.1846210539245795e-05, "loss": 0.17108909785747528, "step": 3420 }, { "epoch": 0.41511952432957167, "grad_norm": 2.615473985671997, "learning_rate": 1.184375383859477e-05, "loss": 0.3039502501487732, "step": 3421 }, { "epoch": 0.4152408688265987, "grad_norm": 0.0017497573280707002, "learning_rate": 1.1841297137943743e-05, "loss": 4.755393456434831e-05, "step": 3422 }, { "epoch": 0.4153622133236258, "grad_norm": 2.4875738620758057, "learning_rate": 1.1838840437292718e-05, "loss": 0.17304940521717072, "step": 3423 }, { "epoch": 0.4154835578206528, "grad_norm": 3.6963891983032227, "learning_rate": 1.1836383736641692e-05, "loss": 0.22823911905288696, "step": 3424 }, { "epoch": 0.4156049023176799, "grad_norm": 2.8694639205932617, "learning_rate": 1.1833927035990666e-05, "loss": 0.2963635325431824, "step": 3425 }, { "epoch": 0.41572624681470693, "grad_norm": 2.4273478984832764, "learning_rate": 1.1831470335339639e-05, "loss": 0.10278123617172241, "step": 3426 }, { "epoch": 0.415847591311734, "grad_norm": 2.7320337295532227, "learning_rate": 1.1829013634688613e-05, "loss": 0.10359422862529755, "step": 3427 }, { "epoch": 0.4159689358087611, "grad_norm": 3.307586193084717, "learning_rate": 1.1826556934037587e-05, "loss": 0.24914634227752686, "step": 3428 }, { "epoch": 0.41609028030578815, "grad_norm": 2.7045950889587402, "learning_rate": 1.1824100233386562e-05, "loss": 0.1354033648967743, "step": 3429 }, { "epoch": 0.4162116248028152, "grad_norm": 3.0522232055664062, "learning_rate": 1.1821643532735536e-05, "loss": 0.24849481880664825, "step": 3430 }, { "epoch": 0.41633296929984226, "grad_norm": 3.298497438430786, "learning_rate": 1.181918683208451e-05, "loss": 0.12789031863212585, "step": 3431 }, { "epoch": 0.4164543137968693, "grad_norm": 3.9013428688049316, "learning_rate": 1.1816730131433484e-05, "loss": 0.49709630012512207, "step": 3432 }, { "epoch": 0.41657565829389637, "grad_norm": 1.721135139465332, "learning_rate": 1.1814273430782459e-05, "loss": 0.42568439245224, "step": 3433 }, { "epoch": 0.4166970027909234, "grad_norm": 1.8467674255371094, "learning_rate": 1.1811816730131433e-05, "loss": 0.43003901839256287, "step": 3434 }, { "epoch": 0.4168183472879505, "grad_norm": 2.3580636978149414, "learning_rate": 1.1809360029480407e-05, "loss": 0.25825563073158264, "step": 3435 }, { "epoch": 0.41693969178497753, "grad_norm": 2.2304062843322754, "learning_rate": 1.1806903328829381e-05, "loss": 0.3182814121246338, "step": 3436 }, { "epoch": 0.41706103628200464, "grad_norm": 3.9392285346984863, "learning_rate": 1.1804446628178357e-05, "loss": 0.07946665585041046, "step": 3437 }, { "epoch": 0.4171823807790317, "grad_norm": 3.439152240753174, "learning_rate": 1.1801989927527332e-05, "loss": 0.4690271019935608, "step": 3438 }, { "epoch": 0.41730372527605875, "grad_norm": 1.1062110662460327, "learning_rate": 1.1799533226876306e-05, "loss": 0.023044677451252937, "step": 3439 }, { "epoch": 0.4174250697730858, "grad_norm": 2.055751085281372, "learning_rate": 1.179707652622528e-05, "loss": 0.3243967890739441, "step": 3440 }, { "epoch": 0.41754641427011285, "grad_norm": 2.848154306411743, "learning_rate": 1.1794619825574254e-05, "loss": 0.411385715007782, "step": 3441 }, { "epoch": 0.4176677587671399, "grad_norm": 2.5012378692626953, "learning_rate": 1.1792163124923229e-05, "loss": 0.1882108449935913, "step": 3442 }, { "epoch": 0.41778910326416696, "grad_norm": 3.05448579788208, "learning_rate": 1.1789706424272203e-05, "loss": 0.3536946773529053, "step": 3443 }, { "epoch": 0.417910447761194, "grad_norm": 3.1110012531280518, "learning_rate": 1.1787249723621177e-05, "loss": 0.18979328870773315, "step": 3444 }, { "epoch": 0.41803179225822107, "grad_norm": 2.1430108547210693, "learning_rate": 1.1784793022970151e-05, "loss": 0.06423166394233704, "step": 3445 }, { "epoch": 0.4181531367552481, "grad_norm": 2.6607182025909424, "learning_rate": 1.1782336322319126e-05, "loss": 0.3381607234477997, "step": 3446 }, { "epoch": 0.41827448125227523, "grad_norm": 4.065653324127197, "learning_rate": 1.17798796216681e-05, "loss": 0.3323563039302826, "step": 3447 }, { "epoch": 0.4183958257493023, "grad_norm": 2.4826183319091797, "learning_rate": 1.1777422921017074e-05, "loss": 0.35323530435562134, "step": 3448 }, { "epoch": 0.41851717024632934, "grad_norm": 1.811853051185608, "learning_rate": 1.1774966220366048e-05, "loss": 0.20018243789672852, "step": 3449 }, { "epoch": 0.4186385147433564, "grad_norm": 3.218059539794922, "learning_rate": 1.1772509519715023e-05, "loss": 0.40599000453948975, "step": 3450 }, { "epoch": 0.41875985924038345, "grad_norm": 2.8796212673187256, "learning_rate": 1.1770052819063997e-05, "loss": 0.3082905411720276, "step": 3451 }, { "epoch": 0.4188812037374105, "grad_norm": 1.919619083404541, "learning_rate": 1.1767596118412971e-05, "loss": 0.12047933042049408, "step": 3452 }, { "epoch": 0.41900254823443756, "grad_norm": 1.6094869375228882, "learning_rate": 1.1765139417761945e-05, "loss": 0.13965997099876404, "step": 3453 }, { "epoch": 0.4191238927314646, "grad_norm": 2.9742469787597656, "learning_rate": 1.176268271711092e-05, "loss": 0.3110111355781555, "step": 3454 }, { "epoch": 0.41924523722849166, "grad_norm": 3.066451072692871, "learning_rate": 1.1760226016459894e-05, "loss": 0.248049795627594, "step": 3455 }, { "epoch": 0.41936658172551877, "grad_norm": 2.063441753387451, "learning_rate": 1.1757769315808868e-05, "loss": 0.19167058169841766, "step": 3456 }, { "epoch": 0.4194879262225458, "grad_norm": 1.94533371925354, "learning_rate": 1.1755312615157844e-05, "loss": 0.12360180914402008, "step": 3457 }, { "epoch": 0.4196092707195729, "grad_norm": 1.6641358137130737, "learning_rate": 1.1752855914506818e-05, "loss": 0.15361237525939941, "step": 3458 }, { "epoch": 0.41973061521659993, "grad_norm": 3.931972026824951, "learning_rate": 1.1750399213855793e-05, "loss": 0.4843231439590454, "step": 3459 }, { "epoch": 0.419851959713627, "grad_norm": 1.9616279602050781, "learning_rate": 1.1747942513204767e-05, "loss": 0.25367042422294617, "step": 3460 }, { "epoch": 0.41997330421065404, "grad_norm": 3.106501340866089, "learning_rate": 1.1745485812553741e-05, "loss": 0.3402540683746338, "step": 3461 }, { "epoch": 0.4200946487076811, "grad_norm": 2.1331770420074463, "learning_rate": 1.1743029111902716e-05, "loss": 0.3224339485168457, "step": 3462 }, { "epoch": 0.42021599320470815, "grad_norm": 2.8695995807647705, "learning_rate": 1.174057241125169e-05, "loss": 0.05880775675177574, "step": 3463 }, { "epoch": 0.4203373377017352, "grad_norm": 2.60664963722229, "learning_rate": 1.1738115710600664e-05, "loss": 0.2266593724489212, "step": 3464 }, { "epoch": 0.4204586821987623, "grad_norm": 2.247929573059082, "learning_rate": 1.1735659009949638e-05, "loss": 0.20880204439163208, "step": 3465 }, { "epoch": 0.42058002669578937, "grad_norm": 1.9216110706329346, "learning_rate": 1.1733202309298613e-05, "loss": 0.2999063730239868, "step": 3466 }, { "epoch": 0.4207013711928164, "grad_norm": 2.366934061050415, "learning_rate": 1.1730745608647587e-05, "loss": 0.14423035085201263, "step": 3467 }, { "epoch": 0.4208227156898435, "grad_norm": 1.9067960977554321, "learning_rate": 1.1728288907996561e-05, "loss": 0.1828007698059082, "step": 3468 }, { "epoch": 0.42094406018687053, "grad_norm": 2.340365409851074, "learning_rate": 1.1725832207345535e-05, "loss": 0.40259379148483276, "step": 3469 }, { "epoch": 0.4210654046838976, "grad_norm": 1.356203317642212, "learning_rate": 1.172337550669451e-05, "loss": 0.04147522151470184, "step": 3470 }, { "epoch": 0.42118674918092464, "grad_norm": 3.8081276416778564, "learning_rate": 1.1720918806043484e-05, "loss": 0.20986762642860413, "step": 3471 }, { "epoch": 0.4213080936779517, "grad_norm": 3.0089476108551025, "learning_rate": 1.1718462105392458e-05, "loss": 0.3639543354511261, "step": 3472 }, { "epoch": 0.42142943817497874, "grad_norm": 2.3719913959503174, "learning_rate": 1.1716005404741432e-05, "loss": 0.10580010712146759, "step": 3473 }, { "epoch": 0.4215507826720058, "grad_norm": 2.1673812866210938, "learning_rate": 1.1713548704090407e-05, "loss": 0.44383132457733154, "step": 3474 }, { "epoch": 0.4216721271690329, "grad_norm": 1.9117655754089355, "learning_rate": 1.1711092003439381e-05, "loss": 0.1181115061044693, "step": 3475 }, { "epoch": 0.42179347166605996, "grad_norm": 2.113288164138794, "learning_rate": 1.1708635302788355e-05, "loss": 0.10157492756843567, "step": 3476 }, { "epoch": 0.421914816163087, "grad_norm": 4.154051303863525, "learning_rate": 1.1706178602137331e-05, "loss": 0.5281915664672852, "step": 3477 }, { "epoch": 0.42203616066011407, "grad_norm": 3.4318158626556396, "learning_rate": 1.1703721901486305e-05, "loss": 0.4258676767349243, "step": 3478 }, { "epoch": 0.4221575051571411, "grad_norm": 2.2328593730926514, "learning_rate": 1.170126520083528e-05, "loss": 0.13055363297462463, "step": 3479 }, { "epoch": 0.4222788496541682, "grad_norm": 3.7509846687316895, "learning_rate": 1.1698808500184254e-05, "loss": 0.24521100521087646, "step": 3480 }, { "epoch": 0.42240019415119523, "grad_norm": 3.5181307792663574, "learning_rate": 1.1696351799533228e-05, "loss": 0.15635553002357483, "step": 3481 }, { "epoch": 0.4225215386482223, "grad_norm": 2.506375789642334, "learning_rate": 1.1693895098882202e-05, "loss": 0.6314314603805542, "step": 3482 }, { "epoch": 0.42264288314524934, "grad_norm": 0.2738441228866577, "learning_rate": 1.1691438398231177e-05, "loss": 0.006706348154693842, "step": 3483 }, { "epoch": 0.42276422764227645, "grad_norm": 1.785003900527954, "learning_rate": 1.1688981697580151e-05, "loss": 0.17614462971687317, "step": 3484 }, { "epoch": 0.4228855721393035, "grad_norm": 2.6342015266418457, "learning_rate": 1.1686524996929125e-05, "loss": 0.2105119228363037, "step": 3485 }, { "epoch": 0.42300691663633055, "grad_norm": 1.8216427564620972, "learning_rate": 1.16840682962781e-05, "loss": 0.260210782289505, "step": 3486 }, { "epoch": 0.4231282611333576, "grad_norm": 4.7217302322387695, "learning_rate": 1.1681611595627074e-05, "loss": 0.3383857011795044, "step": 3487 }, { "epoch": 0.42324960563038466, "grad_norm": 2.028871536254883, "learning_rate": 1.1679154894976048e-05, "loss": 0.25453463196754456, "step": 3488 }, { "epoch": 0.4233709501274117, "grad_norm": 1.85947847366333, "learning_rate": 1.1676698194325022e-05, "loss": 0.22250008583068848, "step": 3489 }, { "epoch": 0.42349229462443877, "grad_norm": 2.306297540664673, "learning_rate": 1.1674241493673996e-05, "loss": 0.28463929891586304, "step": 3490 }, { "epoch": 0.4236136391214658, "grad_norm": 3.397921323776245, "learning_rate": 1.167178479302297e-05, "loss": 0.4102676212787628, "step": 3491 }, { "epoch": 0.4237349836184929, "grad_norm": 2.7147834300994873, "learning_rate": 1.1669328092371945e-05, "loss": 0.37689846754074097, "step": 3492 }, { "epoch": 0.42385632811552, "grad_norm": 2.865588426589966, "learning_rate": 1.166687139172092e-05, "loss": 0.480543315410614, "step": 3493 }, { "epoch": 0.42397767261254704, "grad_norm": 1.8033530712127686, "learning_rate": 1.1664414691069893e-05, "loss": 0.1970994621515274, "step": 3494 }, { "epoch": 0.4240990171095741, "grad_norm": 1.495980143547058, "learning_rate": 1.1661957990418868e-05, "loss": 0.10199486464262009, "step": 3495 }, { "epoch": 0.42422036160660115, "grad_norm": 3.647043466567993, "learning_rate": 1.1659501289767842e-05, "loss": 0.4993443191051483, "step": 3496 }, { "epoch": 0.4243417061036282, "grad_norm": 4.067138195037842, "learning_rate": 1.1657044589116818e-05, "loss": 0.16017641127109528, "step": 3497 }, { "epoch": 0.42446305060065526, "grad_norm": 2.5167953968048096, "learning_rate": 1.1654587888465792e-05, "loss": 0.5309091806411743, "step": 3498 }, { "epoch": 0.4245843950976823, "grad_norm": 3.4322762489318848, "learning_rate": 1.1652131187814766e-05, "loss": 0.34487950801849365, "step": 3499 }, { "epoch": 0.42470573959470936, "grad_norm": 3.0887749195098877, "learning_rate": 1.164967448716374e-05, "loss": 0.4641647934913635, "step": 3500 }, { "epoch": 0.4248270840917364, "grad_norm": 2.39925217628479, "learning_rate": 1.1647217786512715e-05, "loss": 0.19832506775856018, "step": 3501 }, { "epoch": 0.4249484285887635, "grad_norm": 0.8822919726371765, "learning_rate": 1.164476108586169e-05, "loss": 0.024732433259487152, "step": 3502 }, { "epoch": 0.4250697730857906, "grad_norm": 2.3826348781585693, "learning_rate": 1.1642304385210663e-05, "loss": 0.14591310918331146, "step": 3503 }, { "epoch": 0.42519111758281763, "grad_norm": 2.1904237270355225, "learning_rate": 1.1639847684559638e-05, "loss": 0.28078460693359375, "step": 3504 }, { "epoch": 0.4253124620798447, "grad_norm": 2.4217185974121094, "learning_rate": 1.1637390983908612e-05, "loss": 0.1772153079509735, "step": 3505 }, { "epoch": 0.42543380657687174, "grad_norm": 3.663706064224243, "learning_rate": 1.1634934283257586e-05, "loss": 0.3164679706096649, "step": 3506 }, { "epoch": 0.4255551510738988, "grad_norm": 3.5393636226654053, "learning_rate": 1.163247758260656e-05, "loss": 0.36650732159614563, "step": 3507 }, { "epoch": 0.42567649557092585, "grad_norm": 2.4501283168792725, "learning_rate": 1.1630020881955535e-05, "loss": 0.301665723323822, "step": 3508 }, { "epoch": 0.4257978400679529, "grad_norm": 2.496389627456665, "learning_rate": 1.1627564181304509e-05, "loss": 0.4281569719314575, "step": 3509 }, { "epoch": 0.42591918456497996, "grad_norm": 2.8083412647247314, "learning_rate": 1.1625107480653483e-05, "loss": 0.27063703536987305, "step": 3510 }, { "epoch": 0.426040529062007, "grad_norm": 2.149444580078125, "learning_rate": 1.1622650780002458e-05, "loss": 0.19885718822479248, "step": 3511 }, { "epoch": 0.4261618735590341, "grad_norm": 2.8739559650421143, "learning_rate": 1.1620194079351432e-05, "loss": 0.4075099229812622, "step": 3512 }, { "epoch": 0.4262832180560612, "grad_norm": 2.3116772174835205, "learning_rate": 1.1617737378700406e-05, "loss": 0.163457989692688, "step": 3513 }, { "epoch": 0.42640456255308823, "grad_norm": 2.594003438949585, "learning_rate": 1.161528067804938e-05, "loss": 0.48457199335098267, "step": 3514 }, { "epoch": 0.4265259070501153, "grad_norm": 1.6292730569839478, "learning_rate": 1.1612823977398355e-05, "loss": 0.2179834097623825, "step": 3515 }, { "epoch": 0.42664725154714234, "grad_norm": 2.3443987369537354, "learning_rate": 1.1610367276747329e-05, "loss": 0.48786991834640503, "step": 3516 }, { "epoch": 0.4267685960441694, "grad_norm": 2.87321400642395, "learning_rate": 1.1607910576096305e-05, "loss": 0.20168547332286835, "step": 3517 }, { "epoch": 0.42688994054119644, "grad_norm": 3.7508914470672607, "learning_rate": 1.1605453875445279e-05, "loss": 0.18805919587612152, "step": 3518 }, { "epoch": 0.4270112850382235, "grad_norm": 3.1636602878570557, "learning_rate": 1.1602997174794253e-05, "loss": 0.7994831204414368, "step": 3519 }, { "epoch": 0.42713262953525055, "grad_norm": 1.8529301881790161, "learning_rate": 1.1600540474143228e-05, "loss": 0.19495491683483124, "step": 3520 }, { "epoch": 0.42725397403227766, "grad_norm": 3.220113515853882, "learning_rate": 1.1598083773492202e-05, "loss": 0.23272007703781128, "step": 3521 }, { "epoch": 0.4273753185293047, "grad_norm": 2.2415926456451416, "learning_rate": 1.1595627072841176e-05, "loss": 0.18578383326530457, "step": 3522 }, { "epoch": 0.42749666302633177, "grad_norm": 3.9811363220214844, "learning_rate": 1.159317037219015e-05, "loss": 0.5883661508560181, "step": 3523 }, { "epoch": 0.4276180075233588, "grad_norm": 3.611394166946411, "learning_rate": 1.1590713671539125e-05, "loss": 0.29106664657592773, "step": 3524 }, { "epoch": 0.4277393520203859, "grad_norm": 2.861515760421753, "learning_rate": 1.1588256970888099e-05, "loss": 0.13153845071792603, "step": 3525 }, { "epoch": 0.42786069651741293, "grad_norm": 1.6934245824813843, "learning_rate": 1.1585800270237073e-05, "loss": 0.059216201305389404, "step": 3526 }, { "epoch": 0.42798204101444, "grad_norm": 1.712193250656128, "learning_rate": 1.1583343569586047e-05, "loss": 0.0886315107345581, "step": 3527 }, { "epoch": 0.42810338551146704, "grad_norm": 2.9867546558380127, "learning_rate": 1.1580886868935022e-05, "loss": 0.729335606098175, "step": 3528 }, { "epoch": 0.4282247300084941, "grad_norm": 1.6713956594467163, "learning_rate": 1.1578430168283996e-05, "loss": 0.09660333395004272, "step": 3529 }, { "epoch": 0.4283460745055212, "grad_norm": 1.2232511043548584, "learning_rate": 1.157597346763297e-05, "loss": 0.27441084384918213, "step": 3530 }, { "epoch": 0.42846741900254826, "grad_norm": 2.7496514320373535, "learning_rate": 1.1573516766981944e-05, "loss": 0.3395706117153168, "step": 3531 }, { "epoch": 0.4285887634995753, "grad_norm": 2.7180612087249756, "learning_rate": 1.1571060066330919e-05, "loss": 0.2094610333442688, "step": 3532 }, { "epoch": 0.42871010799660236, "grad_norm": 2.590651750564575, "learning_rate": 1.1568603365679893e-05, "loss": 0.3029654026031494, "step": 3533 }, { "epoch": 0.4288314524936294, "grad_norm": 3.976020574569702, "learning_rate": 1.1566146665028867e-05, "loss": 0.51016765832901, "step": 3534 }, { "epoch": 0.42895279699065647, "grad_norm": 1.4675898551940918, "learning_rate": 1.1563689964377841e-05, "loss": 0.11305573582649231, "step": 3535 }, { "epoch": 0.4290741414876835, "grad_norm": 3.136228084564209, "learning_rate": 1.1561233263726816e-05, "loss": 0.5877640843391418, "step": 3536 }, { "epoch": 0.4291954859847106, "grad_norm": 3.057117462158203, "learning_rate": 1.1558776563075792e-05, "loss": 0.32149437069892883, "step": 3537 }, { "epoch": 0.42931683048173763, "grad_norm": 3.842179775238037, "learning_rate": 1.1556319862424766e-05, "loss": 0.2886391282081604, "step": 3538 }, { "epoch": 0.4294381749787647, "grad_norm": 3.0417163372039795, "learning_rate": 1.155386316177374e-05, "loss": 0.30413177609443665, "step": 3539 }, { "epoch": 0.4295595194757918, "grad_norm": 3.1643025875091553, "learning_rate": 1.1551406461122714e-05, "loss": 0.2993037700653076, "step": 3540 }, { "epoch": 0.42968086397281885, "grad_norm": 3.737643003463745, "learning_rate": 1.1548949760471689e-05, "loss": 0.3740388751029968, "step": 3541 }, { "epoch": 0.4298022084698459, "grad_norm": 2.914933204650879, "learning_rate": 1.1546493059820663e-05, "loss": 0.15409384667873383, "step": 3542 }, { "epoch": 0.42992355296687296, "grad_norm": 2.2203798294067383, "learning_rate": 1.1544036359169637e-05, "loss": 0.14025011658668518, "step": 3543 }, { "epoch": 0.4300448974639, "grad_norm": 2.3739166259765625, "learning_rate": 1.1541579658518611e-05, "loss": 0.25144433975219727, "step": 3544 }, { "epoch": 0.43016624196092706, "grad_norm": 2.491156578063965, "learning_rate": 1.1539122957867586e-05, "loss": 0.3260461091995239, "step": 3545 }, { "epoch": 0.4302875864579541, "grad_norm": 2.44917368888855, "learning_rate": 1.153666625721656e-05, "loss": 0.15014734864234924, "step": 3546 }, { "epoch": 0.4304089309549812, "grad_norm": 2.5014209747314453, "learning_rate": 1.1534209556565534e-05, "loss": 0.3493393659591675, "step": 3547 }, { "epoch": 0.4305302754520082, "grad_norm": 2.060030460357666, "learning_rate": 1.1531752855914508e-05, "loss": 0.10436143726110458, "step": 3548 }, { "epoch": 0.43065161994903534, "grad_norm": 3.732374906539917, "learning_rate": 1.1529296155263483e-05, "loss": 0.2870655059814453, "step": 3549 }, { "epoch": 0.4307729644460624, "grad_norm": 2.585165023803711, "learning_rate": 1.1526839454612457e-05, "loss": 0.3311839699745178, "step": 3550 }, { "epoch": 0.43089430894308944, "grad_norm": 1.9173551797866821, "learning_rate": 1.1524382753961431e-05, "loss": 0.08459767699241638, "step": 3551 }, { "epoch": 0.4310156534401165, "grad_norm": 3.1529128551483154, "learning_rate": 1.1521926053310406e-05, "loss": 0.7174496650695801, "step": 3552 }, { "epoch": 0.43113699793714355, "grad_norm": 2.7838709354400635, "learning_rate": 1.151946935265938e-05, "loss": 0.30458956956863403, "step": 3553 }, { "epoch": 0.4312583424341706, "grad_norm": 2.698802947998047, "learning_rate": 1.1517012652008354e-05, "loss": 0.38087013363838196, "step": 3554 }, { "epoch": 0.43137968693119766, "grad_norm": 1.689888834953308, "learning_rate": 1.1514555951357328e-05, "loss": 0.055040039122104645, "step": 3555 }, { "epoch": 0.4315010314282247, "grad_norm": 2.4405455589294434, "learning_rate": 1.1512099250706304e-05, "loss": 0.377621054649353, "step": 3556 }, { "epoch": 0.43162237592525177, "grad_norm": 0.798610508441925, "learning_rate": 1.1509642550055279e-05, "loss": 0.01458574179559946, "step": 3557 }, { "epoch": 0.4317437204222789, "grad_norm": 2.441983222961426, "learning_rate": 1.1507185849404253e-05, "loss": 0.4220641255378723, "step": 3558 }, { "epoch": 0.43186506491930593, "grad_norm": 2.1550843715667725, "learning_rate": 1.1504729148753227e-05, "loss": 0.3988901376724243, "step": 3559 }, { "epoch": 0.431986409416333, "grad_norm": 1.9146445989608765, "learning_rate": 1.1502272448102201e-05, "loss": 0.07315497100353241, "step": 3560 }, { "epoch": 0.43210775391336004, "grad_norm": 2.2415459156036377, "learning_rate": 1.1499815747451172e-05, "loss": 0.3059389889240265, "step": 3561 }, { "epoch": 0.4322290984103871, "grad_norm": 2.4857776165008545, "learning_rate": 1.1497359046800146e-05, "loss": 0.102960005402565, "step": 3562 }, { "epoch": 0.43235044290741415, "grad_norm": 3.687988042831421, "learning_rate": 1.1494902346149122e-05, "loss": 0.08040996640920639, "step": 3563 }, { "epoch": 0.4324717874044412, "grad_norm": 1.5955703258514404, "learning_rate": 1.1492445645498097e-05, "loss": 0.051232174038887024, "step": 3564 }, { "epoch": 0.43259313190146825, "grad_norm": 3.303870439529419, "learning_rate": 1.1489988944847071e-05, "loss": 0.7577817440032959, "step": 3565 }, { "epoch": 0.4327144763984953, "grad_norm": 3.2710652351379395, "learning_rate": 1.1487532244196045e-05, "loss": 0.25319910049438477, "step": 3566 }, { "epoch": 0.43283582089552236, "grad_norm": 2.376044988632202, "learning_rate": 1.148507554354502e-05, "loss": 0.06742098927497864, "step": 3567 }, { "epoch": 0.43295716539254947, "grad_norm": 1.9534738063812256, "learning_rate": 1.1482618842893994e-05, "loss": 0.22318729758262634, "step": 3568 }, { "epoch": 0.4330785098895765, "grad_norm": 2.6536521911621094, "learning_rate": 1.1480162142242968e-05, "loss": 0.29003578424453735, "step": 3569 }, { "epoch": 0.4331998543866036, "grad_norm": 3.3636391162872314, "learning_rate": 1.1477705441591942e-05, "loss": 0.2415037751197815, "step": 3570 }, { "epoch": 0.43332119888363063, "grad_norm": 2.113816261291504, "learning_rate": 1.1475248740940916e-05, "loss": 0.34754252433776855, "step": 3571 }, { "epoch": 0.4334425433806577, "grad_norm": 3.4806878566741943, "learning_rate": 1.147279204028989e-05, "loss": 0.5855320692062378, "step": 3572 }, { "epoch": 0.43356388787768474, "grad_norm": 2.993398427963257, "learning_rate": 1.1470335339638865e-05, "loss": 0.4003051519393921, "step": 3573 }, { "epoch": 0.4336852323747118, "grad_norm": 3.324693441390991, "learning_rate": 1.146787863898784e-05, "loss": 0.16323281824588776, "step": 3574 }, { "epoch": 0.43380657687173885, "grad_norm": 2.242252826690674, "learning_rate": 1.1465421938336813e-05, "loss": 0.07665936648845673, "step": 3575 }, { "epoch": 0.4339279213687659, "grad_norm": 1.906256079673767, "learning_rate": 1.1462965237685788e-05, "loss": 0.17530910670757294, "step": 3576 }, { "epoch": 0.434049265865793, "grad_norm": 1.4897524118423462, "learning_rate": 1.1460508537034762e-05, "loss": 0.0955578088760376, "step": 3577 }, { "epoch": 0.43417061036282006, "grad_norm": 2.944699287414551, "learning_rate": 1.1458051836383736e-05, "loss": 0.2497621476650238, "step": 3578 }, { "epoch": 0.4342919548598471, "grad_norm": 2.2499160766601562, "learning_rate": 1.145559513573271e-05, "loss": 0.5359215140342712, "step": 3579 }, { "epoch": 0.43441329935687417, "grad_norm": 4.141959190368652, "learning_rate": 1.1453138435081685e-05, "loss": 0.7478635311126709, "step": 3580 }, { "epoch": 0.4345346438539012, "grad_norm": 2.1418049335479736, "learning_rate": 1.1450681734430659e-05, "loss": 0.6666457056999207, "step": 3581 }, { "epoch": 0.4346559883509283, "grad_norm": 2.682008981704712, "learning_rate": 1.1448225033779635e-05, "loss": 0.2484360635280609, "step": 3582 }, { "epoch": 0.43477733284795533, "grad_norm": 1.3566113710403442, "learning_rate": 1.144576833312861e-05, "loss": 0.11822110414505005, "step": 3583 }, { "epoch": 0.4348986773449824, "grad_norm": 3.0834574699401855, "learning_rate": 1.1443311632477583e-05, "loss": 0.25057023763656616, "step": 3584 }, { "epoch": 0.43502002184200944, "grad_norm": 1.1841784715652466, "learning_rate": 1.1440854931826558e-05, "loss": 0.0327373705804348, "step": 3585 }, { "epoch": 0.43514136633903655, "grad_norm": 2.461822748184204, "learning_rate": 1.1438398231175532e-05, "loss": 0.19103094935417175, "step": 3586 }, { "epoch": 0.4352627108360636, "grad_norm": 2.851731538772583, "learning_rate": 1.1435941530524506e-05, "loss": 0.37302541732788086, "step": 3587 }, { "epoch": 0.43538405533309066, "grad_norm": 2.4243295192718506, "learning_rate": 1.143348482987348e-05, "loss": 0.32346072793006897, "step": 3588 }, { "epoch": 0.4355053998301177, "grad_norm": 2.8613784313201904, "learning_rate": 1.1431028129222455e-05, "loss": 0.13731716573238373, "step": 3589 }, { "epoch": 0.43562674432714477, "grad_norm": 2.957123279571533, "learning_rate": 1.1428571428571429e-05, "loss": 0.33699917793273926, "step": 3590 }, { "epoch": 0.4357480888241718, "grad_norm": 2.904517412185669, "learning_rate": 1.1426114727920403e-05, "loss": 0.16977430880069733, "step": 3591 }, { "epoch": 0.4358694333211989, "grad_norm": 3.907168388366699, "learning_rate": 1.1423658027269378e-05, "loss": 0.1890479475259781, "step": 3592 }, { "epoch": 0.4359907778182259, "grad_norm": 1.818791389465332, "learning_rate": 1.1421201326618352e-05, "loss": 0.1662425696849823, "step": 3593 }, { "epoch": 0.436112122315253, "grad_norm": 2.139403820037842, "learning_rate": 1.1418744625967326e-05, "loss": 0.3009265959262848, "step": 3594 }, { "epoch": 0.4362334668122801, "grad_norm": 3.330634117126465, "learning_rate": 1.14162879253163e-05, "loss": 0.30554115772247314, "step": 3595 }, { "epoch": 0.43635481130930714, "grad_norm": 2.787034034729004, "learning_rate": 1.1413831224665275e-05, "loss": 0.1728060245513916, "step": 3596 }, { "epoch": 0.4364761558063342, "grad_norm": 3.3264801502227783, "learning_rate": 1.1411374524014249e-05, "loss": 0.27438050508499146, "step": 3597 }, { "epoch": 0.43659750030336125, "grad_norm": 2.9363393783569336, "learning_rate": 1.1408917823363223e-05, "loss": 0.5970642566680908, "step": 3598 }, { "epoch": 0.4367188448003883, "grad_norm": 2.2291409969329834, "learning_rate": 1.1406461122712197e-05, "loss": 0.22181521356105804, "step": 3599 }, { "epoch": 0.43684018929741536, "grad_norm": 2.859792947769165, "learning_rate": 1.1404004422061172e-05, "loss": 0.19904783368110657, "step": 3600 }, { "epoch": 0.4369615337944424, "grad_norm": 3.4355077743530273, "learning_rate": 1.1401547721410146e-05, "loss": 0.7270107269287109, "step": 3601 }, { "epoch": 0.43708287829146947, "grad_norm": 2.798133611679077, "learning_rate": 1.1399091020759122e-05, "loss": 0.2644732594490051, "step": 3602 }, { "epoch": 0.4372042227884965, "grad_norm": 2.2718589305877686, "learning_rate": 1.1396634320108096e-05, "loss": 0.1575520932674408, "step": 3603 }, { "epoch": 0.4373255672855236, "grad_norm": 1.9757939577102661, "learning_rate": 1.139417761945707e-05, "loss": 0.14380985498428345, "step": 3604 }, { "epoch": 0.4374469117825507, "grad_norm": 2.0162603855133057, "learning_rate": 1.1391720918806045e-05, "loss": 0.2779490649700165, "step": 3605 }, { "epoch": 0.43756825627957774, "grad_norm": 3.3149561882019043, "learning_rate": 1.1389264218155019e-05, "loss": 0.1290723830461502, "step": 3606 }, { "epoch": 0.4376896007766048, "grad_norm": 1.8297659158706665, "learning_rate": 1.1386807517503993e-05, "loss": 0.34521085023880005, "step": 3607 }, { "epoch": 0.43781094527363185, "grad_norm": 2.085313320159912, "learning_rate": 1.1384350816852967e-05, "loss": 0.1551712602376938, "step": 3608 }, { "epoch": 0.4379322897706589, "grad_norm": 3.8662731647491455, "learning_rate": 1.1381894116201942e-05, "loss": 0.25557762384414673, "step": 3609 }, { "epoch": 0.43805363426768595, "grad_norm": 2.7187681198120117, "learning_rate": 1.1379437415550916e-05, "loss": 0.24853914976119995, "step": 3610 }, { "epoch": 0.438174978764713, "grad_norm": 2.3490939140319824, "learning_rate": 1.137698071489989e-05, "loss": 0.32234546542167664, "step": 3611 }, { "epoch": 0.43829632326174006, "grad_norm": 2.499971866607666, "learning_rate": 1.1374524014248864e-05, "loss": 0.27525416016578674, "step": 3612 }, { "epoch": 0.4384176677587671, "grad_norm": 1.7944048643112183, "learning_rate": 1.1372067313597839e-05, "loss": 0.10448901355266571, "step": 3613 }, { "epoch": 0.4385390122557942, "grad_norm": 2.5525612831115723, "learning_rate": 1.1369610612946813e-05, "loss": 0.1685064136981964, "step": 3614 }, { "epoch": 0.4386603567528213, "grad_norm": 2.662881851196289, "learning_rate": 1.1367153912295787e-05, "loss": 0.3491094708442688, "step": 3615 }, { "epoch": 0.43878170124984833, "grad_norm": 2.6570043563842773, "learning_rate": 1.1364697211644761e-05, "loss": 0.2924567461013794, "step": 3616 }, { "epoch": 0.4389030457468754, "grad_norm": 2.361314058303833, "learning_rate": 1.1362240510993736e-05, "loss": 0.5655845403671265, "step": 3617 }, { "epoch": 0.43902439024390244, "grad_norm": 3.0596911907196045, "learning_rate": 1.135978381034271e-05, "loss": 0.3180844783782959, "step": 3618 }, { "epoch": 0.4391457347409295, "grad_norm": 3.2376537322998047, "learning_rate": 1.1357327109691684e-05, "loss": 0.48498889803886414, "step": 3619 }, { "epoch": 0.43926707923795655, "grad_norm": 2.4582009315490723, "learning_rate": 1.1354870409040658e-05, "loss": 0.14106501638889313, "step": 3620 }, { "epoch": 0.4393884237349836, "grad_norm": 1.0312790870666504, "learning_rate": 1.1352413708389633e-05, "loss": 0.009895110502839088, "step": 3621 }, { "epoch": 0.43950976823201066, "grad_norm": 2.2700610160827637, "learning_rate": 1.1349957007738609e-05, "loss": 0.1510792374610901, "step": 3622 }, { "epoch": 0.43963111272903777, "grad_norm": 3.85322904586792, "learning_rate": 1.1347500307087583e-05, "loss": 0.6400561928749084, "step": 3623 }, { "epoch": 0.4397524572260648, "grad_norm": 2.530968427658081, "learning_rate": 1.1345043606436557e-05, "loss": 0.3659777343273163, "step": 3624 }, { "epoch": 0.4398738017230919, "grad_norm": 2.400381088256836, "learning_rate": 1.1342586905785531e-05, "loss": 0.09772318601608276, "step": 3625 }, { "epoch": 0.4399951462201189, "grad_norm": 3.9213593006134033, "learning_rate": 1.1340130205134506e-05, "loss": 0.7347447872161865, "step": 3626 }, { "epoch": 0.440116490717146, "grad_norm": 1.5514545440673828, "learning_rate": 1.133767350448348e-05, "loss": 0.09241119772195816, "step": 3627 }, { "epoch": 0.44023783521417303, "grad_norm": 1.6501867771148682, "learning_rate": 1.1335216803832454e-05, "loss": 0.12137887626886368, "step": 3628 }, { "epoch": 0.4403591797112001, "grad_norm": 2.3960394859313965, "learning_rate": 1.1332760103181428e-05, "loss": 0.5893489122390747, "step": 3629 }, { "epoch": 0.44048052420822714, "grad_norm": 0.7135860919952393, "learning_rate": 1.1330303402530403e-05, "loss": 0.019609341397881508, "step": 3630 }, { "epoch": 0.4406018687052542, "grad_norm": 1.8278428316116333, "learning_rate": 1.1327846701879377e-05, "loss": 0.10106104612350464, "step": 3631 }, { "epoch": 0.44072321320228125, "grad_norm": 2.0153684616088867, "learning_rate": 1.1325390001228351e-05, "loss": 0.27083125710487366, "step": 3632 }, { "epoch": 0.44084455769930836, "grad_norm": 3.3400328159332275, "learning_rate": 1.1322933300577326e-05, "loss": 0.11951978504657745, "step": 3633 }, { "epoch": 0.4409659021963354, "grad_norm": 3.1101183891296387, "learning_rate": 1.13204765999263e-05, "loss": 0.3573867082595825, "step": 3634 }, { "epoch": 0.44108724669336247, "grad_norm": 2.616203784942627, "learning_rate": 1.1318019899275274e-05, "loss": 0.17786771059036255, "step": 3635 }, { "epoch": 0.4412085911903895, "grad_norm": 2.3295364379882812, "learning_rate": 1.1315563198624248e-05, "loss": 0.13881252706050873, "step": 3636 }, { "epoch": 0.4413299356874166, "grad_norm": 2.7589056491851807, "learning_rate": 1.1313106497973223e-05, "loss": 0.24190297722816467, "step": 3637 }, { "epoch": 0.44145128018444363, "grad_norm": 5.638766765594482, "learning_rate": 1.1310649797322197e-05, "loss": 0.463576078414917, "step": 3638 }, { "epoch": 0.4415726246814707, "grad_norm": 2.3034234046936035, "learning_rate": 1.1308193096671171e-05, "loss": 0.2793579399585724, "step": 3639 }, { "epoch": 0.44169396917849774, "grad_norm": 3.2416858673095703, "learning_rate": 1.1305736396020145e-05, "loss": 0.4077681005001068, "step": 3640 }, { "epoch": 0.4418153136755248, "grad_norm": 2.0680837631225586, "learning_rate": 1.130327969536912e-05, "loss": 0.2055528461933136, "step": 3641 }, { "epoch": 0.4419366581725519, "grad_norm": 2.0137829780578613, "learning_rate": 1.1300822994718096e-05, "loss": 0.44573357701301575, "step": 3642 }, { "epoch": 0.44205800266957895, "grad_norm": 1.356884479522705, "learning_rate": 1.129836629406707e-05, "loss": 0.10731276869773865, "step": 3643 }, { "epoch": 0.442179347166606, "grad_norm": 1.970157504081726, "learning_rate": 1.1295909593416044e-05, "loss": 0.416285902261734, "step": 3644 }, { "epoch": 0.44230069166363306, "grad_norm": 2.973112106323242, "learning_rate": 1.1293452892765018e-05, "loss": 0.108841173350811, "step": 3645 }, { "epoch": 0.4424220361606601, "grad_norm": 3.15687894821167, "learning_rate": 1.1290996192113993e-05, "loss": 0.5777620077133179, "step": 3646 }, { "epoch": 0.44254338065768717, "grad_norm": 2.8096749782562256, "learning_rate": 1.1288539491462967e-05, "loss": 0.1413717120885849, "step": 3647 }, { "epoch": 0.4426647251547142, "grad_norm": 2.755324125289917, "learning_rate": 1.1286082790811941e-05, "loss": 0.24966059625148773, "step": 3648 }, { "epoch": 0.4427860696517413, "grad_norm": 2.03189754486084, "learning_rate": 1.1283626090160915e-05, "loss": 0.2302841991186142, "step": 3649 }, { "epoch": 0.44290741414876833, "grad_norm": 3.2773029804229736, "learning_rate": 1.128116938950989e-05, "loss": 0.2901148200035095, "step": 3650 }, { "epoch": 0.44302875864579544, "grad_norm": 1.733625054359436, "learning_rate": 1.1278712688858864e-05, "loss": 0.10937804728746414, "step": 3651 }, { "epoch": 0.4431501031428225, "grad_norm": 2.043578624725342, "learning_rate": 1.1276255988207838e-05, "loss": 0.2226874828338623, "step": 3652 }, { "epoch": 0.44327144763984955, "grad_norm": 1.561728596687317, "learning_rate": 1.1273799287556812e-05, "loss": 0.1764751374721527, "step": 3653 }, { "epoch": 0.4433927921368766, "grad_norm": 2.3204519748687744, "learning_rate": 1.1271342586905787e-05, "loss": 0.6837862730026245, "step": 3654 }, { "epoch": 0.44351413663390366, "grad_norm": 2.4015603065490723, "learning_rate": 1.1268885886254761e-05, "loss": 0.5877541303634644, "step": 3655 }, { "epoch": 0.4436354811309307, "grad_norm": 2.2663328647613525, "learning_rate": 1.1266429185603735e-05, "loss": 0.17803741991519928, "step": 3656 }, { "epoch": 0.44375682562795776, "grad_norm": 1.9317059516906738, "learning_rate": 1.126397248495271e-05, "loss": 0.3878974914550781, "step": 3657 }, { "epoch": 0.4438781701249848, "grad_norm": 2.765651226043701, "learning_rate": 1.1261515784301684e-05, "loss": 0.22609782218933105, "step": 3658 }, { "epoch": 0.44399951462201187, "grad_norm": 2.527340888977051, "learning_rate": 1.1259059083650658e-05, "loss": 0.22068405151367188, "step": 3659 }, { "epoch": 0.4441208591190389, "grad_norm": 2.0030107498168945, "learning_rate": 1.1256602382999632e-05, "loss": 0.15071557462215424, "step": 3660 }, { "epoch": 0.44424220361606603, "grad_norm": 1.7133005857467651, "learning_rate": 1.1254145682348606e-05, "loss": 0.060266535729169846, "step": 3661 }, { "epoch": 0.4443635481130931, "grad_norm": 4.328049182891846, "learning_rate": 1.1251688981697582e-05, "loss": 0.27519306540489197, "step": 3662 }, { "epoch": 0.44448489261012014, "grad_norm": 4.070196628570557, "learning_rate": 1.1249232281046557e-05, "loss": 0.19711193442344666, "step": 3663 }, { "epoch": 0.4446062371071472, "grad_norm": 1.8636376857757568, "learning_rate": 1.1246775580395531e-05, "loss": 0.06845342367887497, "step": 3664 }, { "epoch": 0.44472758160417425, "grad_norm": 1.4219677448272705, "learning_rate": 1.1244318879744505e-05, "loss": 0.06279438734054565, "step": 3665 }, { "epoch": 0.4448489261012013, "grad_norm": 2.331526756286621, "learning_rate": 1.124186217909348e-05, "loss": 0.19652298092842102, "step": 3666 }, { "epoch": 0.44497027059822836, "grad_norm": 2.0246334075927734, "learning_rate": 1.1239405478442454e-05, "loss": 0.3149532675743103, "step": 3667 }, { "epoch": 0.4450916150952554, "grad_norm": 1.9124372005462646, "learning_rate": 1.1236948777791428e-05, "loss": 0.25972670316696167, "step": 3668 }, { "epoch": 0.44521295959228246, "grad_norm": 2.798413038253784, "learning_rate": 1.1234492077140402e-05, "loss": 0.2329055815935135, "step": 3669 }, { "epoch": 0.4453343040893096, "grad_norm": 2.784217119216919, "learning_rate": 1.1232035376489376e-05, "loss": 0.13828489184379578, "step": 3670 }, { "epoch": 0.4454556485863366, "grad_norm": 2.5673763751983643, "learning_rate": 1.122957867583835e-05, "loss": 0.5141546726226807, "step": 3671 }, { "epoch": 0.4455769930833637, "grad_norm": 3.2268829345703125, "learning_rate": 1.1227121975187325e-05, "loss": 0.3171578347682953, "step": 3672 }, { "epoch": 0.44569833758039074, "grad_norm": 2.244013786315918, "learning_rate": 1.12246652745363e-05, "loss": 0.20982038974761963, "step": 3673 }, { "epoch": 0.4458196820774178, "grad_norm": 4.174572944641113, "learning_rate": 1.1222208573885273e-05, "loss": 0.5640068054199219, "step": 3674 }, { "epoch": 0.44594102657444484, "grad_norm": 2.7843480110168457, "learning_rate": 1.1219751873234248e-05, "loss": 0.2926955223083496, "step": 3675 }, { "epoch": 0.4460623710714719, "grad_norm": 1.712131142616272, "learning_rate": 1.1217295172583222e-05, "loss": 0.14825302362442017, "step": 3676 }, { "epoch": 0.44618371556849895, "grad_norm": 3.0750234127044678, "learning_rate": 1.1214838471932196e-05, "loss": 0.20352722704410553, "step": 3677 }, { "epoch": 0.446305060065526, "grad_norm": 2.5182766914367676, "learning_rate": 1.121238177128117e-05, "loss": 0.21560010313987732, "step": 3678 }, { "epoch": 0.4464264045625531, "grad_norm": 1.4503228664398193, "learning_rate": 1.1209925070630145e-05, "loss": 0.08557716012001038, "step": 3679 }, { "epoch": 0.44654774905958017, "grad_norm": 2.568263053894043, "learning_rate": 1.1207468369979119e-05, "loss": 0.2553696930408478, "step": 3680 }, { "epoch": 0.4466690935566072, "grad_norm": 3.5506436824798584, "learning_rate": 1.1205011669328095e-05, "loss": 0.5497809648513794, "step": 3681 }, { "epoch": 0.4467904380536343, "grad_norm": 2.676406145095825, "learning_rate": 1.120255496867707e-05, "loss": 0.23912879824638367, "step": 3682 }, { "epoch": 0.44691178255066133, "grad_norm": 2.9485552310943604, "learning_rate": 1.1200098268026044e-05, "loss": 0.2600053548812866, "step": 3683 }, { "epoch": 0.4470331270476884, "grad_norm": 1.826737403869629, "learning_rate": 1.1197641567375018e-05, "loss": 0.22873912751674652, "step": 3684 }, { "epoch": 0.44715447154471544, "grad_norm": 1.9971508979797363, "learning_rate": 1.1195184866723992e-05, "loss": 0.1476428210735321, "step": 3685 }, { "epoch": 0.4472758160417425, "grad_norm": 1.1586400270462036, "learning_rate": 1.1192728166072966e-05, "loss": 0.10401519387960434, "step": 3686 }, { "epoch": 0.44739716053876954, "grad_norm": 3.4623281955718994, "learning_rate": 1.119027146542194e-05, "loss": 0.24992474913597107, "step": 3687 }, { "epoch": 0.44751850503579665, "grad_norm": 3.845836639404297, "learning_rate": 1.1187814764770915e-05, "loss": 0.7359163761138916, "step": 3688 }, { "epoch": 0.4476398495328237, "grad_norm": 2.8226850032806396, "learning_rate": 1.1185358064119889e-05, "loss": 0.24005986750125885, "step": 3689 }, { "epoch": 0.44776119402985076, "grad_norm": 2.2125051021575928, "learning_rate": 1.1182901363468863e-05, "loss": 0.3258327841758728, "step": 3690 }, { "epoch": 0.4478825385268778, "grad_norm": 2.472573757171631, "learning_rate": 1.1180444662817838e-05, "loss": 0.10994560271501541, "step": 3691 }, { "epoch": 0.44800388302390487, "grad_norm": 2.341024398803711, "learning_rate": 1.1177987962166812e-05, "loss": 0.14841987192630768, "step": 3692 }, { "epoch": 0.4481252275209319, "grad_norm": 1.2704229354858398, "learning_rate": 1.1175531261515786e-05, "loss": 0.06925120204687119, "step": 3693 }, { "epoch": 0.448246572017959, "grad_norm": 1.6538794040679932, "learning_rate": 1.117307456086476e-05, "loss": 0.11470181494951248, "step": 3694 }, { "epoch": 0.44836791651498603, "grad_norm": 1.6831903457641602, "learning_rate": 1.1170617860213735e-05, "loss": 0.0720558613538742, "step": 3695 }, { "epoch": 0.4484892610120131, "grad_norm": 1.8351422548294067, "learning_rate": 1.1168161159562709e-05, "loss": 0.17231544852256775, "step": 3696 }, { "epoch": 0.44861060550904014, "grad_norm": 1.7946488857269287, "learning_rate": 1.1165704458911681e-05, "loss": 0.10063959658145905, "step": 3697 }, { "epoch": 0.44873195000606725, "grad_norm": 2.6582419872283936, "learning_rate": 1.1163247758260656e-05, "loss": 0.12853577733039856, "step": 3698 }, { "epoch": 0.4488532945030943, "grad_norm": 3.3695971965789795, "learning_rate": 1.116079105760963e-05, "loss": 0.4992516040802002, "step": 3699 }, { "epoch": 0.44897463900012136, "grad_norm": 3.701185941696167, "learning_rate": 1.1158334356958604e-05, "loss": 0.24211734533309937, "step": 3700 }, { "epoch": 0.4490959834971484, "grad_norm": 4.2144904136657715, "learning_rate": 1.1155877656307578e-05, "loss": 0.49937915802001953, "step": 3701 }, { "epoch": 0.44921732799417546, "grad_norm": 3.507452964782715, "learning_rate": 1.1153420955656553e-05, "loss": 0.4374186396598816, "step": 3702 }, { "epoch": 0.4493386724912025, "grad_norm": 2.2759249210357666, "learning_rate": 1.1150964255005527e-05, "loss": 0.07882027328014374, "step": 3703 }, { "epoch": 0.44946001698822957, "grad_norm": 1.028842568397522, "learning_rate": 1.1148507554354501e-05, "loss": 0.04556459188461304, "step": 3704 }, { "epoch": 0.4495813614852566, "grad_norm": 2.8861923217773438, "learning_rate": 1.1146050853703475e-05, "loss": 0.26581016182899475, "step": 3705 }, { "epoch": 0.4497027059822837, "grad_norm": 3.1814310550689697, "learning_rate": 1.114359415305245e-05, "loss": 0.34447595477104187, "step": 3706 }, { "epoch": 0.4498240504793108, "grad_norm": 2.059791326522827, "learning_rate": 1.1141137452401426e-05, "loss": 0.1574913114309311, "step": 3707 }, { "epoch": 0.44994539497633784, "grad_norm": 1.8999013900756836, "learning_rate": 1.11386807517504e-05, "loss": 0.08208724856376648, "step": 3708 }, { "epoch": 0.4500667394733649, "grad_norm": 2.4744856357574463, "learning_rate": 1.1136224051099374e-05, "loss": 0.21813160181045532, "step": 3709 }, { "epoch": 0.45018808397039195, "grad_norm": 2.708770513534546, "learning_rate": 1.1133767350448348e-05, "loss": 0.6522781252861023, "step": 3710 }, { "epoch": 0.450309428467419, "grad_norm": 2.580425977706909, "learning_rate": 1.1131310649797323e-05, "loss": 0.25760287046432495, "step": 3711 }, { "epoch": 0.45043077296444606, "grad_norm": 1.9656243324279785, "learning_rate": 1.1128853949146297e-05, "loss": 0.1759759485721588, "step": 3712 }, { "epoch": 0.4505521174614731, "grad_norm": 2.028416156768799, "learning_rate": 1.1126397248495271e-05, "loss": 0.12356072664260864, "step": 3713 }, { "epoch": 0.45067346195850017, "grad_norm": 2.8768270015716553, "learning_rate": 1.1123940547844246e-05, "loss": 0.30887824296951294, "step": 3714 }, { "epoch": 0.4507948064555272, "grad_norm": 3.1012911796569824, "learning_rate": 1.112148384719322e-05, "loss": 0.6460905075073242, "step": 3715 }, { "epoch": 0.45091615095255433, "grad_norm": 2.7501220703125, "learning_rate": 1.1119027146542194e-05, "loss": 0.4956667721271515, "step": 3716 }, { "epoch": 0.4510374954495814, "grad_norm": 3.2978079319000244, "learning_rate": 1.1116570445891168e-05, "loss": 0.30553939938545227, "step": 3717 }, { "epoch": 0.45115883994660844, "grad_norm": 1.735008716583252, "learning_rate": 1.1114113745240143e-05, "loss": 0.11469829082489014, "step": 3718 }, { "epoch": 0.4512801844436355, "grad_norm": 1.777876377105713, "learning_rate": 1.1111657044589117e-05, "loss": 0.10907553136348724, "step": 3719 }, { "epoch": 0.45140152894066254, "grad_norm": 2.542064666748047, "learning_rate": 1.1109200343938091e-05, "loss": 0.17916345596313477, "step": 3720 }, { "epoch": 0.4515228734376896, "grad_norm": 1.6052556037902832, "learning_rate": 1.1106743643287065e-05, "loss": 0.029923617839813232, "step": 3721 }, { "epoch": 0.45164421793471665, "grad_norm": 2.0250751972198486, "learning_rate": 1.110428694263604e-05, "loss": 0.07364283502101898, "step": 3722 }, { "epoch": 0.4517655624317437, "grad_norm": 2.5152509212493896, "learning_rate": 1.1101830241985014e-05, "loss": 0.21491853892803192, "step": 3723 }, { "epoch": 0.45188690692877076, "grad_norm": 2.6573703289031982, "learning_rate": 1.1099373541333988e-05, "loss": 0.2361636459827423, "step": 3724 }, { "epoch": 0.4520082514257978, "grad_norm": 2.5982563495635986, "learning_rate": 1.1096916840682962e-05, "loss": 0.2589453458786011, "step": 3725 }, { "epoch": 0.4521295959228249, "grad_norm": 3.372690439224243, "learning_rate": 1.1094460140031937e-05, "loss": 0.3643377721309662, "step": 3726 }, { "epoch": 0.452250940419852, "grad_norm": 2.9856884479522705, "learning_rate": 1.1092003439380913e-05, "loss": 0.4071636497974396, "step": 3727 }, { "epoch": 0.45237228491687903, "grad_norm": 3.358541965484619, "learning_rate": 1.1089546738729887e-05, "loss": 0.33494922518730164, "step": 3728 }, { "epoch": 0.4524936294139061, "grad_norm": 1.913957953453064, "learning_rate": 1.1087090038078861e-05, "loss": 0.0629691556096077, "step": 3729 }, { "epoch": 0.45261497391093314, "grad_norm": 2.1571216583251953, "learning_rate": 1.1084633337427835e-05, "loss": 0.26977357268333435, "step": 3730 }, { "epoch": 0.4527363184079602, "grad_norm": 2.607900381088257, "learning_rate": 1.108217663677681e-05, "loss": 0.10467074811458588, "step": 3731 }, { "epoch": 0.45285766290498725, "grad_norm": 1.5066871643066406, "learning_rate": 1.1079719936125784e-05, "loss": 0.12838679552078247, "step": 3732 }, { "epoch": 0.4529790074020143, "grad_norm": 2.9027323722839355, "learning_rate": 1.1077263235474758e-05, "loss": 0.5089572072029114, "step": 3733 }, { "epoch": 0.45310035189904135, "grad_norm": 2.9095492362976074, "learning_rate": 1.1074806534823732e-05, "loss": 0.20720970630645752, "step": 3734 }, { "epoch": 0.45322169639606846, "grad_norm": 1.759684681892395, "learning_rate": 1.1072349834172707e-05, "loss": 0.4124320447444916, "step": 3735 }, { "epoch": 0.4533430408930955, "grad_norm": 4.119267463684082, "learning_rate": 1.1069893133521681e-05, "loss": 0.26363465189933777, "step": 3736 }, { "epoch": 0.45346438539012257, "grad_norm": 2.257187604904175, "learning_rate": 1.1067436432870655e-05, "loss": 0.2994039058685303, "step": 3737 }, { "epoch": 0.4535857298871496, "grad_norm": 1.8574036359786987, "learning_rate": 1.106497973221963e-05, "loss": 0.1107100248336792, "step": 3738 }, { "epoch": 0.4537070743841767, "grad_norm": 1.9110321998596191, "learning_rate": 1.1062523031568604e-05, "loss": 0.18646883964538574, "step": 3739 }, { "epoch": 0.45382841888120373, "grad_norm": 2.2259957790374756, "learning_rate": 1.1060066330917578e-05, "loss": 0.12754613161087036, "step": 3740 }, { "epoch": 0.4539497633782308, "grad_norm": 2.3074288368225098, "learning_rate": 1.1057609630266552e-05, "loss": 0.11077870428562164, "step": 3741 }, { "epoch": 0.45407110787525784, "grad_norm": 2.2520663738250732, "learning_rate": 1.1055152929615526e-05, "loss": 0.16683274507522583, "step": 3742 }, { "epoch": 0.4541924523722849, "grad_norm": 2.3819658756256104, "learning_rate": 1.10526962289645e-05, "loss": 0.17191553115844727, "step": 3743 }, { "epoch": 0.454313796869312, "grad_norm": 2.6728708744049072, "learning_rate": 1.1050239528313475e-05, "loss": 0.27221840620040894, "step": 3744 }, { "epoch": 0.45443514136633906, "grad_norm": 1.5359383821487427, "learning_rate": 1.104778282766245e-05, "loss": 0.0838262066245079, "step": 3745 }, { "epoch": 0.4545564858633661, "grad_norm": 2.8321125507354736, "learning_rate": 1.1045326127011423e-05, "loss": 0.3541053533554077, "step": 3746 }, { "epoch": 0.45467783036039316, "grad_norm": 2.7794976234436035, "learning_rate": 1.10428694263604e-05, "loss": 0.31279149651527405, "step": 3747 }, { "epoch": 0.4547991748574202, "grad_norm": 1.9179726839065552, "learning_rate": 1.1040412725709374e-05, "loss": 0.06787727028131485, "step": 3748 }, { "epoch": 0.4549205193544473, "grad_norm": 2.6522345542907715, "learning_rate": 1.1037956025058348e-05, "loss": 0.14771534502506256, "step": 3749 }, { "epoch": 0.4550418638514743, "grad_norm": 1.329545497894287, "learning_rate": 1.1035499324407322e-05, "loss": 0.0695379301905632, "step": 3750 }, { "epoch": 0.4551632083485014, "grad_norm": 2.4873099327087402, "learning_rate": 1.1033042623756296e-05, "loss": 0.10876917839050293, "step": 3751 }, { "epoch": 0.45528455284552843, "grad_norm": 1.5930664539337158, "learning_rate": 1.103058592310527e-05, "loss": 0.08490962535142899, "step": 3752 }, { "epoch": 0.4554058973425555, "grad_norm": 1.8572665452957153, "learning_rate": 1.1028129222454245e-05, "loss": 0.4095427989959717, "step": 3753 }, { "epoch": 0.4555272418395826, "grad_norm": 0.954850435256958, "learning_rate": 1.102567252180322e-05, "loss": 0.02159183658659458, "step": 3754 }, { "epoch": 0.45564858633660965, "grad_norm": 2.3393404483795166, "learning_rate": 1.1023215821152193e-05, "loss": 0.49459564685821533, "step": 3755 }, { "epoch": 0.4557699308336367, "grad_norm": 3.7105650901794434, "learning_rate": 1.1020759120501168e-05, "loss": 0.504112720489502, "step": 3756 }, { "epoch": 0.45589127533066376, "grad_norm": 1.7900317907333374, "learning_rate": 1.1018302419850142e-05, "loss": 0.04191075637936592, "step": 3757 }, { "epoch": 0.4560126198276908, "grad_norm": 2.862607717514038, "learning_rate": 1.1015845719199116e-05, "loss": 0.2582750618457794, "step": 3758 }, { "epoch": 0.45613396432471787, "grad_norm": 2.3275985717773438, "learning_rate": 1.101338901854809e-05, "loss": 0.29841142892837524, "step": 3759 }, { "epoch": 0.4562553088217449, "grad_norm": 3.050267219543457, "learning_rate": 1.1010932317897065e-05, "loss": 0.12717841565608978, "step": 3760 }, { "epoch": 0.456376653318772, "grad_norm": 3.191488265991211, "learning_rate": 1.1008475617246039e-05, "loss": 0.16273337602615356, "step": 3761 }, { "epoch": 0.45649799781579903, "grad_norm": 1.3571722507476807, "learning_rate": 1.1006018916595013e-05, "loss": 0.19462181627750397, "step": 3762 }, { "epoch": 0.45661934231282614, "grad_norm": 4.811699390411377, "learning_rate": 1.1003562215943988e-05, "loss": 0.24823786318302155, "step": 3763 }, { "epoch": 0.4567406868098532, "grad_norm": 3.2369728088378906, "learning_rate": 1.1001105515292962e-05, "loss": 0.2375682145357132, "step": 3764 }, { "epoch": 0.45686203130688025, "grad_norm": 2.953446388244629, "learning_rate": 1.0998648814641936e-05, "loss": 0.2989498972892761, "step": 3765 }, { "epoch": 0.4569833758039073, "grad_norm": 2.2972118854522705, "learning_rate": 1.099619211399091e-05, "loss": 0.23729734122753143, "step": 3766 }, { "epoch": 0.45710472030093435, "grad_norm": 3.0327601432800293, "learning_rate": 1.0993735413339886e-05, "loss": 0.1904260814189911, "step": 3767 }, { "epoch": 0.4572260647979614, "grad_norm": 3.116729497909546, "learning_rate": 1.099127871268886e-05, "loss": 0.2167295217514038, "step": 3768 }, { "epoch": 0.45734740929498846, "grad_norm": 2.1636672019958496, "learning_rate": 1.0988822012037835e-05, "loss": 0.1741916388273239, "step": 3769 }, { "epoch": 0.4574687537920155, "grad_norm": 3.252021551132202, "learning_rate": 1.0986365311386809e-05, "loss": 0.2020607441663742, "step": 3770 }, { "epoch": 0.45759009828904257, "grad_norm": 3.535862922668457, "learning_rate": 1.0983908610735783e-05, "loss": 0.17055633664131165, "step": 3771 }, { "epoch": 0.4577114427860697, "grad_norm": 3.0886454582214355, "learning_rate": 1.0981451910084758e-05, "loss": 0.23023851215839386, "step": 3772 }, { "epoch": 0.45783278728309673, "grad_norm": 2.175285577774048, "learning_rate": 1.0978995209433732e-05, "loss": 0.151248961687088, "step": 3773 }, { "epoch": 0.4579541317801238, "grad_norm": 1.763638973236084, "learning_rate": 1.0976538508782706e-05, "loss": 0.08291637152433395, "step": 3774 }, { "epoch": 0.45807547627715084, "grad_norm": 1.2859675884246826, "learning_rate": 1.097408180813168e-05, "loss": 0.04810434579849243, "step": 3775 }, { "epoch": 0.4581968207741779, "grad_norm": 3.772589683532715, "learning_rate": 1.0971625107480655e-05, "loss": 0.35038936138153076, "step": 3776 }, { "epoch": 0.45831816527120495, "grad_norm": 3.2569420337677, "learning_rate": 1.0969168406829629e-05, "loss": 0.41661763191223145, "step": 3777 }, { "epoch": 0.458439509768232, "grad_norm": 3.516561269760132, "learning_rate": 1.0966711706178603e-05, "loss": 0.2825745940208435, "step": 3778 }, { "epoch": 0.45856085426525905, "grad_norm": 4.072770118713379, "learning_rate": 1.0964255005527577e-05, "loss": 0.6777939200401306, "step": 3779 }, { "epoch": 0.4586821987622861, "grad_norm": 2.922475576400757, "learning_rate": 1.0961798304876552e-05, "loss": 0.07564674317836761, "step": 3780 }, { "epoch": 0.45880354325931316, "grad_norm": 2.678272008895874, "learning_rate": 1.0959341604225526e-05, "loss": 0.2217925786972046, "step": 3781 }, { "epoch": 0.45892488775634027, "grad_norm": 2.3827056884765625, "learning_rate": 1.09568849035745e-05, "loss": 0.3195100426673889, "step": 3782 }, { "epoch": 0.4590462322533673, "grad_norm": 2.139965057373047, "learning_rate": 1.0954428202923474e-05, "loss": 0.33632519841194153, "step": 3783 }, { "epoch": 0.4591675767503944, "grad_norm": 3.4572787284851074, "learning_rate": 1.0951971502272449e-05, "loss": 0.5048207640647888, "step": 3784 }, { "epoch": 0.45928892124742143, "grad_norm": 2.216235876083374, "learning_rate": 1.0949514801621423e-05, "loss": 0.18083621561527252, "step": 3785 }, { "epoch": 0.4594102657444485, "grad_norm": 2.518138885498047, "learning_rate": 1.0947058100970397e-05, "loss": 0.40386083722114563, "step": 3786 }, { "epoch": 0.45953161024147554, "grad_norm": 1.9466145038604736, "learning_rate": 1.0944601400319373e-05, "loss": 0.15552747249603271, "step": 3787 }, { "epoch": 0.4596529547385026, "grad_norm": 1.848572850227356, "learning_rate": 1.0942144699668347e-05, "loss": 0.10546831786632538, "step": 3788 }, { "epoch": 0.45977429923552965, "grad_norm": 2.860260248184204, "learning_rate": 1.0939687999017322e-05, "loss": 0.34014883637428284, "step": 3789 }, { "epoch": 0.4598956437325567, "grad_norm": 3.1662676334381104, "learning_rate": 1.0937231298366296e-05, "loss": 0.45574745535850525, "step": 3790 }, { "epoch": 0.4600169882295838, "grad_norm": 3.0280942916870117, "learning_rate": 1.093477459771527e-05, "loss": 0.6346323490142822, "step": 3791 }, { "epoch": 0.46013833272661087, "grad_norm": 2.906144380569458, "learning_rate": 1.0932317897064244e-05, "loss": 0.4477047324180603, "step": 3792 }, { "epoch": 0.4602596772236379, "grad_norm": 3.9944026470184326, "learning_rate": 1.0929861196413219e-05, "loss": 0.4693850576877594, "step": 3793 }, { "epoch": 0.460381021720665, "grad_norm": 1.2766904830932617, "learning_rate": 1.0927404495762193e-05, "loss": 0.013830102048814297, "step": 3794 }, { "epoch": 0.460502366217692, "grad_norm": 3.3384554386138916, "learning_rate": 1.0924947795111167e-05, "loss": 0.4867768883705139, "step": 3795 }, { "epoch": 0.4606237107147191, "grad_norm": 2.780524969100952, "learning_rate": 1.0922491094460141e-05, "loss": 0.5420291423797607, "step": 3796 }, { "epoch": 0.46074505521174614, "grad_norm": 1.9150787591934204, "learning_rate": 1.0920034393809116e-05, "loss": 0.10740383714437485, "step": 3797 }, { "epoch": 0.4608663997087732, "grad_norm": 2.6779415607452393, "learning_rate": 1.091757769315809e-05, "loss": 0.11736059933900833, "step": 3798 }, { "epoch": 0.46098774420580024, "grad_norm": 2.689134359359741, "learning_rate": 1.0915120992507064e-05, "loss": 0.5773141980171204, "step": 3799 }, { "epoch": 0.46110908870282735, "grad_norm": 2.030559539794922, "learning_rate": 1.0912664291856038e-05, "loss": 0.1807558387517929, "step": 3800 }, { "epoch": 0.4612304331998544, "grad_norm": 2.132622718811035, "learning_rate": 1.0910207591205013e-05, "loss": 0.3821357488632202, "step": 3801 }, { "epoch": 0.46135177769688146, "grad_norm": 3.906198501586914, "learning_rate": 1.0907750890553987e-05, "loss": 0.34439414739608765, "step": 3802 }, { "epoch": 0.4614731221939085, "grad_norm": 3.38917875289917, "learning_rate": 1.0905294189902961e-05, "loss": 0.2816096246242523, "step": 3803 }, { "epoch": 0.46159446669093557, "grad_norm": 2.7078585624694824, "learning_rate": 1.0902837489251936e-05, "loss": 0.2726061940193176, "step": 3804 }, { "epoch": 0.4617158111879626, "grad_norm": 2.956815242767334, "learning_rate": 1.090038078860091e-05, "loss": 0.19974419474601746, "step": 3805 }, { "epoch": 0.4618371556849897, "grad_norm": 2.5726802349090576, "learning_rate": 1.0897924087949884e-05, "loss": 0.1582794338464737, "step": 3806 }, { "epoch": 0.46195850018201673, "grad_norm": 2.217891216278076, "learning_rate": 1.089546738729886e-05, "loss": 0.4006637632846832, "step": 3807 }, { "epoch": 0.4620798446790438, "grad_norm": 2.2818827629089355, "learning_rate": 1.0893010686647834e-05, "loss": 0.38829168677330017, "step": 3808 }, { "epoch": 0.4622011891760709, "grad_norm": 3.294948101043701, "learning_rate": 1.0890553985996809e-05, "loss": 0.42471641302108765, "step": 3809 }, { "epoch": 0.46232253367309795, "grad_norm": 1.1976569890975952, "learning_rate": 1.0888097285345783e-05, "loss": 0.08018773794174194, "step": 3810 }, { "epoch": 0.462443878170125, "grad_norm": 1.8881957530975342, "learning_rate": 1.0885640584694757e-05, "loss": 0.09716351330280304, "step": 3811 }, { "epoch": 0.46256522266715205, "grad_norm": 1.2837885618209839, "learning_rate": 1.0883183884043731e-05, "loss": 0.04680376499891281, "step": 3812 }, { "epoch": 0.4626865671641791, "grad_norm": 2.924452304840088, "learning_rate": 1.0880727183392706e-05, "loss": 0.36568957567214966, "step": 3813 }, { "epoch": 0.46280791166120616, "grad_norm": 2.9263999462127686, "learning_rate": 1.087827048274168e-05, "loss": 0.31210649013519287, "step": 3814 }, { "epoch": 0.4629292561582332, "grad_norm": 1.9239130020141602, "learning_rate": 1.0875813782090654e-05, "loss": 0.2763766050338745, "step": 3815 }, { "epoch": 0.46305060065526027, "grad_norm": 2.8812992572784424, "learning_rate": 1.0873357081439628e-05, "loss": 0.1720142662525177, "step": 3816 }, { "epoch": 0.4631719451522873, "grad_norm": 2.3239588737487793, "learning_rate": 1.0870900380788603e-05, "loss": 0.3543533980846405, "step": 3817 }, { "epoch": 0.4632932896493144, "grad_norm": 1.8847872018814087, "learning_rate": 1.0868443680137577e-05, "loss": 0.09396478533744812, "step": 3818 }, { "epoch": 0.4634146341463415, "grad_norm": 3.816448450088501, "learning_rate": 1.0865986979486551e-05, "loss": 0.08851034939289093, "step": 3819 }, { "epoch": 0.46353597864336854, "grad_norm": 2.910249710083008, "learning_rate": 1.0863530278835525e-05, "loss": 0.4211457669734955, "step": 3820 }, { "epoch": 0.4636573231403956, "grad_norm": 2.403411388397217, "learning_rate": 1.08610735781845e-05, "loss": 0.21749405562877655, "step": 3821 }, { "epoch": 0.46377866763742265, "grad_norm": 2.843613862991333, "learning_rate": 1.0858616877533474e-05, "loss": 0.43604692816734314, "step": 3822 }, { "epoch": 0.4639000121344497, "grad_norm": 2.7551069259643555, "learning_rate": 1.0856160176882448e-05, "loss": 0.3502194583415985, "step": 3823 }, { "epoch": 0.46402135663147676, "grad_norm": 3.504392385482788, "learning_rate": 1.0853703476231422e-05, "loss": 0.2447194755077362, "step": 3824 }, { "epoch": 0.4641427011285038, "grad_norm": 1.3589024543762207, "learning_rate": 1.0851246775580397e-05, "loss": 0.06629391759634018, "step": 3825 }, { "epoch": 0.46426404562553086, "grad_norm": 2.4835751056671143, "learning_rate": 1.0848790074929373e-05, "loss": 0.3067898452281952, "step": 3826 }, { "epoch": 0.4643853901225579, "grad_norm": 0.09892398864030838, "learning_rate": 1.0846333374278347e-05, "loss": 0.0005673590349033475, "step": 3827 }, { "epoch": 0.464506734619585, "grad_norm": 0.376726359128952, "learning_rate": 1.0843876673627321e-05, "loss": 0.0030884118750691414, "step": 3828 }, { "epoch": 0.4646280791166121, "grad_norm": 3.054842710494995, "learning_rate": 1.0841419972976295e-05, "loss": 0.3230999708175659, "step": 3829 }, { "epoch": 0.46474942361363913, "grad_norm": 2.944398880004883, "learning_rate": 1.083896327232527e-05, "loss": 0.26404961943626404, "step": 3830 }, { "epoch": 0.4648707681106662, "grad_norm": 2.2208971977233887, "learning_rate": 1.0836506571674244e-05, "loss": 0.12686988711357117, "step": 3831 }, { "epoch": 0.46499211260769324, "grad_norm": 1.7423559427261353, "learning_rate": 1.0834049871023218e-05, "loss": 0.11456757038831711, "step": 3832 }, { "epoch": 0.4651134571047203, "grad_norm": 1.8023579120635986, "learning_rate": 1.083159317037219e-05, "loss": 0.03461475670337677, "step": 3833 }, { "epoch": 0.46523480160174735, "grad_norm": 4.159533500671387, "learning_rate": 1.0829136469721165e-05, "loss": 0.6996920704841614, "step": 3834 }, { "epoch": 0.4653561460987744, "grad_norm": 0.8902222514152527, "learning_rate": 1.082667976907014e-05, "loss": 0.015484759584069252, "step": 3835 }, { "epoch": 0.46547749059580146, "grad_norm": 3.078047275543213, "learning_rate": 1.0824223068419113e-05, "loss": 0.031939297914505005, "step": 3836 }, { "epoch": 0.46559883509282857, "grad_norm": 3.063530683517456, "learning_rate": 1.0821766367768088e-05, "loss": 0.6199505925178528, "step": 3837 }, { "epoch": 0.4657201795898556, "grad_norm": 2.463263988494873, "learning_rate": 1.0819309667117062e-05, "loss": 0.2926793098449707, "step": 3838 }, { "epoch": 0.4658415240868827, "grad_norm": 2.8171894550323486, "learning_rate": 1.0816852966466036e-05, "loss": 0.3186990022659302, "step": 3839 }, { "epoch": 0.46596286858390973, "grad_norm": 2.6074156761169434, "learning_rate": 1.081439626581501e-05, "loss": 0.1860102415084839, "step": 3840 }, { "epoch": 0.4660842130809368, "grad_norm": 2.959845542907715, "learning_rate": 1.0811939565163985e-05, "loss": 0.20030447840690613, "step": 3841 }, { "epoch": 0.46620555757796384, "grad_norm": 2.546820878982544, "learning_rate": 1.0809482864512959e-05, "loss": 0.2492934912443161, "step": 3842 }, { "epoch": 0.4663269020749909, "grad_norm": 2.02109432220459, "learning_rate": 1.0807026163861933e-05, "loss": 0.080159492790699, "step": 3843 }, { "epoch": 0.46644824657201794, "grad_norm": 7.214606761932373, "learning_rate": 1.0804569463210908e-05, "loss": 0.31246402859687805, "step": 3844 }, { "epoch": 0.466569591069045, "grad_norm": 3.7370619773864746, "learning_rate": 1.0802112762559882e-05, "loss": 0.292080283164978, "step": 3845 }, { "epoch": 0.46669093556607205, "grad_norm": 4.298639297485352, "learning_rate": 1.0799656061908856e-05, "loss": 0.2018800675868988, "step": 3846 }, { "epoch": 0.46681228006309916, "grad_norm": 2.9732825756073, "learning_rate": 1.079719936125783e-05, "loss": 0.12097842991352081, "step": 3847 }, { "epoch": 0.4669336245601262, "grad_norm": 3.72353196144104, "learning_rate": 1.0794742660606805e-05, "loss": 0.739285409450531, "step": 3848 }, { "epoch": 0.46705496905715327, "grad_norm": 4.16900110244751, "learning_rate": 1.0792285959955779e-05, "loss": 0.27454543113708496, "step": 3849 }, { "epoch": 0.4671763135541803, "grad_norm": 3.276912212371826, "learning_rate": 1.0789829259304753e-05, "loss": 0.1591143012046814, "step": 3850 }, { "epoch": 0.4672976580512074, "grad_norm": 3.6436712741851807, "learning_rate": 1.0787372558653727e-05, "loss": 0.5543659925460815, "step": 3851 }, { "epoch": 0.46741900254823443, "grad_norm": 2.5346391201019287, "learning_rate": 1.0784915858002703e-05, "loss": 0.10443320870399475, "step": 3852 }, { "epoch": 0.4675403470452615, "grad_norm": 2.5267155170440674, "learning_rate": 1.0782459157351678e-05, "loss": 0.18570610880851746, "step": 3853 }, { "epoch": 0.46766169154228854, "grad_norm": 2.378736972808838, "learning_rate": 1.0780002456700652e-05, "loss": 0.1752547025680542, "step": 3854 }, { "epoch": 0.4677830360393156, "grad_norm": 2.7192838191986084, "learning_rate": 1.0777545756049626e-05, "loss": 0.07624950259923935, "step": 3855 }, { "epoch": 0.4679043805363427, "grad_norm": 2.262993097305298, "learning_rate": 1.07750890553986e-05, "loss": 0.16422806680202484, "step": 3856 }, { "epoch": 0.46802572503336975, "grad_norm": 1.9306615591049194, "learning_rate": 1.0772632354747575e-05, "loss": 0.285910427570343, "step": 3857 }, { "epoch": 0.4681470695303968, "grad_norm": 3.803337812423706, "learning_rate": 1.0770175654096549e-05, "loss": 0.3446381688117981, "step": 3858 }, { "epoch": 0.46826841402742386, "grad_norm": 2.9336376190185547, "learning_rate": 1.0767718953445523e-05, "loss": 0.25159865617752075, "step": 3859 }, { "epoch": 0.4683897585244509, "grad_norm": 2.7131705284118652, "learning_rate": 1.0765262252794497e-05, "loss": 0.22185799479484558, "step": 3860 }, { "epoch": 0.46851110302147797, "grad_norm": 3.0690999031066895, "learning_rate": 1.0762805552143472e-05, "loss": 0.3870882987976074, "step": 3861 }, { "epoch": 0.468632447518505, "grad_norm": 2.561528205871582, "learning_rate": 1.0760348851492446e-05, "loss": 0.6698027849197388, "step": 3862 }, { "epoch": 0.4687537920155321, "grad_norm": 3.0866506099700928, "learning_rate": 1.075789215084142e-05, "loss": 0.3074035346508026, "step": 3863 }, { "epoch": 0.46887513651255913, "grad_norm": 1.8056470155715942, "learning_rate": 1.0755435450190394e-05, "loss": 0.06750544160604477, "step": 3864 }, { "epoch": 0.46899648100958624, "grad_norm": 3.010787010192871, "learning_rate": 1.0752978749539369e-05, "loss": 0.5204322338104248, "step": 3865 }, { "epoch": 0.4691178255066133, "grad_norm": 2.996385097503662, "learning_rate": 1.0750522048888343e-05, "loss": 0.24821801483631134, "step": 3866 }, { "epoch": 0.46923917000364035, "grad_norm": 2.704739809036255, "learning_rate": 1.0748065348237317e-05, "loss": 0.29266121983528137, "step": 3867 }, { "epoch": 0.4693605145006674, "grad_norm": 2.354991912841797, "learning_rate": 1.0745608647586291e-05, "loss": 0.12653499841690063, "step": 3868 }, { "epoch": 0.46948185899769446, "grad_norm": 3.1913106441497803, "learning_rate": 1.0743151946935266e-05, "loss": 0.3769054114818573, "step": 3869 }, { "epoch": 0.4696032034947215, "grad_norm": 2.2342827320098877, "learning_rate": 1.074069524628424e-05, "loss": 0.14861273765563965, "step": 3870 }, { "epoch": 0.46972454799174856, "grad_norm": 2.3855037689208984, "learning_rate": 1.0738238545633214e-05, "loss": 0.16335630416870117, "step": 3871 }, { "epoch": 0.4698458924887756, "grad_norm": 3.1305956840515137, "learning_rate": 1.073578184498219e-05, "loss": 0.3295513987541199, "step": 3872 }, { "epoch": 0.46996723698580267, "grad_norm": 1.3403748273849487, "learning_rate": 1.0733325144331164e-05, "loss": 0.048968229442834854, "step": 3873 }, { "epoch": 0.4700885814828297, "grad_norm": 3.024962902069092, "learning_rate": 1.0730868443680139e-05, "loss": 0.6027660369873047, "step": 3874 }, { "epoch": 0.47020992597985684, "grad_norm": 2.7007224559783936, "learning_rate": 1.0728411743029113e-05, "loss": 0.40500205755233765, "step": 3875 }, { "epoch": 0.4703312704768839, "grad_norm": 2.693378210067749, "learning_rate": 1.0725955042378087e-05, "loss": 0.4564482271671295, "step": 3876 }, { "epoch": 0.47045261497391094, "grad_norm": 1.9114058017730713, "learning_rate": 1.0723498341727061e-05, "loss": 0.2021862268447876, "step": 3877 }, { "epoch": 0.470573959470938, "grad_norm": 1.7047556638717651, "learning_rate": 1.0721041641076036e-05, "loss": 0.2426297515630722, "step": 3878 }, { "epoch": 0.47069530396796505, "grad_norm": 2.2682855129241943, "learning_rate": 1.071858494042501e-05, "loss": 0.1504073441028595, "step": 3879 }, { "epoch": 0.4708166484649921, "grad_norm": 3.064701557159424, "learning_rate": 1.0716128239773984e-05, "loss": 0.40392714738845825, "step": 3880 }, { "epoch": 0.47093799296201916, "grad_norm": 2.480076789855957, "learning_rate": 1.0713671539122958e-05, "loss": 0.28264525532722473, "step": 3881 }, { "epoch": 0.4710593374590462, "grad_norm": 0.01469326764345169, "learning_rate": 1.0711214838471933e-05, "loss": 0.0003070329548791051, "step": 3882 }, { "epoch": 0.47118068195607327, "grad_norm": 2.8371729850769043, "learning_rate": 1.0708758137820907e-05, "loss": 0.35440123081207275, "step": 3883 }, { "epoch": 0.4713020264531004, "grad_norm": 3.269803285598755, "learning_rate": 1.0706301437169881e-05, "loss": 0.41059374809265137, "step": 3884 }, { "epoch": 0.47142337095012743, "grad_norm": 2.0703015327453613, "learning_rate": 1.0703844736518856e-05, "loss": 0.07132905721664429, "step": 3885 }, { "epoch": 0.4715447154471545, "grad_norm": 2.3490655422210693, "learning_rate": 1.070138803586783e-05, "loss": 0.49852243065834045, "step": 3886 }, { "epoch": 0.47166605994418154, "grad_norm": 1.5123106241226196, "learning_rate": 1.0698931335216804e-05, "loss": 0.1113678365945816, "step": 3887 }, { "epoch": 0.4717874044412086, "grad_norm": 3.1069955825805664, "learning_rate": 1.0696474634565778e-05, "loss": 0.3869853615760803, "step": 3888 }, { "epoch": 0.47190874893823564, "grad_norm": 2.830869674682617, "learning_rate": 1.0694017933914753e-05, "loss": 0.48214244842529297, "step": 3889 }, { "epoch": 0.4720300934352627, "grad_norm": 1.855278730392456, "learning_rate": 1.0691561233263727e-05, "loss": 0.056116193532943726, "step": 3890 }, { "epoch": 0.47215143793228975, "grad_norm": 4.9797749519348145, "learning_rate": 1.0689104532612701e-05, "loss": 0.41455674171447754, "step": 3891 }, { "epoch": 0.4722727824293168, "grad_norm": 2.3777003288269043, "learning_rate": 1.0686647831961677e-05, "loss": 0.2299240231513977, "step": 3892 }, { "epoch": 0.4723941269263439, "grad_norm": 0.6550430655479431, "learning_rate": 1.0684191131310651e-05, "loss": 0.002911692252382636, "step": 3893 }, { "epoch": 0.47251547142337097, "grad_norm": 2.060988187789917, "learning_rate": 1.0681734430659626e-05, "loss": 0.13176614046096802, "step": 3894 }, { "epoch": 0.472636815920398, "grad_norm": 3.4118447303771973, "learning_rate": 1.06792777300086e-05, "loss": 0.23221538960933685, "step": 3895 }, { "epoch": 0.4727581604174251, "grad_norm": 2.803123950958252, "learning_rate": 1.0676821029357574e-05, "loss": 0.16176444292068481, "step": 3896 }, { "epoch": 0.47287950491445213, "grad_norm": 1.795917272567749, "learning_rate": 1.0674364328706548e-05, "loss": 0.4585912823677063, "step": 3897 }, { "epoch": 0.4730008494114792, "grad_norm": 2.3427181243896484, "learning_rate": 1.0671907628055523e-05, "loss": 0.34860968589782715, "step": 3898 }, { "epoch": 0.47312219390850624, "grad_norm": 1.4768927097320557, "learning_rate": 1.0669450927404497e-05, "loss": 0.09793134033679962, "step": 3899 }, { "epoch": 0.4732435384055333, "grad_norm": 2.1842041015625, "learning_rate": 1.0666994226753471e-05, "loss": 0.4446258544921875, "step": 3900 }, { "epoch": 0.47336488290256035, "grad_norm": 2.9729180335998535, "learning_rate": 1.0664537526102445e-05, "loss": 0.3867642283439636, "step": 3901 }, { "epoch": 0.47348622739958746, "grad_norm": 3.0708909034729004, "learning_rate": 1.066208082545142e-05, "loss": 0.2150513231754303, "step": 3902 }, { "epoch": 0.4736075718966145, "grad_norm": 1.3335891962051392, "learning_rate": 1.0659624124800394e-05, "loss": 0.021012621000409126, "step": 3903 }, { "epoch": 0.47372891639364156, "grad_norm": 2.9141807556152344, "learning_rate": 1.0657167424149368e-05, "loss": 0.6602778434753418, "step": 3904 }, { "epoch": 0.4738502608906686, "grad_norm": 2.237032890319824, "learning_rate": 1.0654710723498342e-05, "loss": 0.26512467861175537, "step": 3905 }, { "epoch": 0.47397160538769567, "grad_norm": 2.4029390811920166, "learning_rate": 1.0652254022847317e-05, "loss": 0.2347608208656311, "step": 3906 }, { "epoch": 0.4740929498847227, "grad_norm": 2.534771680831909, "learning_rate": 1.0649797322196291e-05, "loss": 0.0783708393573761, "step": 3907 }, { "epoch": 0.4742142943817498, "grad_norm": 2.047633647918701, "learning_rate": 1.0647340621545265e-05, "loss": 0.1865750551223755, "step": 3908 }, { "epoch": 0.47433563887877683, "grad_norm": 1.6869794130325317, "learning_rate": 1.064488392089424e-05, "loss": 0.12035293877124786, "step": 3909 }, { "epoch": 0.4744569833758039, "grad_norm": 3.8962488174438477, "learning_rate": 1.0642427220243214e-05, "loss": 0.4050816595554352, "step": 3910 }, { "epoch": 0.47457832787283094, "grad_norm": 1.9148086309432983, "learning_rate": 1.0639970519592188e-05, "loss": 0.10218142718076706, "step": 3911 }, { "epoch": 0.47469967236985805, "grad_norm": 2.1516618728637695, "learning_rate": 1.0637513818941164e-05, "loss": 0.23401297628879547, "step": 3912 }, { "epoch": 0.4748210168668851, "grad_norm": 2.0974016189575195, "learning_rate": 1.0635057118290138e-05, "loss": 0.4570923447608948, "step": 3913 }, { "epoch": 0.47494236136391216, "grad_norm": 3.9381473064422607, "learning_rate": 1.0632600417639112e-05, "loss": 0.4090957045555115, "step": 3914 }, { "epoch": 0.4750637058609392, "grad_norm": 3.0685372352600098, "learning_rate": 1.0630143716988087e-05, "loss": 0.3796621859073639, "step": 3915 }, { "epoch": 0.47518505035796627, "grad_norm": 1.3861891031265259, "learning_rate": 1.0627687016337061e-05, "loss": 0.14838837087154388, "step": 3916 }, { "epoch": 0.4753063948549933, "grad_norm": 2.1783580780029297, "learning_rate": 1.0625230315686035e-05, "loss": 0.4516194462776184, "step": 3917 }, { "epoch": 0.4754277393520204, "grad_norm": 2.5179131031036377, "learning_rate": 1.062277361503501e-05, "loss": 0.36951062083244324, "step": 3918 }, { "epoch": 0.4755490838490474, "grad_norm": 6.605739593505859, "learning_rate": 1.0620316914383984e-05, "loss": 0.3154905438423157, "step": 3919 }, { "epoch": 0.4756704283460745, "grad_norm": 0.9335907697677612, "learning_rate": 1.0617860213732958e-05, "loss": 0.049954358488321304, "step": 3920 }, { "epoch": 0.4757917728431016, "grad_norm": 2.6936075687408447, "learning_rate": 1.0615403513081932e-05, "loss": 0.16908985376358032, "step": 3921 }, { "epoch": 0.47591311734012864, "grad_norm": 1.6764365434646606, "learning_rate": 1.0612946812430906e-05, "loss": 0.26403748989105225, "step": 3922 }, { "epoch": 0.4760344618371557, "grad_norm": 5.9298577308654785, "learning_rate": 1.061049011177988e-05, "loss": 0.08351560682058334, "step": 3923 }, { "epoch": 0.47615580633418275, "grad_norm": 2.9468815326690674, "learning_rate": 1.0608033411128855e-05, "loss": 0.360771119594574, "step": 3924 }, { "epoch": 0.4762771508312098, "grad_norm": 1.9476665258407593, "learning_rate": 1.060557671047783e-05, "loss": 0.3027259409427643, "step": 3925 }, { "epoch": 0.47639849532823686, "grad_norm": 4.023041725158691, "learning_rate": 1.0603120009826803e-05, "loss": 0.4723225235939026, "step": 3926 }, { "epoch": 0.4765198398252639, "grad_norm": 1.9701441526412964, "learning_rate": 1.0600663309175778e-05, "loss": 0.29517021775245667, "step": 3927 }, { "epoch": 0.47664118432229097, "grad_norm": 1.5217077732086182, "learning_rate": 1.0598206608524752e-05, "loss": 0.10690990835428238, "step": 3928 }, { "epoch": 0.476762528819318, "grad_norm": 3.051661252975464, "learning_rate": 1.0595749907873726e-05, "loss": 0.3336365520954132, "step": 3929 }, { "epoch": 0.47688387331634513, "grad_norm": 2.147693634033203, "learning_rate": 1.05932932072227e-05, "loss": 0.5873215198516846, "step": 3930 }, { "epoch": 0.4770052178133722, "grad_norm": 2.739251136779785, "learning_rate": 1.0590836506571675e-05, "loss": 0.34239891171455383, "step": 3931 }, { "epoch": 0.47712656231039924, "grad_norm": 4.637502670288086, "learning_rate": 1.058837980592065e-05, "loss": 0.11938660591840744, "step": 3932 }, { "epoch": 0.4772479068074263, "grad_norm": 3.1877243518829346, "learning_rate": 1.0585923105269625e-05, "loss": 0.7206730842590332, "step": 3933 }, { "epoch": 0.47736925130445335, "grad_norm": 2.642648935317993, "learning_rate": 1.05834664046186e-05, "loss": 0.24321579933166504, "step": 3934 }, { "epoch": 0.4774905958014804, "grad_norm": 2.448634624481201, "learning_rate": 1.0581009703967574e-05, "loss": 0.22593624889850616, "step": 3935 }, { "epoch": 0.47761194029850745, "grad_norm": 2.391491174697876, "learning_rate": 1.0578553003316548e-05, "loss": 0.33686891198158264, "step": 3936 }, { "epoch": 0.4777332847955345, "grad_norm": 3.104855537414551, "learning_rate": 1.0576096302665522e-05, "loss": 0.4050844609737396, "step": 3937 }, { "epoch": 0.47785462929256156, "grad_norm": 4.130711555480957, "learning_rate": 1.0573639602014496e-05, "loss": 0.2961690127849579, "step": 3938 }, { "epoch": 0.4779759737895886, "grad_norm": 3.7535223960876465, "learning_rate": 1.057118290136347e-05, "loss": 0.1767973005771637, "step": 3939 }, { "epoch": 0.4780973182866157, "grad_norm": 1.657116413116455, "learning_rate": 1.0568726200712445e-05, "loss": 0.06132087856531143, "step": 3940 }, { "epoch": 0.4782186627836428, "grad_norm": 1.1180098056793213, "learning_rate": 1.0566269500061419e-05, "loss": 0.07620692998170853, "step": 3941 }, { "epoch": 0.47834000728066983, "grad_norm": 2.728780746459961, "learning_rate": 1.0563812799410393e-05, "loss": 0.299428254365921, "step": 3942 }, { "epoch": 0.4784613517776969, "grad_norm": 2.388319969177246, "learning_rate": 1.0561356098759368e-05, "loss": 0.147850900888443, "step": 3943 }, { "epoch": 0.47858269627472394, "grad_norm": 2.5944557189941406, "learning_rate": 1.0558899398108342e-05, "loss": 0.2355179488658905, "step": 3944 }, { "epoch": 0.478704040771751, "grad_norm": 2.288933753967285, "learning_rate": 1.0556442697457316e-05, "loss": 0.33115917444229126, "step": 3945 }, { "epoch": 0.47882538526877805, "grad_norm": 3.3465325832366943, "learning_rate": 1.055398599680629e-05, "loss": 0.5318613052368164, "step": 3946 }, { "epoch": 0.4789467297658051, "grad_norm": 1.7699624300003052, "learning_rate": 1.0551529296155265e-05, "loss": 0.13044926524162292, "step": 3947 }, { "epoch": 0.47906807426283216, "grad_norm": 2.82124924659729, "learning_rate": 1.0549072595504239e-05, "loss": 0.47051340341567993, "step": 3948 }, { "epoch": 0.47918941875985926, "grad_norm": 2.446516990661621, "learning_rate": 1.0546615894853213e-05, "loss": 0.526097297668457, "step": 3949 }, { "epoch": 0.4793107632568863, "grad_norm": 2.2702369689941406, "learning_rate": 1.0544159194202187e-05, "loss": 0.4103502631187439, "step": 3950 }, { "epoch": 0.4794321077539134, "grad_norm": 2.536848783493042, "learning_rate": 1.0541702493551162e-05, "loss": 0.2067994773387909, "step": 3951 }, { "epoch": 0.4795534522509404, "grad_norm": 3.593503952026367, "learning_rate": 1.0539245792900138e-05, "loss": 0.181950181722641, "step": 3952 }, { "epoch": 0.4796747967479675, "grad_norm": 3.185767889022827, "learning_rate": 1.0536789092249112e-05, "loss": 0.18063871562480927, "step": 3953 }, { "epoch": 0.47979614124499453, "grad_norm": 1.8873003721237183, "learning_rate": 1.0534332391598086e-05, "loss": 0.18147023022174835, "step": 3954 }, { "epoch": 0.4799174857420216, "grad_norm": 2.7884440422058105, "learning_rate": 1.053187569094706e-05, "loss": 0.22004428505897522, "step": 3955 }, { "epoch": 0.48003883023904864, "grad_norm": 3.063300848007202, "learning_rate": 1.0529418990296035e-05, "loss": 0.3766789436340332, "step": 3956 }, { "epoch": 0.4801601747360757, "grad_norm": 2.764601707458496, "learning_rate": 1.0526962289645009e-05, "loss": 0.26879075169563293, "step": 3957 }, { "epoch": 0.4802815192331028, "grad_norm": 1.9297682046890259, "learning_rate": 1.0524505588993983e-05, "loss": 0.14930634200572968, "step": 3958 }, { "epoch": 0.48040286373012986, "grad_norm": 1.9681339263916016, "learning_rate": 1.0522048888342957e-05, "loss": 0.397515207529068, "step": 3959 }, { "epoch": 0.4805242082271569, "grad_norm": 1.9679991006851196, "learning_rate": 1.0519592187691932e-05, "loss": 0.1813061386346817, "step": 3960 }, { "epoch": 0.48064555272418397, "grad_norm": 2.6101455688476562, "learning_rate": 1.0517135487040906e-05, "loss": 0.2768298089504242, "step": 3961 }, { "epoch": 0.480766897221211, "grad_norm": 2.4954378604888916, "learning_rate": 1.051467878638988e-05, "loss": 0.3034391403198242, "step": 3962 }, { "epoch": 0.4808882417182381, "grad_norm": 4.449526786804199, "learning_rate": 1.0512222085738854e-05, "loss": 0.38359954953193665, "step": 3963 }, { "epoch": 0.48100958621526513, "grad_norm": 2.071256160736084, "learning_rate": 1.0509765385087829e-05, "loss": 0.3157460391521454, "step": 3964 }, { "epoch": 0.4811309307122922, "grad_norm": 2.6139607429504395, "learning_rate": 1.0507308684436803e-05, "loss": 0.30131807923316956, "step": 3965 }, { "epoch": 0.48125227520931924, "grad_norm": 0.8780409097671509, "learning_rate": 1.0504851983785777e-05, "loss": 0.01298319548368454, "step": 3966 }, { "epoch": 0.4813736197063463, "grad_norm": 1.8879942893981934, "learning_rate": 1.0502395283134751e-05, "loss": 0.08332539349794388, "step": 3967 }, { "epoch": 0.4814949642033734, "grad_norm": 3.2966036796569824, "learning_rate": 1.0499938582483724e-05, "loss": 0.34805336594581604, "step": 3968 }, { "epoch": 0.48161630870040045, "grad_norm": 2.2880709171295166, "learning_rate": 1.0497481881832698e-05, "loss": 0.18685874342918396, "step": 3969 }, { "epoch": 0.4817376531974275, "grad_norm": 4.809329986572266, "learning_rate": 1.0495025181181673e-05, "loss": 0.2955242991447449, "step": 3970 }, { "epoch": 0.48185899769445456, "grad_norm": 1.616361379623413, "learning_rate": 1.0492568480530647e-05, "loss": 0.2602810859680176, "step": 3971 }, { "epoch": 0.4819803421914816, "grad_norm": 1.6564544439315796, "learning_rate": 1.0490111779879621e-05, "loss": 0.09533637762069702, "step": 3972 }, { "epoch": 0.48210168668850867, "grad_norm": 1.8999651670455933, "learning_rate": 1.0487655079228595e-05, "loss": 0.21959394216537476, "step": 3973 }, { "epoch": 0.4822230311855357, "grad_norm": 1.5243175029754639, "learning_rate": 1.048519837857757e-05, "loss": 0.07984542846679688, "step": 3974 }, { "epoch": 0.4823443756825628, "grad_norm": 3.108914613723755, "learning_rate": 1.0482741677926544e-05, "loss": 0.5572518110275269, "step": 3975 }, { "epoch": 0.48246572017958983, "grad_norm": 2.040226697921753, "learning_rate": 1.0480284977275518e-05, "loss": 0.33752697706222534, "step": 3976 }, { "epoch": 0.48258706467661694, "grad_norm": 3.3626208305358887, "learning_rate": 1.0477828276624492e-05, "loss": 0.24655762314796448, "step": 3977 }, { "epoch": 0.482708409173644, "grad_norm": 4.025614261627197, "learning_rate": 1.0475371575973468e-05, "loss": 0.4968215525150299, "step": 3978 }, { "epoch": 0.48282975367067105, "grad_norm": 3.119117021560669, "learning_rate": 1.0472914875322443e-05, "loss": 0.6192764639854431, "step": 3979 }, { "epoch": 0.4829510981676981, "grad_norm": 3.2061896324157715, "learning_rate": 1.0470458174671417e-05, "loss": 0.24846620857715607, "step": 3980 }, { "epoch": 0.48307244266472515, "grad_norm": 1.9638543128967285, "learning_rate": 1.0468001474020391e-05, "loss": 0.35574042797088623, "step": 3981 }, { "epoch": 0.4831937871617522, "grad_norm": 3.0407400131225586, "learning_rate": 1.0465544773369365e-05, "loss": 0.2249983549118042, "step": 3982 }, { "epoch": 0.48331513165877926, "grad_norm": 2.090866804122925, "learning_rate": 1.046308807271834e-05, "loss": 0.2959258556365967, "step": 3983 }, { "epoch": 0.4834364761558063, "grad_norm": 2.555753707885742, "learning_rate": 1.0460631372067314e-05, "loss": 0.15021148324012756, "step": 3984 }, { "epoch": 0.48355782065283337, "grad_norm": 2.147218704223633, "learning_rate": 1.0458174671416288e-05, "loss": 0.2335168421268463, "step": 3985 }, { "epoch": 0.4836791651498605, "grad_norm": 2.133526086807251, "learning_rate": 1.0455717970765262e-05, "loss": 0.2494381070137024, "step": 3986 }, { "epoch": 0.48380050964688753, "grad_norm": 2.633037805557251, "learning_rate": 1.0453261270114237e-05, "loss": 0.4223847985267639, "step": 3987 }, { "epoch": 0.4839218541439146, "grad_norm": 1.964646577835083, "learning_rate": 1.0450804569463211e-05, "loss": 0.015639489516615868, "step": 3988 }, { "epoch": 0.48404319864094164, "grad_norm": 2.0672714710235596, "learning_rate": 1.0448347868812185e-05, "loss": 0.35201549530029297, "step": 3989 }, { "epoch": 0.4841645431379687, "grad_norm": 2.8944873809814453, "learning_rate": 1.044589116816116e-05, "loss": 0.36844390630722046, "step": 3990 }, { "epoch": 0.48428588763499575, "grad_norm": 2.2492825984954834, "learning_rate": 1.0443434467510134e-05, "loss": 0.159017875790596, "step": 3991 }, { "epoch": 0.4844072321320228, "grad_norm": 2.8350884914398193, "learning_rate": 1.0440977766859108e-05, "loss": 0.20941796898841858, "step": 3992 }, { "epoch": 0.48452857662904986, "grad_norm": 2.2333664894104004, "learning_rate": 1.0438521066208082e-05, "loss": 0.17051367461681366, "step": 3993 }, { "epoch": 0.4846499211260769, "grad_norm": 3.7527835369110107, "learning_rate": 1.0436064365557056e-05, "loss": 0.34921470284461975, "step": 3994 }, { "epoch": 0.484771265623104, "grad_norm": 0.799025297164917, "learning_rate": 1.043360766490603e-05, "loss": 0.02540355548262596, "step": 3995 }, { "epoch": 0.4848926101201311, "grad_norm": 2.9625139236450195, "learning_rate": 1.0431150964255005e-05, "loss": 0.31718266010284424, "step": 3996 }, { "epoch": 0.4850139546171581, "grad_norm": 6.872302532196045, "learning_rate": 1.0428694263603981e-05, "loss": 0.2765273451805115, "step": 3997 }, { "epoch": 0.4851352991141852, "grad_norm": 1.9910902976989746, "learning_rate": 1.0426237562952955e-05, "loss": 0.06150764226913452, "step": 3998 }, { "epoch": 0.48525664361121224, "grad_norm": 2.3983571529388428, "learning_rate": 1.042378086230193e-05, "loss": 0.46947625279426575, "step": 3999 }, { "epoch": 0.4853779881082393, "grad_norm": 2.5228049755096436, "learning_rate": 1.0421324161650904e-05, "loss": 0.29351717233657837, "step": 4000 }, { "epoch": 0.48549933260526634, "grad_norm": 2.956768035888672, "learning_rate": 1.0418867460999878e-05, "loss": 0.6193536520004272, "step": 4001 }, { "epoch": 0.4856206771022934, "grad_norm": 1.801517128944397, "learning_rate": 1.0416410760348852e-05, "loss": 0.1959724724292755, "step": 4002 }, { "epoch": 0.48574202159932045, "grad_norm": 2.0595669746398926, "learning_rate": 1.0413954059697826e-05, "loss": 0.2087545394897461, "step": 4003 }, { "epoch": 0.4858633660963475, "grad_norm": 3.764439821243286, "learning_rate": 1.04114973590468e-05, "loss": 0.3073838949203491, "step": 4004 }, { "epoch": 0.4859847105933746, "grad_norm": 2.4153034687042236, "learning_rate": 1.0409040658395775e-05, "loss": 0.7671661376953125, "step": 4005 }, { "epoch": 0.48610605509040167, "grad_norm": 2.417722225189209, "learning_rate": 1.040658395774475e-05, "loss": 0.08856865018606186, "step": 4006 }, { "epoch": 0.4862273995874287, "grad_norm": 3.1890218257904053, "learning_rate": 1.0404127257093723e-05, "loss": 0.38545364141464233, "step": 4007 }, { "epoch": 0.4863487440844558, "grad_norm": 3.0509095191955566, "learning_rate": 1.0401670556442698e-05, "loss": 0.1537172496318817, "step": 4008 }, { "epoch": 0.48647008858148283, "grad_norm": 2.6010284423828125, "learning_rate": 1.0399213855791672e-05, "loss": 0.1654970794916153, "step": 4009 }, { "epoch": 0.4865914330785099, "grad_norm": 2.0901601314544678, "learning_rate": 1.0396757155140646e-05, "loss": 0.023962242528796196, "step": 4010 }, { "epoch": 0.48671277757553694, "grad_norm": 2.0741961002349854, "learning_rate": 1.039430045448962e-05, "loss": 0.2669495642185211, "step": 4011 }, { "epoch": 0.486834122072564, "grad_norm": 2.757509469985962, "learning_rate": 1.0391843753838595e-05, "loss": 0.23018372058868408, "step": 4012 }, { "epoch": 0.48695546656959104, "grad_norm": 9.108535766601562, "learning_rate": 1.0389387053187569e-05, "loss": 0.25134938955307007, "step": 4013 }, { "epoch": 0.48707681106661815, "grad_norm": 2.496730089187622, "learning_rate": 1.0386930352536543e-05, "loss": 0.20971739292144775, "step": 4014 }, { "epoch": 0.4871981555636452, "grad_norm": 2.4744932651519775, "learning_rate": 1.0384473651885518e-05, "loss": 0.4026094079017639, "step": 4015 }, { "epoch": 0.48731950006067226, "grad_norm": 3.44887113571167, "learning_rate": 1.0382016951234492e-05, "loss": 0.3604486584663391, "step": 4016 }, { "epoch": 0.4874408445576993, "grad_norm": 2.9968016147613525, "learning_rate": 1.0379560250583468e-05, "loss": 0.26460587978363037, "step": 4017 }, { "epoch": 0.48756218905472637, "grad_norm": 2.093482494354248, "learning_rate": 1.0377103549932442e-05, "loss": 0.15084385871887207, "step": 4018 }, { "epoch": 0.4876835335517534, "grad_norm": 1.8410183191299438, "learning_rate": 1.0374646849281416e-05, "loss": 0.11805377155542374, "step": 4019 }, { "epoch": 0.4878048780487805, "grad_norm": 2.6056156158447266, "learning_rate": 1.037219014863039e-05, "loss": 0.15497414767742157, "step": 4020 }, { "epoch": 0.48792622254580753, "grad_norm": 2.131641387939453, "learning_rate": 1.0369733447979365e-05, "loss": 0.15796193480491638, "step": 4021 }, { "epoch": 0.4880475670428346, "grad_norm": 2.2191338539123535, "learning_rate": 1.0367276747328339e-05, "loss": 0.3430430293083191, "step": 4022 }, { "epoch": 0.4881689115398617, "grad_norm": 2.9782004356384277, "learning_rate": 1.0364820046677313e-05, "loss": 0.21719008684158325, "step": 4023 }, { "epoch": 0.48829025603688875, "grad_norm": 0.4477785527706146, "learning_rate": 1.0362363346026288e-05, "loss": 0.007014643866568804, "step": 4024 }, { "epoch": 0.4884116005339158, "grad_norm": 4.277488708496094, "learning_rate": 1.0359906645375262e-05, "loss": 0.42528635263442993, "step": 4025 }, { "epoch": 0.48853294503094286, "grad_norm": 3.6790611743927, "learning_rate": 1.0357449944724236e-05, "loss": 0.30435433983802795, "step": 4026 }, { "epoch": 0.4886542895279699, "grad_norm": 2.9960482120513916, "learning_rate": 1.035499324407321e-05, "loss": 0.5715393424034119, "step": 4027 }, { "epoch": 0.48877563402499696, "grad_norm": 3.9691762924194336, "learning_rate": 1.0352536543422185e-05, "loss": 0.20012164115905762, "step": 4028 }, { "epoch": 0.488896978522024, "grad_norm": 3.4209837913513184, "learning_rate": 1.0350079842771159e-05, "loss": 0.29357442259788513, "step": 4029 }, { "epoch": 0.48901832301905107, "grad_norm": 3.4433178901672363, "learning_rate": 1.0347623142120133e-05, "loss": 0.2870279550552368, "step": 4030 }, { "epoch": 0.4891396675160781, "grad_norm": 2.3300018310546875, "learning_rate": 1.0345166441469107e-05, "loss": 0.32107627391815186, "step": 4031 }, { "epoch": 0.4892610120131052, "grad_norm": 3.5191640853881836, "learning_rate": 1.0342709740818082e-05, "loss": 0.5115379095077515, "step": 4032 }, { "epoch": 0.4893823565101323, "grad_norm": 3.82517147064209, "learning_rate": 1.0340253040167056e-05, "loss": 0.16688136756420135, "step": 4033 }, { "epoch": 0.48950370100715934, "grad_norm": 2.6024441719055176, "learning_rate": 1.033779633951603e-05, "loss": 0.5628013014793396, "step": 4034 }, { "epoch": 0.4896250455041864, "grad_norm": 1.8036068677902222, "learning_rate": 1.0335339638865004e-05, "loss": 0.13555356860160828, "step": 4035 }, { "epoch": 0.48974639000121345, "grad_norm": 2.815880298614502, "learning_rate": 1.0332882938213979e-05, "loss": 0.2016706019639969, "step": 4036 }, { "epoch": 0.4898677344982405, "grad_norm": 2.8223869800567627, "learning_rate": 1.0330426237562955e-05, "loss": 0.3089810013771057, "step": 4037 }, { "epoch": 0.48998907899526756, "grad_norm": 3.0222620964050293, "learning_rate": 1.0327969536911929e-05, "loss": 0.4708369970321655, "step": 4038 }, { "epoch": 0.4901104234922946, "grad_norm": 0.8015201687812805, "learning_rate": 1.0325512836260903e-05, "loss": 0.016762444749474525, "step": 4039 }, { "epoch": 0.49023176798932167, "grad_norm": 2.1054742336273193, "learning_rate": 1.0323056135609877e-05, "loss": 0.2451382279396057, "step": 4040 }, { "epoch": 0.4903531124863487, "grad_norm": 2.0531117916107178, "learning_rate": 1.0320599434958852e-05, "loss": 0.16565163433551788, "step": 4041 }, { "epoch": 0.49047445698337583, "grad_norm": 2.1806256771087646, "learning_rate": 1.0318142734307826e-05, "loss": 0.2720821499824524, "step": 4042 }, { "epoch": 0.4905958014804029, "grad_norm": 2.54463791847229, "learning_rate": 1.03156860336568e-05, "loss": 0.44434866309165955, "step": 4043 }, { "epoch": 0.49071714597742994, "grad_norm": 2.0366435050964355, "learning_rate": 1.0313229333005774e-05, "loss": 0.31781578063964844, "step": 4044 }, { "epoch": 0.490838490474457, "grad_norm": 1.9263622760772705, "learning_rate": 1.0310772632354749e-05, "loss": 0.05168451368808746, "step": 4045 }, { "epoch": 0.49095983497148404, "grad_norm": 3.3703081607818604, "learning_rate": 1.0308315931703723e-05, "loss": 0.20190204679965973, "step": 4046 }, { "epoch": 0.4910811794685111, "grad_norm": 0.9096542596817017, "learning_rate": 1.0305859231052697e-05, "loss": 0.05566013231873512, "step": 4047 }, { "epoch": 0.49120252396553815, "grad_norm": 2.569469690322876, "learning_rate": 1.0303402530401671e-05, "loss": 0.1524551659822464, "step": 4048 }, { "epoch": 0.4913238684625652, "grad_norm": 3.883267879486084, "learning_rate": 1.0300945829750646e-05, "loss": 0.5030513405799866, "step": 4049 }, { "epoch": 0.49144521295959226, "grad_norm": 1.2005629539489746, "learning_rate": 1.029848912909962e-05, "loss": 0.04238266497850418, "step": 4050 }, { "epoch": 0.49156655745661937, "grad_norm": 2.4214656352996826, "learning_rate": 1.0296032428448594e-05, "loss": 0.11391811817884445, "step": 4051 }, { "epoch": 0.4916879019536464, "grad_norm": 2.980701446533203, "learning_rate": 1.0293575727797568e-05, "loss": 0.14839670062065125, "step": 4052 }, { "epoch": 0.4918092464506735, "grad_norm": 2.5335092544555664, "learning_rate": 1.0291119027146543e-05, "loss": 0.17934875190258026, "step": 4053 }, { "epoch": 0.49193059094770053, "grad_norm": 3.0248172283172607, "learning_rate": 1.0288662326495517e-05, "loss": 0.394993394613266, "step": 4054 }, { "epoch": 0.4920519354447276, "grad_norm": 2.9761300086975098, "learning_rate": 1.0286205625844491e-05, "loss": 0.40359243750572205, "step": 4055 }, { "epoch": 0.49217327994175464, "grad_norm": 3.4035818576812744, "learning_rate": 1.0283748925193466e-05, "loss": 0.6626853942871094, "step": 4056 }, { "epoch": 0.4922946244387817, "grad_norm": 3.1175756454467773, "learning_rate": 1.0281292224542441e-05, "loss": 0.1597563922405243, "step": 4057 }, { "epoch": 0.49241596893580875, "grad_norm": 0.5844665169715881, "learning_rate": 1.0278835523891416e-05, "loss": 0.012030395679175854, "step": 4058 }, { "epoch": 0.4925373134328358, "grad_norm": 3.8436920642852783, "learning_rate": 1.027637882324039e-05, "loss": 0.6617082357406616, "step": 4059 }, { "epoch": 0.49265865792986285, "grad_norm": 0.1531480997800827, "learning_rate": 1.0273922122589364e-05, "loss": 0.0010612016776576638, "step": 4060 }, { "epoch": 0.49278000242688996, "grad_norm": 2.8751049041748047, "learning_rate": 1.0271465421938339e-05, "loss": 0.1708884984254837, "step": 4061 }, { "epoch": 0.492901346923917, "grad_norm": 3.7742342948913574, "learning_rate": 1.0269008721287313e-05, "loss": 0.30525243282318115, "step": 4062 }, { "epoch": 0.49302269142094407, "grad_norm": 2.9056217670440674, "learning_rate": 1.0266552020636287e-05, "loss": 0.3002099096775055, "step": 4063 }, { "epoch": 0.4931440359179711, "grad_norm": 1.3139050006866455, "learning_rate": 1.0264095319985261e-05, "loss": 0.119903564453125, "step": 4064 }, { "epoch": 0.4932653804149982, "grad_norm": 3.280710458755493, "learning_rate": 1.0261638619334236e-05, "loss": 0.20150858163833618, "step": 4065 }, { "epoch": 0.49338672491202523, "grad_norm": 3.2768208980560303, "learning_rate": 1.025918191868321e-05, "loss": 0.2754751443862915, "step": 4066 }, { "epoch": 0.4935080694090523, "grad_norm": 0.588826596736908, "learning_rate": 1.0256725218032184e-05, "loss": 0.015325482934713364, "step": 4067 }, { "epoch": 0.49362941390607934, "grad_norm": 7.418586254119873, "learning_rate": 1.0254268517381158e-05, "loss": 0.21450486779212952, "step": 4068 }, { "epoch": 0.4937507584031064, "grad_norm": 1.8046109676361084, "learning_rate": 1.0251811816730133e-05, "loss": 0.06981750577688217, "step": 4069 }, { "epoch": 0.4938721029001335, "grad_norm": 4.094432353973389, "learning_rate": 1.0249355116079107e-05, "loss": 0.2268960177898407, "step": 4070 }, { "epoch": 0.49399344739716056, "grad_norm": 2.455014228820801, "learning_rate": 1.0246898415428081e-05, "loss": 0.5886940360069275, "step": 4071 }, { "epoch": 0.4941147918941876, "grad_norm": 2.8781957626342773, "learning_rate": 1.0244441714777055e-05, "loss": 0.2283071130514145, "step": 4072 }, { "epoch": 0.49423613639121466, "grad_norm": 2.245194673538208, "learning_rate": 1.024198501412603e-05, "loss": 0.1047191396355629, "step": 4073 }, { "epoch": 0.4943574808882417, "grad_norm": 2.2549569606781006, "learning_rate": 1.0239528313475004e-05, "loss": 0.15678414702415466, "step": 4074 }, { "epoch": 0.49447882538526877, "grad_norm": 3.263427495956421, "learning_rate": 1.0237071612823978e-05, "loss": 0.11827545613050461, "step": 4075 }, { "epoch": 0.4946001698822958, "grad_norm": 3.4566428661346436, "learning_rate": 1.0234614912172952e-05, "loss": 0.5960505604743958, "step": 4076 }, { "epoch": 0.4947215143793229, "grad_norm": 2.172283172607422, "learning_rate": 1.0232158211521928e-05, "loss": 0.152704656124115, "step": 4077 }, { "epoch": 0.49484285887634993, "grad_norm": 2.2562057971954346, "learning_rate": 1.0229701510870903e-05, "loss": 0.23249760270118713, "step": 4078 }, { "epoch": 0.49496420337337704, "grad_norm": 2.9587838649749756, "learning_rate": 1.0227244810219877e-05, "loss": 0.15634280443191528, "step": 4079 }, { "epoch": 0.4950855478704041, "grad_norm": 4.509875774383545, "learning_rate": 1.0224788109568851e-05, "loss": 0.4428016245365143, "step": 4080 }, { "epoch": 0.49520689236743115, "grad_norm": 2.2916460037231445, "learning_rate": 1.0222331408917825e-05, "loss": 0.10448870062828064, "step": 4081 }, { "epoch": 0.4953282368644582, "grad_norm": 2.21024751663208, "learning_rate": 1.02198747082668e-05, "loss": 0.13139306008815765, "step": 4082 }, { "epoch": 0.49544958136148526, "grad_norm": 3.4574575424194336, "learning_rate": 1.0217418007615774e-05, "loss": 0.31435874104499817, "step": 4083 }, { "epoch": 0.4955709258585123, "grad_norm": 2.510141372680664, "learning_rate": 1.0214961306964748e-05, "loss": 0.19896230101585388, "step": 4084 }, { "epoch": 0.49569227035553937, "grad_norm": 4.263970851898193, "learning_rate": 1.0212504606313722e-05, "loss": 0.3680446147918701, "step": 4085 }, { "epoch": 0.4958136148525664, "grad_norm": 3.2049477100372314, "learning_rate": 1.0210047905662697e-05, "loss": 0.2639424800872803, "step": 4086 }, { "epoch": 0.4959349593495935, "grad_norm": 0.0013051891000941396, "learning_rate": 1.0207591205011671e-05, "loss": 2.018272061832249e-05, "step": 4087 }, { "epoch": 0.4960563038466206, "grad_norm": 3.0153448581695557, "learning_rate": 1.0205134504360645e-05, "loss": 0.34443244338035583, "step": 4088 }, { "epoch": 0.49617764834364764, "grad_norm": 1.970654845237732, "learning_rate": 1.020267780370962e-05, "loss": 0.12577636539936066, "step": 4089 }, { "epoch": 0.4962989928406747, "grad_norm": 2.5999841690063477, "learning_rate": 1.0200221103058594e-05, "loss": 0.5478004217147827, "step": 4090 }, { "epoch": 0.49642033733770174, "grad_norm": 2.0796852111816406, "learning_rate": 1.0197764402407568e-05, "loss": 0.22136391699314117, "step": 4091 }, { "epoch": 0.4965416818347288, "grad_norm": 3.5338828563690186, "learning_rate": 1.0195307701756542e-05, "loss": 0.34463879466056824, "step": 4092 }, { "epoch": 0.49666302633175585, "grad_norm": 2.144592761993408, "learning_rate": 1.0192851001105516e-05, "loss": 0.08712837845087051, "step": 4093 }, { "epoch": 0.4967843708287829, "grad_norm": 3.4841408729553223, "learning_rate": 1.019039430045449e-05, "loss": 0.4265940189361572, "step": 4094 }, { "epoch": 0.49690571532580996, "grad_norm": 1.3534290790557861, "learning_rate": 1.0187937599803465e-05, "loss": 0.05520536005496979, "step": 4095 }, { "epoch": 0.497027059822837, "grad_norm": 2.2348973751068115, "learning_rate": 1.0185480899152441e-05, "loss": 0.05542955920100212, "step": 4096 }, { "epoch": 0.49714840431986407, "grad_norm": 4.560401439666748, "learning_rate": 1.0183024198501415e-05, "loss": 0.6386822462081909, "step": 4097 }, { "epoch": 0.4972697488168912, "grad_norm": 1.7751681804656982, "learning_rate": 1.018056749785039e-05, "loss": 0.39322564005851746, "step": 4098 }, { "epoch": 0.49739109331391823, "grad_norm": 1.1833059787750244, "learning_rate": 1.0178110797199364e-05, "loss": 0.052323970943689346, "step": 4099 }, { "epoch": 0.4975124378109453, "grad_norm": 3.6777946949005127, "learning_rate": 1.0175654096548338e-05, "loss": 0.24734443426132202, "step": 4100 }, { "epoch": 0.49763378230797234, "grad_norm": 2.0843570232391357, "learning_rate": 1.0173197395897312e-05, "loss": 0.37874841690063477, "step": 4101 }, { "epoch": 0.4977551268049994, "grad_norm": 4.529017448425293, "learning_rate": 1.0170740695246286e-05, "loss": 0.3926376700401306, "step": 4102 }, { "epoch": 0.49787647130202645, "grad_norm": 1.7081705331802368, "learning_rate": 1.016828399459526e-05, "loss": 0.06437855213880539, "step": 4103 }, { "epoch": 0.4979978157990535, "grad_norm": 1.9430485963821411, "learning_rate": 1.0165827293944233e-05, "loss": 0.5112993121147156, "step": 4104 }, { "epoch": 0.49811916029608055, "grad_norm": 4.296749591827393, "learning_rate": 1.0163370593293208e-05, "loss": 0.4143819808959961, "step": 4105 }, { "epoch": 0.4982405047931076, "grad_norm": 2.0550286769866943, "learning_rate": 1.0160913892642182e-05, "loss": 0.28097832202911377, "step": 4106 }, { "epoch": 0.4983618492901347, "grad_norm": 2.422853946685791, "learning_rate": 1.0158457191991156e-05, "loss": 0.0879950225353241, "step": 4107 }, { "epoch": 0.49848319378716177, "grad_norm": 2.408190965652466, "learning_rate": 1.015600049134013e-05, "loss": 0.7080591917037964, "step": 4108 }, { "epoch": 0.4986045382841888, "grad_norm": 3.7219440937042236, "learning_rate": 1.0153543790689105e-05, "loss": 0.731609582901001, "step": 4109 }, { "epoch": 0.4987258827812159, "grad_norm": 4.751108169555664, "learning_rate": 1.0151087090038079e-05, "loss": 0.36133718490600586, "step": 4110 }, { "epoch": 0.49884722727824293, "grad_norm": 2.3252601623535156, "learning_rate": 1.0148630389387053e-05, "loss": 0.4636101722717285, "step": 4111 }, { "epoch": 0.49896857177527, "grad_norm": 3.084789276123047, "learning_rate": 1.0146173688736027e-05, "loss": 0.5244260430335999, "step": 4112 }, { "epoch": 0.49908991627229704, "grad_norm": 4.178112983703613, "learning_rate": 1.0143716988085002e-05, "loss": 0.3125312328338623, "step": 4113 }, { "epoch": 0.4992112607693241, "grad_norm": 2.446272134780884, "learning_rate": 1.0141260287433976e-05, "loss": 0.2685425877571106, "step": 4114 }, { "epoch": 0.49933260526635115, "grad_norm": 2.1826047897338867, "learning_rate": 1.013880358678295e-05, "loss": 0.04579189047217369, "step": 4115 }, { "epoch": 0.49945394976337826, "grad_norm": 1.8496407270431519, "learning_rate": 1.0136346886131924e-05, "loss": 0.44226139783859253, "step": 4116 }, { "epoch": 0.4995752942604053, "grad_norm": 2.2133660316467285, "learning_rate": 1.0133890185480899e-05, "loss": 0.24514491856098175, "step": 4117 }, { "epoch": 0.49969663875743237, "grad_norm": 2.562633514404297, "learning_rate": 1.0131433484829873e-05, "loss": 0.09214141964912415, "step": 4118 }, { "epoch": 0.4998179832544594, "grad_norm": 2.914689302444458, "learning_rate": 1.0128976784178847e-05, "loss": 0.21326091885566711, "step": 4119 }, { "epoch": 0.4999393277514865, "grad_norm": 2.9168035984039307, "learning_rate": 1.0126520083527821e-05, "loss": 0.1721213310956955, "step": 4120 }, { "epoch": 0.5000606722485136, "grad_norm": 1.4866218566894531, "learning_rate": 1.0124063382876796e-05, "loss": 0.10295268148183823, "step": 4121 }, { "epoch": 0.5001820167455406, "grad_norm": 2.614938497543335, "learning_rate": 1.0121606682225772e-05, "loss": 0.2343529611825943, "step": 4122 }, { "epoch": 0.5003033612425677, "grad_norm": 4.006135940551758, "learning_rate": 1.0119149981574746e-05, "loss": 0.35155296325683594, "step": 4123 }, { "epoch": 0.5004247057395947, "grad_norm": 2.4474079608917236, "learning_rate": 1.011669328092372e-05, "loss": 0.1790582537651062, "step": 4124 }, { "epoch": 0.5005460502366218, "grad_norm": 3.1768453121185303, "learning_rate": 1.0114236580272694e-05, "loss": 0.5567673444747925, "step": 4125 }, { "epoch": 0.5006673947336489, "grad_norm": 2.021885871887207, "learning_rate": 1.0111779879621669e-05, "loss": 0.22571390867233276, "step": 4126 }, { "epoch": 0.5007887392306759, "grad_norm": 2.4576969146728516, "learning_rate": 1.0109323178970643e-05, "loss": 0.2387232631444931, "step": 4127 }, { "epoch": 0.500910083727703, "grad_norm": 1.508373737335205, "learning_rate": 1.0106866478319617e-05, "loss": 0.03977758064866066, "step": 4128 }, { "epoch": 0.50103142822473, "grad_norm": 3.192394971847534, "learning_rate": 1.0104409777668591e-05, "loss": 0.748091995716095, "step": 4129 }, { "epoch": 0.5011527727217571, "grad_norm": 2.3539927005767822, "learning_rate": 1.0101953077017566e-05, "loss": 0.5486544370651245, "step": 4130 }, { "epoch": 0.5012741172187841, "grad_norm": 3.171369791030884, "learning_rate": 1.009949637636654e-05, "loss": 0.4201990067958832, "step": 4131 }, { "epoch": 0.5013954617158112, "grad_norm": 3.3497774600982666, "learning_rate": 1.0097039675715514e-05, "loss": 0.5252123475074768, "step": 4132 }, { "epoch": 0.5015168062128382, "grad_norm": 2.970364809036255, "learning_rate": 1.0094582975064488e-05, "loss": 0.7375112175941467, "step": 4133 }, { "epoch": 0.5016381507098653, "grad_norm": 2.1404271125793457, "learning_rate": 1.0092126274413463e-05, "loss": 0.16470035910606384, "step": 4134 }, { "epoch": 0.5017594952068923, "grad_norm": 4.011536598205566, "learning_rate": 1.0089669573762437e-05, "loss": 0.23924729228019714, "step": 4135 }, { "epoch": 0.5018808397039194, "grad_norm": 2.813760280609131, "learning_rate": 1.0087212873111411e-05, "loss": 0.2731741666793823, "step": 4136 }, { "epoch": 0.5020021842009464, "grad_norm": 3.419562339782715, "learning_rate": 1.0084756172460386e-05, "loss": 0.27091479301452637, "step": 4137 }, { "epoch": 0.5021235286979735, "grad_norm": 5.356061935424805, "learning_rate": 1.008229947180936e-05, "loss": 0.2529284954071045, "step": 4138 }, { "epoch": 0.5022448731950006, "grad_norm": 2.764036178588867, "learning_rate": 1.0079842771158334e-05, "loss": 0.08217710256576538, "step": 4139 }, { "epoch": 0.5023662176920277, "grad_norm": 1.5529385805130005, "learning_rate": 1.0077386070507308e-05, "loss": 0.14652186632156372, "step": 4140 }, { "epoch": 0.5024875621890548, "grad_norm": 2.565126895904541, "learning_rate": 1.0074929369856283e-05, "loss": 0.22693905234336853, "step": 4141 }, { "epoch": 0.5026089066860818, "grad_norm": 1.599971055984497, "learning_rate": 1.0072472669205259e-05, "loss": 0.13558928668498993, "step": 4142 }, { "epoch": 0.5027302511831089, "grad_norm": 3.5298283100128174, "learning_rate": 1.0070015968554233e-05, "loss": 0.3867851495742798, "step": 4143 }, { "epoch": 0.5028515956801359, "grad_norm": 1.9586138725280762, "learning_rate": 1.0067559267903207e-05, "loss": 0.15402142703533173, "step": 4144 }, { "epoch": 0.502972940177163, "grad_norm": 2.8137459754943848, "learning_rate": 1.0065102567252181e-05, "loss": 0.24119551479816437, "step": 4145 }, { "epoch": 0.50309428467419, "grad_norm": 2.008090019226074, "learning_rate": 1.0062645866601156e-05, "loss": 0.10239668190479279, "step": 4146 }, { "epoch": 0.5032156291712171, "grad_norm": 2.0211710929870605, "learning_rate": 1.006018916595013e-05, "loss": 0.22735410928726196, "step": 4147 }, { "epoch": 0.5033369736682441, "grad_norm": 1.2321698665618896, "learning_rate": 1.0057732465299104e-05, "loss": 0.0534224808216095, "step": 4148 }, { "epoch": 0.5034583181652712, "grad_norm": 1.8673452138900757, "learning_rate": 1.0055275764648078e-05, "loss": 0.10090601444244385, "step": 4149 }, { "epoch": 0.5035796626622983, "grad_norm": 2.7299022674560547, "learning_rate": 1.0052819063997053e-05, "loss": 0.30227774381637573, "step": 4150 }, { "epoch": 0.5037010071593253, "grad_norm": 3.299987316131592, "learning_rate": 1.0050362363346027e-05, "loss": 0.37869489192962646, "step": 4151 }, { "epoch": 0.5038223516563524, "grad_norm": 3.1947779655456543, "learning_rate": 1.0047905662695001e-05, "loss": 0.28902101516723633, "step": 4152 }, { "epoch": 0.5039436961533794, "grad_norm": 2.723618268966675, "learning_rate": 1.0045448962043975e-05, "loss": 0.3364158868789673, "step": 4153 }, { "epoch": 0.5040650406504065, "grad_norm": 2.876434087753296, "learning_rate": 1.004299226139295e-05, "loss": 0.21820318698883057, "step": 4154 }, { "epoch": 0.5041863851474335, "grad_norm": 2.373448133468628, "learning_rate": 1.0040535560741924e-05, "loss": 0.20106640458106995, "step": 4155 }, { "epoch": 0.5043077296444606, "grad_norm": 3.047008514404297, "learning_rate": 1.0038078860090898e-05, "loss": 0.4500592052936554, "step": 4156 }, { "epoch": 0.5044290741414876, "grad_norm": 3.0404393672943115, "learning_rate": 1.0035622159439872e-05, "loss": 0.38422954082489014, "step": 4157 }, { "epoch": 0.5045504186385148, "grad_norm": 2.329564094543457, "learning_rate": 1.0033165458788847e-05, "loss": 0.06831081211566925, "step": 4158 }, { "epoch": 0.5046717631355419, "grad_norm": 2.9787800312042236, "learning_rate": 1.0030708758137821e-05, "loss": 0.34328189492225647, "step": 4159 }, { "epoch": 0.5047931076325689, "grad_norm": 2.1107780933380127, "learning_rate": 1.0028252057486795e-05, "loss": 0.5161923766136169, "step": 4160 }, { "epoch": 0.504914452129596, "grad_norm": 3.553253650665283, "learning_rate": 1.002579535683577e-05, "loss": 0.3942931890487671, "step": 4161 }, { "epoch": 0.505035796626623, "grad_norm": 4.448299407958984, "learning_rate": 1.0023338656184745e-05, "loss": 0.13006842136383057, "step": 4162 }, { "epoch": 0.5051571411236501, "grad_norm": 2.6870155334472656, "learning_rate": 1.002088195553372e-05, "loss": 0.22197997570037842, "step": 4163 }, { "epoch": 0.5052784856206771, "grad_norm": 2.8157153129577637, "learning_rate": 1.0018425254882694e-05, "loss": 0.2694483995437622, "step": 4164 }, { "epoch": 0.5053998301177042, "grad_norm": 2.664857864379883, "learning_rate": 1.0015968554231668e-05, "loss": 0.2562234401702881, "step": 4165 }, { "epoch": 0.5055211746147312, "grad_norm": 3.342111110687256, "learning_rate": 1.0013511853580642e-05, "loss": 0.44710221886634827, "step": 4166 }, { "epoch": 0.5056425191117583, "grad_norm": 1.6096012592315674, "learning_rate": 1.0011055152929617e-05, "loss": 0.12671571969985962, "step": 4167 }, { "epoch": 0.5057638636087853, "grad_norm": 2.7451324462890625, "learning_rate": 1.0008598452278591e-05, "loss": 0.27547794580459595, "step": 4168 }, { "epoch": 0.5058852081058124, "grad_norm": 2.3084189891815186, "learning_rate": 1.0006141751627565e-05, "loss": 0.2316734492778778, "step": 4169 }, { "epoch": 0.5060065526028394, "grad_norm": 2.081693410873413, "learning_rate": 1.000368505097654e-05, "loss": 0.16185830533504486, "step": 4170 }, { "epoch": 0.5061278970998665, "grad_norm": 2.6320300102233887, "learning_rate": 1.0001228350325514e-05, "loss": 0.19916628301143646, "step": 4171 }, { "epoch": 0.5062492415968936, "grad_norm": 2.3653316497802734, "learning_rate": 9.998771649674488e-06, "loss": 0.18337592482566833, "step": 4172 }, { "epoch": 0.5063705860939206, "grad_norm": 1.5738928318023682, "learning_rate": 9.996314949023462e-06, "loss": 0.06333501636981964, "step": 4173 }, { "epoch": 0.5064919305909477, "grad_norm": 3.4989211559295654, "learning_rate": 9.993858248372436e-06, "loss": 0.47226548194885254, "step": 4174 }, { "epoch": 0.5066132750879747, "grad_norm": 2.346409320831299, "learning_rate": 9.99140154772141e-06, "loss": 0.45269617438316345, "step": 4175 }, { "epoch": 0.5067346195850018, "grad_norm": 2.2800467014312744, "learning_rate": 9.988944847070385e-06, "loss": 0.2113410234451294, "step": 4176 }, { "epoch": 0.5068559640820289, "grad_norm": 2.343733549118042, "learning_rate": 9.98648814641936e-06, "loss": 0.22176428139209747, "step": 4177 }, { "epoch": 0.506977308579056, "grad_norm": 1.4506417512893677, "learning_rate": 9.984031445768333e-06, "loss": 0.1998836249113083, "step": 4178 }, { "epoch": 0.507098653076083, "grad_norm": 1.370615839958191, "learning_rate": 9.981574745117308e-06, "loss": 0.0651252418756485, "step": 4179 }, { "epoch": 0.5072199975731101, "grad_norm": 2.417644500732422, "learning_rate": 9.979118044466282e-06, "loss": 0.1573028713464737, "step": 4180 }, { "epoch": 0.5073413420701371, "grad_norm": 2.660836696624756, "learning_rate": 9.976661343815256e-06, "loss": 0.6236342787742615, "step": 4181 }, { "epoch": 0.5074626865671642, "grad_norm": 4.117074966430664, "learning_rate": 9.974204643164232e-06, "loss": 0.3315301835536957, "step": 4182 }, { "epoch": 0.5075840310641913, "grad_norm": 1.6318374872207642, "learning_rate": 9.971747942513206e-06, "loss": 0.08524307608604431, "step": 4183 }, { "epoch": 0.5077053755612183, "grad_norm": 3.2868754863739014, "learning_rate": 9.96929124186218e-06, "loss": 0.17342756688594818, "step": 4184 }, { "epoch": 0.5078267200582454, "grad_norm": 2.7353427410125732, "learning_rate": 9.966834541211155e-06, "loss": 0.3902250826358795, "step": 4185 }, { "epoch": 0.5079480645552724, "grad_norm": 0.9674828052520752, "learning_rate": 9.96437784056013e-06, "loss": 0.0245108213275671, "step": 4186 }, { "epoch": 0.5080694090522995, "grad_norm": 2.5527987480163574, "learning_rate": 9.961921139909104e-06, "loss": 0.14701351523399353, "step": 4187 }, { "epoch": 0.5081907535493265, "grad_norm": 2.8111016750335693, "learning_rate": 9.959464439258078e-06, "loss": 0.7971981763839722, "step": 4188 }, { "epoch": 0.5083120980463536, "grad_norm": 2.606820583343506, "learning_rate": 9.957007738607052e-06, "loss": 0.39198192954063416, "step": 4189 }, { "epoch": 0.5084334425433806, "grad_norm": 1.9609767198562622, "learning_rate": 9.954551037956026e-06, "loss": 0.11993835866451263, "step": 4190 }, { "epoch": 0.5085547870404077, "grad_norm": 2.2802999019622803, "learning_rate": 9.952094337305e-06, "loss": 0.07411612570285797, "step": 4191 }, { "epoch": 0.5086761315374347, "grad_norm": 2.68603253364563, "learning_rate": 9.949637636653975e-06, "loss": 0.25573959946632385, "step": 4192 }, { "epoch": 0.5087974760344618, "grad_norm": 4.1121296882629395, "learning_rate": 9.947180936002949e-06, "loss": 0.5706912279129028, "step": 4193 }, { "epoch": 0.5089188205314888, "grad_norm": 4.367928981781006, "learning_rate": 9.944724235351923e-06, "loss": 0.3496420979499817, "step": 4194 }, { "epoch": 0.509040165028516, "grad_norm": 2.5021324157714844, "learning_rate": 9.942267534700898e-06, "loss": 0.37761035561561584, "step": 4195 }, { "epoch": 0.5091615095255431, "grad_norm": 2.2599778175354004, "learning_rate": 9.939810834049872e-06, "loss": 0.23744657635688782, "step": 4196 }, { "epoch": 0.5092828540225701, "grad_norm": 3.6730966567993164, "learning_rate": 9.937354133398846e-06, "loss": 0.5212063789367676, "step": 4197 }, { "epoch": 0.5094041985195972, "grad_norm": 1.9374099969863892, "learning_rate": 9.93489743274782e-06, "loss": 0.10793831944465637, "step": 4198 }, { "epoch": 0.5095255430166242, "grad_norm": 1.5751670598983765, "learning_rate": 9.932440732096795e-06, "loss": 0.2692280411720276, "step": 4199 }, { "epoch": 0.5096468875136513, "grad_norm": 3.1226181983947754, "learning_rate": 9.929984031445769e-06, "loss": 0.2043701857328415, "step": 4200 }, { "epoch": 0.5097682320106783, "grad_norm": 4.093362808227539, "learning_rate": 9.927527330794743e-06, "loss": 0.3448679447174072, "step": 4201 }, { "epoch": 0.5098895765077054, "grad_norm": 2.0513975620269775, "learning_rate": 9.925070630143719e-06, "loss": 0.1077132374048233, "step": 4202 }, { "epoch": 0.5100109210047324, "grad_norm": 2.1867973804473877, "learning_rate": 9.922613929492693e-06, "loss": 0.3904503881931305, "step": 4203 }, { "epoch": 0.5101322655017595, "grad_norm": 1.4725761413574219, "learning_rate": 9.920157228841668e-06, "loss": 0.24507243931293488, "step": 4204 }, { "epoch": 0.5102536099987866, "grad_norm": 2.098909378051758, "learning_rate": 9.917700528190642e-06, "loss": 0.5881178379058838, "step": 4205 }, { "epoch": 0.5103749544958136, "grad_norm": 2.224513292312622, "learning_rate": 9.915243827539614e-06, "loss": 0.3186401128768921, "step": 4206 }, { "epoch": 0.5104962989928407, "grad_norm": 2.4195799827575684, "learning_rate": 9.912787126888589e-06, "loss": 0.4775788187980652, "step": 4207 }, { "epoch": 0.5106176434898677, "grad_norm": 4.721523284912109, "learning_rate": 9.910330426237563e-06, "loss": 0.22731582820415497, "step": 4208 }, { "epoch": 0.5107389879868948, "grad_norm": 2.256333351135254, "learning_rate": 9.907873725586537e-06, "loss": 0.21163173019886017, "step": 4209 }, { "epoch": 0.5108603324839218, "grad_norm": 1.7789000272750854, "learning_rate": 9.905417024935511e-06, "loss": 0.1547716110944748, "step": 4210 }, { "epoch": 0.5109816769809489, "grad_norm": 1.0651543140411377, "learning_rate": 9.902960324284486e-06, "loss": 0.02926081232726574, "step": 4211 }, { "epoch": 0.5111030214779759, "grad_norm": 2.000568151473999, "learning_rate": 9.90050362363346e-06, "loss": 0.11307431757450104, "step": 4212 }, { "epoch": 0.511224365975003, "grad_norm": 1.7287936210632324, "learning_rate": 9.898046922982434e-06, "loss": 0.07569710165262222, "step": 4213 }, { "epoch": 0.5113457104720301, "grad_norm": 3.9824037551879883, "learning_rate": 9.895590222331408e-06, "loss": 0.428244411945343, "step": 4214 }, { "epoch": 0.5114670549690572, "grad_norm": 2.6710071563720703, "learning_rate": 9.893133521680384e-06, "loss": 0.39466023445129395, "step": 4215 }, { "epoch": 0.5115883994660843, "grad_norm": 3.226755380630493, "learning_rate": 9.890676821029359e-06, "loss": 0.6697176098823547, "step": 4216 }, { "epoch": 0.5117097439631113, "grad_norm": 3.4830660820007324, "learning_rate": 9.888220120378333e-06, "loss": 0.3048277497291565, "step": 4217 }, { "epoch": 0.5118310884601384, "grad_norm": 3.67615008354187, "learning_rate": 9.885763419727307e-06, "loss": 0.20277659595012665, "step": 4218 }, { "epoch": 0.5119524329571654, "grad_norm": 3.556042194366455, "learning_rate": 9.883306719076281e-06, "loss": 0.13591155409812927, "step": 4219 }, { "epoch": 0.5120737774541925, "grad_norm": 5.140079975128174, "learning_rate": 9.880850018425256e-06, "loss": 0.2648215889930725, "step": 4220 }, { "epoch": 0.5121951219512195, "grad_norm": 2.4776129722595215, "learning_rate": 9.87839331777423e-06, "loss": 0.15774701535701752, "step": 4221 }, { "epoch": 0.5123164664482466, "grad_norm": 1.7563296556472778, "learning_rate": 9.875936617123204e-06, "loss": 0.38825228810310364, "step": 4222 }, { "epoch": 0.5124378109452736, "grad_norm": 3.5801842212677, "learning_rate": 9.873479916472179e-06, "loss": 0.4334130883216858, "step": 4223 }, { "epoch": 0.5125591554423007, "grad_norm": 2.9012908935546875, "learning_rate": 9.871023215821153e-06, "loss": 0.5523781776428223, "step": 4224 }, { "epoch": 0.5126804999393277, "grad_norm": 2.4513423442840576, "learning_rate": 9.868566515170127e-06, "loss": 0.24379047751426697, "step": 4225 }, { "epoch": 0.5128018444363548, "grad_norm": 2.5361316204071045, "learning_rate": 9.866109814519101e-06, "loss": 0.19307921826839447, "step": 4226 }, { "epoch": 0.5129231889333818, "grad_norm": 1.9908452033996582, "learning_rate": 9.863653113868076e-06, "loss": 0.15932908654212952, "step": 4227 }, { "epoch": 0.5130445334304089, "grad_norm": 2.6058144569396973, "learning_rate": 9.86119641321705e-06, "loss": 0.2265259474515915, "step": 4228 }, { "epoch": 0.513165877927436, "grad_norm": 2.817314624786377, "learning_rate": 9.858739712566024e-06, "loss": 0.21313747763633728, "step": 4229 }, { "epoch": 0.513287222424463, "grad_norm": 2.4328365325927734, "learning_rate": 9.856283011914998e-06, "loss": 0.5157303810119629, "step": 4230 }, { "epoch": 0.5134085669214901, "grad_norm": 2.163607358932495, "learning_rate": 9.853826311263973e-06, "loss": 0.37992095947265625, "step": 4231 }, { "epoch": 0.5135299114185171, "grad_norm": 2.0889792442321777, "learning_rate": 9.851369610612947e-06, "loss": 0.4817541539669037, "step": 4232 }, { "epoch": 0.5136512559155443, "grad_norm": 1.5885951519012451, "learning_rate": 9.848912909961921e-06, "loss": 0.18071448802947998, "step": 4233 }, { "epoch": 0.5137726004125713, "grad_norm": 2.8798556327819824, "learning_rate": 9.846456209310895e-06, "loss": 0.19292524456977844, "step": 4234 }, { "epoch": 0.5138939449095984, "grad_norm": 2.3206210136413574, "learning_rate": 9.843999508659871e-06, "loss": 0.1239616870880127, "step": 4235 }, { "epoch": 0.5140152894066254, "grad_norm": 1.886003851890564, "learning_rate": 9.841542808008846e-06, "loss": 0.09364642202854156, "step": 4236 }, { "epoch": 0.5141366339036525, "grad_norm": 2.5909759998321533, "learning_rate": 9.83908610735782e-06, "loss": 0.1589403599500656, "step": 4237 }, { "epoch": 0.5142579784006795, "grad_norm": 2.7184417247772217, "learning_rate": 9.836629406706794e-06, "loss": 0.18991923332214355, "step": 4238 }, { "epoch": 0.5143793228977066, "grad_norm": 2.65834903717041, "learning_rate": 9.834172706055768e-06, "loss": 0.21206572651863098, "step": 4239 }, { "epoch": 0.5145006673947337, "grad_norm": 1.6865746974945068, "learning_rate": 9.831716005404743e-06, "loss": 0.049991779029369354, "step": 4240 }, { "epoch": 0.5146220118917607, "grad_norm": 1.896868348121643, "learning_rate": 9.829259304753717e-06, "loss": 0.12384995073080063, "step": 4241 }, { "epoch": 0.5147433563887878, "grad_norm": 2.037418842315674, "learning_rate": 9.826802604102691e-06, "loss": 0.20679543912410736, "step": 4242 }, { "epoch": 0.5148647008858148, "grad_norm": 2.9814274311065674, "learning_rate": 9.824345903451665e-06, "loss": 0.32943928241729736, "step": 4243 }, { "epoch": 0.5149860453828419, "grad_norm": 1.3780646324157715, "learning_rate": 9.82188920280064e-06, "loss": 0.05411846563220024, "step": 4244 }, { "epoch": 0.5151073898798689, "grad_norm": 1.7106112241744995, "learning_rate": 9.819432502149614e-06, "loss": 0.16935984790325165, "step": 4245 }, { "epoch": 0.515228734376896, "grad_norm": 2.7039034366607666, "learning_rate": 9.816975801498588e-06, "loss": 0.214540496468544, "step": 4246 }, { "epoch": 0.515350078873923, "grad_norm": 3.957542896270752, "learning_rate": 9.814519100847562e-06, "loss": 0.31250113248825073, "step": 4247 }, { "epoch": 0.5154714233709501, "grad_norm": 2.3784372806549072, "learning_rate": 9.812062400196537e-06, "loss": 0.278774231672287, "step": 4248 }, { "epoch": 0.5155927678679771, "grad_norm": 0.0030853315256536007, "learning_rate": 9.809605699545511e-06, "loss": 4.5005966967437416e-05, "step": 4249 }, { "epoch": 0.5157141123650042, "grad_norm": 2.263526201248169, "learning_rate": 9.807148998894485e-06, "loss": 0.33649060130119324, "step": 4250 }, { "epoch": 0.5158354568620314, "grad_norm": 1.9608598947525024, "learning_rate": 9.80469229824346e-06, "loss": 0.28678780794143677, "step": 4251 }, { "epoch": 0.5159568013590584, "grad_norm": 3.994715690612793, "learning_rate": 9.802235597592434e-06, "loss": 0.7879469394683838, "step": 4252 }, { "epoch": 0.5160781458560855, "grad_norm": 3.5545449256896973, "learning_rate": 9.799778896941408e-06, "loss": 0.39630916714668274, "step": 4253 }, { "epoch": 0.5161994903531125, "grad_norm": 3.012026786804199, "learning_rate": 9.797322196290384e-06, "loss": 0.5700284838676453, "step": 4254 }, { "epoch": 0.5163208348501396, "grad_norm": 2.7505388259887695, "learning_rate": 9.794865495639358e-06, "loss": 0.22359231114387512, "step": 4255 }, { "epoch": 0.5164421793471666, "grad_norm": 2.802236557006836, "learning_rate": 9.792408794988332e-06, "loss": 0.2393806278705597, "step": 4256 }, { "epoch": 0.5165635238441937, "grad_norm": 3.74605655670166, "learning_rate": 9.789952094337307e-06, "loss": 0.18059270083904266, "step": 4257 }, { "epoch": 0.5166848683412207, "grad_norm": 2.596965789794922, "learning_rate": 9.787495393686281e-06, "loss": 0.3215555250644684, "step": 4258 }, { "epoch": 0.5168062128382478, "grad_norm": 5.046637535095215, "learning_rate": 9.785038693035255e-06, "loss": 0.4464711844921112, "step": 4259 }, { "epoch": 0.5169275573352748, "grad_norm": 3.1214942932128906, "learning_rate": 9.78258199238423e-06, "loss": 0.4625573456287384, "step": 4260 }, { "epoch": 0.5170489018323019, "grad_norm": 2.0416691303253174, "learning_rate": 9.780125291733204e-06, "loss": 0.17834597826004028, "step": 4261 }, { "epoch": 0.517170246329329, "grad_norm": 2.4989962577819824, "learning_rate": 9.777668591082178e-06, "loss": 0.27045705914497375, "step": 4262 }, { "epoch": 0.517291590826356, "grad_norm": 1.2243449687957764, "learning_rate": 9.775211890431152e-06, "loss": 0.1048809215426445, "step": 4263 }, { "epoch": 0.5174129353233831, "grad_norm": 2.5428357124328613, "learning_rate": 9.772755189780126e-06, "loss": 0.15796953439712524, "step": 4264 }, { "epoch": 0.5175342798204101, "grad_norm": 2.9616146087646484, "learning_rate": 9.7702984891291e-06, "loss": 0.3846844434738159, "step": 4265 }, { "epoch": 0.5176556243174372, "grad_norm": 1.7791976928710938, "learning_rate": 9.767841788478075e-06, "loss": 0.03959834203124046, "step": 4266 }, { "epoch": 0.5177769688144642, "grad_norm": 4.364762783050537, "learning_rate": 9.76538508782705e-06, "loss": 0.6386626958847046, "step": 4267 }, { "epoch": 0.5178983133114913, "grad_norm": 5.407717704772949, "learning_rate": 9.762928387176024e-06, "loss": 0.40696191787719727, "step": 4268 }, { "epoch": 0.5180196578085183, "grad_norm": 3.397351026535034, "learning_rate": 9.760471686524998e-06, "loss": 0.18888014554977417, "step": 4269 }, { "epoch": 0.5181410023055455, "grad_norm": 2.804692029953003, "learning_rate": 9.758014985873972e-06, "loss": 0.5099548697471619, "step": 4270 }, { "epoch": 0.5182623468025725, "grad_norm": 0.03131352737545967, "learning_rate": 9.755558285222946e-06, "loss": 0.00019634910859167576, "step": 4271 }, { "epoch": 0.5183836912995996, "grad_norm": 3.4323160648345947, "learning_rate": 9.75310158457192e-06, "loss": 0.49300116300582886, "step": 4272 }, { "epoch": 0.5185050357966267, "grad_norm": 3.964902877807617, "learning_rate": 9.750644883920895e-06, "loss": 0.2962758541107178, "step": 4273 }, { "epoch": 0.5186263802936537, "grad_norm": 3.8886382579803467, "learning_rate": 9.748188183269869e-06, "loss": 0.14708931744098663, "step": 4274 }, { "epoch": 0.5187477247906808, "grad_norm": 2.3202738761901855, "learning_rate": 9.745731482618843e-06, "loss": 0.2117128074169159, "step": 4275 }, { "epoch": 0.5188690692877078, "grad_norm": 1.887408971786499, "learning_rate": 9.743274781967818e-06, "loss": 0.36363089084625244, "step": 4276 }, { "epoch": 0.5189904137847349, "grad_norm": 2.5797910690307617, "learning_rate": 9.740818081316792e-06, "loss": 0.11557850241661072, "step": 4277 }, { "epoch": 0.5191117582817619, "grad_norm": 1.6288282871246338, "learning_rate": 9.738361380665766e-06, "loss": 0.19051632285118103, "step": 4278 }, { "epoch": 0.519233102778789, "grad_norm": 3.4741971492767334, "learning_rate": 9.73590468001474e-06, "loss": 0.2764098346233368, "step": 4279 }, { "epoch": 0.519354447275816, "grad_norm": 3.539687395095825, "learning_rate": 9.733447979363715e-06, "loss": 0.23989051580429077, "step": 4280 }, { "epoch": 0.5194757917728431, "grad_norm": 4.374762535095215, "learning_rate": 9.730991278712689e-06, "loss": 0.3245375156402588, "step": 4281 }, { "epoch": 0.5195971362698701, "grad_norm": 2.440361976623535, "learning_rate": 9.728534578061663e-06, "loss": 0.21470434963703156, "step": 4282 }, { "epoch": 0.5197184807668972, "grad_norm": 2.289381742477417, "learning_rate": 9.726077877410637e-06, "loss": 0.08997684717178345, "step": 4283 }, { "epoch": 0.5198398252639242, "grad_norm": 3.192652463912964, "learning_rate": 9.723621176759612e-06, "loss": 0.4521543085575104, "step": 4284 }, { "epoch": 0.5199611697609513, "grad_norm": 2.989790678024292, "learning_rate": 9.721164476108586e-06, "loss": 0.1688225269317627, "step": 4285 }, { "epoch": 0.5200825142579784, "grad_norm": 3.2835798263549805, "learning_rate": 9.71870777545756e-06, "loss": 0.33371561765670776, "step": 4286 }, { "epoch": 0.5202038587550054, "grad_norm": 2.888363838195801, "learning_rate": 9.716251074806536e-06, "loss": 0.3828931152820587, "step": 4287 }, { "epoch": 0.5203252032520326, "grad_norm": 3.026040554046631, "learning_rate": 9.71379437415551e-06, "loss": 0.27714061737060547, "step": 4288 }, { "epoch": 0.5204465477490596, "grad_norm": 2.5769591331481934, "learning_rate": 9.711337673504485e-06, "loss": 0.12180380523204803, "step": 4289 }, { "epoch": 0.5205678922460867, "grad_norm": 1.3926194906234741, "learning_rate": 9.708880972853459e-06, "loss": 0.13883787393569946, "step": 4290 }, { "epoch": 0.5206892367431137, "grad_norm": 4.918461322784424, "learning_rate": 9.706424272202433e-06, "loss": 0.5623934268951416, "step": 4291 }, { "epoch": 0.5208105812401408, "grad_norm": 4.004831314086914, "learning_rate": 9.703967571551407e-06, "loss": 0.6053475737571716, "step": 4292 }, { "epoch": 0.5209319257371678, "grad_norm": 1.6555696725845337, "learning_rate": 9.701510870900382e-06, "loss": 0.04169592261314392, "step": 4293 }, { "epoch": 0.5210532702341949, "grad_norm": 1.952047348022461, "learning_rate": 9.699054170249356e-06, "loss": 0.25566381216049194, "step": 4294 }, { "epoch": 0.521174614731222, "grad_norm": 3.1777448654174805, "learning_rate": 9.69659746959833e-06, "loss": 0.3138297200202942, "step": 4295 }, { "epoch": 0.521295959228249, "grad_norm": 1.9265861511230469, "learning_rate": 9.694140768947304e-06, "loss": 0.204679936170578, "step": 4296 }, { "epoch": 0.5214173037252761, "grad_norm": 2.556940793991089, "learning_rate": 9.691684068296279e-06, "loss": 0.47050511837005615, "step": 4297 }, { "epoch": 0.5215386482223031, "grad_norm": 2.059161901473999, "learning_rate": 9.689227367645253e-06, "loss": 0.05421885475516319, "step": 4298 }, { "epoch": 0.5216599927193302, "grad_norm": 1.7835500240325928, "learning_rate": 9.686770666994227e-06, "loss": 0.35009118914604187, "step": 4299 }, { "epoch": 0.5217813372163572, "grad_norm": 2.996112585067749, "learning_rate": 9.684313966343201e-06, "loss": 0.3362860083580017, "step": 4300 }, { "epoch": 0.5219026817133843, "grad_norm": 2.280832290649414, "learning_rate": 9.681857265692176e-06, "loss": 0.4567345976829529, "step": 4301 }, { "epoch": 0.5220240262104113, "grad_norm": 2.7124814987182617, "learning_rate": 9.67940056504115e-06, "loss": 0.4585801959037781, "step": 4302 }, { "epoch": 0.5221453707074384, "grad_norm": 1.92635977268219, "learning_rate": 9.676943864390124e-06, "loss": 0.40402501821517944, "step": 4303 }, { "epoch": 0.5222667152044654, "grad_norm": 2.1645712852478027, "learning_rate": 9.674487163739098e-06, "loss": 0.10599954426288605, "step": 4304 }, { "epoch": 0.5223880597014925, "grad_norm": 2.7209508419036865, "learning_rate": 9.672030463088073e-06, "loss": 0.17675229907035828, "step": 4305 }, { "epoch": 0.5225094041985195, "grad_norm": 1.6960214376449585, "learning_rate": 9.669573762437047e-06, "loss": 0.12912413477897644, "step": 4306 }, { "epoch": 0.5226307486955467, "grad_norm": 3.264946460723877, "learning_rate": 9.667117061786023e-06, "loss": 0.5050619840621948, "step": 4307 }, { "epoch": 0.5227520931925738, "grad_norm": 2.822012424468994, "learning_rate": 9.664660361134997e-06, "loss": 0.3715551793575287, "step": 4308 }, { "epoch": 0.5228734376896008, "grad_norm": 2.3768510818481445, "learning_rate": 9.662203660483971e-06, "loss": 0.6279295682907104, "step": 4309 }, { "epoch": 0.5229947821866279, "grad_norm": 3.032176971435547, "learning_rate": 9.659746959832946e-06, "loss": 0.5859978199005127, "step": 4310 }, { "epoch": 0.5231161266836549, "grad_norm": 2.812337636947632, "learning_rate": 9.65729025918192e-06, "loss": 0.31946492195129395, "step": 4311 }, { "epoch": 0.523237471180682, "grad_norm": 0.7264633774757385, "learning_rate": 9.654833558530894e-06, "loss": 0.027655793353915215, "step": 4312 }, { "epoch": 0.523358815677709, "grad_norm": 3.169421672821045, "learning_rate": 9.652376857879869e-06, "loss": 0.7980327010154724, "step": 4313 }, { "epoch": 0.5234801601747361, "grad_norm": 1.768828272819519, "learning_rate": 9.649920157228843e-06, "loss": 0.032245345413684845, "step": 4314 }, { "epoch": 0.5236015046717631, "grad_norm": 3.845825433731079, "learning_rate": 9.647463456577817e-06, "loss": 0.29784584045410156, "step": 4315 }, { "epoch": 0.5237228491687902, "grad_norm": 2.5543646812438965, "learning_rate": 9.645006755926791e-06, "loss": 0.6837296485900879, "step": 4316 }, { "epoch": 0.5238441936658172, "grad_norm": 1.9198970794677734, "learning_rate": 9.642550055275766e-06, "loss": 0.3972179889678955, "step": 4317 }, { "epoch": 0.5239655381628443, "grad_norm": 0.12180625647306442, "learning_rate": 9.64009335462474e-06, "loss": 0.0015332770999521017, "step": 4318 }, { "epoch": 0.5240868826598714, "grad_norm": 2.028900384902954, "learning_rate": 9.637636653973714e-06, "loss": 0.09115578979253769, "step": 4319 }, { "epoch": 0.5242082271568984, "grad_norm": 2.7641420364379883, "learning_rate": 9.635179953322688e-06, "loss": 0.19612711668014526, "step": 4320 }, { "epoch": 0.5243295716539255, "grad_norm": 2.5495553016662598, "learning_rate": 9.632723252671663e-06, "loss": 0.32222750782966614, "step": 4321 }, { "epoch": 0.5244509161509525, "grad_norm": 1.5696449279785156, "learning_rate": 9.630266552020637e-06, "loss": 0.15393958985805511, "step": 4322 }, { "epoch": 0.5245722606479796, "grad_norm": 2.9041662216186523, "learning_rate": 9.627809851369611e-06, "loss": 0.24877168238162994, "step": 4323 }, { "epoch": 0.5246936051450066, "grad_norm": 1.3056710958480835, "learning_rate": 9.625353150718585e-06, "loss": 0.048518288880586624, "step": 4324 }, { "epoch": 0.5248149496420337, "grad_norm": 3.565513849258423, "learning_rate": 9.62289645006756e-06, "loss": 0.16031083464622498, "step": 4325 }, { "epoch": 0.5249362941390608, "grad_norm": 1.9159208536148071, "learning_rate": 9.620439749416534e-06, "loss": 0.10559926182031631, "step": 4326 }, { "epoch": 0.5250576386360879, "grad_norm": 5.760512351989746, "learning_rate": 9.61798304876551e-06, "loss": 0.6371904611587524, "step": 4327 }, { "epoch": 0.525178983133115, "grad_norm": 1.0325922966003418, "learning_rate": 9.615526348114484e-06, "loss": 0.03919842839241028, "step": 4328 }, { "epoch": 0.525300327630142, "grad_norm": 3.09704852104187, "learning_rate": 9.613069647463458e-06, "loss": 0.5220425128936768, "step": 4329 }, { "epoch": 0.5254216721271691, "grad_norm": 3.4370083808898926, "learning_rate": 9.610612946812433e-06, "loss": 0.6814584732055664, "step": 4330 }, { "epoch": 0.5255430166241961, "grad_norm": 2.6540093421936035, "learning_rate": 9.608156246161407e-06, "loss": 0.3233155608177185, "step": 4331 }, { "epoch": 0.5256643611212232, "grad_norm": 5.390744209289551, "learning_rate": 9.605699545510381e-06, "loss": 0.20254011452198029, "step": 4332 }, { "epoch": 0.5257857056182502, "grad_norm": 2.6013028621673584, "learning_rate": 9.603242844859355e-06, "loss": 0.25573745369911194, "step": 4333 }, { "epoch": 0.5259070501152773, "grad_norm": 3.3531880378723145, "learning_rate": 9.60078614420833e-06, "loss": 0.33559533953666687, "step": 4334 }, { "epoch": 0.5260283946123043, "grad_norm": 3.2431600093841553, "learning_rate": 9.598329443557304e-06, "loss": 0.1592082381248474, "step": 4335 }, { "epoch": 0.5261497391093314, "grad_norm": 1.8179652690887451, "learning_rate": 9.595872742906278e-06, "loss": 0.11027230322360992, "step": 4336 }, { "epoch": 0.5262710836063584, "grad_norm": 1.0300358533859253, "learning_rate": 9.593416042255252e-06, "loss": 0.014166835695505142, "step": 4337 }, { "epoch": 0.5263924281033855, "grad_norm": 2.4130702018737793, "learning_rate": 9.590959341604227e-06, "loss": 0.34523141384124756, "step": 4338 }, { "epoch": 0.5265137726004125, "grad_norm": 2.0706870555877686, "learning_rate": 9.588502640953201e-06, "loss": 0.08468206226825714, "step": 4339 }, { "epoch": 0.5266351170974396, "grad_norm": 2.360323190689087, "learning_rate": 9.586045940302175e-06, "loss": 0.03960234671831131, "step": 4340 }, { "epoch": 0.5267564615944667, "grad_norm": 2.7765705585479736, "learning_rate": 9.58358923965115e-06, "loss": 0.06560096144676208, "step": 4341 }, { "epoch": 0.5268778060914937, "grad_norm": 2.8075785636901855, "learning_rate": 9.581132539000124e-06, "loss": 0.13630546629428864, "step": 4342 }, { "epoch": 0.5269991505885208, "grad_norm": 2.0789291858673096, "learning_rate": 9.578675838349098e-06, "loss": 0.23830987513065338, "step": 4343 }, { "epoch": 0.5271204950855479, "grad_norm": 3.374202013015747, "learning_rate": 9.576219137698072e-06, "loss": 0.2440771758556366, "step": 4344 }, { "epoch": 0.527241839582575, "grad_norm": 3.7407407760620117, "learning_rate": 9.573762437047046e-06, "loss": 0.38453179597854614, "step": 4345 }, { "epoch": 0.527363184079602, "grad_norm": 3.284327745437622, "learning_rate": 9.57130573639602e-06, "loss": 0.4602471590042114, "step": 4346 }, { "epoch": 0.5274845285766291, "grad_norm": 2.6015002727508545, "learning_rate": 9.568849035744995e-06, "loss": 0.37477612495422363, "step": 4347 }, { "epoch": 0.5276058730736561, "grad_norm": 2.2609827518463135, "learning_rate": 9.56639233509397e-06, "loss": 0.4559963345527649, "step": 4348 }, { "epoch": 0.5277272175706832, "grad_norm": 2.531146287918091, "learning_rate": 9.563935634442944e-06, "loss": 0.05817968770861626, "step": 4349 }, { "epoch": 0.5278485620677102, "grad_norm": 2.047569990158081, "learning_rate": 9.561478933791918e-06, "loss": 0.11833447217941284, "step": 4350 }, { "epoch": 0.5279699065647373, "grad_norm": 1.1371315717697144, "learning_rate": 9.559022233140892e-06, "loss": 0.019302822649478912, "step": 4351 }, { "epoch": 0.5280912510617644, "grad_norm": 3.8380463123321533, "learning_rate": 9.556565532489866e-06, "loss": 0.3412572145462036, "step": 4352 }, { "epoch": 0.5282125955587914, "grad_norm": 2.6884429454803467, "learning_rate": 9.55410883183884e-06, "loss": 0.35121381282806396, "step": 4353 }, { "epoch": 0.5283339400558185, "grad_norm": 3.436293125152588, "learning_rate": 9.551652131187815e-06, "loss": 0.2901090383529663, "step": 4354 }, { "epoch": 0.5284552845528455, "grad_norm": 2.5959036350250244, "learning_rate": 9.549195430536789e-06, "loss": 0.46066898107528687, "step": 4355 }, { "epoch": 0.5285766290498726, "grad_norm": 3.0636565685272217, "learning_rate": 9.546738729885763e-06, "loss": 0.20460449159145355, "step": 4356 }, { "epoch": 0.5286979735468996, "grad_norm": 1.1614396572113037, "learning_rate": 9.544282029234738e-06, "loss": 0.020782146602869034, "step": 4357 }, { "epoch": 0.5288193180439267, "grad_norm": 2.305600881576538, "learning_rate": 9.541825328583712e-06, "loss": 0.18629641830921173, "step": 4358 }, { "epoch": 0.5289406625409537, "grad_norm": 3.2714695930480957, "learning_rate": 9.539368627932686e-06, "loss": 0.22634369134902954, "step": 4359 }, { "epoch": 0.5290620070379808, "grad_norm": 1.6404998302459717, "learning_rate": 9.536911927281662e-06, "loss": 0.3201233446598053, "step": 4360 }, { "epoch": 0.5291833515350078, "grad_norm": 1.8846904039382935, "learning_rate": 9.534455226630636e-06, "loss": 0.18915316462516785, "step": 4361 }, { "epoch": 0.5293046960320349, "grad_norm": 3.434107542037964, "learning_rate": 9.53199852597961e-06, "loss": 0.4823604226112366, "step": 4362 }, { "epoch": 0.5294260405290621, "grad_norm": 3.5646657943725586, "learning_rate": 9.529541825328585e-06, "loss": 0.21305254101753235, "step": 4363 }, { "epoch": 0.5295473850260891, "grad_norm": 0.12773068249225616, "learning_rate": 9.527085124677559e-06, "loss": 0.001985025592148304, "step": 4364 }, { "epoch": 0.5296687295231162, "grad_norm": 2.2520675659179688, "learning_rate": 9.524628424026533e-06, "loss": 0.11278132349252701, "step": 4365 }, { "epoch": 0.5297900740201432, "grad_norm": 2.376042127609253, "learning_rate": 9.522171723375508e-06, "loss": 0.1810624599456787, "step": 4366 }, { "epoch": 0.5299114185171703, "grad_norm": 3.7654926776885986, "learning_rate": 9.519715022724482e-06, "loss": 0.2707492709159851, "step": 4367 }, { "epoch": 0.5300327630141973, "grad_norm": 2.7538743019104004, "learning_rate": 9.517258322073456e-06, "loss": 0.37106794118881226, "step": 4368 }, { "epoch": 0.5301541075112244, "grad_norm": 4.071906566619873, "learning_rate": 9.51480162142243e-06, "loss": 0.8431082963943481, "step": 4369 }, { "epoch": 0.5302754520082514, "grad_norm": 1.596279263496399, "learning_rate": 9.512344920771405e-06, "loss": 0.029040779918432236, "step": 4370 }, { "epoch": 0.5303967965052785, "grad_norm": 2.6431918144226074, "learning_rate": 9.509888220120379e-06, "loss": 0.44360002875328064, "step": 4371 }, { "epoch": 0.5305181410023055, "grad_norm": 3.127549648284912, "learning_rate": 9.507431519469353e-06, "loss": 0.5893381834030151, "step": 4372 }, { "epoch": 0.5306394854993326, "grad_norm": 5.788398265838623, "learning_rate": 9.504974818818327e-06, "loss": 0.34285157918930054, "step": 4373 }, { "epoch": 0.5307608299963597, "grad_norm": 2.535609006881714, "learning_rate": 9.502518118167302e-06, "loss": 0.49192413687705994, "step": 4374 }, { "epoch": 0.5308821744933867, "grad_norm": 0.1654292345046997, "learning_rate": 9.500061417516276e-06, "loss": 0.0028806175105273724, "step": 4375 }, { "epoch": 0.5310035189904138, "grad_norm": 3.411996603012085, "learning_rate": 9.49760471686525e-06, "loss": 0.2368258833885193, "step": 4376 }, { "epoch": 0.5311248634874408, "grad_norm": 4.247682571411133, "learning_rate": 9.495148016214224e-06, "loss": 0.3538593649864197, "step": 4377 }, { "epoch": 0.5312462079844679, "grad_norm": 3.0858328342437744, "learning_rate": 9.492691315563199e-06, "loss": 0.44344794750213623, "step": 4378 }, { "epoch": 0.5313675524814949, "grad_norm": 3.902742862701416, "learning_rate": 9.490234614912175e-06, "loss": 0.3760649561882019, "step": 4379 }, { "epoch": 0.531488896978522, "grad_norm": 2.0287985801696777, "learning_rate": 9.487777914261149e-06, "loss": 0.2186664342880249, "step": 4380 }, { "epoch": 0.5316102414755491, "grad_norm": 2.9189248085021973, "learning_rate": 9.485321213610123e-06, "loss": 0.10675910115242004, "step": 4381 }, { "epoch": 0.5317315859725762, "grad_norm": 0.0024930962827056646, "learning_rate": 9.482864512959097e-06, "loss": 2.8372218366712332e-05, "step": 4382 }, { "epoch": 0.5318529304696032, "grad_norm": 2.709022283554077, "learning_rate": 9.480407812308072e-06, "loss": 0.15575909614562988, "step": 4383 }, { "epoch": 0.5319742749666303, "grad_norm": 3.228724241256714, "learning_rate": 9.477951111657046e-06, "loss": 0.2679954469203949, "step": 4384 }, { "epoch": 0.5320956194636574, "grad_norm": 2.18173885345459, "learning_rate": 9.47549441100602e-06, "loss": 0.22904042899608612, "step": 4385 }, { "epoch": 0.5322169639606844, "grad_norm": 4.54716157913208, "learning_rate": 9.473037710354994e-06, "loss": 0.19969333708286285, "step": 4386 }, { "epoch": 0.5323383084577115, "grad_norm": 2.694363594055176, "learning_rate": 9.470581009703969e-06, "loss": 0.4748269021511078, "step": 4387 }, { "epoch": 0.5324596529547385, "grad_norm": 2.9783759117126465, "learning_rate": 9.468124309052943e-06, "loss": 0.22778832912445068, "step": 4388 }, { "epoch": 0.5325809974517656, "grad_norm": 3.478226900100708, "learning_rate": 9.465667608401917e-06, "loss": 0.25991737842559814, "step": 4389 }, { "epoch": 0.5327023419487926, "grad_norm": 2.653726816177368, "learning_rate": 9.463210907750891e-06, "loss": 0.3041546940803528, "step": 4390 }, { "epoch": 0.5328236864458197, "grad_norm": 2.022184371948242, "learning_rate": 9.460754207099866e-06, "loss": 0.3324507176876068, "step": 4391 }, { "epoch": 0.5329450309428467, "grad_norm": 2.930952548980713, "learning_rate": 9.45829750644884e-06, "loss": 0.4074629843235016, "step": 4392 }, { "epoch": 0.5330663754398738, "grad_norm": 3.4578254222869873, "learning_rate": 9.455840805797814e-06, "loss": 0.6086788177490234, "step": 4393 }, { "epoch": 0.5331877199369008, "grad_norm": 4.706747055053711, "learning_rate": 9.453384105146789e-06, "loss": 0.17019282281398773, "step": 4394 }, { "epoch": 0.5333090644339279, "grad_norm": 4.258393287658691, "learning_rate": 9.450927404495763e-06, "loss": 0.31556954979896545, "step": 4395 }, { "epoch": 0.533430408930955, "grad_norm": 2.725116014480591, "learning_rate": 9.448470703844737e-06, "loss": 0.07249310612678528, "step": 4396 }, { "epoch": 0.533551753427982, "grad_norm": 1.6573102474212646, "learning_rate": 9.446014003193711e-06, "loss": 0.11331679672002792, "step": 4397 }, { "epoch": 0.533673097925009, "grad_norm": 0.0010044585214927793, "learning_rate": 9.443557302542686e-06, "loss": 2.74689809884876e-05, "step": 4398 }, { "epoch": 0.5337944424220361, "grad_norm": 2.479907751083374, "learning_rate": 9.441100601891661e-06, "loss": 0.1212753877043724, "step": 4399 }, { "epoch": 0.5339157869190633, "grad_norm": 2.909217119216919, "learning_rate": 9.438643901240636e-06, "loss": 0.1047779843211174, "step": 4400 }, { "epoch": 0.5340371314160903, "grad_norm": 3.9293336868286133, "learning_rate": 9.43618720058961e-06, "loss": 0.2681064009666443, "step": 4401 }, { "epoch": 0.5341584759131174, "grad_norm": 2.899937868118286, "learning_rate": 9.433730499938584e-06, "loss": 0.29302269220352173, "step": 4402 }, { "epoch": 0.5342798204101444, "grad_norm": 3.3628416061401367, "learning_rate": 9.431273799287559e-06, "loss": 0.16513022780418396, "step": 4403 }, { "epoch": 0.5344011649071715, "grad_norm": 3.774719476699829, "learning_rate": 9.428817098636533e-06, "loss": 0.550164520740509, "step": 4404 }, { "epoch": 0.5345225094041985, "grad_norm": 2.9464266300201416, "learning_rate": 9.426360397985507e-06, "loss": 0.2272089123725891, "step": 4405 }, { "epoch": 0.5346438539012256, "grad_norm": 2.589801549911499, "learning_rate": 9.423903697334481e-06, "loss": 0.35708191990852356, "step": 4406 }, { "epoch": 0.5347651983982527, "grad_norm": 1.8263518810272217, "learning_rate": 9.421446996683456e-06, "loss": 0.1987764686346054, "step": 4407 }, { "epoch": 0.5348865428952797, "grad_norm": 2.7768590450286865, "learning_rate": 9.41899029603243e-06, "loss": 0.47926926612854004, "step": 4408 }, { "epoch": 0.5350078873923068, "grad_norm": 2.7337265014648438, "learning_rate": 9.416533595381402e-06, "loss": 0.14657065272331238, "step": 4409 }, { "epoch": 0.5351292318893338, "grad_norm": 0.5382618308067322, "learning_rate": 9.414076894730377e-06, "loss": 0.020921194925904274, "step": 4410 }, { "epoch": 0.5352505763863609, "grad_norm": 4.957176685333252, "learning_rate": 9.411620194079351e-06, "loss": 0.2991045415401459, "step": 4411 }, { "epoch": 0.5353719208833879, "grad_norm": 2.611210823059082, "learning_rate": 9.409163493428327e-06, "loss": 0.16011947393417358, "step": 4412 }, { "epoch": 0.535493265380415, "grad_norm": 3.07895827293396, "learning_rate": 9.406706792777301e-06, "loss": 0.16234822571277618, "step": 4413 }, { "epoch": 0.535614609877442, "grad_norm": 3.446093797683716, "learning_rate": 9.404250092126275e-06, "loss": 0.35477128624916077, "step": 4414 }, { "epoch": 0.5357359543744691, "grad_norm": 3.2842867374420166, "learning_rate": 9.40179339147525e-06, "loss": 0.33142784237861633, "step": 4415 }, { "epoch": 0.5358572988714961, "grad_norm": 3.6377499103546143, "learning_rate": 9.399336690824224e-06, "loss": 0.2857425808906555, "step": 4416 }, { "epoch": 0.5359786433685232, "grad_norm": 0.6414675116539001, "learning_rate": 9.396879990173198e-06, "loss": 0.010555651970207691, "step": 4417 }, { "epoch": 0.5360999878655502, "grad_norm": 3.3899612426757812, "learning_rate": 9.394423289522172e-06, "loss": 0.5796612501144409, "step": 4418 }, { "epoch": 0.5362213323625774, "grad_norm": 1.855065107345581, "learning_rate": 9.391966588871147e-06, "loss": 0.3373965919017792, "step": 4419 }, { "epoch": 0.5363426768596045, "grad_norm": 2.497534990310669, "learning_rate": 9.389509888220121e-06, "loss": 0.1768612563610077, "step": 4420 }, { "epoch": 0.5364640213566315, "grad_norm": 3.088486433029175, "learning_rate": 9.387053187569095e-06, "loss": 0.22879958152770996, "step": 4421 }, { "epoch": 0.5365853658536586, "grad_norm": 2.7292346954345703, "learning_rate": 9.38459648691807e-06, "loss": 0.28809696435928345, "step": 4422 }, { "epoch": 0.5367067103506856, "grad_norm": 2.9703803062438965, "learning_rate": 9.382139786267044e-06, "loss": 0.34471648931503296, "step": 4423 }, { "epoch": 0.5368280548477127, "grad_norm": 4.7264509201049805, "learning_rate": 9.379683085616018e-06, "loss": 0.3024921417236328, "step": 4424 }, { "epoch": 0.5369493993447397, "grad_norm": 2.1945624351501465, "learning_rate": 9.377226384964992e-06, "loss": 0.09563179314136505, "step": 4425 }, { "epoch": 0.5370707438417668, "grad_norm": 1.919366717338562, "learning_rate": 9.374769684313966e-06, "loss": 0.07395078241825104, "step": 4426 }, { "epoch": 0.5371920883387938, "grad_norm": 3.039940357208252, "learning_rate": 9.37231298366294e-06, "loss": 0.49841296672821045, "step": 4427 }, { "epoch": 0.5373134328358209, "grad_norm": 2.701833963394165, "learning_rate": 9.369856283011915e-06, "loss": 0.08298008143901825, "step": 4428 }, { "epoch": 0.537434777332848, "grad_norm": 2.517366409301758, "learning_rate": 9.36739958236089e-06, "loss": 0.07566508650779724, "step": 4429 }, { "epoch": 0.537556121829875, "grad_norm": 2.2342193126678467, "learning_rate": 9.364942881709864e-06, "loss": 0.20025718212127686, "step": 4430 }, { "epoch": 0.537677466326902, "grad_norm": 3.664217710494995, "learning_rate": 9.362486181058838e-06, "loss": 0.2784649729728699, "step": 4431 }, { "epoch": 0.5377988108239291, "grad_norm": 2.7283802032470703, "learning_rate": 9.360029480407814e-06, "loss": 0.18996834754943848, "step": 4432 }, { "epoch": 0.5379201553209562, "grad_norm": 2.188549041748047, "learning_rate": 9.357572779756788e-06, "loss": 0.3108798861503601, "step": 4433 }, { "epoch": 0.5380414998179832, "grad_norm": 2.8081154823303223, "learning_rate": 9.355116079105762e-06, "loss": 0.17977400124073029, "step": 4434 }, { "epoch": 0.5381628443150103, "grad_norm": 2.873342514038086, "learning_rate": 9.352659378454736e-06, "loss": 0.21185101568698883, "step": 4435 }, { "epoch": 0.5382841888120373, "grad_norm": 1.9519914388656616, "learning_rate": 9.35020267780371e-06, "loss": 0.17341850697994232, "step": 4436 }, { "epoch": 0.5384055333090645, "grad_norm": 2.0728065967559814, "learning_rate": 9.347745977152685e-06, "loss": 0.23260879516601562, "step": 4437 }, { "epoch": 0.5385268778060915, "grad_norm": 2.554821491241455, "learning_rate": 9.34528927650166e-06, "loss": 0.29856768250465393, "step": 4438 }, { "epoch": 0.5386482223031186, "grad_norm": 2.138981342315674, "learning_rate": 9.342832575850634e-06, "loss": 0.1678609549999237, "step": 4439 }, { "epoch": 0.5387695668001456, "grad_norm": 1.6926543712615967, "learning_rate": 9.340375875199608e-06, "loss": 0.12643671035766602, "step": 4440 }, { "epoch": 0.5388909112971727, "grad_norm": 2.993431806564331, "learning_rate": 9.337919174548582e-06, "loss": 0.7881591320037842, "step": 4441 }, { "epoch": 0.5390122557941998, "grad_norm": 2.8974554538726807, "learning_rate": 9.335462473897556e-06, "loss": 0.5474866032600403, "step": 4442 }, { "epoch": 0.5391336002912268, "grad_norm": 3.2219316959381104, "learning_rate": 9.33300577324653e-06, "loss": 0.45903801918029785, "step": 4443 }, { "epoch": 0.5392549447882539, "grad_norm": 3.2341408729553223, "learning_rate": 9.330549072595505e-06, "loss": 0.5835304856300354, "step": 4444 }, { "epoch": 0.5393762892852809, "grad_norm": 3.9189608097076416, "learning_rate": 9.328092371944479e-06, "loss": 0.24262051284313202, "step": 4445 }, { "epoch": 0.539497633782308, "grad_norm": 1.1636987924575806, "learning_rate": 9.325635671293453e-06, "loss": 0.09128616750240326, "step": 4446 }, { "epoch": 0.539618978279335, "grad_norm": 5.299190044403076, "learning_rate": 9.323178970642428e-06, "loss": 0.23806308209896088, "step": 4447 }, { "epoch": 0.5397403227763621, "grad_norm": 2.0878846645355225, "learning_rate": 9.320722269991402e-06, "loss": 0.10870083421468735, "step": 4448 }, { "epoch": 0.5398616672733891, "grad_norm": 3.6392018795013428, "learning_rate": 9.318265569340376e-06, "loss": 0.0658004954457283, "step": 4449 }, { "epoch": 0.5399830117704162, "grad_norm": 0.6597356796264648, "learning_rate": 9.31580886868935e-06, "loss": 0.013893425464630127, "step": 4450 }, { "epoch": 0.5401043562674432, "grad_norm": 2.757697582244873, "learning_rate": 9.313352168038325e-06, "loss": 0.30158179998397827, "step": 4451 }, { "epoch": 0.5402257007644703, "grad_norm": 3.4862139225006104, "learning_rate": 9.3108954673873e-06, "loss": 0.4102773070335388, "step": 4452 }, { "epoch": 0.5403470452614973, "grad_norm": 4.629420280456543, "learning_rate": 9.308438766736275e-06, "loss": 0.2778346836566925, "step": 4453 }, { "epoch": 0.5404683897585244, "grad_norm": 2.4873886108398438, "learning_rate": 9.305982066085249e-06, "loss": 0.32104575634002686, "step": 4454 }, { "epoch": 0.5405897342555515, "grad_norm": 2.1146411895751953, "learning_rate": 9.303525365434223e-06, "loss": 0.22613663971424103, "step": 4455 }, { "epoch": 0.5407110787525786, "grad_norm": 4.303106307983398, "learning_rate": 9.301068664783198e-06, "loss": 0.3541973829269409, "step": 4456 }, { "epoch": 0.5408324232496057, "grad_norm": 2.4047317504882812, "learning_rate": 9.298611964132172e-06, "loss": 0.3012484014034271, "step": 4457 }, { "epoch": 0.5409537677466327, "grad_norm": 3.0852818489074707, "learning_rate": 9.296155263481146e-06, "loss": 0.20283588767051697, "step": 4458 }, { "epoch": 0.5410751122436598, "grad_norm": 2.433861494064331, "learning_rate": 9.29369856283012e-06, "loss": 0.0741279199719429, "step": 4459 }, { "epoch": 0.5411964567406868, "grad_norm": 1.134724736213684, "learning_rate": 9.291241862179095e-06, "loss": 0.012104371562600136, "step": 4460 }, { "epoch": 0.5413178012377139, "grad_norm": 2.0270988941192627, "learning_rate": 9.288785161528069e-06, "loss": 0.10126547515392303, "step": 4461 }, { "epoch": 0.541439145734741, "grad_norm": 2.4997951984405518, "learning_rate": 9.286328460877043e-06, "loss": 0.2210891842842102, "step": 4462 }, { "epoch": 0.541560490231768, "grad_norm": 3.305931329727173, "learning_rate": 9.283871760226017e-06, "loss": 0.31861066818237305, "step": 4463 }, { "epoch": 0.541681834728795, "grad_norm": 2.4003255367279053, "learning_rate": 9.281415059574992e-06, "loss": 0.31306779384613037, "step": 4464 }, { "epoch": 0.5418031792258221, "grad_norm": 2.8415191173553467, "learning_rate": 9.278958358923966e-06, "loss": 0.22130592167377472, "step": 4465 }, { "epoch": 0.5419245237228492, "grad_norm": 3.1347544193267822, "learning_rate": 9.27650165827294e-06, "loss": 0.2920812964439392, "step": 4466 }, { "epoch": 0.5420458682198762, "grad_norm": 3.6265780925750732, "learning_rate": 9.274044957621914e-06, "loss": 0.20269900560379028, "step": 4467 }, { "epoch": 0.5421672127169033, "grad_norm": 2.0227572917938232, "learning_rate": 9.271588256970889e-06, "loss": 0.23353327810764313, "step": 4468 }, { "epoch": 0.5422885572139303, "grad_norm": 3.0727126598358154, "learning_rate": 9.269131556319863e-06, "loss": 0.12812408804893494, "step": 4469 }, { "epoch": 0.5424099017109574, "grad_norm": 1.4131548404693604, "learning_rate": 9.266674855668837e-06, "loss": 0.08903699368238449, "step": 4470 }, { "epoch": 0.5425312462079844, "grad_norm": 2.436211347579956, "learning_rate": 9.264218155017811e-06, "loss": 0.3985083997249603, "step": 4471 }, { "epoch": 0.5426525907050115, "grad_norm": 2.4969236850738525, "learning_rate": 9.261761454366787e-06, "loss": 0.17769329249858856, "step": 4472 }, { "epoch": 0.5427739352020385, "grad_norm": 2.9740588665008545, "learning_rate": 9.259304753715762e-06, "loss": 0.1564944088459015, "step": 4473 }, { "epoch": 0.5428952796990657, "grad_norm": 2.5118603706359863, "learning_rate": 9.256848053064736e-06, "loss": 0.17297369241714478, "step": 4474 }, { "epoch": 0.5430166241960928, "grad_norm": 2.7014503479003906, "learning_rate": 9.25439135241371e-06, "loss": 0.20029833912849426, "step": 4475 }, { "epoch": 0.5431379686931198, "grad_norm": 2.8678176403045654, "learning_rate": 9.251934651762684e-06, "loss": 0.18959420919418335, "step": 4476 }, { "epoch": 0.5432593131901469, "grad_norm": 2.1272525787353516, "learning_rate": 9.249477951111657e-06, "loss": 0.3020903170108795, "step": 4477 }, { "epoch": 0.5433806576871739, "grad_norm": 3.1587746143341064, "learning_rate": 9.247021250460631e-06, "loss": 0.2680964767932892, "step": 4478 }, { "epoch": 0.543502002184201, "grad_norm": 2.932344675064087, "learning_rate": 9.244564549809606e-06, "loss": 0.11635663360357285, "step": 4479 }, { "epoch": 0.543623346681228, "grad_norm": 1.5658379793167114, "learning_rate": 9.24210784915858e-06, "loss": 0.3331388235092163, "step": 4480 }, { "epoch": 0.5437446911782551, "grad_norm": 3.198911428451538, "learning_rate": 9.239651148507554e-06, "loss": 0.06593173742294312, "step": 4481 }, { "epoch": 0.5438660356752821, "grad_norm": 2.9255058765411377, "learning_rate": 9.237194447856528e-06, "loss": 0.3806287348270416, "step": 4482 }, { "epoch": 0.5439873801723092, "grad_norm": 3.018001079559326, "learning_rate": 9.234737747205503e-06, "loss": 0.6326801180839539, "step": 4483 }, { "epoch": 0.5441087246693362, "grad_norm": 1.9830690622329712, "learning_rate": 9.232281046554477e-06, "loss": 0.27141863107681274, "step": 4484 }, { "epoch": 0.5442300691663633, "grad_norm": 2.603083848953247, "learning_rate": 9.229824345903453e-06, "loss": 0.18259039521217346, "step": 4485 }, { "epoch": 0.5443514136633903, "grad_norm": 3.477292060852051, "learning_rate": 9.227367645252427e-06, "loss": 0.3545914590358734, "step": 4486 }, { "epoch": 0.5444727581604174, "grad_norm": 3.1754257678985596, "learning_rate": 9.224910944601401e-06, "loss": 0.266185998916626, "step": 4487 }, { "epoch": 0.5445941026574445, "grad_norm": 2.9562489986419678, "learning_rate": 9.222454243950376e-06, "loss": 0.28897449374198914, "step": 4488 }, { "epoch": 0.5447154471544715, "grad_norm": 11.001296997070312, "learning_rate": 9.21999754329935e-06, "loss": 0.6779196858406067, "step": 4489 }, { "epoch": 0.5448367916514986, "grad_norm": 3.8062222003936768, "learning_rate": 9.217540842648324e-06, "loss": 0.7591618299484253, "step": 4490 }, { "epoch": 0.5449581361485256, "grad_norm": 2.9487602710723877, "learning_rate": 9.215084141997298e-06, "loss": 0.32513558864593506, "step": 4491 }, { "epoch": 0.5450794806455527, "grad_norm": 3.365544319152832, "learning_rate": 9.212627441346273e-06, "loss": 0.37451258301734924, "step": 4492 }, { "epoch": 0.5452008251425798, "grad_norm": 3.771261692047119, "learning_rate": 9.210170740695247e-06, "loss": 0.20522935688495636, "step": 4493 }, { "epoch": 0.5453221696396069, "grad_norm": 2.6576852798461914, "learning_rate": 9.207714040044221e-06, "loss": 0.45043930411338806, "step": 4494 }, { "epoch": 0.545443514136634, "grad_norm": 2.0722954273223877, "learning_rate": 9.205257339393195e-06, "loss": 0.04227669537067413, "step": 4495 }, { "epoch": 0.545564858633661, "grad_norm": 2.4815409183502197, "learning_rate": 9.20280063874217e-06, "loss": 0.12568433582782745, "step": 4496 }, { "epoch": 0.545686203130688, "grad_norm": 3.4653637409210205, "learning_rate": 9.200343938091144e-06, "loss": 0.35587602853775024, "step": 4497 }, { "epoch": 0.5458075476277151, "grad_norm": 3.0857656002044678, "learning_rate": 9.197887237440118e-06, "loss": 0.4217790365219116, "step": 4498 }, { "epoch": 0.5459288921247422, "grad_norm": 3.8405237197875977, "learning_rate": 9.195430536789092e-06, "loss": 0.4252901077270508, "step": 4499 }, { "epoch": 0.5460502366217692, "grad_norm": 0.9962751865386963, "learning_rate": 9.192973836138067e-06, "loss": 0.02946596033871174, "step": 4500 }, { "epoch": 0.5461715811187963, "grad_norm": 1.5757423639297485, "learning_rate": 9.190517135487041e-06, "loss": 0.05185816437005997, "step": 4501 }, { "epoch": 0.5462929256158233, "grad_norm": 4.0479865074157715, "learning_rate": 9.188060434836015e-06, "loss": 0.3740744888782501, "step": 4502 }, { "epoch": 0.5464142701128504, "grad_norm": 4.4502668380737305, "learning_rate": 9.18560373418499e-06, "loss": 0.36921781301498413, "step": 4503 }, { "epoch": 0.5465356146098774, "grad_norm": 1.648971438407898, "learning_rate": 9.183147033533964e-06, "loss": 0.21644048392772675, "step": 4504 }, { "epoch": 0.5466569591069045, "grad_norm": 3.2031352519989014, "learning_rate": 9.18069033288294e-06, "loss": 0.2160961627960205, "step": 4505 }, { "epoch": 0.5467783036039315, "grad_norm": 1.8445900678634644, "learning_rate": 9.178233632231914e-06, "loss": 0.05424124002456665, "step": 4506 }, { "epoch": 0.5468996481009586, "grad_norm": 3.3590433597564697, "learning_rate": 9.175776931580888e-06, "loss": 0.27612707018852234, "step": 4507 }, { "epoch": 0.5470209925979856, "grad_norm": 2.5159993171691895, "learning_rate": 9.173320230929862e-06, "loss": 0.10842301696538925, "step": 4508 }, { "epoch": 0.5471423370950127, "grad_norm": 2.46826171875, "learning_rate": 9.170863530278837e-06, "loss": 0.5720289945602417, "step": 4509 }, { "epoch": 0.5472636815920398, "grad_norm": 5.638071060180664, "learning_rate": 9.168406829627811e-06, "loss": 0.35024020075798035, "step": 4510 }, { "epoch": 0.5473850260890668, "grad_norm": 0.0009154945728369057, "learning_rate": 9.165950128976785e-06, "loss": 1.9427683582762256e-05, "step": 4511 }, { "epoch": 0.547506370586094, "grad_norm": 1.5281604528427124, "learning_rate": 9.16349342832576e-06, "loss": 0.14189767837524414, "step": 4512 }, { "epoch": 0.547627715083121, "grad_norm": 2.1466760635375977, "learning_rate": 9.161036727674734e-06, "loss": 0.034475717693567276, "step": 4513 }, { "epoch": 0.5477490595801481, "grad_norm": 1.0230348110198975, "learning_rate": 9.158580027023708e-06, "loss": 0.12298868596553802, "step": 4514 }, { "epoch": 0.5478704040771751, "grad_norm": 2.5712826251983643, "learning_rate": 9.156123326372682e-06, "loss": 0.37247079610824585, "step": 4515 }, { "epoch": 0.5479917485742022, "grad_norm": 2.8114864826202393, "learning_rate": 9.153666625721656e-06, "loss": 0.19029825925827026, "step": 4516 }, { "epoch": 0.5481130930712292, "grad_norm": 2.0876331329345703, "learning_rate": 9.15120992507063e-06, "loss": 0.20716963708400726, "step": 4517 }, { "epoch": 0.5482344375682563, "grad_norm": 3.4400641918182373, "learning_rate": 9.148753224419605e-06, "loss": 0.5175398588180542, "step": 4518 }, { "epoch": 0.5483557820652833, "grad_norm": 1.850892186164856, "learning_rate": 9.14629652376858e-06, "loss": 0.12127500772476196, "step": 4519 }, { "epoch": 0.5484771265623104, "grad_norm": 2.1759486198425293, "learning_rate": 9.143839823117554e-06, "loss": 0.21545371413230896, "step": 4520 }, { "epoch": 0.5485984710593375, "grad_norm": 0.9485598206520081, "learning_rate": 9.141383122466528e-06, "loss": 0.08693765103816986, "step": 4521 }, { "epoch": 0.5487198155563645, "grad_norm": 3.1117711067199707, "learning_rate": 9.138926421815502e-06, "loss": 0.5137118101119995, "step": 4522 }, { "epoch": 0.5488411600533916, "grad_norm": 3.7684214115142822, "learning_rate": 9.136469721164476e-06, "loss": 0.2704165577888489, "step": 4523 }, { "epoch": 0.5489625045504186, "grad_norm": 0.990609884262085, "learning_rate": 9.134013020513452e-06, "loss": 0.13176289200782776, "step": 4524 }, { "epoch": 0.5490838490474457, "grad_norm": 2.039346933364868, "learning_rate": 9.131556319862426e-06, "loss": 0.2368188202381134, "step": 4525 }, { "epoch": 0.5492051935444727, "grad_norm": 0.5513215661048889, "learning_rate": 9.1290996192114e-06, "loss": 0.008934629149734974, "step": 4526 }, { "epoch": 0.5493265380414998, "grad_norm": 2.988719940185547, "learning_rate": 9.126642918560375e-06, "loss": 0.21047022938728333, "step": 4527 }, { "epoch": 0.5494478825385268, "grad_norm": 3.2961339950561523, "learning_rate": 9.12418621790935e-06, "loss": 0.6444485783576965, "step": 4528 }, { "epoch": 0.5495692270355539, "grad_norm": 3.2400856018066406, "learning_rate": 9.121729517258324e-06, "loss": 0.3836045265197754, "step": 4529 }, { "epoch": 0.549690571532581, "grad_norm": 2.40461802482605, "learning_rate": 9.119272816607298e-06, "loss": 0.27850544452667236, "step": 4530 }, { "epoch": 0.5498119160296081, "grad_norm": 6.316298961639404, "learning_rate": 9.116816115956272e-06, "loss": 0.45413386821746826, "step": 4531 }, { "epoch": 0.5499332605266352, "grad_norm": 2.2767460346221924, "learning_rate": 9.114359415305246e-06, "loss": 0.5729230642318726, "step": 4532 }, { "epoch": 0.5500546050236622, "grad_norm": 3.3545124530792236, "learning_rate": 9.11190271465422e-06, "loss": 0.3226602077484131, "step": 4533 }, { "epoch": 0.5501759495206893, "grad_norm": 1.9540446996688843, "learning_rate": 9.109446014003195e-06, "loss": 0.48093292117118835, "step": 4534 }, { "epoch": 0.5502972940177163, "grad_norm": 2.7246620655059814, "learning_rate": 9.106989313352169e-06, "loss": 0.16427510976791382, "step": 4535 }, { "epoch": 0.5504186385147434, "grad_norm": 3.192305564880371, "learning_rate": 9.104532612701143e-06, "loss": 0.4048592150211334, "step": 4536 }, { "epoch": 0.5505399830117704, "grad_norm": 1.9194501638412476, "learning_rate": 9.102075912050118e-06, "loss": 0.14160402119159698, "step": 4537 }, { "epoch": 0.5506613275087975, "grad_norm": 2.597014904022217, "learning_rate": 9.099619211399092e-06, "loss": 0.12982876598834991, "step": 4538 }, { "epoch": 0.5507826720058245, "grad_norm": 1.9970993995666504, "learning_rate": 9.097162510748066e-06, "loss": 0.6876908540725708, "step": 4539 }, { "epoch": 0.5509040165028516, "grad_norm": 2.131498098373413, "learning_rate": 9.09470581009704e-06, "loss": 0.12262062728404999, "step": 4540 }, { "epoch": 0.5510253609998786, "grad_norm": 1.8072608709335327, "learning_rate": 9.092249109446015e-06, "loss": 0.06946182996034622, "step": 4541 }, { "epoch": 0.5511467054969057, "grad_norm": 3.1910746097564697, "learning_rate": 9.089792408794989e-06, "loss": 0.33112478256225586, "step": 4542 }, { "epoch": 0.5512680499939328, "grad_norm": 2.126844882965088, "learning_rate": 9.087335708143963e-06, "loss": 0.22125214338302612, "step": 4543 }, { "epoch": 0.5513893944909598, "grad_norm": 1.640899896621704, "learning_rate": 9.084879007492939e-06, "loss": 0.12188848853111267, "step": 4544 }, { "epoch": 0.5515107389879869, "grad_norm": 1.9318177700042725, "learning_rate": 9.082422306841912e-06, "loss": 0.13428768515586853, "step": 4545 }, { "epoch": 0.5516320834850139, "grad_norm": 2.5575506687164307, "learning_rate": 9.079965606190886e-06, "loss": 0.4335910975933075, "step": 4546 }, { "epoch": 0.551753427982041, "grad_norm": 2.0728278160095215, "learning_rate": 9.07750890553986e-06, "loss": 0.11995276808738708, "step": 4547 }, { "epoch": 0.551874772479068, "grad_norm": 3.6529641151428223, "learning_rate": 9.075052204888834e-06, "loss": 0.2091088742017746, "step": 4548 }, { "epoch": 0.5519961169760952, "grad_norm": 2.7953732013702393, "learning_rate": 9.072595504237809e-06, "loss": 0.17759278416633606, "step": 4549 }, { "epoch": 0.5521174614731222, "grad_norm": 2.0739352703094482, "learning_rate": 9.070138803586783e-06, "loss": 0.3068687617778778, "step": 4550 }, { "epoch": 0.5522388059701493, "grad_norm": 4.288859844207764, "learning_rate": 9.067682102935757e-06, "loss": 0.25371402502059937, "step": 4551 }, { "epoch": 0.5523601504671763, "grad_norm": 2.2860450744628906, "learning_rate": 9.065225402284731e-06, "loss": 0.2985222041606903, "step": 4552 }, { "epoch": 0.5524814949642034, "grad_norm": 4.307240009307861, "learning_rate": 9.062768701633706e-06, "loss": 0.2752586305141449, "step": 4553 }, { "epoch": 0.5526028394612305, "grad_norm": 1.3085616827011108, "learning_rate": 9.06031200098268e-06, "loss": 0.23151975870132446, "step": 4554 }, { "epoch": 0.5527241839582575, "grad_norm": 3.1817173957824707, "learning_rate": 9.057855300331654e-06, "loss": 0.1844831258058548, "step": 4555 }, { "epoch": 0.5528455284552846, "grad_norm": 2.2929646968841553, "learning_rate": 9.055398599680629e-06, "loss": 0.23864464461803436, "step": 4556 }, { "epoch": 0.5529668729523116, "grad_norm": 2.123814344406128, "learning_rate": 9.052941899029604e-06, "loss": 0.21860180795192719, "step": 4557 }, { "epoch": 0.5530882174493387, "grad_norm": 1.796294927597046, "learning_rate": 9.050485198378579e-06, "loss": 0.211173877120018, "step": 4558 }, { "epoch": 0.5532095619463657, "grad_norm": 2.8568968772888184, "learning_rate": 9.048028497727553e-06, "loss": 0.3414806127548218, "step": 4559 }, { "epoch": 0.5533309064433928, "grad_norm": 2.1389451026916504, "learning_rate": 9.045571797076527e-06, "loss": 0.2081296145915985, "step": 4560 }, { "epoch": 0.5534522509404198, "grad_norm": 1.115829348564148, "learning_rate": 9.043115096425501e-06, "loss": 0.3595654368400574, "step": 4561 }, { "epoch": 0.5535735954374469, "grad_norm": 1.3610594272613525, "learning_rate": 9.040658395774476e-06, "loss": 0.03550596162676811, "step": 4562 }, { "epoch": 0.5536949399344739, "grad_norm": 2.4596400260925293, "learning_rate": 9.03820169512345e-06, "loss": 0.14365418255329132, "step": 4563 }, { "epoch": 0.553816284431501, "grad_norm": 1.660984992980957, "learning_rate": 9.035744994472424e-06, "loss": 0.07896963506937027, "step": 4564 }, { "epoch": 0.553937628928528, "grad_norm": 2.2545220851898193, "learning_rate": 9.033288293821399e-06, "loss": 0.24589726328849792, "step": 4565 }, { "epoch": 0.5540589734255551, "grad_norm": 2.455479860305786, "learning_rate": 9.030831593170373e-06, "loss": 0.2457631528377533, "step": 4566 }, { "epoch": 0.5541803179225823, "grad_norm": 1.5269775390625, "learning_rate": 9.028374892519347e-06, "loss": 0.08918678015470505, "step": 4567 }, { "epoch": 0.5543016624196093, "grad_norm": 2.3557610511779785, "learning_rate": 9.025918191868321e-06, "loss": 0.18805299699306488, "step": 4568 }, { "epoch": 0.5544230069166364, "grad_norm": 3.466259717941284, "learning_rate": 9.023461491217296e-06, "loss": 0.24532702565193176, "step": 4569 }, { "epoch": 0.5545443514136634, "grad_norm": 1.2164995670318604, "learning_rate": 9.02100479056627e-06, "loss": 0.031904492527246475, "step": 4570 }, { "epoch": 0.5546656959106905, "grad_norm": 3.050809383392334, "learning_rate": 9.018548089915244e-06, "loss": 0.3586193919181824, "step": 4571 }, { "epoch": 0.5547870404077175, "grad_norm": 3.0135560035705566, "learning_rate": 9.016091389264218e-06, "loss": 0.10105389356613159, "step": 4572 }, { "epoch": 0.5549083849047446, "grad_norm": 2.6137521266937256, "learning_rate": 9.013634688613193e-06, "loss": 0.2575450837612152, "step": 4573 }, { "epoch": 0.5550297294017716, "grad_norm": 2.811858892440796, "learning_rate": 9.011177987962167e-06, "loss": 0.2925565838813782, "step": 4574 }, { "epoch": 0.5551510738987987, "grad_norm": 3.5789120197296143, "learning_rate": 9.008721287311141e-06, "loss": 0.27618736028671265, "step": 4575 }, { "epoch": 0.5552724183958258, "grad_norm": 2.189844846725464, "learning_rate": 9.006264586660115e-06, "loss": 0.41711121797561646, "step": 4576 }, { "epoch": 0.5553937628928528, "grad_norm": 3.073932409286499, "learning_rate": 9.003807886009091e-06, "loss": 0.5063557028770447, "step": 4577 }, { "epoch": 0.5555151073898799, "grad_norm": 4.205935001373291, "learning_rate": 9.001351185358066e-06, "loss": 0.3898845911026001, "step": 4578 }, { "epoch": 0.5556364518869069, "grad_norm": 2.6963531970977783, "learning_rate": 8.99889448470704e-06, "loss": 0.2505294978618622, "step": 4579 }, { "epoch": 0.555757796383934, "grad_norm": 2.6249184608459473, "learning_rate": 8.996437784056014e-06, "loss": 0.1429687887430191, "step": 4580 }, { "epoch": 0.555879140880961, "grad_norm": 3.435067653656006, "learning_rate": 8.993981083404988e-06, "loss": 0.6661196351051331, "step": 4581 }, { "epoch": 0.5560004853779881, "grad_norm": 1.8672665357589722, "learning_rate": 8.991524382753963e-06, "loss": 0.3908240795135498, "step": 4582 }, { "epoch": 0.5561218298750151, "grad_norm": 1.852908730506897, "learning_rate": 8.989067682102937e-06, "loss": 0.12419023364782333, "step": 4583 }, { "epoch": 0.5562431743720422, "grad_norm": 2.563528537750244, "learning_rate": 8.986610981451911e-06, "loss": 0.1437659114599228, "step": 4584 }, { "epoch": 0.5563645188690692, "grad_norm": 2.3816699981689453, "learning_rate": 8.984154280800885e-06, "loss": 0.2081899344921112, "step": 4585 }, { "epoch": 0.5564858633660964, "grad_norm": 2.505880117416382, "learning_rate": 8.98169758014986e-06, "loss": 0.1642467975616455, "step": 4586 }, { "epoch": 0.5566072078631235, "grad_norm": 3.073155641555786, "learning_rate": 8.979240879498834e-06, "loss": 0.35402369499206543, "step": 4587 }, { "epoch": 0.5567285523601505, "grad_norm": 1.1981679201126099, "learning_rate": 8.976784178847808e-06, "loss": 0.2385360598564148, "step": 4588 }, { "epoch": 0.5568498968571776, "grad_norm": 3.6582369804382324, "learning_rate": 8.974327478196782e-06, "loss": 0.28997334837913513, "step": 4589 }, { "epoch": 0.5569712413542046, "grad_norm": 3.4013192653656006, "learning_rate": 8.971870777545757e-06, "loss": 0.24837681651115417, "step": 4590 }, { "epoch": 0.5570925858512317, "grad_norm": 1.8810604810714722, "learning_rate": 8.969414076894731e-06, "loss": 0.3941044807434082, "step": 4591 }, { "epoch": 0.5572139303482587, "grad_norm": 2.366826295852661, "learning_rate": 8.966957376243705e-06, "loss": 0.3071171045303345, "step": 4592 }, { "epoch": 0.5573352748452858, "grad_norm": 3.0873939990997314, "learning_rate": 8.96450067559268e-06, "loss": 0.21738561987876892, "step": 4593 }, { "epoch": 0.5574566193423128, "grad_norm": 2.7858917713165283, "learning_rate": 8.962043974941654e-06, "loss": 0.39993590116500854, "step": 4594 }, { "epoch": 0.5575779638393399, "grad_norm": 1.1715283393859863, "learning_rate": 8.959587274290628e-06, "loss": 0.05722789093852043, "step": 4595 }, { "epoch": 0.5576993083363669, "grad_norm": 5.790470600128174, "learning_rate": 8.957130573639602e-06, "loss": 0.33138906955718994, "step": 4596 }, { "epoch": 0.557820652833394, "grad_norm": 2.7855169773101807, "learning_rate": 8.954673872988578e-06, "loss": 0.09155604988336563, "step": 4597 }, { "epoch": 0.557941997330421, "grad_norm": 2.920541286468506, "learning_rate": 8.952217172337552e-06, "loss": 0.29356199502944946, "step": 4598 }, { "epoch": 0.5580633418274481, "grad_norm": 2.1692259311676025, "learning_rate": 8.949760471686527e-06, "loss": 0.18016237020492554, "step": 4599 }, { "epoch": 0.5581846863244752, "grad_norm": 4.2535481452941895, "learning_rate": 8.947303771035501e-06, "loss": 0.22029706835746765, "step": 4600 }, { "epoch": 0.5583060308215022, "grad_norm": 3.9874050617218018, "learning_rate": 8.944847070384475e-06, "loss": 0.43803870677948, "step": 4601 }, { "epoch": 0.5584273753185293, "grad_norm": 2.4762213230133057, "learning_rate": 8.94239036973345e-06, "loss": 0.3853197991847992, "step": 4602 }, { "epoch": 0.5585487198155563, "grad_norm": 3.5158536434173584, "learning_rate": 8.939933669082424e-06, "loss": 0.22476443648338318, "step": 4603 }, { "epoch": 0.5586700643125834, "grad_norm": 1.4725539684295654, "learning_rate": 8.937476968431398e-06, "loss": 0.023397210985422134, "step": 4604 }, { "epoch": 0.5587914088096105, "grad_norm": 3.081088066101074, "learning_rate": 8.935020267780372e-06, "loss": 0.32525530457496643, "step": 4605 }, { "epoch": 0.5589127533066376, "grad_norm": 2.7123496532440186, "learning_rate": 8.932563567129346e-06, "loss": 0.12249074876308441, "step": 4606 }, { "epoch": 0.5590340978036646, "grad_norm": 2.9984469413757324, "learning_rate": 8.93010686647832e-06, "loss": 0.23081940412521362, "step": 4607 }, { "epoch": 0.5591554423006917, "grad_norm": 3.980036735534668, "learning_rate": 8.927650165827295e-06, "loss": 0.17853817343711853, "step": 4608 }, { "epoch": 0.5592767867977188, "grad_norm": 2.616176128387451, "learning_rate": 8.92519346517627e-06, "loss": 0.39459630846977234, "step": 4609 }, { "epoch": 0.5593981312947458, "grad_norm": 2.62721586227417, "learning_rate": 8.922736764525244e-06, "loss": 0.4388883709907532, "step": 4610 }, { "epoch": 0.5595194757917729, "grad_norm": 2.678649425506592, "learning_rate": 8.920280063874218e-06, "loss": 0.230331689119339, "step": 4611 }, { "epoch": 0.5596408202887999, "grad_norm": 2.6389753818511963, "learning_rate": 8.917823363223192e-06, "loss": 0.21237829327583313, "step": 4612 }, { "epoch": 0.559762164785827, "grad_norm": 3.212249517440796, "learning_rate": 8.915366662572166e-06, "loss": 0.5107381343841553, "step": 4613 }, { "epoch": 0.559883509282854, "grad_norm": 2.317833662033081, "learning_rate": 8.91290996192114e-06, "loss": 0.36912640929222107, "step": 4614 }, { "epoch": 0.5600048537798811, "grad_norm": 2.685931921005249, "learning_rate": 8.910453261270115e-06, "loss": 0.14463792741298676, "step": 4615 }, { "epoch": 0.5601261982769081, "grad_norm": 2.8833651542663574, "learning_rate": 8.907996560619089e-06, "loss": 0.2698196768760681, "step": 4616 }, { "epoch": 0.5602475427739352, "grad_norm": 0.33456164598464966, "learning_rate": 8.905539859968063e-06, "loss": 0.004454801324754953, "step": 4617 }, { "epoch": 0.5603688872709622, "grad_norm": 2.6761889457702637, "learning_rate": 8.903083159317038e-06, "loss": 0.5546298623085022, "step": 4618 }, { "epoch": 0.5604902317679893, "grad_norm": 1.162175178527832, "learning_rate": 8.900626458666012e-06, "loss": 0.01604592427611351, "step": 4619 }, { "epoch": 0.5606115762650163, "grad_norm": 2.363185167312622, "learning_rate": 8.898169758014986e-06, "loss": 0.33113154768943787, "step": 4620 }, { "epoch": 0.5607329207620434, "grad_norm": 2.300384998321533, "learning_rate": 8.89571305736396e-06, "loss": 0.2339404970407486, "step": 4621 }, { "epoch": 0.5608542652590705, "grad_norm": 3.954155445098877, "learning_rate": 8.893256356712935e-06, "loss": 0.3408513367176056, "step": 4622 }, { "epoch": 0.5609756097560976, "grad_norm": 4.011313438415527, "learning_rate": 8.890799656061909e-06, "loss": 0.2716611623764038, "step": 4623 }, { "epoch": 0.5610969542531247, "grad_norm": 5.153746128082275, "learning_rate": 8.888342955410883e-06, "loss": 0.05890808627009392, "step": 4624 }, { "epoch": 0.5612182987501517, "grad_norm": 3.5169119834899902, "learning_rate": 8.885886254759857e-06, "loss": 0.6330788135528564, "step": 4625 }, { "epoch": 0.5613396432471788, "grad_norm": 2.223536252975464, "learning_rate": 8.883429554108832e-06, "loss": 0.36279380321502686, "step": 4626 }, { "epoch": 0.5614609877442058, "grad_norm": 2.7091453075408936, "learning_rate": 8.880972853457806e-06, "loss": 0.34752100706100464, "step": 4627 }, { "epoch": 0.5615823322412329, "grad_norm": 4.571422100067139, "learning_rate": 8.87851615280678e-06, "loss": 0.15122555196285248, "step": 4628 }, { "epoch": 0.5617036767382599, "grad_norm": 1.985201358795166, "learning_rate": 8.876059452155754e-06, "loss": 0.23814445734024048, "step": 4629 }, { "epoch": 0.561825021235287, "grad_norm": 4.561100959777832, "learning_rate": 8.87360275150473e-06, "loss": 0.277901291847229, "step": 4630 }, { "epoch": 0.561946365732314, "grad_norm": 2.3758301734924316, "learning_rate": 8.871146050853705e-06, "loss": 0.43857789039611816, "step": 4631 }, { "epoch": 0.5620677102293411, "grad_norm": 1.798718810081482, "learning_rate": 8.868689350202679e-06, "loss": 0.1545332670211792, "step": 4632 }, { "epoch": 0.5621890547263682, "grad_norm": 4.274606227874756, "learning_rate": 8.866232649551653e-06, "loss": 0.5856348276138306, "step": 4633 }, { "epoch": 0.5623103992233952, "grad_norm": 2.5427372455596924, "learning_rate": 8.863775948900627e-06, "loss": 0.12918700277805328, "step": 4634 }, { "epoch": 0.5624317437204223, "grad_norm": 2.15158748626709, "learning_rate": 8.861319248249602e-06, "loss": 0.43932589888572693, "step": 4635 }, { "epoch": 0.5625530882174493, "grad_norm": 1.3702175617218018, "learning_rate": 8.858862547598576e-06, "loss": 0.07663845270872116, "step": 4636 }, { "epoch": 0.5626744327144764, "grad_norm": 4.811141014099121, "learning_rate": 8.85640584694755e-06, "loss": 0.3394564688205719, "step": 4637 }, { "epoch": 0.5627957772115034, "grad_norm": 3.5993902683258057, "learning_rate": 8.853949146296524e-06, "loss": 0.472808301448822, "step": 4638 }, { "epoch": 0.5629171217085305, "grad_norm": 6.76779842376709, "learning_rate": 8.851492445645499e-06, "loss": 0.4756602644920349, "step": 4639 }, { "epoch": 0.5630384662055575, "grad_norm": 2.4220874309539795, "learning_rate": 8.849035744994473e-06, "loss": 0.275368332862854, "step": 4640 }, { "epoch": 0.5631598107025846, "grad_norm": 2.316923141479492, "learning_rate": 8.846579044343447e-06, "loss": 0.3943760097026825, "step": 4641 }, { "epoch": 0.5632811551996117, "grad_norm": 4.4944376945495605, "learning_rate": 8.844122343692421e-06, "loss": 0.4663783311843872, "step": 4642 }, { "epoch": 0.5634024996966388, "grad_norm": 3.1661713123321533, "learning_rate": 8.841665643041396e-06, "loss": 0.23757371306419373, "step": 4643 }, { "epoch": 0.5635238441936659, "grad_norm": 3.55597186088562, "learning_rate": 8.83920894239037e-06, "loss": 0.447149395942688, "step": 4644 }, { "epoch": 0.5636451886906929, "grad_norm": 2.8339645862579346, "learning_rate": 8.836752241739344e-06, "loss": 0.14023752510547638, "step": 4645 }, { "epoch": 0.56376653318772, "grad_norm": 3.7399895191192627, "learning_rate": 8.834295541088319e-06, "loss": 0.6317920684814453, "step": 4646 }, { "epoch": 0.563887877684747, "grad_norm": 3.6672725677490234, "learning_rate": 8.831838840437293e-06, "loss": 0.3368057608604431, "step": 4647 }, { "epoch": 0.5640092221817741, "grad_norm": 2.8210198879241943, "learning_rate": 8.829382139786267e-06, "loss": 0.25458377599716187, "step": 4648 }, { "epoch": 0.5641305666788011, "grad_norm": 2.7250094413757324, "learning_rate": 8.826925439135241e-06, "loss": 0.21671104431152344, "step": 4649 }, { "epoch": 0.5642519111758282, "grad_norm": 2.8842055797576904, "learning_rate": 8.824468738484217e-06, "loss": 0.321000874042511, "step": 4650 }, { "epoch": 0.5643732556728552, "grad_norm": 2.1366565227508545, "learning_rate": 8.822012037833192e-06, "loss": 0.3256775140762329, "step": 4651 }, { "epoch": 0.5644946001698823, "grad_norm": 2.608379602432251, "learning_rate": 8.819555337182166e-06, "loss": 0.29251405596733093, "step": 4652 }, { "epoch": 0.5646159446669093, "grad_norm": 3.132045030593872, "learning_rate": 8.81709863653114e-06, "loss": 0.2334512323141098, "step": 4653 }, { "epoch": 0.5647372891639364, "grad_norm": 2.9863319396972656, "learning_rate": 8.814641935880114e-06, "loss": 0.1100335642695427, "step": 4654 }, { "epoch": 0.5648586336609634, "grad_norm": 1.8839737176895142, "learning_rate": 8.812185235229089e-06, "loss": 0.2133115530014038, "step": 4655 }, { "epoch": 0.5649799781579905, "grad_norm": 2.216392993927002, "learning_rate": 8.809728534578063e-06, "loss": 0.1971181333065033, "step": 4656 }, { "epoch": 0.5651013226550176, "grad_norm": 1.3055797815322876, "learning_rate": 8.807271833927037e-06, "loss": 0.19560663402080536, "step": 4657 }, { "epoch": 0.5652226671520446, "grad_norm": 1.881071925163269, "learning_rate": 8.804815133276011e-06, "loss": 0.27774298191070557, "step": 4658 }, { "epoch": 0.5653440116490717, "grad_norm": 1.7477121353149414, "learning_rate": 8.802358432624986e-06, "loss": 0.25512588024139404, "step": 4659 }, { "epoch": 0.5654653561460988, "grad_norm": 2.6637346744537354, "learning_rate": 8.79990173197396e-06, "loss": 0.23845238983631134, "step": 4660 }, { "epoch": 0.5655867006431259, "grad_norm": 3.0946743488311768, "learning_rate": 8.797445031322934e-06, "loss": 0.327470600605011, "step": 4661 }, { "epoch": 0.5657080451401529, "grad_norm": 3.1413981914520264, "learning_rate": 8.794988330671908e-06, "loss": 0.6006260514259338, "step": 4662 }, { "epoch": 0.56582938963718, "grad_norm": 1.579168438911438, "learning_rate": 8.792531630020883e-06, "loss": 0.05705728009343147, "step": 4663 }, { "epoch": 0.565950734134207, "grad_norm": 1.1139492988586426, "learning_rate": 8.790074929369857e-06, "loss": 0.05490889400243759, "step": 4664 }, { "epoch": 0.5660720786312341, "grad_norm": 2.7925429344177246, "learning_rate": 8.787618228718831e-06, "loss": 0.2298317551612854, "step": 4665 }, { "epoch": 0.5661934231282612, "grad_norm": 1.179821252822876, "learning_rate": 8.785161528067805e-06, "loss": 0.038065098226070404, "step": 4666 }, { "epoch": 0.5663147676252882, "grad_norm": 3.3816986083984375, "learning_rate": 8.78270482741678e-06, "loss": 0.25041159987449646, "step": 4667 }, { "epoch": 0.5664361121223153, "grad_norm": 3.742292881011963, "learning_rate": 8.780248126765754e-06, "loss": 0.2156071662902832, "step": 4668 }, { "epoch": 0.5665574566193423, "grad_norm": 2.1318435668945312, "learning_rate": 8.77779142611473e-06, "loss": 0.3526674211025238, "step": 4669 }, { "epoch": 0.5666788011163694, "grad_norm": 0.005370914004743099, "learning_rate": 8.775334725463704e-06, "loss": 7.684863521717489e-05, "step": 4670 }, { "epoch": 0.5668001456133964, "grad_norm": 1.019944429397583, "learning_rate": 8.772878024812678e-06, "loss": 0.034011360257864, "step": 4671 }, { "epoch": 0.5669214901104235, "grad_norm": 2.4717838764190674, "learning_rate": 8.770421324161653e-06, "loss": 0.2773903012275696, "step": 4672 }, { "epoch": 0.5670428346074505, "grad_norm": 2.827631950378418, "learning_rate": 8.767964623510627e-06, "loss": 0.33173704147338867, "step": 4673 }, { "epoch": 0.5671641791044776, "grad_norm": 1.9835450649261475, "learning_rate": 8.765507922859601e-06, "loss": 0.22106708586215973, "step": 4674 }, { "epoch": 0.5672855236015046, "grad_norm": 2.567112922668457, "learning_rate": 8.763051222208575e-06, "loss": 0.5502939820289612, "step": 4675 }, { "epoch": 0.5674068680985317, "grad_norm": 2.5454301834106445, "learning_rate": 8.76059452155755e-06, "loss": 0.42484840750694275, "step": 4676 }, { "epoch": 0.5675282125955587, "grad_norm": 4.547565460205078, "learning_rate": 8.758137820906524e-06, "loss": 0.12058883905410767, "step": 4677 }, { "epoch": 0.5676495570925858, "grad_norm": 0.07788243144750595, "learning_rate": 8.755681120255498e-06, "loss": 0.0008267023949883878, "step": 4678 }, { "epoch": 0.567770901589613, "grad_norm": 3.6384663581848145, "learning_rate": 8.753224419604472e-06, "loss": 0.397627592086792, "step": 4679 }, { "epoch": 0.56789224608664, "grad_norm": 1.7667748928070068, "learning_rate": 8.750767718953447e-06, "loss": 0.10125569254159927, "step": 4680 }, { "epoch": 0.5680135905836671, "grad_norm": 2.1924235820770264, "learning_rate": 8.74831101830242e-06, "loss": 0.3592543601989746, "step": 4681 }, { "epoch": 0.5681349350806941, "grad_norm": 2.507514476776123, "learning_rate": 8.745854317651395e-06, "loss": 0.5054503083229065, "step": 4682 }, { "epoch": 0.5682562795777212, "grad_norm": 1.6910021305084229, "learning_rate": 8.74339761700037e-06, "loss": 0.19515091180801392, "step": 4683 }, { "epoch": 0.5683776240747482, "grad_norm": 2.8864822387695312, "learning_rate": 8.740940916349344e-06, "loss": 0.29222238063812256, "step": 4684 }, { "epoch": 0.5684989685717753, "grad_norm": 0.3490169644355774, "learning_rate": 8.738484215698318e-06, "loss": 0.006586109753698111, "step": 4685 }, { "epoch": 0.5686203130688023, "grad_norm": 3.780137538909912, "learning_rate": 8.736027515047292e-06, "loss": 0.2850925922393799, "step": 4686 }, { "epoch": 0.5687416575658294, "grad_norm": 4.050656795501709, "learning_rate": 8.733570814396266e-06, "loss": 0.3204861879348755, "step": 4687 }, { "epoch": 0.5688630020628564, "grad_norm": 2.766467809677124, "learning_rate": 8.73111411374524e-06, "loss": 0.3583075702190399, "step": 4688 }, { "epoch": 0.5689843465598835, "grad_norm": 2.315871477127075, "learning_rate": 8.728657413094215e-06, "loss": 0.19348451495170593, "step": 4689 }, { "epoch": 0.5691056910569106, "grad_norm": 2.887486696243286, "learning_rate": 8.72620071244319e-06, "loss": 0.19641593098640442, "step": 4690 }, { "epoch": 0.5692270355539376, "grad_norm": 1.7832244634628296, "learning_rate": 8.723744011792164e-06, "loss": 0.21230794489383698, "step": 4691 }, { "epoch": 0.5693483800509647, "grad_norm": 0.978489339351654, "learning_rate": 8.721287311141138e-06, "loss": 0.010369169525802135, "step": 4692 }, { "epoch": 0.5694697245479917, "grad_norm": 4.001723289489746, "learning_rate": 8.718830610490112e-06, "loss": 0.09683437645435333, "step": 4693 }, { "epoch": 0.5695910690450188, "grad_norm": 2.968479871749878, "learning_rate": 8.716373909839086e-06, "loss": 0.19080141186714172, "step": 4694 }, { "epoch": 0.5697124135420458, "grad_norm": 2.2581820487976074, "learning_rate": 8.71391720918806e-06, "loss": 0.08418820798397064, "step": 4695 }, { "epoch": 0.5698337580390729, "grad_norm": 2.5875041484832764, "learning_rate": 8.711460508537035e-06, "loss": 0.47382867336273193, "step": 4696 }, { "epoch": 0.5699551025360999, "grad_norm": 3.0884313583374023, "learning_rate": 8.709003807886009e-06, "loss": 0.38697201013565063, "step": 4697 }, { "epoch": 0.5700764470331271, "grad_norm": 3.3458266258239746, "learning_rate": 8.706547107234983e-06, "loss": 0.3282185196876526, "step": 4698 }, { "epoch": 0.5701977915301542, "grad_norm": 3.78436279296875, "learning_rate": 8.704090406583958e-06, "loss": 0.19664761424064636, "step": 4699 }, { "epoch": 0.5703191360271812, "grad_norm": 2.6323821544647217, "learning_rate": 8.701633705932932e-06, "loss": 0.14579224586486816, "step": 4700 }, { "epoch": 0.5704404805242083, "grad_norm": 3.237558126449585, "learning_rate": 8.699177005281906e-06, "loss": 0.10967915505170822, "step": 4701 }, { "epoch": 0.5705618250212353, "grad_norm": 2.9427151679992676, "learning_rate": 8.696720304630882e-06, "loss": 0.2973884642124176, "step": 4702 }, { "epoch": 0.5706831695182624, "grad_norm": 1.0702643394470215, "learning_rate": 8.694263603979856e-06, "loss": 0.043667495250701904, "step": 4703 }, { "epoch": 0.5708045140152894, "grad_norm": 4.496472358703613, "learning_rate": 8.69180690332883e-06, "loss": 0.1632377654314041, "step": 4704 }, { "epoch": 0.5709258585123165, "grad_norm": 2.8900721073150635, "learning_rate": 8.689350202677805e-06, "loss": 0.20599009096622467, "step": 4705 }, { "epoch": 0.5710472030093435, "grad_norm": 2.561933994293213, "learning_rate": 8.686893502026779e-06, "loss": 0.10625152289867401, "step": 4706 }, { "epoch": 0.5711685475063706, "grad_norm": 1.6871941089630127, "learning_rate": 8.684436801375753e-06, "loss": 0.04246843606233597, "step": 4707 }, { "epoch": 0.5712898920033976, "grad_norm": 1.6238974332809448, "learning_rate": 8.681980100724728e-06, "loss": 0.07677508890628815, "step": 4708 }, { "epoch": 0.5714112365004247, "grad_norm": 3.803601026535034, "learning_rate": 8.679523400073702e-06, "loss": 0.3821961283683777, "step": 4709 }, { "epoch": 0.5715325809974517, "grad_norm": 2.0734896659851074, "learning_rate": 8.677066699422676e-06, "loss": 0.10442207008600235, "step": 4710 }, { "epoch": 0.5716539254944788, "grad_norm": 2.9698433876037598, "learning_rate": 8.67460999877165e-06, "loss": 0.07573885470628738, "step": 4711 }, { "epoch": 0.5717752699915059, "grad_norm": 2.4777112007141113, "learning_rate": 8.672153298120625e-06, "loss": 0.3022940456867218, "step": 4712 }, { "epoch": 0.5718966144885329, "grad_norm": 1.1152487993240356, "learning_rate": 8.669696597469599e-06, "loss": 0.06342915445566177, "step": 4713 }, { "epoch": 0.57201795898556, "grad_norm": 2.9648983478546143, "learning_rate": 8.667239896818573e-06, "loss": 0.18680709600448608, "step": 4714 }, { "epoch": 0.572139303482587, "grad_norm": 1.6772212982177734, "learning_rate": 8.664783196167547e-06, "loss": 0.09321870654821396, "step": 4715 }, { "epoch": 0.5722606479796142, "grad_norm": 2.1530203819274902, "learning_rate": 8.662326495516522e-06, "loss": 0.1939767301082611, "step": 4716 }, { "epoch": 0.5723819924766412, "grad_norm": 2.245058536529541, "learning_rate": 8.659869794865496e-06, "loss": 0.17272621393203735, "step": 4717 }, { "epoch": 0.5725033369736683, "grad_norm": 2.317760467529297, "learning_rate": 8.65741309421447e-06, "loss": 0.38135427236557007, "step": 4718 }, { "epoch": 0.5726246814706953, "grad_norm": 2.8810060024261475, "learning_rate": 8.654956393563444e-06, "loss": 0.44683998823165894, "step": 4719 }, { "epoch": 0.5727460259677224, "grad_norm": 1.0729150772094727, "learning_rate": 8.652499692912419e-06, "loss": 0.021647926419973373, "step": 4720 }, { "epoch": 0.5728673704647494, "grad_norm": 0.6269320249557495, "learning_rate": 8.650042992261393e-06, "loss": 0.02363976277410984, "step": 4721 }, { "epoch": 0.5729887149617765, "grad_norm": 4.623542785644531, "learning_rate": 8.647586291610369e-06, "loss": 0.3098624348640442, "step": 4722 }, { "epoch": 0.5731100594588036, "grad_norm": 4.064969539642334, "learning_rate": 8.645129590959343e-06, "loss": 0.17818564176559448, "step": 4723 }, { "epoch": 0.5732314039558306, "grad_norm": 2.3159632682800293, "learning_rate": 8.642672890308317e-06, "loss": 0.16215842962265015, "step": 4724 }, { "epoch": 0.5733527484528577, "grad_norm": 2.4354324340820312, "learning_rate": 8.640216189657292e-06, "loss": 0.04336971417069435, "step": 4725 }, { "epoch": 0.5734740929498847, "grad_norm": 3.473142623901367, "learning_rate": 8.637759489006266e-06, "loss": 0.3720940351486206, "step": 4726 }, { "epoch": 0.5735954374469118, "grad_norm": 3.9998550415039062, "learning_rate": 8.63530278835524e-06, "loss": 0.4635125398635864, "step": 4727 }, { "epoch": 0.5737167819439388, "grad_norm": 3.426687002182007, "learning_rate": 8.632846087704214e-06, "loss": 0.3143175542354584, "step": 4728 }, { "epoch": 0.5738381264409659, "grad_norm": 5.112907409667969, "learning_rate": 8.630389387053189e-06, "loss": 0.5484808087348938, "step": 4729 }, { "epoch": 0.5739594709379929, "grad_norm": 4.069041728973389, "learning_rate": 8.627932686402163e-06, "loss": 0.3274216651916504, "step": 4730 }, { "epoch": 0.57408081543502, "grad_norm": 3.2604873180389404, "learning_rate": 8.625475985751137e-06, "loss": 0.3226872682571411, "step": 4731 }, { "epoch": 0.574202159932047, "grad_norm": 4.08828067779541, "learning_rate": 8.623019285100111e-06, "loss": 0.30742356181144714, "step": 4732 }, { "epoch": 0.5743235044290741, "grad_norm": 2.7328379154205322, "learning_rate": 8.620562584449086e-06, "loss": 0.16814541816711426, "step": 4733 }, { "epoch": 0.5744448489261011, "grad_norm": 0.06660814583301544, "learning_rate": 8.61810588379806e-06, "loss": 0.000544509559404105, "step": 4734 }, { "epoch": 0.5745661934231283, "grad_norm": 4.549208641052246, "learning_rate": 8.615649183147034e-06, "loss": 0.1760621815919876, "step": 4735 }, { "epoch": 0.5746875379201554, "grad_norm": 3.394392728805542, "learning_rate": 8.613192482496009e-06, "loss": 0.12093991041183472, "step": 4736 }, { "epoch": 0.5748088824171824, "grad_norm": 3.0732192993164062, "learning_rate": 8.610735781844983e-06, "loss": 0.21971963346004486, "step": 4737 }, { "epoch": 0.5749302269142095, "grad_norm": 2.63718581199646, "learning_rate": 8.608279081193957e-06, "loss": 0.21228516101837158, "step": 4738 }, { "epoch": 0.5750515714112365, "grad_norm": 2.2092223167419434, "learning_rate": 8.605822380542931e-06, "loss": 0.022659119218587875, "step": 4739 }, { "epoch": 0.5751729159082636, "grad_norm": 2.897289514541626, "learning_rate": 8.603365679891906e-06, "loss": 0.2163439691066742, "step": 4740 }, { "epoch": 0.5752942604052906, "grad_norm": 2.715763807296753, "learning_rate": 8.60090897924088e-06, "loss": 0.4751744866371155, "step": 4741 }, { "epoch": 0.5754156049023177, "grad_norm": 2.5717742443084717, "learning_rate": 8.598452278589856e-06, "loss": 0.30743980407714844, "step": 4742 }, { "epoch": 0.5755369493993447, "grad_norm": 2.188279628753662, "learning_rate": 8.59599557793883e-06, "loss": 0.42055049538612366, "step": 4743 }, { "epoch": 0.5756582938963718, "grad_norm": 1.8264455795288086, "learning_rate": 8.593538877287804e-06, "loss": 0.17960487306118011, "step": 4744 }, { "epoch": 0.5757796383933989, "grad_norm": 3.1548256874084473, "learning_rate": 8.591082176636779e-06, "loss": 0.3810442090034485, "step": 4745 }, { "epoch": 0.5759009828904259, "grad_norm": 1.463153600692749, "learning_rate": 8.588625475985753e-06, "loss": 0.1261136531829834, "step": 4746 }, { "epoch": 0.576022327387453, "grad_norm": 3.5934722423553467, "learning_rate": 8.586168775334727e-06, "loss": 0.16028206050395966, "step": 4747 }, { "epoch": 0.57614367188448, "grad_norm": 2.886974811553955, "learning_rate": 8.583712074683701e-06, "loss": 0.28415611386299133, "step": 4748 }, { "epoch": 0.5762650163815071, "grad_norm": 3.1621410846710205, "learning_rate": 8.581255374032674e-06, "loss": 0.5991367101669312, "step": 4749 }, { "epoch": 0.5763863608785341, "grad_norm": 2.6561591625213623, "learning_rate": 8.578798673381648e-06, "loss": 0.15986141562461853, "step": 4750 }, { "epoch": 0.5765077053755612, "grad_norm": 2.8709747791290283, "learning_rate": 8.576341972730622e-06, "loss": 0.3234342932701111, "step": 4751 }, { "epoch": 0.5766290498725882, "grad_norm": 3.9713871479034424, "learning_rate": 8.573885272079597e-06, "loss": 0.09976892173290253, "step": 4752 }, { "epoch": 0.5767503943696154, "grad_norm": 3.3438308238983154, "learning_rate": 8.571428571428571e-06, "loss": 0.07265103608369827, "step": 4753 }, { "epoch": 0.5768717388666424, "grad_norm": 4.483652114868164, "learning_rate": 8.568971870777545e-06, "loss": 0.1635909527540207, "step": 4754 }, { "epoch": 0.5769930833636695, "grad_norm": 2.0684444904327393, "learning_rate": 8.566515170126521e-06, "loss": 0.25538957118988037, "step": 4755 }, { "epoch": 0.5771144278606966, "grad_norm": 2.286940813064575, "learning_rate": 8.564058469475495e-06, "loss": 0.1521320790052414, "step": 4756 }, { "epoch": 0.5772357723577236, "grad_norm": 2.893602132797241, "learning_rate": 8.56160176882447e-06, "loss": 0.36091557145118713, "step": 4757 }, { "epoch": 0.5773571168547507, "grad_norm": 3.3173770904541016, "learning_rate": 8.559145068173444e-06, "loss": 0.4178902804851532, "step": 4758 }, { "epoch": 0.5774784613517777, "grad_norm": 1.76053786277771, "learning_rate": 8.556688367522418e-06, "loss": 0.10738193243741989, "step": 4759 }, { "epoch": 0.5775998058488048, "grad_norm": 3.789828300476074, "learning_rate": 8.554231666871392e-06, "loss": 0.2995709180831909, "step": 4760 }, { "epoch": 0.5777211503458318, "grad_norm": 1.0471935272216797, "learning_rate": 8.551774966220367e-06, "loss": 0.0598975233733654, "step": 4761 }, { "epoch": 0.5778424948428589, "grad_norm": 2.652618408203125, "learning_rate": 8.549318265569341e-06, "loss": 0.16570216417312622, "step": 4762 }, { "epoch": 0.5779638393398859, "grad_norm": 4.153013229370117, "learning_rate": 8.546861564918315e-06, "loss": 0.3538026809692383, "step": 4763 }, { "epoch": 0.578085183836913, "grad_norm": 4.0243706703186035, "learning_rate": 8.54440486426729e-06, "loss": 0.4823606312274933, "step": 4764 }, { "epoch": 0.57820652833394, "grad_norm": 2.6824862957000732, "learning_rate": 8.541948163616264e-06, "loss": 0.2088221311569214, "step": 4765 }, { "epoch": 0.5783278728309671, "grad_norm": 2.907728672027588, "learning_rate": 8.539491462965238e-06, "loss": 0.3896994888782501, "step": 4766 }, { "epoch": 0.5784492173279941, "grad_norm": 4.708937644958496, "learning_rate": 8.537034762314212e-06, "loss": 0.3998515009880066, "step": 4767 }, { "epoch": 0.5785705618250212, "grad_norm": 2.4710025787353516, "learning_rate": 8.534578061663186e-06, "loss": 0.4317276179790497, "step": 4768 }, { "epoch": 0.5786919063220483, "grad_norm": 2.578756093978882, "learning_rate": 8.53212136101216e-06, "loss": 0.3319275975227356, "step": 4769 }, { "epoch": 0.5788132508190753, "grad_norm": 3.9417810440063477, "learning_rate": 8.529664660361135e-06, "loss": 0.29484379291534424, "step": 4770 }, { "epoch": 0.5789345953161024, "grad_norm": 2.0699777603149414, "learning_rate": 8.52720795971011e-06, "loss": 0.2751099169254303, "step": 4771 }, { "epoch": 0.5790559398131295, "grad_norm": 2.6701064109802246, "learning_rate": 8.524751259059084e-06, "loss": 0.19009806215763092, "step": 4772 }, { "epoch": 0.5791772843101566, "grad_norm": 2.6411185264587402, "learning_rate": 8.522294558408058e-06, "loss": 0.30293259024620056, "step": 4773 }, { "epoch": 0.5792986288071836, "grad_norm": 2.5449161529541016, "learning_rate": 8.519837857757032e-06, "loss": 0.3157493770122528, "step": 4774 }, { "epoch": 0.5794199733042107, "grad_norm": 3.8581368923187256, "learning_rate": 8.517381157106008e-06, "loss": 0.48576632142066956, "step": 4775 }, { "epoch": 0.5795413178012377, "grad_norm": 2.397142171859741, "learning_rate": 8.514924456454982e-06, "loss": 0.14189498126506805, "step": 4776 }, { "epoch": 0.5796626622982648, "grad_norm": 5.303322792053223, "learning_rate": 8.512467755803957e-06, "loss": 0.25372982025146484, "step": 4777 }, { "epoch": 0.5797840067952919, "grad_norm": 2.236499071121216, "learning_rate": 8.51001105515293e-06, "loss": 0.19894060492515564, "step": 4778 }, { "epoch": 0.5799053512923189, "grad_norm": 3.641697645187378, "learning_rate": 8.507554354501905e-06, "loss": 0.286632239818573, "step": 4779 }, { "epoch": 0.580026695789346, "grad_norm": 1.9835792779922485, "learning_rate": 8.50509765385088e-06, "loss": 0.30654704570770264, "step": 4780 }, { "epoch": 0.580148040286373, "grad_norm": 4.646665573120117, "learning_rate": 8.502640953199854e-06, "loss": 0.17053008079528809, "step": 4781 }, { "epoch": 0.5802693847834001, "grad_norm": 3.070521593093872, "learning_rate": 8.500184252548828e-06, "loss": 0.24634458124637604, "step": 4782 }, { "epoch": 0.5803907292804271, "grad_norm": 2.007344961166382, "learning_rate": 8.497727551897802e-06, "loss": 0.10919144004583359, "step": 4783 }, { "epoch": 0.5805120737774542, "grad_norm": 2.7126834392547607, "learning_rate": 8.495270851246776e-06, "loss": 0.1839764416217804, "step": 4784 }, { "epoch": 0.5806334182744812, "grad_norm": 3.7217347621917725, "learning_rate": 8.49281415059575e-06, "loss": 0.5151821374893188, "step": 4785 }, { "epoch": 0.5807547627715083, "grad_norm": 2.7659478187561035, "learning_rate": 8.490357449944725e-06, "loss": 0.24573221802711487, "step": 4786 }, { "epoch": 0.5808761072685353, "grad_norm": 1.9926369190216064, "learning_rate": 8.487900749293699e-06, "loss": 0.124819815158844, "step": 4787 }, { "epoch": 0.5809974517655624, "grad_norm": 2.9955086708068848, "learning_rate": 8.485444048642673e-06, "loss": 0.32017385959625244, "step": 4788 }, { "epoch": 0.5811187962625894, "grad_norm": 3.088501214981079, "learning_rate": 8.482987347991648e-06, "loss": 0.283455491065979, "step": 4789 }, { "epoch": 0.5812401407596165, "grad_norm": 2.8362207412719727, "learning_rate": 8.480530647340622e-06, "loss": 0.1286943107843399, "step": 4790 }, { "epoch": 0.5813614852566437, "grad_norm": 3.4135043621063232, "learning_rate": 8.478073946689596e-06, "loss": 0.38316118717193604, "step": 4791 }, { "epoch": 0.5814828297536707, "grad_norm": 3.243379831314087, "learning_rate": 8.47561724603857e-06, "loss": 0.23274675011634827, "step": 4792 }, { "epoch": 0.5816041742506978, "grad_norm": 2.8915443420410156, "learning_rate": 8.473160545387545e-06, "loss": 0.19641761481761932, "step": 4793 }, { "epoch": 0.5817255187477248, "grad_norm": 1.664831280708313, "learning_rate": 8.47070384473652e-06, "loss": 0.10790649056434631, "step": 4794 }, { "epoch": 0.5818468632447519, "grad_norm": 2.050881862640381, "learning_rate": 8.468247144085495e-06, "loss": 0.16045114398002625, "step": 4795 }, { "epoch": 0.5819682077417789, "grad_norm": 2.9262709617614746, "learning_rate": 8.465790443434469e-06, "loss": 0.6135281920433044, "step": 4796 }, { "epoch": 0.582089552238806, "grad_norm": 2.07201886177063, "learning_rate": 8.463333742783443e-06, "loss": 0.2009136974811554, "step": 4797 }, { "epoch": 0.582210896735833, "grad_norm": 4.733902931213379, "learning_rate": 8.460877042132418e-06, "loss": 0.18952949345111847, "step": 4798 }, { "epoch": 0.5823322412328601, "grad_norm": 2.8319785594940186, "learning_rate": 8.458420341481392e-06, "loss": 0.2759975492954254, "step": 4799 }, { "epoch": 0.5824535857298871, "grad_norm": 1.0371869802474976, "learning_rate": 8.455963640830366e-06, "loss": 0.07131935656070709, "step": 4800 }, { "epoch": 0.5825749302269142, "grad_norm": 1.4528043270111084, "learning_rate": 8.45350694017934e-06, "loss": 0.01148059032857418, "step": 4801 }, { "epoch": 0.5826962747239413, "grad_norm": 1.6123428344726562, "learning_rate": 8.451050239528315e-06, "loss": 0.041788335889577866, "step": 4802 }, { "epoch": 0.5828176192209683, "grad_norm": 2.146088123321533, "learning_rate": 8.448593538877289e-06, "loss": 0.34290069341659546, "step": 4803 }, { "epoch": 0.5829389637179954, "grad_norm": 2.6575448513031006, "learning_rate": 8.446136838226263e-06, "loss": 0.18728607892990112, "step": 4804 }, { "epoch": 0.5830603082150224, "grad_norm": 3.099613904953003, "learning_rate": 8.443680137575237e-06, "loss": 0.398285448551178, "step": 4805 }, { "epoch": 0.5831816527120495, "grad_norm": 1.349876880645752, "learning_rate": 8.441223436924212e-06, "loss": 0.07577229291200638, "step": 4806 }, { "epoch": 0.5833029972090765, "grad_norm": 3.4181690216064453, "learning_rate": 8.438766736273186e-06, "loss": 0.5053579807281494, "step": 4807 }, { "epoch": 0.5834243417061036, "grad_norm": 2.661022424697876, "learning_rate": 8.43631003562216e-06, "loss": 0.15136495232582092, "step": 4808 }, { "epoch": 0.5835456862031307, "grad_norm": 3.8564469814300537, "learning_rate": 8.433853334971134e-06, "loss": 0.38397639989852905, "step": 4809 }, { "epoch": 0.5836670307001578, "grad_norm": 2.661569833755493, "learning_rate": 8.431396634320109e-06, "loss": 0.3396529257297516, "step": 4810 }, { "epoch": 0.5837883751971849, "grad_norm": 2.1096351146698, "learning_rate": 8.428939933669083e-06, "loss": 0.18212434649467468, "step": 4811 }, { "epoch": 0.5839097196942119, "grad_norm": 2.5602078437805176, "learning_rate": 8.426483233018057e-06, "loss": 0.2641725540161133, "step": 4812 }, { "epoch": 0.584031064191239, "grad_norm": 2.9521026611328125, "learning_rate": 8.424026532367031e-06, "loss": 0.40929552912712097, "step": 4813 }, { "epoch": 0.584152408688266, "grad_norm": 3.7398362159729004, "learning_rate": 8.421569831716007e-06, "loss": 0.16453248262405396, "step": 4814 }, { "epoch": 0.5842737531852931, "grad_norm": 2.9919795989990234, "learning_rate": 8.419113131064982e-06, "loss": 0.34266138076782227, "step": 4815 }, { "epoch": 0.5843950976823201, "grad_norm": 2.4697251319885254, "learning_rate": 8.416656430413954e-06, "loss": 0.2197001576423645, "step": 4816 }, { "epoch": 0.5845164421793472, "grad_norm": 1.861261010169983, "learning_rate": 8.414199729762929e-06, "loss": 0.15283513069152832, "step": 4817 }, { "epoch": 0.5846377866763742, "grad_norm": 2.098623752593994, "learning_rate": 8.411743029111903e-06, "loss": 0.08336268365383148, "step": 4818 }, { "epoch": 0.5847591311734013, "grad_norm": 3.530374526977539, "learning_rate": 8.409286328460877e-06, "loss": 0.3948133587837219, "step": 4819 }, { "epoch": 0.5848804756704283, "grad_norm": 5.075887680053711, "learning_rate": 8.406829627809851e-06, "loss": 0.35386788845062256, "step": 4820 }, { "epoch": 0.5850018201674554, "grad_norm": 4.7112345695495605, "learning_rate": 8.404372927158826e-06, "loss": 0.26722416281700134, "step": 4821 }, { "epoch": 0.5851231646644824, "grad_norm": 3.265364646911621, "learning_rate": 8.4019162265078e-06, "loss": 0.3755974769592285, "step": 4822 }, { "epoch": 0.5852445091615095, "grad_norm": 2.1128063201904297, "learning_rate": 8.399459525856774e-06, "loss": 0.5681613683700562, "step": 4823 }, { "epoch": 0.5853658536585366, "grad_norm": 3.237454891204834, "learning_rate": 8.397002825205748e-06, "loss": 0.31261223554611206, "step": 4824 }, { "epoch": 0.5854871981555636, "grad_norm": 1.3415945768356323, "learning_rate": 8.394546124554723e-06, "loss": 0.1441393345594406, "step": 4825 }, { "epoch": 0.5856085426525907, "grad_norm": 2.925435781478882, "learning_rate": 8.392089423903697e-06, "loss": 0.3544679284095764, "step": 4826 }, { "epoch": 0.5857298871496177, "grad_norm": 3.5000503063201904, "learning_rate": 8.389632723252673e-06, "loss": 0.3691284656524658, "step": 4827 }, { "epoch": 0.5858512316466449, "grad_norm": 3.267124652862549, "learning_rate": 8.387176022601647e-06, "loss": 0.3643167018890381, "step": 4828 }, { "epoch": 0.5859725761436719, "grad_norm": 3.5892388820648193, "learning_rate": 8.384719321950621e-06, "loss": 0.19309209287166595, "step": 4829 }, { "epoch": 0.586093920640699, "grad_norm": 1.6001935005187988, "learning_rate": 8.382262621299596e-06, "loss": 0.32333236932754517, "step": 4830 }, { "epoch": 0.586215265137726, "grad_norm": 1.896188735961914, "learning_rate": 8.37980592064857e-06, "loss": 0.15367908775806427, "step": 4831 }, { "epoch": 0.5863366096347531, "grad_norm": 2.819467067718506, "learning_rate": 8.377349219997544e-06, "loss": 0.21954113245010376, "step": 4832 }, { "epoch": 0.5864579541317801, "grad_norm": 2.5720043182373047, "learning_rate": 8.374892519346518e-06, "loss": 0.12032808363437653, "step": 4833 }, { "epoch": 0.5865792986288072, "grad_norm": 3.0199387073516846, "learning_rate": 8.372435818695493e-06, "loss": 0.7560880184173584, "step": 4834 }, { "epoch": 0.5867006431258343, "grad_norm": 2.8504323959350586, "learning_rate": 8.369979118044467e-06, "loss": 0.7607696056365967, "step": 4835 }, { "epoch": 0.5868219876228613, "grad_norm": 2.2772388458251953, "learning_rate": 8.367522417393441e-06, "loss": 0.13826005160808563, "step": 4836 }, { "epoch": 0.5869433321198884, "grad_norm": 4.252085208892822, "learning_rate": 8.365065716742415e-06, "loss": 0.3288404941558838, "step": 4837 }, { "epoch": 0.5870646766169154, "grad_norm": 3.9891278743743896, "learning_rate": 8.36260901609139e-06, "loss": 0.17529702186584473, "step": 4838 }, { "epoch": 0.5871860211139425, "grad_norm": 3.4640860557556152, "learning_rate": 8.360152315440364e-06, "loss": 0.31302693486213684, "step": 4839 }, { "epoch": 0.5873073656109695, "grad_norm": 2.776254892349243, "learning_rate": 8.357695614789338e-06, "loss": 0.10631134361028671, "step": 4840 }, { "epoch": 0.5874287101079966, "grad_norm": 3.0740268230438232, "learning_rate": 8.355238914138312e-06, "loss": 0.19883981347084045, "step": 4841 }, { "epoch": 0.5875500546050236, "grad_norm": 1.7030606269836426, "learning_rate": 8.352782213487287e-06, "loss": 0.5683722496032715, "step": 4842 }, { "epoch": 0.5876713991020507, "grad_norm": 1.876086950302124, "learning_rate": 8.350325512836261e-06, "loss": 0.33288562297821045, "step": 4843 }, { "epoch": 0.5877927435990777, "grad_norm": 3.218090534210205, "learning_rate": 8.347868812185235e-06, "loss": 0.2539356052875519, "step": 4844 }, { "epoch": 0.5879140880961048, "grad_norm": 1.9674046039581299, "learning_rate": 8.34541211153421e-06, "loss": 0.13256607949733734, "step": 4845 }, { "epoch": 0.5880354325931318, "grad_norm": 2.7254817485809326, "learning_rate": 8.342955410883184e-06, "loss": 0.2237296998500824, "step": 4846 }, { "epoch": 0.588156777090159, "grad_norm": 2.3862967491149902, "learning_rate": 8.34049871023216e-06, "loss": 0.13859593868255615, "step": 4847 }, { "epoch": 0.5882781215871861, "grad_norm": 2.9677910804748535, "learning_rate": 8.338042009581134e-06, "loss": 0.38353800773620605, "step": 4848 }, { "epoch": 0.5883994660842131, "grad_norm": 2.8074543476104736, "learning_rate": 8.335585308930108e-06, "loss": 0.24441176652908325, "step": 4849 }, { "epoch": 0.5885208105812402, "grad_norm": 2.0923867225646973, "learning_rate": 8.333128608279082e-06, "loss": 0.0680294781923294, "step": 4850 }, { "epoch": 0.5886421550782672, "grad_norm": 3.017760753631592, "learning_rate": 8.330671907628057e-06, "loss": 0.39237746596336365, "step": 4851 }, { "epoch": 0.5887634995752943, "grad_norm": 3.0075600147247314, "learning_rate": 8.328215206977031e-06, "loss": 0.40487807989120483, "step": 4852 }, { "epoch": 0.5888848440723213, "grad_norm": 4.2741780281066895, "learning_rate": 8.325758506326005e-06, "loss": 0.3545905351638794, "step": 4853 }, { "epoch": 0.5890061885693484, "grad_norm": 2.6837165355682373, "learning_rate": 8.32330180567498e-06, "loss": 0.5662364959716797, "step": 4854 }, { "epoch": 0.5891275330663754, "grad_norm": 3.5712952613830566, "learning_rate": 8.320845105023954e-06, "loss": 0.2107972949743271, "step": 4855 }, { "epoch": 0.5892488775634025, "grad_norm": 2.3162617683410645, "learning_rate": 8.318388404372928e-06, "loss": 0.09335719048976898, "step": 4856 }, { "epoch": 0.5893702220604295, "grad_norm": 0.8508409261703491, "learning_rate": 8.315931703721902e-06, "loss": 0.014101793989539146, "step": 4857 }, { "epoch": 0.5894915665574566, "grad_norm": 2.4899237155914307, "learning_rate": 8.313475003070877e-06, "loss": 0.11831708252429962, "step": 4858 }, { "epoch": 0.5896129110544837, "grad_norm": 2.593254327774048, "learning_rate": 8.31101830241985e-06, "loss": 0.54585862159729, "step": 4859 }, { "epoch": 0.5897342555515107, "grad_norm": 2.626678943634033, "learning_rate": 8.308561601768825e-06, "loss": 0.4251805245876312, "step": 4860 }, { "epoch": 0.5898556000485378, "grad_norm": 2.680504322052002, "learning_rate": 8.3061049011178e-06, "loss": 0.07961871474981308, "step": 4861 }, { "epoch": 0.5899769445455648, "grad_norm": 2.0370588302612305, "learning_rate": 8.303648200466774e-06, "loss": 0.23664435744285583, "step": 4862 }, { "epoch": 0.5900982890425919, "grad_norm": 3.232109785079956, "learning_rate": 8.301191499815748e-06, "loss": 0.2291097342967987, "step": 4863 }, { "epoch": 0.5902196335396189, "grad_norm": 2.9229607582092285, "learning_rate": 8.298734799164722e-06, "loss": 0.3763829171657562, "step": 4864 }, { "epoch": 0.5903409780366461, "grad_norm": 1.2411973476409912, "learning_rate": 8.296278098513696e-06, "loss": 0.036167170852422714, "step": 4865 }, { "epoch": 0.5904623225336731, "grad_norm": 2.9300179481506348, "learning_rate": 8.29382139786267e-06, "loss": 0.11425483971834183, "step": 4866 }, { "epoch": 0.5905836670307002, "grad_norm": 3.284384250640869, "learning_rate": 8.291364697211647e-06, "loss": 0.3564888834953308, "step": 4867 }, { "epoch": 0.5907050115277273, "grad_norm": 2.3149406909942627, "learning_rate": 8.28890799656062e-06, "loss": 0.28983408212661743, "step": 4868 }, { "epoch": 0.5908263560247543, "grad_norm": 2.6460511684417725, "learning_rate": 8.286451295909595e-06, "loss": 0.24226048588752747, "step": 4869 }, { "epoch": 0.5909477005217814, "grad_norm": 0.20117826759815216, "learning_rate": 8.28399459525857e-06, "loss": 0.0024678686168044806, "step": 4870 }, { "epoch": 0.5910690450188084, "grad_norm": 3.132948637008667, "learning_rate": 8.281537894607544e-06, "loss": 0.2914450764656067, "step": 4871 }, { "epoch": 0.5911903895158355, "grad_norm": 2.6121127605438232, "learning_rate": 8.279081193956518e-06, "loss": 0.3137444853782654, "step": 4872 }, { "epoch": 0.5913117340128625, "grad_norm": 2.8972864151000977, "learning_rate": 8.276624493305492e-06, "loss": 0.18376387655735016, "step": 4873 }, { "epoch": 0.5914330785098896, "grad_norm": 3.6977860927581787, "learning_rate": 8.274167792654466e-06, "loss": 0.15052613615989685, "step": 4874 }, { "epoch": 0.5915544230069166, "grad_norm": 3.4427833557128906, "learning_rate": 8.27171109200344e-06, "loss": 0.18915200233459473, "step": 4875 }, { "epoch": 0.5916757675039437, "grad_norm": 4.190464019775391, "learning_rate": 8.269254391352415e-06, "loss": 0.274960458278656, "step": 4876 }, { "epoch": 0.5917971120009707, "grad_norm": 1.515971064567566, "learning_rate": 8.266797690701389e-06, "loss": 0.12423788011074066, "step": 4877 }, { "epoch": 0.5919184564979978, "grad_norm": 3.474355459213257, "learning_rate": 8.264340990050363e-06, "loss": 0.29963234066963196, "step": 4878 }, { "epoch": 0.5920398009950248, "grad_norm": 2.7541823387145996, "learning_rate": 8.261884289399338e-06, "loss": 0.24850957095623016, "step": 4879 }, { "epoch": 0.5921611454920519, "grad_norm": 3.5696475505828857, "learning_rate": 8.259427588748312e-06, "loss": 0.2671099007129669, "step": 4880 }, { "epoch": 0.592282489989079, "grad_norm": 3.7899556159973145, "learning_rate": 8.256970888097286e-06, "loss": 0.18115632236003876, "step": 4881 }, { "epoch": 0.592403834486106, "grad_norm": 3.302109479904175, "learning_rate": 8.25451418744626e-06, "loss": 0.2514381408691406, "step": 4882 }, { "epoch": 0.5925251789831331, "grad_norm": 2.6111788749694824, "learning_rate": 8.252057486795235e-06, "loss": 0.3491509258747101, "step": 4883 }, { "epoch": 0.5926465234801602, "grad_norm": 3.5890560150146484, "learning_rate": 8.249600786144209e-06, "loss": 0.12296824157238007, "step": 4884 }, { "epoch": 0.5927678679771873, "grad_norm": 1.5223008394241333, "learning_rate": 8.247144085493183e-06, "loss": 0.08243507891893387, "step": 4885 }, { "epoch": 0.5928892124742143, "grad_norm": 2.5468361377716064, "learning_rate": 8.244687384842157e-06, "loss": 0.12586478888988495, "step": 4886 }, { "epoch": 0.5930105569712414, "grad_norm": 3.812387228012085, "learning_rate": 8.242230684191132e-06, "loss": 0.4913281202316284, "step": 4887 }, { "epoch": 0.5931319014682684, "grad_norm": 1.89600670337677, "learning_rate": 8.239773983540106e-06, "loss": 0.520455539226532, "step": 4888 }, { "epoch": 0.5932532459652955, "grad_norm": 2.7963790893554688, "learning_rate": 8.23731728288908e-06, "loss": 0.15355242788791656, "step": 4889 }, { "epoch": 0.5933745904623225, "grad_norm": 4.67811393737793, "learning_rate": 8.234860582238054e-06, "loss": 0.21717412769794464, "step": 4890 }, { "epoch": 0.5934959349593496, "grad_norm": 2.145292282104492, "learning_rate": 8.232403881587029e-06, "loss": 0.07317359745502472, "step": 4891 }, { "epoch": 0.5936172794563767, "grad_norm": 1.8363304138183594, "learning_rate": 8.229947180936003e-06, "loss": 0.11386632919311523, "step": 4892 }, { "epoch": 0.5937386239534037, "grad_norm": 2.6897947788238525, "learning_rate": 8.227490480284977e-06, "loss": 0.1288149207830429, "step": 4893 }, { "epoch": 0.5938599684504308, "grad_norm": 3.0684075355529785, "learning_rate": 8.225033779633951e-06, "loss": 0.2625901997089386, "step": 4894 }, { "epoch": 0.5939813129474578, "grad_norm": 3.239774703979492, "learning_rate": 8.222577078982926e-06, "loss": 0.3578149080276489, "step": 4895 }, { "epoch": 0.5941026574444849, "grad_norm": 2.3085806369781494, "learning_rate": 8.2201203783319e-06, "loss": 0.16322527825832367, "step": 4896 }, { "epoch": 0.5942240019415119, "grad_norm": 1.8863133192062378, "learning_rate": 8.217663677680874e-06, "loss": 0.3666040599346161, "step": 4897 }, { "epoch": 0.594345346438539, "grad_norm": 4.101634502410889, "learning_rate": 8.215206977029849e-06, "loss": 0.12799082696437836, "step": 4898 }, { "epoch": 0.594466690935566, "grad_norm": 2.5980849266052246, "learning_rate": 8.212750276378823e-06, "loss": 0.3090270757675171, "step": 4899 }, { "epoch": 0.5945880354325931, "grad_norm": 3.175664186477661, "learning_rate": 8.210293575727799e-06, "loss": 0.5135940909385681, "step": 4900 }, { "epoch": 0.5947093799296201, "grad_norm": 3.282644510269165, "learning_rate": 8.207836875076773e-06, "loss": 0.10034079104661942, "step": 4901 }, { "epoch": 0.5948307244266473, "grad_norm": 1.4776768684387207, "learning_rate": 8.205380174425747e-06, "loss": 0.13272124528884888, "step": 4902 }, { "epoch": 0.5949520689236744, "grad_norm": 3.921097755432129, "learning_rate": 8.202923473774722e-06, "loss": 0.5469210743904114, "step": 4903 }, { "epoch": 0.5950734134207014, "grad_norm": 4.039199352264404, "learning_rate": 8.200466773123696e-06, "loss": 0.562058687210083, "step": 4904 }, { "epoch": 0.5951947579177285, "grad_norm": 3.7927656173706055, "learning_rate": 8.19801007247267e-06, "loss": 0.2531275451183319, "step": 4905 }, { "epoch": 0.5953161024147555, "grad_norm": 1.6802647113800049, "learning_rate": 8.195553371821644e-06, "loss": 0.2857990562915802, "step": 4906 }, { "epoch": 0.5954374469117826, "grad_norm": 1.7148853540420532, "learning_rate": 8.193096671170619e-06, "loss": 0.19474482536315918, "step": 4907 }, { "epoch": 0.5955587914088096, "grad_norm": 2.656442403793335, "learning_rate": 8.190639970519593e-06, "loss": 0.27169013023376465, "step": 4908 }, { "epoch": 0.5956801359058367, "grad_norm": 1.5421209335327148, "learning_rate": 8.188183269868567e-06, "loss": 0.11355651915073395, "step": 4909 }, { "epoch": 0.5958014804028637, "grad_norm": 2.569021224975586, "learning_rate": 8.185726569217541e-06, "loss": 0.2665570378303528, "step": 4910 }, { "epoch": 0.5959228248998908, "grad_norm": 6.12681770324707, "learning_rate": 8.183269868566516e-06, "loss": 0.23138393461704254, "step": 4911 }, { "epoch": 0.5960441693969178, "grad_norm": 2.869800090789795, "learning_rate": 8.18081316791549e-06, "loss": 0.4892810583114624, "step": 4912 }, { "epoch": 0.5961655138939449, "grad_norm": 1.7092455625534058, "learning_rate": 8.178356467264464e-06, "loss": 0.058704257011413574, "step": 4913 }, { "epoch": 0.596286858390972, "grad_norm": 1.66459321975708, "learning_rate": 8.175899766613438e-06, "loss": 0.13633720576763153, "step": 4914 }, { "epoch": 0.596408202887999, "grad_norm": 2.1635212898254395, "learning_rate": 8.173443065962413e-06, "loss": 0.09913753718137741, "step": 4915 }, { "epoch": 0.5965295473850261, "grad_norm": 3.4829602241516113, "learning_rate": 8.170986365311387e-06, "loss": 0.4390674829483032, "step": 4916 }, { "epoch": 0.5966508918820531, "grad_norm": 2.564565896987915, "learning_rate": 8.168529664660361e-06, "loss": 0.7062397003173828, "step": 4917 }, { "epoch": 0.5967722363790802, "grad_norm": 1.5156720876693726, "learning_rate": 8.166072964009335e-06, "loss": 0.10012664645910263, "step": 4918 }, { "epoch": 0.5968935808761072, "grad_norm": 4.316605091094971, "learning_rate": 8.16361626335831e-06, "loss": 0.31636524200439453, "step": 4919 }, { "epoch": 0.5970149253731343, "grad_norm": 2.1977221965789795, "learning_rate": 8.161159562707286e-06, "loss": 0.13649821281433105, "step": 4920 }, { "epoch": 0.5971362698701614, "grad_norm": 2.2444705963134766, "learning_rate": 8.15870286205626e-06, "loss": 0.4829500913619995, "step": 4921 }, { "epoch": 0.5972576143671885, "grad_norm": 3.449131965637207, "learning_rate": 8.156246161405234e-06, "loss": 0.30537745356559753, "step": 4922 }, { "epoch": 0.5973789588642155, "grad_norm": 1.4344327449798584, "learning_rate": 8.153789460754208e-06, "loss": 0.03976817801594734, "step": 4923 }, { "epoch": 0.5975003033612426, "grad_norm": 3.1223433017730713, "learning_rate": 8.151332760103183e-06, "loss": 0.4055136442184448, "step": 4924 }, { "epoch": 0.5976216478582697, "grad_norm": 3.8170249462127686, "learning_rate": 8.148876059452157e-06, "loss": 0.39578771591186523, "step": 4925 }, { "epoch": 0.5977429923552967, "grad_norm": 2.76593017578125, "learning_rate": 8.146419358801131e-06, "loss": 0.0751633420586586, "step": 4926 }, { "epoch": 0.5978643368523238, "grad_norm": 4.092085361480713, "learning_rate": 8.143962658150105e-06, "loss": 0.1952417641878128, "step": 4927 }, { "epoch": 0.5979856813493508, "grad_norm": 0.746239423751831, "learning_rate": 8.14150595749908e-06, "loss": 0.045013438910245895, "step": 4928 }, { "epoch": 0.5981070258463779, "grad_norm": 1.7731306552886963, "learning_rate": 8.139049256848054e-06, "loss": 0.28632959723472595, "step": 4929 }, { "epoch": 0.5982283703434049, "grad_norm": 5.450740814208984, "learning_rate": 8.136592556197028e-06, "loss": 0.280550092458725, "step": 4930 }, { "epoch": 0.598349714840432, "grad_norm": 3.1241631507873535, "learning_rate": 8.134135855546002e-06, "loss": 0.40991276502609253, "step": 4931 }, { "epoch": 0.598471059337459, "grad_norm": 0.8131852746009827, "learning_rate": 8.131679154894977e-06, "loss": 0.03431374207139015, "step": 4932 }, { "epoch": 0.5985924038344861, "grad_norm": 2.933363676071167, "learning_rate": 8.129222454243951e-06, "loss": 0.258465051651001, "step": 4933 }, { "epoch": 0.5987137483315131, "grad_norm": 2.5620806217193604, "learning_rate": 8.126765753592925e-06, "loss": 0.1636987030506134, "step": 4934 }, { "epoch": 0.5988350928285402, "grad_norm": 3.6974592208862305, "learning_rate": 8.1243090529419e-06, "loss": 0.4084409475326538, "step": 4935 }, { "epoch": 0.5989564373255672, "grad_norm": 0.839344322681427, "learning_rate": 8.121852352290874e-06, "loss": 0.03876178339123726, "step": 4936 }, { "epoch": 0.5990777818225943, "grad_norm": 1.598281741142273, "learning_rate": 8.119395651639848e-06, "loss": 0.09284627437591553, "step": 4937 }, { "epoch": 0.5991991263196214, "grad_norm": 2.860506534576416, "learning_rate": 8.116938950988822e-06, "loss": 0.2387407422065735, "step": 4938 }, { "epoch": 0.5993204708166484, "grad_norm": 4.720833778381348, "learning_rate": 8.114482250337798e-06, "loss": 0.3659774363040924, "step": 4939 }, { "epoch": 0.5994418153136756, "grad_norm": 2.1442627906799316, "learning_rate": 8.112025549686772e-06, "loss": 0.10479340702295303, "step": 4940 }, { "epoch": 0.5995631598107026, "grad_norm": 4.0736823081970215, "learning_rate": 8.109568849035747e-06, "loss": 0.1675359010696411, "step": 4941 }, { "epoch": 0.5996845043077297, "grad_norm": 3.340693712234497, "learning_rate": 8.107112148384721e-06, "loss": 0.20808559656143188, "step": 4942 }, { "epoch": 0.5998058488047567, "grad_norm": 4.53356409072876, "learning_rate": 8.104655447733695e-06, "loss": 0.4518565535545349, "step": 4943 }, { "epoch": 0.5999271933017838, "grad_norm": 2.4527876377105713, "learning_rate": 8.10219874708267e-06, "loss": 0.18983863294124603, "step": 4944 }, { "epoch": 0.6000485377988108, "grad_norm": 3.633119821548462, "learning_rate": 8.099742046431644e-06, "loss": 0.5405551195144653, "step": 4945 }, { "epoch": 0.6001698822958379, "grad_norm": 2.6303248405456543, "learning_rate": 8.097285345780618e-06, "loss": 0.22900894284248352, "step": 4946 }, { "epoch": 0.600291226792865, "grad_norm": 1.9993830919265747, "learning_rate": 8.094828645129592e-06, "loss": 0.06924360245466232, "step": 4947 }, { "epoch": 0.600412571289892, "grad_norm": 2.905698776245117, "learning_rate": 8.092371944478567e-06, "loss": 0.23253166675567627, "step": 4948 }, { "epoch": 0.6005339157869191, "grad_norm": 1.7375648021697998, "learning_rate": 8.08991524382754e-06, "loss": 0.25840383768081665, "step": 4949 }, { "epoch": 0.6006552602839461, "grad_norm": 1.8935976028442383, "learning_rate": 8.087458543176515e-06, "loss": 0.3991242051124573, "step": 4950 }, { "epoch": 0.6007766047809732, "grad_norm": 2.5214953422546387, "learning_rate": 8.08500184252549e-06, "loss": 0.29310426115989685, "step": 4951 }, { "epoch": 0.6008979492780002, "grad_norm": 2.5790224075317383, "learning_rate": 8.082545141874464e-06, "loss": 0.060612622648477554, "step": 4952 }, { "epoch": 0.6010192937750273, "grad_norm": 4.7028422355651855, "learning_rate": 8.080088441223438e-06, "loss": 0.8238290548324585, "step": 4953 }, { "epoch": 0.6011406382720543, "grad_norm": 2.918485403060913, "learning_rate": 8.077631740572412e-06, "loss": 0.16626369953155518, "step": 4954 }, { "epoch": 0.6012619827690814, "grad_norm": 3.310537099838257, "learning_rate": 8.075175039921386e-06, "loss": 0.09575515985488892, "step": 4955 }, { "epoch": 0.6013833272661084, "grad_norm": 2.850628614425659, "learning_rate": 8.07271833927036e-06, "loss": 0.2598009407520294, "step": 4956 }, { "epoch": 0.6015046717631355, "grad_norm": 3.268893241882324, "learning_rate": 8.070261638619335e-06, "loss": 0.2722715139389038, "step": 4957 }, { "epoch": 0.6016260162601627, "grad_norm": 3.6891651153564453, "learning_rate": 8.067804937968309e-06, "loss": 0.03528110682964325, "step": 4958 }, { "epoch": 0.6017473607571897, "grad_norm": 3.009472608566284, "learning_rate": 8.065348237317283e-06, "loss": 0.40958213806152344, "step": 4959 }, { "epoch": 0.6018687052542168, "grad_norm": 2.2466647624969482, "learning_rate": 8.062891536666258e-06, "loss": 0.48619869351387024, "step": 4960 }, { "epoch": 0.6019900497512438, "grad_norm": 3.892745018005371, "learning_rate": 8.060434836015232e-06, "loss": 0.12275706231594086, "step": 4961 }, { "epoch": 0.6021113942482709, "grad_norm": 3.2257652282714844, "learning_rate": 8.057978135364206e-06, "loss": 0.2827064096927643, "step": 4962 }, { "epoch": 0.6022327387452979, "grad_norm": 2.2588863372802734, "learning_rate": 8.05552143471318e-06, "loss": 0.35299065709114075, "step": 4963 }, { "epoch": 0.602354083242325, "grad_norm": 4.953440189361572, "learning_rate": 8.053064734062155e-06, "loss": 0.2678789794445038, "step": 4964 }, { "epoch": 0.602475427739352, "grad_norm": 1.701935052871704, "learning_rate": 8.050608033411129e-06, "loss": 0.04540437087416649, "step": 4965 }, { "epoch": 0.6025967722363791, "grad_norm": 4.7477898597717285, "learning_rate": 8.048151332760103e-06, "loss": 0.6357859373092651, "step": 4966 }, { "epoch": 0.6027181167334061, "grad_norm": 2.3043951988220215, "learning_rate": 8.045694632109077e-06, "loss": 0.23349282145500183, "step": 4967 }, { "epoch": 0.6028394612304332, "grad_norm": 2.0508289337158203, "learning_rate": 8.043237931458052e-06, "loss": 0.11859741061925888, "step": 4968 }, { "epoch": 0.6029608057274602, "grad_norm": 2.6492397785186768, "learning_rate": 8.040781230807026e-06, "loss": 0.24407370388507843, "step": 4969 }, { "epoch": 0.6030821502244873, "grad_norm": 3.548419237136841, "learning_rate": 8.038324530156e-06, "loss": 0.21287448704242706, "step": 4970 }, { "epoch": 0.6032034947215144, "grad_norm": 2.1876282691955566, "learning_rate": 8.035867829504974e-06, "loss": 0.08959885686635971, "step": 4971 }, { "epoch": 0.6033248392185414, "grad_norm": 1.7002156972885132, "learning_rate": 8.03341112885395e-06, "loss": 0.11170674115419388, "step": 4972 }, { "epoch": 0.6034461837155685, "grad_norm": 2.1888082027435303, "learning_rate": 8.030954428202925e-06, "loss": 1.1892191171646118, "step": 4973 }, { "epoch": 0.6035675282125955, "grad_norm": 1.943291187286377, "learning_rate": 8.028497727551899e-06, "loss": 0.10716716945171356, "step": 4974 }, { "epoch": 0.6036888727096226, "grad_norm": 3.666123628616333, "learning_rate": 8.026041026900873e-06, "loss": 0.690584123134613, "step": 4975 }, { "epoch": 0.6038102172066496, "grad_norm": 2.343515396118164, "learning_rate": 8.023584326249847e-06, "loss": 0.0430336557328701, "step": 4976 }, { "epoch": 0.6039315617036768, "grad_norm": 0.4914427697658539, "learning_rate": 8.021127625598822e-06, "loss": 0.006635440047830343, "step": 4977 }, { "epoch": 0.6040529062007038, "grad_norm": 1.3789258003234863, "learning_rate": 8.018670924947796e-06, "loss": 0.06812597066164017, "step": 4978 }, { "epoch": 0.6041742506977309, "grad_norm": 2.700730562210083, "learning_rate": 8.01621422429677e-06, "loss": 0.36649149656295776, "step": 4979 }, { "epoch": 0.604295595194758, "grad_norm": 2.317082166671753, "learning_rate": 8.013757523645744e-06, "loss": 0.3082185983657837, "step": 4980 }, { "epoch": 0.604416939691785, "grad_norm": 0.8955159783363342, "learning_rate": 8.011300822994719e-06, "loss": 0.03570184484124184, "step": 4981 }, { "epoch": 0.6045382841888121, "grad_norm": 2.685279130935669, "learning_rate": 8.008844122343693e-06, "loss": 0.10023897141218185, "step": 4982 }, { "epoch": 0.6046596286858391, "grad_norm": 2.5496881008148193, "learning_rate": 8.006387421692667e-06, "loss": 0.1904086023569107, "step": 4983 }, { "epoch": 0.6047809731828662, "grad_norm": 2.514430522918701, "learning_rate": 8.003930721041642e-06, "loss": 0.05790841206908226, "step": 4984 }, { "epoch": 0.6049023176798932, "grad_norm": 2.8593924045562744, "learning_rate": 8.001474020390616e-06, "loss": 0.10597558319568634, "step": 4985 }, { "epoch": 0.6050236621769203, "grad_norm": 3.378652334213257, "learning_rate": 7.99901731973959e-06, "loss": 0.15676720440387726, "step": 4986 }, { "epoch": 0.6051450066739473, "grad_norm": 2.2277534008026123, "learning_rate": 7.996560619088564e-06, "loss": 0.12349286675453186, "step": 4987 }, { "epoch": 0.6052663511709744, "grad_norm": 1.8536624908447266, "learning_rate": 7.994103918437539e-06, "loss": 0.25664010643959045, "step": 4988 }, { "epoch": 0.6053876956680014, "grad_norm": 2.3955867290496826, "learning_rate": 7.991647217786513e-06, "loss": 0.4799758195877075, "step": 4989 }, { "epoch": 0.6055090401650285, "grad_norm": 2.3670573234558105, "learning_rate": 7.989190517135487e-06, "loss": 0.21658958494663239, "step": 4990 }, { "epoch": 0.6056303846620555, "grad_norm": 3.5387184619903564, "learning_rate": 7.986733816484461e-06, "loss": 0.6672046780586243, "step": 4991 }, { "epoch": 0.6057517291590826, "grad_norm": 2.8642866611480713, "learning_rate": 7.984277115833437e-06, "loss": 0.31506139039993286, "step": 4992 }, { "epoch": 0.6058730736561097, "grad_norm": 2.2095093727111816, "learning_rate": 7.981820415182412e-06, "loss": 0.1362318992614746, "step": 4993 }, { "epoch": 0.6059944181531367, "grad_norm": 2.0428056716918945, "learning_rate": 7.979363714531386e-06, "loss": 0.13638094067573547, "step": 4994 }, { "epoch": 0.6061157626501639, "grad_norm": 1.7997664213180542, "learning_rate": 7.97690701388036e-06, "loss": 0.11015208065509796, "step": 4995 }, { "epoch": 0.6062371071471909, "grad_norm": 2.2334229946136475, "learning_rate": 7.974450313229334e-06, "loss": 0.4246353507041931, "step": 4996 }, { "epoch": 0.606358451644218, "grad_norm": 3.2942206859588623, "learning_rate": 7.971993612578309e-06, "loss": 0.032625336199998856, "step": 4997 }, { "epoch": 0.606479796141245, "grad_norm": 4.750143527984619, "learning_rate": 7.969536911927283e-06, "loss": 0.36150041222572327, "step": 4998 }, { "epoch": 0.6066011406382721, "grad_norm": 3.3749117851257324, "learning_rate": 7.967080211276257e-06, "loss": 0.3516336977481842, "step": 4999 }, { "epoch": 0.6067224851352991, "grad_norm": 1.5636332035064697, "learning_rate": 7.964623510625231e-06, "loss": 0.11021507531404495, "step": 5000 }, { "epoch": 0.6068438296323262, "grad_norm": 1.3467923402786255, "learning_rate": 7.962166809974206e-06, "loss": 0.10061002522706985, "step": 5001 }, { "epoch": 0.6069651741293532, "grad_norm": 3.274048328399658, "learning_rate": 7.95971010932318e-06, "loss": 0.23810824751853943, "step": 5002 }, { "epoch": 0.6070865186263803, "grad_norm": 1.8316787481307983, "learning_rate": 7.957253408672154e-06, "loss": 0.5422258377075195, "step": 5003 }, { "epoch": 0.6072078631234074, "grad_norm": 2.667977809906006, "learning_rate": 7.954796708021128e-06, "loss": 0.09888293594121933, "step": 5004 }, { "epoch": 0.6073292076204344, "grad_norm": 2.4327571392059326, "learning_rate": 7.952340007370103e-06, "loss": 0.10190269351005554, "step": 5005 }, { "epoch": 0.6074505521174615, "grad_norm": 2.737102746963501, "learning_rate": 7.949883306719077e-06, "loss": 0.3887120485305786, "step": 5006 }, { "epoch": 0.6075718966144885, "grad_norm": 3.709571123123169, "learning_rate": 7.947426606068051e-06, "loss": 0.2243715226650238, "step": 5007 }, { "epoch": 0.6076932411115156, "grad_norm": 2.4994356632232666, "learning_rate": 7.944969905417025e-06, "loss": 0.12882472574710846, "step": 5008 }, { "epoch": 0.6078145856085426, "grad_norm": 2.8674659729003906, "learning_rate": 7.942513204766e-06, "loss": 0.29750263690948486, "step": 5009 }, { "epoch": 0.6079359301055697, "grad_norm": 2.6985597610473633, "learning_rate": 7.940056504114974e-06, "loss": 0.40544816851615906, "step": 5010 }, { "epoch": 0.6080572746025967, "grad_norm": 3.4858715534210205, "learning_rate": 7.937599803463948e-06, "loss": 0.21399088203907013, "step": 5011 }, { "epoch": 0.6081786190996238, "grad_norm": 1.7172353267669678, "learning_rate": 7.935143102812924e-06, "loss": 0.11463232338428497, "step": 5012 }, { "epoch": 0.6082999635966508, "grad_norm": 2.159005880355835, "learning_rate": 7.932686402161898e-06, "loss": 0.16871798038482666, "step": 5013 }, { "epoch": 0.608421308093678, "grad_norm": 1.4525233507156372, "learning_rate": 7.930229701510873e-06, "loss": 0.2686057388782501, "step": 5014 }, { "epoch": 0.6085426525907051, "grad_norm": 2.6442062854766846, "learning_rate": 7.927773000859847e-06, "loss": 0.34124258160591125, "step": 5015 }, { "epoch": 0.6086639970877321, "grad_norm": 1.4025843143463135, "learning_rate": 7.925316300208821e-06, "loss": 0.023755650967359543, "step": 5016 }, { "epoch": 0.6087853415847592, "grad_norm": 3.0993549823760986, "learning_rate": 7.922859599557795e-06, "loss": 0.27893584966659546, "step": 5017 }, { "epoch": 0.6089066860817862, "grad_norm": 2.7398300170898438, "learning_rate": 7.92040289890677e-06, "loss": 0.5425594449043274, "step": 5018 }, { "epoch": 0.6090280305788133, "grad_norm": 2.2794017791748047, "learning_rate": 7.917946198255744e-06, "loss": 0.23282457888126373, "step": 5019 }, { "epoch": 0.6091493750758403, "grad_norm": 2.3547658920288086, "learning_rate": 7.915489497604716e-06, "loss": 0.15604516863822937, "step": 5020 }, { "epoch": 0.6092707195728674, "grad_norm": 4.0555524826049805, "learning_rate": 7.91303279695369e-06, "loss": 0.6190059781074524, "step": 5021 }, { "epoch": 0.6093920640698944, "grad_norm": 2.843796730041504, "learning_rate": 7.910576096302665e-06, "loss": 0.21953535079956055, "step": 5022 }, { "epoch": 0.6095134085669215, "grad_norm": 2.7207107543945312, "learning_rate": 7.90811939565164e-06, "loss": 0.3967467248439789, "step": 5023 }, { "epoch": 0.6096347530639485, "grad_norm": 2.566718816757202, "learning_rate": 7.905662695000614e-06, "loss": 0.5843710899353027, "step": 5024 }, { "epoch": 0.6097560975609756, "grad_norm": 2.5708975791931152, "learning_rate": 7.90320599434959e-06, "loss": 0.2545311152935028, "step": 5025 }, { "epoch": 0.6098774420580027, "grad_norm": 3.2102577686309814, "learning_rate": 7.900749293698564e-06, "loss": 0.4721425771713257, "step": 5026 }, { "epoch": 0.6099987865550297, "grad_norm": 1.5845712423324585, "learning_rate": 7.898292593047538e-06, "loss": 0.033341266214847565, "step": 5027 }, { "epoch": 0.6101201310520568, "grad_norm": 3.7597010135650635, "learning_rate": 7.895835892396512e-06, "loss": 0.5398728251457214, "step": 5028 }, { "epoch": 0.6102414755490838, "grad_norm": 1.2103767395019531, "learning_rate": 7.893379191745487e-06, "loss": 0.01152086816728115, "step": 5029 }, { "epoch": 0.6103628200461109, "grad_norm": 3.8116137981414795, "learning_rate": 7.89092249109446e-06, "loss": 0.25407129526138306, "step": 5030 }, { "epoch": 0.6104841645431379, "grad_norm": 4.166960716247559, "learning_rate": 7.888465790443435e-06, "loss": 0.43297919631004333, "step": 5031 }, { "epoch": 0.610605509040165, "grad_norm": 3.2523417472839355, "learning_rate": 7.88600908979241e-06, "loss": 0.22620409727096558, "step": 5032 }, { "epoch": 0.6107268535371921, "grad_norm": 1.6338255405426025, "learning_rate": 7.883552389141384e-06, "loss": 0.06129894778132439, "step": 5033 }, { "epoch": 0.6108481980342192, "grad_norm": 3.5130200386047363, "learning_rate": 7.881095688490358e-06, "loss": 0.3082846701145172, "step": 5034 }, { "epoch": 0.6109695425312462, "grad_norm": 4.574677467346191, "learning_rate": 7.878638987839332e-06, "loss": 0.3582816421985626, "step": 5035 }, { "epoch": 0.6110908870282733, "grad_norm": 2.2764151096343994, "learning_rate": 7.876182287188306e-06, "loss": 0.18171778321266174, "step": 5036 }, { "epoch": 0.6112122315253004, "grad_norm": 2.7231826782226562, "learning_rate": 7.87372558653728e-06, "loss": 0.2322385013103485, "step": 5037 }, { "epoch": 0.6113335760223274, "grad_norm": 2.8828442096710205, "learning_rate": 7.871268885886255e-06, "loss": 0.39386141300201416, "step": 5038 }, { "epoch": 0.6114549205193545, "grad_norm": 1.7847236394882202, "learning_rate": 7.868812185235229e-06, "loss": 0.05671222135424614, "step": 5039 }, { "epoch": 0.6115762650163815, "grad_norm": 3.4137165546417236, "learning_rate": 7.866355484584203e-06, "loss": 0.2834808826446533, "step": 5040 }, { "epoch": 0.6116976095134086, "grad_norm": 3.2117955684661865, "learning_rate": 7.863898783933178e-06, "loss": 0.24669750034809113, "step": 5041 }, { "epoch": 0.6118189540104356, "grad_norm": 2.5697784423828125, "learning_rate": 7.861442083282152e-06, "loss": 0.40070024132728577, "step": 5042 }, { "epoch": 0.6119402985074627, "grad_norm": 3.293456554412842, "learning_rate": 7.858985382631126e-06, "loss": 0.2214411050081253, "step": 5043 }, { "epoch": 0.6120616430044897, "grad_norm": 2.2387073040008545, "learning_rate": 7.8565286819801e-06, "loss": 0.034520845860242844, "step": 5044 }, { "epoch": 0.6121829875015168, "grad_norm": 3.3270695209503174, "learning_rate": 7.854071981329076e-06, "loss": 0.4730197489261627, "step": 5045 }, { "epoch": 0.6123043319985438, "grad_norm": 2.7307798862457275, "learning_rate": 7.85161528067805e-06, "loss": 0.21448566019535065, "step": 5046 }, { "epoch": 0.6124256764955709, "grad_norm": 1.0328198671340942, "learning_rate": 7.849158580027025e-06, "loss": 0.006210668478161097, "step": 5047 }, { "epoch": 0.612547020992598, "grad_norm": 1.1922792196273804, "learning_rate": 7.846701879375999e-06, "loss": 0.047189947217702866, "step": 5048 }, { "epoch": 0.612668365489625, "grad_norm": 3.2236971855163574, "learning_rate": 7.844245178724973e-06, "loss": 0.339810311794281, "step": 5049 }, { "epoch": 0.612789709986652, "grad_norm": 1.8247088193893433, "learning_rate": 7.841788478073948e-06, "loss": 0.2693372666835785, "step": 5050 }, { "epoch": 0.6129110544836792, "grad_norm": 2.738832712173462, "learning_rate": 7.839331777422922e-06, "loss": 0.5602001547813416, "step": 5051 }, { "epoch": 0.6130323989807063, "grad_norm": 1.9328089952468872, "learning_rate": 7.836875076771896e-06, "loss": 0.08254025131464005, "step": 5052 }, { "epoch": 0.6131537434777333, "grad_norm": 2.0107409954071045, "learning_rate": 7.83441837612087e-06, "loss": 0.16358467936515808, "step": 5053 }, { "epoch": 0.6132750879747604, "grad_norm": 2.35520076751709, "learning_rate": 7.831961675469845e-06, "loss": 0.11217784136533737, "step": 5054 }, { "epoch": 0.6133964324717874, "grad_norm": 2.350970983505249, "learning_rate": 7.829504974818819e-06, "loss": 0.37507393956184387, "step": 5055 }, { "epoch": 0.6135177769688145, "grad_norm": 3.7732248306274414, "learning_rate": 7.827048274167793e-06, "loss": 0.37614113092422485, "step": 5056 }, { "epoch": 0.6136391214658415, "grad_norm": 2.7831919193267822, "learning_rate": 7.824591573516767e-06, "loss": 0.3179229497909546, "step": 5057 }, { "epoch": 0.6137604659628686, "grad_norm": 4.301512718200684, "learning_rate": 7.822134872865742e-06, "loss": 0.27783235907554626, "step": 5058 }, { "epoch": 0.6138818104598956, "grad_norm": 2.7758965492248535, "learning_rate": 7.819678172214716e-06, "loss": 0.4789348542690277, "step": 5059 }, { "epoch": 0.6140031549569227, "grad_norm": 2.6012847423553467, "learning_rate": 7.81722147156369e-06, "loss": 0.2627089023590088, "step": 5060 }, { "epoch": 0.6141244994539498, "grad_norm": 2.834746837615967, "learning_rate": 7.814764770912664e-06, "loss": 0.3822893798351288, "step": 5061 }, { "epoch": 0.6142458439509768, "grad_norm": 2.2192726135253906, "learning_rate": 7.812308070261639e-06, "loss": 0.3894115090370178, "step": 5062 }, { "epoch": 0.6143671884480039, "grad_norm": 3.0342159271240234, "learning_rate": 7.809851369610613e-06, "loss": 0.4201892912387848, "step": 5063 }, { "epoch": 0.6144885329450309, "grad_norm": 3.276789426803589, "learning_rate": 7.807394668959587e-06, "loss": 0.17495159804821014, "step": 5064 }, { "epoch": 0.614609877442058, "grad_norm": 3.7130370140075684, "learning_rate": 7.804937968308563e-06, "loss": 0.41440925002098083, "step": 5065 }, { "epoch": 0.614731221939085, "grad_norm": 3.204479932785034, "learning_rate": 7.802481267657537e-06, "loss": 0.4625559151172638, "step": 5066 }, { "epoch": 0.6148525664361121, "grad_norm": 2.13676118850708, "learning_rate": 7.800024567006512e-06, "loss": 0.1474398672580719, "step": 5067 }, { "epoch": 0.6149739109331391, "grad_norm": 2.709505558013916, "learning_rate": 7.797567866355486e-06, "loss": 0.38709717988967896, "step": 5068 }, { "epoch": 0.6150952554301662, "grad_norm": 2.0195040702819824, "learning_rate": 7.79511116570446e-06, "loss": 0.17484354972839355, "step": 5069 }, { "epoch": 0.6152165999271934, "grad_norm": 2.3023064136505127, "learning_rate": 7.792654465053434e-06, "loss": 0.17981187999248505, "step": 5070 }, { "epoch": 0.6153379444242204, "grad_norm": 3.5440049171447754, "learning_rate": 7.790197764402409e-06, "loss": 0.11025988310575485, "step": 5071 }, { "epoch": 0.6154592889212475, "grad_norm": 3.2002525329589844, "learning_rate": 7.787741063751383e-06, "loss": 0.2279558777809143, "step": 5072 }, { "epoch": 0.6155806334182745, "grad_norm": 2.347578525543213, "learning_rate": 7.785284363100357e-06, "loss": 0.21285709738731384, "step": 5073 }, { "epoch": 0.6157019779153016, "grad_norm": 2.5433967113494873, "learning_rate": 7.782827662449332e-06, "loss": 0.1906622350215912, "step": 5074 }, { "epoch": 0.6158233224123286, "grad_norm": 2.1572959423065186, "learning_rate": 7.780370961798306e-06, "loss": 0.18639400601387024, "step": 5075 }, { "epoch": 0.6159446669093557, "grad_norm": 2.7287354469299316, "learning_rate": 7.77791426114728e-06, "loss": 0.31678736209869385, "step": 5076 }, { "epoch": 0.6160660114063827, "grad_norm": 2.011777400970459, "learning_rate": 7.775457560496254e-06, "loss": 0.25843048095703125, "step": 5077 }, { "epoch": 0.6161873559034098, "grad_norm": 2.2273285388946533, "learning_rate": 7.773000859845229e-06, "loss": 0.10772525519132614, "step": 5078 }, { "epoch": 0.6163087004004368, "grad_norm": 2.9453125, "learning_rate": 7.770544159194203e-06, "loss": 0.23196275532245636, "step": 5079 }, { "epoch": 0.6164300448974639, "grad_norm": 3.0370705127716064, "learning_rate": 7.768087458543177e-06, "loss": 0.19689476490020752, "step": 5080 }, { "epoch": 0.616551389394491, "grad_norm": 3.4278383255004883, "learning_rate": 7.765630757892151e-06, "loss": 0.39116185903549194, "step": 5081 }, { "epoch": 0.616672733891518, "grad_norm": 2.914891481399536, "learning_rate": 7.763174057241126e-06, "loss": 0.2652151584625244, "step": 5082 }, { "epoch": 0.616794078388545, "grad_norm": 2.2551233768463135, "learning_rate": 7.7607173565901e-06, "loss": 0.40178272128105164, "step": 5083 }, { "epoch": 0.6169154228855721, "grad_norm": 2.7544708251953125, "learning_rate": 7.758260655939076e-06, "loss": 0.335882306098938, "step": 5084 }, { "epoch": 0.6170367673825992, "grad_norm": 3.187938928604126, "learning_rate": 7.75580395528805e-06, "loss": 0.4502682089805603, "step": 5085 }, { "epoch": 0.6171581118796262, "grad_norm": 2.511051654815674, "learning_rate": 7.753347254637024e-06, "loss": 0.406748503446579, "step": 5086 }, { "epoch": 0.6172794563766533, "grad_norm": 3.4672982692718506, "learning_rate": 7.750890553985999e-06, "loss": 0.29882341623306274, "step": 5087 }, { "epoch": 0.6174008008736804, "grad_norm": 1.6818232536315918, "learning_rate": 7.748433853334971e-06, "loss": 0.2998236417770386, "step": 5088 }, { "epoch": 0.6175221453707075, "grad_norm": 1.8036460876464844, "learning_rate": 7.745977152683945e-06, "loss": 0.18128180503845215, "step": 5089 }, { "epoch": 0.6176434898677345, "grad_norm": 2.977043628692627, "learning_rate": 7.74352045203292e-06, "loss": 0.36643698811531067, "step": 5090 }, { "epoch": 0.6177648343647616, "grad_norm": 4.143869876861572, "learning_rate": 7.741063751381894e-06, "loss": 0.2840210199356079, "step": 5091 }, { "epoch": 0.6178861788617886, "grad_norm": 2.7095091342926025, "learning_rate": 7.738607050730868e-06, "loss": 0.11376570165157318, "step": 5092 }, { "epoch": 0.6180075233588157, "grad_norm": 1.9028805494308472, "learning_rate": 7.736150350079842e-06, "loss": 0.16399836540222168, "step": 5093 }, { "epoch": 0.6181288678558428, "grad_norm": 1.853744387626648, "learning_rate": 7.733693649428817e-06, "loss": 0.051789525896310806, "step": 5094 }, { "epoch": 0.6182502123528698, "grad_norm": 2.5118985176086426, "learning_rate": 7.731236948777791e-06, "loss": 0.10583311319351196, "step": 5095 }, { "epoch": 0.6183715568498969, "grad_norm": 2.062525510787964, "learning_rate": 7.728780248126765e-06, "loss": 0.12087111175060272, "step": 5096 }, { "epoch": 0.6184929013469239, "grad_norm": 2.445042610168457, "learning_rate": 7.726323547475741e-06, "loss": 0.41819244623184204, "step": 5097 }, { "epoch": 0.618614245843951, "grad_norm": 1.4598667621612549, "learning_rate": 7.723866846824715e-06, "loss": 0.017990294843912125, "step": 5098 }, { "epoch": 0.618735590340978, "grad_norm": 2.81373929977417, "learning_rate": 7.72141014617369e-06, "loss": 0.2588927149772644, "step": 5099 }, { "epoch": 0.6188569348380051, "grad_norm": 2.6385762691497803, "learning_rate": 7.718953445522664e-06, "loss": 0.314892053604126, "step": 5100 }, { "epoch": 0.6189782793350321, "grad_norm": 2.545128583908081, "learning_rate": 7.716496744871638e-06, "loss": 0.18816563487052917, "step": 5101 }, { "epoch": 0.6190996238320592, "grad_norm": 2.3310556411743164, "learning_rate": 7.714040044220612e-06, "loss": 0.28015828132629395, "step": 5102 }, { "epoch": 0.6192209683290862, "grad_norm": 2.86645770072937, "learning_rate": 7.711583343569587e-06, "loss": 0.34264254570007324, "step": 5103 }, { "epoch": 0.6193423128261133, "grad_norm": 2.480808973312378, "learning_rate": 7.709126642918561e-06, "loss": 0.17330048978328705, "step": 5104 }, { "epoch": 0.6194636573231403, "grad_norm": 3.2992103099823, "learning_rate": 7.706669942267535e-06, "loss": 0.27709606289863586, "step": 5105 }, { "epoch": 0.6195850018201674, "grad_norm": 2.3664729595184326, "learning_rate": 7.70421324161651e-06, "loss": 0.08801741153001785, "step": 5106 }, { "epoch": 0.6197063463171946, "grad_norm": 1.9595123529434204, "learning_rate": 7.701756540965484e-06, "loss": 0.272438108921051, "step": 5107 }, { "epoch": 0.6198276908142216, "grad_norm": 2.7587122917175293, "learning_rate": 7.699299840314458e-06, "loss": 0.40725988149642944, "step": 5108 }, { "epoch": 0.6199490353112487, "grad_norm": 4.024038314819336, "learning_rate": 7.696843139663432e-06, "loss": 0.4737035930156708, "step": 5109 }, { "epoch": 0.6200703798082757, "grad_norm": 1.2237366437911987, "learning_rate": 7.694386439012407e-06, "loss": 0.060056671500205994, "step": 5110 }, { "epoch": 0.6201917243053028, "grad_norm": 2.7987937927246094, "learning_rate": 7.69192973836138e-06, "loss": 0.2628626525402069, "step": 5111 }, { "epoch": 0.6203130688023298, "grad_norm": 2.0323808193206787, "learning_rate": 7.689473037710355e-06, "loss": 0.07083710283041, "step": 5112 }, { "epoch": 0.6204344132993569, "grad_norm": 2.8044517040252686, "learning_rate": 7.68701633705933e-06, "loss": 0.2521923780441284, "step": 5113 }, { "epoch": 0.6205557577963839, "grad_norm": 3.5835840702056885, "learning_rate": 7.684559636408304e-06, "loss": 0.31935545802116394, "step": 5114 }, { "epoch": 0.620677102293411, "grad_norm": 1.5363430976867676, "learning_rate": 7.682102935757278e-06, "loss": 0.04548322781920433, "step": 5115 }, { "epoch": 0.620798446790438, "grad_norm": 3.3696537017822266, "learning_rate": 7.679646235106252e-06, "loss": 0.2148149013519287, "step": 5116 }, { "epoch": 0.6209197912874651, "grad_norm": 2.003258466720581, "learning_rate": 7.677189534455228e-06, "loss": 0.16029050946235657, "step": 5117 }, { "epoch": 0.6210411357844922, "grad_norm": 1.4908440113067627, "learning_rate": 7.674732833804202e-06, "loss": 0.051762599498033524, "step": 5118 }, { "epoch": 0.6211624802815192, "grad_norm": 2.991093397140503, "learning_rate": 7.672276133153177e-06, "loss": 0.2940855026245117, "step": 5119 }, { "epoch": 0.6212838247785463, "grad_norm": 3.0953938961029053, "learning_rate": 7.66981943250215e-06, "loss": 0.31778982281684875, "step": 5120 }, { "epoch": 0.6214051692755733, "grad_norm": 2.717170238494873, "learning_rate": 7.667362731851125e-06, "loss": 0.19059117138385773, "step": 5121 }, { "epoch": 0.6215265137726004, "grad_norm": 3.5229837894439697, "learning_rate": 7.6649060312001e-06, "loss": 0.22722238302230835, "step": 5122 }, { "epoch": 0.6216478582696274, "grad_norm": 2.6029388904571533, "learning_rate": 7.662449330549074e-06, "loss": 0.2535755932331085, "step": 5123 }, { "epoch": 0.6217692027666545, "grad_norm": 2.826185464859009, "learning_rate": 7.659992629898048e-06, "loss": 0.29412201046943665, "step": 5124 }, { "epoch": 0.6218905472636815, "grad_norm": 2.28007435798645, "learning_rate": 7.657535929247022e-06, "loss": 0.12204025685787201, "step": 5125 }, { "epoch": 0.6220118917607087, "grad_norm": 3.465522527694702, "learning_rate": 7.655079228595996e-06, "loss": 0.31414011120796204, "step": 5126 }, { "epoch": 0.6221332362577358, "grad_norm": 6.281665325164795, "learning_rate": 7.65262252794497e-06, "loss": 0.5328725576400757, "step": 5127 }, { "epoch": 0.6222545807547628, "grad_norm": 2.3259201049804688, "learning_rate": 7.650165827293945e-06, "loss": 0.11064140498638153, "step": 5128 }, { "epoch": 0.6223759252517899, "grad_norm": 3.3930184841156006, "learning_rate": 7.647709126642919e-06, "loss": 0.6063504219055176, "step": 5129 }, { "epoch": 0.6224972697488169, "grad_norm": 1.279051661491394, "learning_rate": 7.645252425991893e-06, "loss": 0.21466988325119019, "step": 5130 }, { "epoch": 0.622618614245844, "grad_norm": 2.6957547664642334, "learning_rate": 7.642795725340868e-06, "loss": 0.18660517036914825, "step": 5131 }, { "epoch": 0.622739958742871, "grad_norm": 3.9897654056549072, "learning_rate": 7.640339024689842e-06, "loss": 0.2970423102378845, "step": 5132 }, { "epoch": 0.6228613032398981, "grad_norm": 2.0440804958343506, "learning_rate": 7.637882324038816e-06, "loss": 0.31867945194244385, "step": 5133 }, { "epoch": 0.6229826477369251, "grad_norm": 4.42106819152832, "learning_rate": 7.63542562338779e-06, "loss": 0.3898775279521942, "step": 5134 }, { "epoch": 0.6231039922339522, "grad_norm": 4.199008941650391, "learning_rate": 7.632968922736765e-06, "loss": 0.1245967298746109, "step": 5135 }, { "epoch": 0.6232253367309792, "grad_norm": 2.7009990215301514, "learning_rate": 7.630512222085739e-06, "loss": 0.45793604850769043, "step": 5136 }, { "epoch": 0.6233466812280063, "grad_norm": 3.275801181793213, "learning_rate": 7.628055521434714e-06, "loss": 0.4480186998844147, "step": 5137 }, { "epoch": 0.6234680257250333, "grad_norm": 3.289912223815918, "learning_rate": 7.625598820783688e-06, "loss": 0.3174231946468353, "step": 5138 }, { "epoch": 0.6235893702220604, "grad_norm": 2.8927953243255615, "learning_rate": 7.6231421201326625e-06, "loss": 0.28378209471702576, "step": 5139 }, { "epoch": 0.6237107147190875, "grad_norm": 2.7678353786468506, "learning_rate": 7.620685419481637e-06, "loss": 0.3322058320045471, "step": 5140 }, { "epoch": 0.6238320592161145, "grad_norm": 3.4492950439453125, "learning_rate": 7.618228718830611e-06, "loss": 0.222122922539711, "step": 5141 }, { "epoch": 0.6239534037131416, "grad_norm": 2.57182240486145, "learning_rate": 7.615772018179586e-06, "loss": 0.29393821954727173, "step": 5142 }, { "epoch": 0.6240747482101686, "grad_norm": 1.717133641242981, "learning_rate": 7.61331531752856e-06, "loss": 0.4776521921157837, "step": 5143 }, { "epoch": 0.6241960927071958, "grad_norm": 3.611464500427246, "learning_rate": 7.610858616877535e-06, "loss": 0.5599163174629211, "step": 5144 }, { "epoch": 0.6243174372042228, "grad_norm": 2.3300814628601074, "learning_rate": 7.608401916226509e-06, "loss": 0.31671321392059326, "step": 5145 }, { "epoch": 0.6244387817012499, "grad_norm": 0.01847938634455204, "learning_rate": 7.605945215575483e-06, "loss": 0.000190070946700871, "step": 5146 }, { "epoch": 0.6245601261982769, "grad_norm": 2.8741414546966553, "learning_rate": 7.6034885149244574e-06, "loss": 0.1481812596321106, "step": 5147 }, { "epoch": 0.624681470695304, "grad_norm": 3.164642095565796, "learning_rate": 7.601031814273432e-06, "loss": 0.4918677806854248, "step": 5148 }, { "epoch": 0.624802815192331, "grad_norm": 0.11899151653051376, "learning_rate": 7.598575113622406e-06, "loss": 0.0011966261081397533, "step": 5149 }, { "epoch": 0.6249241596893581, "grad_norm": 2.8237783908843994, "learning_rate": 7.59611841297138e-06, "loss": 0.598763644695282, "step": 5150 }, { "epoch": 0.6250455041863852, "grad_norm": 2.731285333633423, "learning_rate": 7.5936617123203545e-06, "loss": 0.5312917232513428, "step": 5151 }, { "epoch": 0.6251668486834122, "grad_norm": 2.5859949588775635, "learning_rate": 7.59120501166933e-06, "loss": 0.4938415586948395, "step": 5152 }, { "epoch": 0.6252881931804393, "grad_norm": 2.6663966178894043, "learning_rate": 7.588748311018304e-06, "loss": 0.1334144026041031, "step": 5153 }, { "epoch": 0.6254095376774663, "grad_norm": 2.52302885055542, "learning_rate": 7.586291610367278e-06, "loss": 0.2385820895433426, "step": 5154 }, { "epoch": 0.6255308821744934, "grad_norm": 2.9277803897857666, "learning_rate": 7.583834909716252e-06, "loss": 0.18325097858905792, "step": 5155 }, { "epoch": 0.6256522266715204, "grad_norm": 1.6543762683868408, "learning_rate": 7.581378209065226e-06, "loss": 0.16163496673107147, "step": 5156 }, { "epoch": 0.6257735711685475, "grad_norm": 2.546449661254883, "learning_rate": 7.5789215084142e-06, "loss": 0.16424694657325745, "step": 5157 }, { "epoch": 0.6258949156655745, "grad_norm": 1.2081456184387207, "learning_rate": 7.576464807763174e-06, "loss": 0.06001346558332443, "step": 5158 }, { "epoch": 0.6260162601626016, "grad_norm": 3.423450469970703, "learning_rate": 7.5740081071121485e-06, "loss": 0.3298032283782959, "step": 5159 }, { "epoch": 0.6261376046596286, "grad_norm": 1.6477750539779663, "learning_rate": 7.571551406461123e-06, "loss": 0.1843397617340088, "step": 5160 }, { "epoch": 0.6262589491566557, "grad_norm": 2.1941425800323486, "learning_rate": 7.569094705810097e-06, "loss": 0.5532945394515991, "step": 5161 }, { "epoch": 0.6263802936536828, "grad_norm": 3.3200879096984863, "learning_rate": 7.566638005159071e-06, "loss": 0.33111000061035156, "step": 5162 }, { "epoch": 0.6265016381507099, "grad_norm": 3.0307066440582275, "learning_rate": 7.564181304508046e-06, "loss": 0.2337704747915268, "step": 5163 }, { "epoch": 0.626622982647737, "grad_norm": 3.377408981323242, "learning_rate": 7.56172460385702e-06, "loss": 0.41515594720840454, "step": 5164 }, { "epoch": 0.626744327144764, "grad_norm": 0.472019761800766, "learning_rate": 7.559267903205995e-06, "loss": 0.004778968170285225, "step": 5165 }, { "epoch": 0.6268656716417911, "grad_norm": 3.1622719764709473, "learning_rate": 7.556811202554969e-06, "loss": 0.3565206527709961, "step": 5166 }, { "epoch": 0.6269870161388181, "grad_norm": 2.075188398361206, "learning_rate": 7.5543545019039435e-06, "loss": 0.29794755578041077, "step": 5167 }, { "epoch": 0.6271083606358452, "grad_norm": 3.496927499771118, "learning_rate": 7.551897801252918e-06, "loss": 0.17961809039115906, "step": 5168 }, { "epoch": 0.6272297051328722, "grad_norm": 2.781517267227173, "learning_rate": 7.549441100601892e-06, "loss": 0.2276153266429901, "step": 5169 }, { "epoch": 0.6273510496298993, "grad_norm": 2.715745210647583, "learning_rate": 7.546984399950866e-06, "loss": 0.4413488507270813, "step": 5170 }, { "epoch": 0.6274723941269263, "grad_norm": 3.04716420173645, "learning_rate": 7.5445276992998405e-06, "loss": 0.4481610357761383, "step": 5171 }, { "epoch": 0.6275937386239534, "grad_norm": 1.8206723928451538, "learning_rate": 7.542070998648815e-06, "loss": 0.1828715205192566, "step": 5172 }, { "epoch": 0.6277150831209805, "grad_norm": 4.010819435119629, "learning_rate": 7.539614297997789e-06, "loss": 0.275452584028244, "step": 5173 }, { "epoch": 0.6278364276180075, "grad_norm": 2.493227481842041, "learning_rate": 7.537157597346763e-06, "loss": 0.11311172693967819, "step": 5174 }, { "epoch": 0.6279577721150346, "grad_norm": 3.087218999862671, "learning_rate": 7.534700896695738e-06, "loss": 0.4291622042655945, "step": 5175 }, { "epoch": 0.6280791166120616, "grad_norm": 2.286325216293335, "learning_rate": 7.532244196044713e-06, "loss": 0.4807203412055969, "step": 5176 }, { "epoch": 0.6282004611090887, "grad_norm": 6.38590145111084, "learning_rate": 7.529787495393687e-06, "loss": 0.5786105990409851, "step": 5177 }, { "epoch": 0.6283218056061157, "grad_norm": 0.0004274118400644511, "learning_rate": 7.527330794742661e-06, "loss": 1.3660848708241247e-05, "step": 5178 }, { "epoch": 0.6284431501031428, "grad_norm": 2.387519121170044, "learning_rate": 7.524874094091635e-06, "loss": 0.09677918255329132, "step": 5179 }, { "epoch": 0.6285644946001698, "grad_norm": 4.426841735839844, "learning_rate": 7.52241739344061e-06, "loss": 0.12191581726074219, "step": 5180 }, { "epoch": 0.628685839097197, "grad_norm": 2.901137351989746, "learning_rate": 7.519960692789584e-06, "loss": 0.2523648738861084, "step": 5181 }, { "epoch": 0.628807183594224, "grad_norm": 2.490514039993286, "learning_rate": 7.517503992138558e-06, "loss": 0.2237474024295807, "step": 5182 }, { "epoch": 0.6289285280912511, "grad_norm": 2.896909475326538, "learning_rate": 7.5150472914875324e-06, "loss": 0.1317497044801712, "step": 5183 }, { "epoch": 0.6290498725882782, "grad_norm": 2.940004348754883, "learning_rate": 7.512590590836507e-06, "loss": 0.17181947827339172, "step": 5184 }, { "epoch": 0.6291712170853052, "grad_norm": 0.8982471227645874, "learning_rate": 7.510133890185482e-06, "loss": 0.022441787645220757, "step": 5185 }, { "epoch": 0.6292925615823323, "grad_norm": 2.9215705394744873, "learning_rate": 7.507677189534456e-06, "loss": 0.24060653150081635, "step": 5186 }, { "epoch": 0.6294139060793593, "grad_norm": 0.7590889930725098, "learning_rate": 7.50522048888343e-06, "loss": 0.01979641243815422, "step": 5187 }, { "epoch": 0.6295352505763864, "grad_norm": 4.2162299156188965, "learning_rate": 7.5027637882324046e-06, "loss": 0.4677298367023468, "step": 5188 }, { "epoch": 0.6296565950734134, "grad_norm": 4.183058261871338, "learning_rate": 7.500307087581379e-06, "loss": 0.31139224767684937, "step": 5189 }, { "epoch": 0.6297779395704405, "grad_norm": 4.271397590637207, "learning_rate": 7.497850386930353e-06, "loss": 0.13579994440078735, "step": 5190 }, { "epoch": 0.6298992840674675, "grad_norm": 2.090845823287964, "learning_rate": 7.495393686279327e-06, "loss": 0.028008976951241493, "step": 5191 }, { "epoch": 0.6300206285644946, "grad_norm": 1.9768749475479126, "learning_rate": 7.492936985628302e-06, "loss": 0.29657506942749023, "step": 5192 }, { "epoch": 0.6301419730615216, "grad_norm": 3.6395578384399414, "learning_rate": 7.490480284977276e-06, "loss": 0.4480901062488556, "step": 5193 }, { "epoch": 0.6302633175585487, "grad_norm": 3.1879708766937256, "learning_rate": 7.48802358432625e-06, "loss": 0.37214791774749756, "step": 5194 }, { "epoch": 0.6303846620555758, "grad_norm": 2.914806365966797, "learning_rate": 7.485566883675225e-06, "loss": 0.2183961272239685, "step": 5195 }, { "epoch": 0.6305060065526028, "grad_norm": 2.9564104080200195, "learning_rate": 7.4831101830241995e-06, "loss": 0.13028694689273834, "step": 5196 }, { "epoch": 0.6306273510496299, "grad_norm": 1.643334150314331, "learning_rate": 7.480653482373174e-06, "loss": 0.22156259417533875, "step": 5197 }, { "epoch": 0.6307486955466569, "grad_norm": 1.9726053476333618, "learning_rate": 7.478196781722148e-06, "loss": 0.23271644115447998, "step": 5198 }, { "epoch": 0.630870040043684, "grad_norm": 1.9717057943344116, "learning_rate": 7.475740081071122e-06, "loss": 0.3165634870529175, "step": 5199 }, { "epoch": 0.6309913845407111, "grad_norm": 2.803903102874756, "learning_rate": 7.4732833804200965e-06, "loss": 0.24271273612976074, "step": 5200 }, { "epoch": 0.6311127290377382, "grad_norm": 2.5464649200439453, "learning_rate": 7.470826679769071e-06, "loss": 0.24610841274261475, "step": 5201 }, { "epoch": 0.6312340735347652, "grad_norm": 0.9751248955726624, "learning_rate": 7.468369979118045e-06, "loss": 0.024487892165780067, "step": 5202 }, { "epoch": 0.6313554180317923, "grad_norm": 2.686784267425537, "learning_rate": 7.465913278467019e-06, "loss": 0.4830613136291504, "step": 5203 }, { "epoch": 0.6314767625288193, "grad_norm": 1.863593578338623, "learning_rate": 7.463456577815994e-06, "loss": 0.05123068392276764, "step": 5204 }, { "epoch": 0.6315981070258464, "grad_norm": 2.540513515472412, "learning_rate": 7.460999877164969e-06, "loss": 0.48978811502456665, "step": 5205 }, { "epoch": 0.6317194515228735, "grad_norm": 3.539367914199829, "learning_rate": 7.458543176513943e-06, "loss": 0.34770894050598145, "step": 5206 }, { "epoch": 0.6318407960199005, "grad_norm": 3.513749122619629, "learning_rate": 7.456086475862917e-06, "loss": 0.31871047616004944, "step": 5207 }, { "epoch": 0.6319621405169276, "grad_norm": 2.685826539993286, "learning_rate": 7.4536297752118914e-06, "loss": 0.5649182796478271, "step": 5208 }, { "epoch": 0.6320834850139546, "grad_norm": 3.360147476196289, "learning_rate": 7.451173074560866e-06, "loss": 0.44040125608444214, "step": 5209 }, { "epoch": 0.6322048295109817, "grad_norm": 1.8129656314849854, "learning_rate": 7.44871637390984e-06, "loss": 0.2525872588157654, "step": 5210 }, { "epoch": 0.6323261740080087, "grad_norm": 2.5779991149902344, "learning_rate": 7.446259673258814e-06, "loss": 0.24007919430732727, "step": 5211 }, { "epoch": 0.6324475185050358, "grad_norm": 3.7836520671844482, "learning_rate": 7.4438029726077885e-06, "loss": 0.1827472448348999, "step": 5212 }, { "epoch": 0.6325688630020628, "grad_norm": 1.731846570968628, "learning_rate": 7.441346271956763e-06, "loss": 0.10758538544178009, "step": 5213 }, { "epoch": 0.6326902074990899, "grad_norm": 1.3360425233840942, "learning_rate": 7.438889571305738e-06, "loss": 0.3957565128803253, "step": 5214 }, { "epoch": 0.6328115519961169, "grad_norm": 2.308990716934204, "learning_rate": 7.436432870654712e-06, "loss": 0.3529370129108429, "step": 5215 }, { "epoch": 0.632932896493144, "grad_norm": 2.1859376430511475, "learning_rate": 7.433976170003686e-06, "loss": 0.3487473726272583, "step": 5216 }, { "epoch": 0.633054240990171, "grad_norm": 2.5486912727355957, "learning_rate": 7.431519469352661e-06, "loss": 0.34937816858291626, "step": 5217 }, { "epoch": 0.6331755854871981, "grad_norm": 4.513184070587158, "learning_rate": 7.429062768701635e-06, "loss": 0.4164623022079468, "step": 5218 }, { "epoch": 0.6332969299842253, "grad_norm": 1.740838646888733, "learning_rate": 7.426606068050609e-06, "loss": 0.1240142434835434, "step": 5219 }, { "epoch": 0.6334182744812523, "grad_norm": 1.7644047737121582, "learning_rate": 7.424149367399583e-06, "loss": 0.16068820655345917, "step": 5220 }, { "epoch": 0.6335396189782794, "grad_norm": 3.6380879878997803, "learning_rate": 7.421692666748558e-06, "loss": 0.7646800875663757, "step": 5221 }, { "epoch": 0.6336609634753064, "grad_norm": 0.002228769473731518, "learning_rate": 7.419235966097532e-06, "loss": 4.162078403169289e-05, "step": 5222 }, { "epoch": 0.6337823079723335, "grad_norm": 2.5019686222076416, "learning_rate": 7.416779265446506e-06, "loss": 0.3278352916240692, "step": 5223 }, { "epoch": 0.6339036524693605, "grad_norm": 2.750730514526367, "learning_rate": 7.4143225647954796e-06, "loss": 0.3762390911579132, "step": 5224 }, { "epoch": 0.6340249969663876, "grad_norm": 2.876154899597168, "learning_rate": 7.411865864144454e-06, "loss": 0.19680559635162354, "step": 5225 }, { "epoch": 0.6341463414634146, "grad_norm": 3.1617276668548584, "learning_rate": 7.409409163493428e-06, "loss": 0.1128135472536087, "step": 5226 }, { "epoch": 0.6342676859604417, "grad_norm": 1.9750601053237915, "learning_rate": 7.406952462842403e-06, "loss": 0.3242079019546509, "step": 5227 }, { "epoch": 0.6343890304574687, "grad_norm": 1.8776732683181763, "learning_rate": 7.4044957621913774e-06, "loss": 0.456900417804718, "step": 5228 }, { "epoch": 0.6345103749544958, "grad_norm": 3.6841602325439453, "learning_rate": 7.402039061540352e-06, "loss": 0.238835409283638, "step": 5229 }, { "epoch": 0.6346317194515229, "grad_norm": 2.542550563812256, "learning_rate": 7.399582360889326e-06, "loss": 0.2142706960439682, "step": 5230 }, { "epoch": 0.6347530639485499, "grad_norm": 2.143183469772339, "learning_rate": 7.3971256602383e-06, "loss": 0.15952105820178986, "step": 5231 }, { "epoch": 0.634874408445577, "grad_norm": 2.3508293628692627, "learning_rate": 7.3946689595872745e-06, "loss": 0.2870420813560486, "step": 5232 }, { "epoch": 0.634995752942604, "grad_norm": 3.544053554534912, "learning_rate": 7.392212258936249e-06, "loss": 0.17155948281288147, "step": 5233 }, { "epoch": 0.6351170974396311, "grad_norm": 2.291283130645752, "learning_rate": 7.389755558285223e-06, "loss": 0.21039658784866333, "step": 5234 }, { "epoch": 0.6352384419366581, "grad_norm": 2.9492058753967285, "learning_rate": 7.387298857634197e-06, "loss": 0.30056238174438477, "step": 5235 }, { "epoch": 0.6353597864336852, "grad_norm": 3.49558424949646, "learning_rate": 7.3848421569831715e-06, "loss": 0.7648195028305054, "step": 5236 }, { "epoch": 0.6354811309307123, "grad_norm": 5.205968856811523, "learning_rate": 7.382385456332147e-06, "loss": 0.20507466793060303, "step": 5237 }, { "epoch": 0.6356024754277394, "grad_norm": 4.635091781616211, "learning_rate": 7.379928755681121e-06, "loss": 0.5483059883117676, "step": 5238 }, { "epoch": 0.6357238199247665, "grad_norm": 2.143866777420044, "learning_rate": 7.377472055030095e-06, "loss": 0.12470147013664246, "step": 5239 }, { "epoch": 0.6358451644217935, "grad_norm": 2.2624711990356445, "learning_rate": 7.375015354379069e-06, "loss": 0.10427339375019073, "step": 5240 }, { "epoch": 0.6359665089188206, "grad_norm": 2.461515426635742, "learning_rate": 7.372558653728044e-06, "loss": 0.2343287318944931, "step": 5241 }, { "epoch": 0.6360878534158476, "grad_norm": 2.0916168689727783, "learning_rate": 7.370101953077018e-06, "loss": 0.05995643511414528, "step": 5242 }, { "epoch": 0.6362091979128747, "grad_norm": 2.866332530975342, "learning_rate": 7.367645252425992e-06, "loss": 0.14199528098106384, "step": 5243 }, { "epoch": 0.6363305424099017, "grad_norm": 3.0678765773773193, "learning_rate": 7.365188551774966e-06, "loss": 0.33930066227912903, "step": 5244 }, { "epoch": 0.6364518869069288, "grad_norm": 3.4433538913726807, "learning_rate": 7.362731851123941e-06, "loss": 0.37763726711273193, "step": 5245 }, { "epoch": 0.6365732314039558, "grad_norm": 3.3747081756591797, "learning_rate": 7.360275150472915e-06, "loss": 0.3498091995716095, "step": 5246 }, { "epoch": 0.6366945759009829, "grad_norm": 2.254237651824951, "learning_rate": 7.35781844982189e-06, "loss": 0.38726261258125305, "step": 5247 }, { "epoch": 0.6368159203980099, "grad_norm": 3.0829031467437744, "learning_rate": 7.355361749170864e-06, "loss": 0.6538985967636108, "step": 5248 }, { "epoch": 0.636937264895037, "grad_norm": 5.062077045440674, "learning_rate": 7.3529050485198386e-06, "loss": 0.4258115291595459, "step": 5249 }, { "epoch": 0.637058609392064, "grad_norm": 1.4171710014343262, "learning_rate": 7.350448347868813e-06, "loss": 0.0836060643196106, "step": 5250 }, { "epoch": 0.6371799538890911, "grad_norm": 2.1625490188598633, "learning_rate": 7.347991647217787e-06, "loss": 0.19590319693088531, "step": 5251 }, { "epoch": 0.6373012983861182, "grad_norm": 2.2641139030456543, "learning_rate": 7.345534946566761e-06, "loss": 0.10216805338859558, "step": 5252 }, { "epoch": 0.6374226428831452, "grad_norm": 1.343229055404663, "learning_rate": 7.343078245915736e-06, "loss": 0.04747616499662399, "step": 5253 }, { "epoch": 0.6375439873801723, "grad_norm": 1.9369689226150513, "learning_rate": 7.34062154526471e-06, "loss": 0.11587318778038025, "step": 5254 }, { "epoch": 0.6376653318771993, "grad_norm": 0.06576523929834366, "learning_rate": 7.338164844613684e-06, "loss": 0.0009665371035225689, "step": 5255 }, { "epoch": 0.6377866763742265, "grad_norm": 2.2284114360809326, "learning_rate": 7.335708143962658e-06, "loss": 0.12489627301692963, "step": 5256 }, { "epoch": 0.6379080208712535, "grad_norm": 1.7144150733947754, "learning_rate": 7.3332514433116335e-06, "loss": 0.15024758875370026, "step": 5257 }, { "epoch": 0.6380293653682806, "grad_norm": 2.753194808959961, "learning_rate": 7.330794742660608e-06, "loss": 0.29353028535842896, "step": 5258 }, { "epoch": 0.6381507098653076, "grad_norm": 5.554291248321533, "learning_rate": 7.328338042009582e-06, "loss": 0.555769145488739, "step": 5259 }, { "epoch": 0.6382720543623347, "grad_norm": 2.0440244674682617, "learning_rate": 7.325881341358556e-06, "loss": 0.10882645845413208, "step": 5260 }, { "epoch": 0.6383933988593617, "grad_norm": 4.69790506362915, "learning_rate": 7.3234246407075305e-06, "loss": 0.4237305819988251, "step": 5261 }, { "epoch": 0.6385147433563888, "grad_norm": 3.8605079650878906, "learning_rate": 7.320967940056505e-06, "loss": 0.298441082239151, "step": 5262 }, { "epoch": 0.6386360878534159, "grad_norm": 2.0664751529693604, "learning_rate": 7.318511239405479e-06, "loss": 0.36174336075782776, "step": 5263 }, { "epoch": 0.6387574323504429, "grad_norm": 1.8126201629638672, "learning_rate": 7.316054538754453e-06, "loss": 0.1482890397310257, "step": 5264 }, { "epoch": 0.63887877684747, "grad_norm": 2.3962934017181396, "learning_rate": 7.3135978381034275e-06, "loss": 0.3556768596172333, "step": 5265 }, { "epoch": 0.639000121344497, "grad_norm": 2.040398120880127, "learning_rate": 7.311141137452402e-06, "loss": 0.5186150670051575, "step": 5266 }, { "epoch": 0.6391214658415241, "grad_norm": 2.392997980117798, "learning_rate": 7.308684436801377e-06, "loss": 0.22875747084617615, "step": 5267 }, { "epoch": 0.6392428103385511, "grad_norm": 2.67751145362854, "learning_rate": 7.306227736150351e-06, "loss": 0.4091866612434387, "step": 5268 }, { "epoch": 0.6393641548355782, "grad_norm": 2.475794792175293, "learning_rate": 7.303771035499325e-06, "loss": 0.2591671943664551, "step": 5269 }, { "epoch": 0.6394854993326052, "grad_norm": 2.6664907932281494, "learning_rate": 7.3013143348483e-06, "loss": 0.13285675644874573, "step": 5270 }, { "epoch": 0.6396068438296323, "grad_norm": 2.705899715423584, "learning_rate": 7.298857634197274e-06, "loss": 0.3562411069869995, "step": 5271 }, { "epoch": 0.6397281883266593, "grad_norm": 2.237299680709839, "learning_rate": 7.296400933546248e-06, "loss": 0.5312922596931458, "step": 5272 }, { "epoch": 0.6398495328236864, "grad_norm": 3.165327310562134, "learning_rate": 7.2939442328952224e-06, "loss": 0.24253319203853607, "step": 5273 }, { "epoch": 0.6399708773207136, "grad_norm": 0.827338457107544, "learning_rate": 7.291487532244197e-06, "loss": 0.02788114733994007, "step": 5274 }, { "epoch": 0.6400922218177406, "grad_norm": 2.10156512260437, "learning_rate": 7.289030831593171e-06, "loss": 0.2697005867958069, "step": 5275 }, { "epoch": 0.6402135663147677, "grad_norm": 2.693977117538452, "learning_rate": 7.286574130942145e-06, "loss": 0.08848381787538528, "step": 5276 }, { "epoch": 0.6403349108117947, "grad_norm": 2.116316318511963, "learning_rate": 7.28411743029112e-06, "loss": 0.177485853433609, "step": 5277 }, { "epoch": 0.6404562553088218, "grad_norm": 2.901185989379883, "learning_rate": 7.281660729640095e-06, "loss": 0.2909940183162689, "step": 5278 }, { "epoch": 0.6405775998058488, "grad_norm": 4.422817707061768, "learning_rate": 7.279204028989069e-06, "loss": 0.5253637433052063, "step": 5279 }, { "epoch": 0.6406989443028759, "grad_norm": 2.8226845264434814, "learning_rate": 7.276747328338043e-06, "loss": 0.3122970461845398, "step": 5280 }, { "epoch": 0.6408202887999029, "grad_norm": 2.272554874420166, "learning_rate": 7.274290627687017e-06, "loss": 0.8164306879043579, "step": 5281 }, { "epoch": 0.64094163329693, "grad_norm": 2.4905824661254883, "learning_rate": 7.271833927035992e-06, "loss": 0.33353665471076965, "step": 5282 }, { "epoch": 0.641062977793957, "grad_norm": 2.50264310836792, "learning_rate": 7.269377226384966e-06, "loss": 0.13481377065181732, "step": 5283 }, { "epoch": 0.6411843222909841, "grad_norm": 0.9713133573532104, "learning_rate": 7.26692052573394e-06, "loss": 0.12120094895362854, "step": 5284 }, { "epoch": 0.6413056667880112, "grad_norm": 2.425795078277588, "learning_rate": 7.264463825082914e-06, "loss": 0.02659408003091812, "step": 5285 }, { "epoch": 0.6414270112850382, "grad_norm": 2.448134422302246, "learning_rate": 7.262007124431889e-06, "loss": 0.06854808330535889, "step": 5286 }, { "epoch": 0.6415483557820653, "grad_norm": 3.6318368911743164, "learning_rate": 7.259550423780864e-06, "loss": 0.5136401057243347, "step": 5287 }, { "epoch": 0.6416697002790923, "grad_norm": 3.926347494125366, "learning_rate": 7.257093723129838e-06, "loss": 0.36056432127952576, "step": 5288 }, { "epoch": 0.6417910447761194, "grad_norm": 4.392789363861084, "learning_rate": 7.254637022478812e-06, "loss": 0.24080726504325867, "step": 5289 }, { "epoch": 0.6419123892731464, "grad_norm": 1.4232943058013916, "learning_rate": 7.2521803218277865e-06, "loss": 0.08554395288228989, "step": 5290 }, { "epoch": 0.6420337337701735, "grad_norm": 2.384232997894287, "learning_rate": 7.24972362117676e-06, "loss": 0.4125480651855469, "step": 5291 }, { "epoch": 0.6421550782672005, "grad_norm": 3.2314963340759277, "learning_rate": 7.247266920525734e-06, "loss": 0.36397701501846313, "step": 5292 }, { "epoch": 0.6422764227642277, "grad_norm": 3.7070834636688232, "learning_rate": 7.2448102198747085e-06, "loss": 0.09396432340145111, "step": 5293 }, { "epoch": 0.6423977672612547, "grad_norm": 2.7868475914001465, "learning_rate": 7.242353519223683e-06, "loss": 0.38682466745376587, "step": 5294 }, { "epoch": 0.6425191117582818, "grad_norm": 2.484625816345215, "learning_rate": 7.239896818572657e-06, "loss": 0.3896671533584595, "step": 5295 }, { "epoch": 0.6426404562553089, "grad_norm": 1.5819138288497925, "learning_rate": 7.237440117921631e-06, "loss": 0.07975146919488907, "step": 5296 }, { "epoch": 0.6427618007523359, "grad_norm": 3.091887950897217, "learning_rate": 7.2349834172706055e-06, "loss": 0.4194187521934509, "step": 5297 }, { "epoch": 0.642883145249363, "grad_norm": 4.634640693664551, "learning_rate": 7.23252671661958e-06, "loss": 0.7704521417617798, "step": 5298 }, { "epoch": 0.64300448974639, "grad_norm": 2.682689905166626, "learning_rate": 7.230070015968554e-06, "loss": 0.2554735839366913, "step": 5299 }, { "epoch": 0.6431258342434171, "grad_norm": 2.859452724456787, "learning_rate": 7.227613315317529e-06, "loss": 0.26469099521636963, "step": 5300 }, { "epoch": 0.6432471787404441, "grad_norm": 3.037590742111206, "learning_rate": 7.225156614666503e-06, "loss": 0.45904409885406494, "step": 5301 }, { "epoch": 0.6433685232374712, "grad_norm": 3.5770950317382812, "learning_rate": 7.222699914015478e-06, "loss": 0.3747759461402893, "step": 5302 }, { "epoch": 0.6434898677344982, "grad_norm": 2.1426126956939697, "learning_rate": 7.220243213364452e-06, "loss": 0.3390931189060211, "step": 5303 }, { "epoch": 0.6436112122315253, "grad_norm": 2.21389102935791, "learning_rate": 7.217786512713426e-06, "loss": 0.19395959377288818, "step": 5304 }, { "epoch": 0.6437325567285523, "grad_norm": 3.029841184616089, "learning_rate": 7.2153298120624e-06, "loss": 0.49713489413261414, "step": 5305 }, { "epoch": 0.6438539012255794, "grad_norm": 2.0456912517547607, "learning_rate": 7.212873111411375e-06, "loss": 0.07904763519763947, "step": 5306 }, { "epoch": 0.6439752457226064, "grad_norm": 3.6251978874206543, "learning_rate": 7.210416410760349e-06, "loss": 0.17891547083854675, "step": 5307 }, { "epoch": 0.6440965902196335, "grad_norm": 2.3782310485839844, "learning_rate": 7.207959710109323e-06, "loss": 0.6051362752914429, "step": 5308 }, { "epoch": 0.6442179347166606, "grad_norm": 2.908564805984497, "learning_rate": 7.2055030094582974e-06, "loss": 0.11423728615045547, "step": 5309 }, { "epoch": 0.6443392792136876, "grad_norm": 3.437335252761841, "learning_rate": 7.2030463088072725e-06, "loss": 0.2593180239200592, "step": 5310 }, { "epoch": 0.6444606237107147, "grad_norm": 1.5736790895462036, "learning_rate": 7.200589608156247e-06, "loss": 0.02757946588099003, "step": 5311 }, { "epoch": 0.6445819682077418, "grad_norm": 2.513566255569458, "learning_rate": 7.198132907505221e-06, "loss": 0.4500012695789337, "step": 5312 }, { "epoch": 0.6447033127047689, "grad_norm": 5.131681442260742, "learning_rate": 7.195676206854195e-06, "loss": 0.27335307002067566, "step": 5313 }, { "epoch": 0.6448246572017959, "grad_norm": 3.161888360977173, "learning_rate": 7.1932195062031696e-06, "loss": 0.13082441687583923, "step": 5314 }, { "epoch": 0.644946001698823, "grad_norm": 3.7497916221618652, "learning_rate": 7.190762805552144e-06, "loss": 0.5052933096885681, "step": 5315 }, { "epoch": 0.64506734619585, "grad_norm": 1.790724754333496, "learning_rate": 7.188306104901118e-06, "loss": 0.09411320835351944, "step": 5316 }, { "epoch": 0.6451886906928771, "grad_norm": 2.4359023571014404, "learning_rate": 7.185849404250092e-06, "loss": 0.17621558904647827, "step": 5317 }, { "epoch": 0.6453100351899042, "grad_norm": 2.291019916534424, "learning_rate": 7.183392703599067e-06, "loss": 0.08902154862880707, "step": 5318 }, { "epoch": 0.6454313796869312, "grad_norm": 2.750394105911255, "learning_rate": 7.180936002948041e-06, "loss": 0.16719895601272583, "step": 5319 }, { "epoch": 0.6455527241839583, "grad_norm": 2.9433720111846924, "learning_rate": 7.178479302297016e-06, "loss": 0.2825256586074829, "step": 5320 }, { "epoch": 0.6456740686809853, "grad_norm": 0.8500217795372009, "learning_rate": 7.17602260164599e-06, "loss": 0.016229603439569473, "step": 5321 }, { "epoch": 0.6457954131780124, "grad_norm": 2.334036350250244, "learning_rate": 7.1735659009949645e-06, "loss": 0.09401032328605652, "step": 5322 }, { "epoch": 0.6459167576750394, "grad_norm": 3.699812412261963, "learning_rate": 7.171109200343939e-06, "loss": 0.39708343148231506, "step": 5323 }, { "epoch": 0.6460381021720665, "grad_norm": 3.0876588821411133, "learning_rate": 7.168652499692913e-06, "loss": 0.09378945827484131, "step": 5324 }, { "epoch": 0.6461594466690935, "grad_norm": 3.574230432510376, "learning_rate": 7.166195799041887e-06, "loss": 0.2574303150177002, "step": 5325 }, { "epoch": 0.6462807911661206, "grad_norm": 2.0921127796173096, "learning_rate": 7.1637390983908615e-06, "loss": 0.0993313416838646, "step": 5326 }, { "epoch": 0.6464021356631476, "grad_norm": 3.4750678539276123, "learning_rate": 7.161282397739836e-06, "loss": 0.37431854009628296, "step": 5327 }, { "epoch": 0.6465234801601747, "grad_norm": 2.1825950145721436, "learning_rate": 7.15882569708881e-06, "loss": 0.17372436821460724, "step": 5328 }, { "epoch": 0.6466448246572017, "grad_norm": 1.8782719373703003, "learning_rate": 7.156368996437784e-06, "loss": 0.1309860497713089, "step": 5329 }, { "epoch": 0.6467661691542289, "grad_norm": 3.964226245880127, "learning_rate": 7.153912295786759e-06, "loss": 0.4540060758590698, "step": 5330 }, { "epoch": 0.646887513651256, "grad_norm": 3.8869893550872803, "learning_rate": 7.151455595135734e-06, "loss": 0.5192214846611023, "step": 5331 }, { "epoch": 0.647008858148283, "grad_norm": 2.0198261737823486, "learning_rate": 7.148998894484708e-06, "loss": 0.1576482504606247, "step": 5332 }, { "epoch": 0.6471302026453101, "grad_norm": 0.5401488542556763, "learning_rate": 7.146542193833682e-06, "loss": 0.0026412131264805794, "step": 5333 }, { "epoch": 0.6472515471423371, "grad_norm": 1.3097823858261108, "learning_rate": 7.1440854931826564e-06, "loss": 0.40700775384902954, "step": 5334 }, { "epoch": 0.6473728916393642, "grad_norm": 2.963634967803955, "learning_rate": 7.141628792531631e-06, "loss": 0.24457323551177979, "step": 5335 }, { "epoch": 0.6474942361363912, "grad_norm": 2.0370354652404785, "learning_rate": 7.139172091880605e-06, "loss": 0.3624710440635681, "step": 5336 }, { "epoch": 0.6476155806334183, "grad_norm": 3.4412221908569336, "learning_rate": 7.136715391229579e-06, "loss": 0.2688665986061096, "step": 5337 }, { "epoch": 0.6477369251304453, "grad_norm": 1.7715803384780884, "learning_rate": 7.1342586905785535e-06, "loss": 0.05225639045238495, "step": 5338 }, { "epoch": 0.6478582696274724, "grad_norm": 3.026961326599121, "learning_rate": 7.131801989927528e-06, "loss": 0.07413452863693237, "step": 5339 }, { "epoch": 0.6479796141244994, "grad_norm": 1.175209879875183, "learning_rate": 7.129345289276503e-06, "loss": 0.0682985857129097, "step": 5340 }, { "epoch": 0.6481009586215265, "grad_norm": 3.1357033252716064, "learning_rate": 7.126888588625477e-06, "loss": 0.11209096014499664, "step": 5341 }, { "epoch": 0.6482223031185536, "grad_norm": 3.152085781097412, "learning_rate": 7.124431887974451e-06, "loss": 0.4559754431247711, "step": 5342 }, { "epoch": 0.6483436476155806, "grad_norm": 3.4920804500579834, "learning_rate": 7.121975187323426e-06, "loss": 0.42634111642837524, "step": 5343 }, { "epoch": 0.6484649921126077, "grad_norm": 3.077864408493042, "learning_rate": 7.1195184866724e-06, "loss": 0.20550504326820374, "step": 5344 }, { "epoch": 0.6485863366096347, "grad_norm": 1.5427824258804321, "learning_rate": 7.117061786021374e-06, "loss": 0.029685180634260178, "step": 5345 }, { "epoch": 0.6487076811066618, "grad_norm": 2.1412782669067383, "learning_rate": 7.114605085370348e-06, "loss": 0.07341589033603668, "step": 5346 }, { "epoch": 0.6488290256036888, "grad_norm": 2.2625107765197754, "learning_rate": 7.112148384719323e-06, "loss": 0.1409926861524582, "step": 5347 }, { "epoch": 0.6489503701007159, "grad_norm": 2.3004796504974365, "learning_rate": 7.109691684068297e-06, "loss": 0.10340629518032074, "step": 5348 }, { "epoch": 0.649071714597743, "grad_norm": 4.027712345123291, "learning_rate": 7.107234983417272e-06, "loss": 0.3554992079734802, "step": 5349 }, { "epoch": 0.6491930590947701, "grad_norm": 2.8003439903259277, "learning_rate": 7.104778282766246e-06, "loss": 0.40208905935287476, "step": 5350 }, { "epoch": 0.6493144035917972, "grad_norm": 2.146296262741089, "learning_rate": 7.1023215821152205e-06, "loss": 0.2513790428638458, "step": 5351 }, { "epoch": 0.6494357480888242, "grad_norm": 2.7272846698760986, "learning_rate": 7.099864881464195e-06, "loss": 0.3574090301990509, "step": 5352 }, { "epoch": 0.6495570925858513, "grad_norm": 2.268134117126465, "learning_rate": 7.097408180813169e-06, "loss": 0.26707515120506287, "step": 5353 }, { "epoch": 0.6496784370828783, "grad_norm": 1.1569606065750122, "learning_rate": 7.094951480162143e-06, "loss": 0.013408780097961426, "step": 5354 }, { "epoch": 0.6497997815799054, "grad_norm": 4.786265850067139, "learning_rate": 7.0924947795111175e-06, "loss": 0.39687496423721313, "step": 5355 }, { "epoch": 0.6499211260769324, "grad_norm": 2.4867453575134277, "learning_rate": 7.090038078860092e-06, "loss": 0.1574365794658661, "step": 5356 }, { "epoch": 0.6500424705739595, "grad_norm": 2.626999855041504, "learning_rate": 7.087581378209066e-06, "loss": 0.39261120557785034, "step": 5357 }, { "epoch": 0.6501638150709865, "grad_norm": 2.048325538635254, "learning_rate": 7.08512467755804e-06, "loss": 0.14659641683101654, "step": 5358 }, { "epoch": 0.6502851595680136, "grad_norm": 2.68200945854187, "learning_rate": 7.082667976907014e-06, "loss": 0.40521812438964844, "step": 5359 }, { "epoch": 0.6504065040650406, "grad_norm": 0.5102768540382385, "learning_rate": 7.080211276255988e-06, "loss": 0.005563216749578714, "step": 5360 }, { "epoch": 0.6505278485620677, "grad_norm": 0.6556315422058105, "learning_rate": 7.077754575604962e-06, "loss": 0.01562683843076229, "step": 5361 }, { "epoch": 0.6506491930590947, "grad_norm": 3.2406671047210693, "learning_rate": 7.075297874953937e-06, "loss": 0.1356423944234848, "step": 5362 }, { "epoch": 0.6507705375561218, "grad_norm": 3.6656618118286133, "learning_rate": 7.072841174302912e-06, "loss": 0.5700106620788574, "step": 5363 }, { "epoch": 0.6508918820531489, "grad_norm": 2.406106472015381, "learning_rate": 7.070384473651886e-06, "loss": 0.4174679219722748, "step": 5364 }, { "epoch": 0.6510132265501759, "grad_norm": 1.7045365571975708, "learning_rate": 7.06792777300086e-06, "loss": 0.03032086044549942, "step": 5365 }, { "epoch": 0.651134571047203, "grad_norm": 2.795091152191162, "learning_rate": 7.065471072349834e-06, "loss": 0.18117356300354004, "step": 5366 }, { "epoch": 0.6512559155442301, "grad_norm": 3.1364381313323975, "learning_rate": 7.063014371698809e-06, "loss": 0.05605272576212883, "step": 5367 }, { "epoch": 0.6513772600412572, "grad_norm": 2.805398941040039, "learning_rate": 7.060557671047783e-06, "loss": 0.43755531311035156, "step": 5368 }, { "epoch": 0.6514986045382842, "grad_norm": 2.1558516025543213, "learning_rate": 7.058100970396757e-06, "loss": 0.3368094861507416, "step": 5369 }, { "epoch": 0.6516199490353113, "grad_norm": 4.519784450531006, "learning_rate": 7.055644269745731e-06, "loss": 0.6358950734138489, "step": 5370 }, { "epoch": 0.6517412935323383, "grad_norm": 2.322420120239258, "learning_rate": 7.053187569094706e-06, "loss": 0.30271294713020325, "step": 5371 }, { "epoch": 0.6518626380293654, "grad_norm": 2.9549715518951416, "learning_rate": 7.050730868443681e-06, "loss": 0.626772403717041, "step": 5372 }, { "epoch": 0.6519839825263924, "grad_norm": 2.0745465755462646, "learning_rate": 7.048274167792655e-06, "loss": 0.20047688484191895, "step": 5373 }, { "epoch": 0.6521053270234195, "grad_norm": 2.8682637214660645, "learning_rate": 7.045817467141629e-06, "loss": 0.11811717599630356, "step": 5374 }, { "epoch": 0.6522266715204466, "grad_norm": 3.234260320663452, "learning_rate": 7.0433607664906036e-06, "loss": 0.1817900389432907, "step": 5375 }, { "epoch": 0.6523480160174736, "grad_norm": 2.7459990978240967, "learning_rate": 7.040904065839578e-06, "loss": 0.28619179129600525, "step": 5376 }, { "epoch": 0.6524693605145007, "grad_norm": 1.4276217222213745, "learning_rate": 7.038447365188552e-06, "loss": 0.11366422474384308, "step": 5377 }, { "epoch": 0.6525907050115277, "grad_norm": 2.9763190746307373, "learning_rate": 7.035990664537526e-06, "loss": 0.17099960148334503, "step": 5378 }, { "epoch": 0.6527120495085548, "grad_norm": 3.4679367542266846, "learning_rate": 7.033533963886501e-06, "loss": 0.38957640528678894, "step": 5379 }, { "epoch": 0.6528333940055818, "grad_norm": 4.0238142013549805, "learning_rate": 7.031077263235475e-06, "loss": 0.37720787525177, "step": 5380 }, { "epoch": 0.6529547385026089, "grad_norm": 2.3461062908172607, "learning_rate": 7.028620562584449e-06, "loss": 0.30270013213157654, "step": 5381 }, { "epoch": 0.6530760829996359, "grad_norm": 3.0273773670196533, "learning_rate": 7.026163861933424e-06, "loss": 0.11384717375040054, "step": 5382 }, { "epoch": 0.653197427496663, "grad_norm": 3.616004228591919, "learning_rate": 7.0237071612823985e-06, "loss": 0.31804853677749634, "step": 5383 }, { "epoch": 0.65331877199369, "grad_norm": 3.0264105796813965, "learning_rate": 7.021250460631373e-06, "loss": 0.7139288187026978, "step": 5384 }, { "epoch": 0.6534401164907171, "grad_norm": 4.377610206604004, "learning_rate": 7.018793759980347e-06, "loss": 0.26987600326538086, "step": 5385 }, { "epoch": 0.6535614609877443, "grad_norm": 4.759424209594727, "learning_rate": 7.016337059329321e-06, "loss": 0.3777257800102234, "step": 5386 }, { "epoch": 0.6536828054847713, "grad_norm": 1.9420127868652344, "learning_rate": 7.0138803586782955e-06, "loss": 0.5316764116287231, "step": 5387 }, { "epoch": 0.6538041499817984, "grad_norm": 1.8100734949111938, "learning_rate": 7.01142365802727e-06, "loss": 0.026012804359197617, "step": 5388 }, { "epoch": 0.6539254944788254, "grad_norm": 1.1917668581008911, "learning_rate": 7.008966957376244e-06, "loss": 0.15556439757347107, "step": 5389 }, { "epoch": 0.6540468389758525, "grad_norm": 4.951122760772705, "learning_rate": 7.006510256725218e-06, "loss": 0.5964975953102112, "step": 5390 }, { "epoch": 0.6541681834728795, "grad_norm": 1.994397521018982, "learning_rate": 7.0040535560741925e-06, "loss": 0.05776876211166382, "step": 5391 }, { "epoch": 0.6542895279699066, "grad_norm": 3.280806541442871, "learning_rate": 7.001596855423168e-06, "loss": 0.18761898577213287, "step": 5392 }, { "epoch": 0.6544108724669336, "grad_norm": 2.4097137451171875, "learning_rate": 6.999140154772142e-06, "loss": 0.08639472723007202, "step": 5393 }, { "epoch": 0.6545322169639607, "grad_norm": 2.5132453441619873, "learning_rate": 6.996683454121116e-06, "loss": 0.2856960594654083, "step": 5394 }, { "epoch": 0.6546535614609877, "grad_norm": 2.309635639190674, "learning_rate": 6.99422675347009e-06, "loss": 0.17434543371200562, "step": 5395 }, { "epoch": 0.6547749059580148, "grad_norm": 3.7251594066619873, "learning_rate": 6.991770052819065e-06, "loss": 0.2689895033836365, "step": 5396 }, { "epoch": 0.6548962504550419, "grad_norm": 4.108977794647217, "learning_rate": 6.989313352168039e-06, "loss": 0.44190171360969543, "step": 5397 }, { "epoch": 0.6550175949520689, "grad_norm": 1.676553726196289, "learning_rate": 6.986856651517013e-06, "loss": 0.17863301932811737, "step": 5398 }, { "epoch": 0.655138939449096, "grad_norm": 1.9282386302947998, "learning_rate": 6.9843999508659875e-06, "loss": 0.058205049484968185, "step": 5399 }, { "epoch": 0.655260283946123, "grad_norm": 2.679506540298462, "learning_rate": 6.981943250214962e-06, "loss": 0.22700083255767822, "step": 5400 }, { "epoch": 0.6553816284431501, "grad_norm": 1.8796812295913696, "learning_rate": 6.979486549563936e-06, "loss": 0.04699080437421799, "step": 5401 }, { "epoch": 0.6555029729401771, "grad_norm": 2.1228792667388916, "learning_rate": 6.977029848912911e-06, "loss": 0.136936217546463, "step": 5402 }, { "epoch": 0.6556243174372042, "grad_norm": 0.2864266037940979, "learning_rate": 6.974573148261885e-06, "loss": 0.006578727159649134, "step": 5403 }, { "epoch": 0.6557456619342312, "grad_norm": 2.9458062648773193, "learning_rate": 6.97211644761086e-06, "loss": 0.25590750575065613, "step": 5404 }, { "epoch": 0.6558670064312584, "grad_norm": 3.090548038482666, "learning_rate": 6.969659746959834e-06, "loss": 0.46683579683303833, "step": 5405 }, { "epoch": 0.6559883509282854, "grad_norm": 2.675194263458252, "learning_rate": 6.967203046308808e-06, "loss": 0.09062185883522034, "step": 5406 }, { "epoch": 0.6561096954253125, "grad_norm": 4.150879383087158, "learning_rate": 6.964746345657782e-06, "loss": 0.1827821135520935, "step": 5407 }, { "epoch": 0.6562310399223396, "grad_norm": 4.0868377685546875, "learning_rate": 6.962289645006757e-06, "loss": 0.4718918204307556, "step": 5408 }, { "epoch": 0.6563523844193666, "grad_norm": 3.4607090950012207, "learning_rate": 6.959832944355731e-06, "loss": 0.180995911359787, "step": 5409 }, { "epoch": 0.6564737289163937, "grad_norm": 1.7607561349868774, "learning_rate": 6.957376243704705e-06, "loss": 0.13535870611667633, "step": 5410 }, { "epoch": 0.6565950734134207, "grad_norm": 2.61734938621521, "learning_rate": 6.954919543053679e-06, "loss": 0.4111171364784241, "step": 5411 }, { "epoch": 0.6567164179104478, "grad_norm": 2.2470059394836426, "learning_rate": 6.9524628424026545e-06, "loss": 0.16884523630142212, "step": 5412 }, { "epoch": 0.6568377624074748, "grad_norm": 1.4135264158248901, "learning_rate": 6.950006141751629e-06, "loss": 0.032384101301431656, "step": 5413 }, { "epoch": 0.6569591069045019, "grad_norm": 2.374598503112793, "learning_rate": 6.947549441100603e-06, "loss": 0.0675191804766655, "step": 5414 }, { "epoch": 0.6570804514015289, "grad_norm": 2.9338395595550537, "learning_rate": 6.945092740449577e-06, "loss": 0.20964652299880981, "step": 5415 }, { "epoch": 0.657201795898556, "grad_norm": 4.238141059875488, "learning_rate": 6.9426360397985515e-06, "loss": 0.5485671758651733, "step": 5416 }, { "epoch": 0.657323140395583, "grad_norm": 2.5284218788146973, "learning_rate": 6.940179339147526e-06, "loss": 0.1669018417596817, "step": 5417 }, { "epoch": 0.6574444848926101, "grad_norm": 2.090810537338257, "learning_rate": 6.9377226384965e-06, "loss": 0.39046671986579895, "step": 5418 }, { "epoch": 0.6575658293896371, "grad_norm": 2.990076780319214, "learning_rate": 6.935265937845474e-06, "loss": 0.381405770778656, "step": 5419 }, { "epoch": 0.6576871738866642, "grad_norm": 2.424238681793213, "learning_rate": 6.9328092371944486e-06, "loss": 0.23704314231872559, "step": 5420 }, { "epoch": 0.6578085183836913, "grad_norm": 1.8313288688659668, "learning_rate": 6.930352536543423e-06, "loss": 0.3616213798522949, "step": 5421 }, { "epoch": 0.6579298628807183, "grad_norm": 1.3959062099456787, "learning_rate": 6.927895835892398e-06, "loss": 0.04481736570596695, "step": 5422 }, { "epoch": 0.6580512073777455, "grad_norm": 1.8262929916381836, "learning_rate": 6.925439135241372e-06, "loss": 0.3286037743091583, "step": 5423 }, { "epoch": 0.6581725518747725, "grad_norm": 1.9123215675354004, "learning_rate": 6.9229824345903464e-06, "loss": 0.23849859833717346, "step": 5424 }, { "epoch": 0.6582938963717996, "grad_norm": 1.9951304197311401, "learning_rate": 6.920525733939321e-06, "loss": 0.10141445696353912, "step": 5425 }, { "epoch": 0.6584152408688266, "grad_norm": 2.401860475540161, "learning_rate": 6.918069033288295e-06, "loss": 0.21767881512641907, "step": 5426 }, { "epoch": 0.6585365853658537, "grad_norm": 2.903214931488037, "learning_rate": 6.915612332637268e-06, "loss": 0.025038015097379684, "step": 5427 }, { "epoch": 0.6586579298628807, "grad_norm": 6.916627883911133, "learning_rate": 6.913155631986243e-06, "loss": 0.46464529633522034, "step": 5428 }, { "epoch": 0.6587792743599078, "grad_norm": 3.615034341812134, "learning_rate": 6.910698931335217e-06, "loss": 0.14290864765644073, "step": 5429 }, { "epoch": 0.6589006188569348, "grad_norm": 2.76067852973938, "learning_rate": 6.908242230684191e-06, "loss": 0.3233334422111511, "step": 5430 }, { "epoch": 0.6590219633539619, "grad_norm": 6.736630439758301, "learning_rate": 6.905785530033165e-06, "loss": 0.2738841474056244, "step": 5431 }, { "epoch": 0.659143307850989, "grad_norm": 3.617276191711426, "learning_rate": 6.90332882938214e-06, "loss": 0.18215060234069824, "step": 5432 }, { "epoch": 0.659264652348016, "grad_norm": 2.383514642715454, "learning_rate": 6.900872128731114e-06, "loss": 0.2697574198246002, "step": 5433 }, { "epoch": 0.6593859968450431, "grad_norm": 2.671180248260498, "learning_rate": 6.898415428080088e-06, "loss": 0.15836653113365173, "step": 5434 }, { "epoch": 0.6595073413420701, "grad_norm": 2.658212661743164, "learning_rate": 6.895958727429063e-06, "loss": 0.1833317130804062, "step": 5435 }, { "epoch": 0.6596286858390972, "grad_norm": 2.6970930099487305, "learning_rate": 6.8935020267780375e-06, "loss": 0.3472805321216583, "step": 5436 }, { "epoch": 0.6597500303361242, "grad_norm": 3.192739963531494, "learning_rate": 6.891045326127012e-06, "loss": 0.18283437192440033, "step": 5437 }, { "epoch": 0.6598713748331513, "grad_norm": 2.556281089782715, "learning_rate": 6.888588625475986e-06, "loss": 0.44779181480407715, "step": 5438 }, { "epoch": 0.6599927193301783, "grad_norm": 2.187122106552124, "learning_rate": 6.88613192482496e-06, "loss": 0.06538421660661697, "step": 5439 }, { "epoch": 0.6601140638272054, "grad_norm": 4.024078845977783, "learning_rate": 6.883675224173935e-06, "loss": 0.16787391901016235, "step": 5440 }, { "epoch": 0.6602354083242324, "grad_norm": 3.425980806350708, "learning_rate": 6.881218523522909e-06, "loss": 0.14063674211502075, "step": 5441 }, { "epoch": 0.6603567528212596, "grad_norm": 2.725271701812744, "learning_rate": 6.878761822871883e-06, "loss": 0.34212785959243774, "step": 5442 }, { "epoch": 0.6604780973182867, "grad_norm": 2.529918909072876, "learning_rate": 6.876305122220857e-06, "loss": 0.11487560719251633, "step": 5443 }, { "epoch": 0.6605994418153137, "grad_norm": 2.3305420875549316, "learning_rate": 6.873848421569832e-06, "loss": 0.4991544187068939, "step": 5444 }, { "epoch": 0.6607207863123408, "grad_norm": 2.5811285972595215, "learning_rate": 6.871391720918807e-06, "loss": 0.11629299819469452, "step": 5445 }, { "epoch": 0.6608421308093678, "grad_norm": 2.741771936416626, "learning_rate": 6.868935020267781e-06, "loss": 0.13820014894008636, "step": 5446 }, { "epoch": 0.6609634753063949, "grad_norm": 3.547617197036743, "learning_rate": 6.866478319616755e-06, "loss": 0.18887688219547272, "step": 5447 }, { "epoch": 0.6610848198034219, "grad_norm": 1.885022759437561, "learning_rate": 6.8640216189657295e-06, "loss": 0.06440681219100952, "step": 5448 }, { "epoch": 0.661206164300449, "grad_norm": 1.6245648860931396, "learning_rate": 6.861564918314704e-06, "loss": 0.11675214767456055, "step": 5449 }, { "epoch": 0.661327508797476, "grad_norm": 2.192105293273926, "learning_rate": 6.859108217663678e-06, "loss": 0.24698755145072937, "step": 5450 }, { "epoch": 0.6614488532945031, "grad_norm": 2.1695117950439453, "learning_rate": 6.856651517012652e-06, "loss": 0.2551764249801636, "step": 5451 }, { "epoch": 0.6615701977915301, "grad_norm": 2.7140331268310547, "learning_rate": 6.8541948163616265e-06, "loss": 0.4169566035270691, "step": 5452 }, { "epoch": 0.6616915422885572, "grad_norm": 0.5542015433311462, "learning_rate": 6.851738115710601e-06, "loss": 0.004856256302446127, "step": 5453 }, { "epoch": 0.6618128867855843, "grad_norm": 3.277898073196411, "learning_rate": 6.849281415059575e-06, "loss": 0.29422488808631897, "step": 5454 }, { "epoch": 0.6619342312826113, "grad_norm": 2.3833491802215576, "learning_rate": 6.84682471440855e-06, "loss": 0.08606436103582382, "step": 5455 }, { "epoch": 0.6620555757796384, "grad_norm": 5.4468865394592285, "learning_rate": 6.844368013757524e-06, "loss": 0.2247561812400818, "step": 5456 }, { "epoch": 0.6621769202766654, "grad_norm": 3.1844582557678223, "learning_rate": 6.841911313106499e-06, "loss": 0.18188631534576416, "step": 5457 }, { "epoch": 0.6622982647736925, "grad_norm": 3.2049169540405273, "learning_rate": 6.839454612455473e-06, "loss": 0.26261094212532043, "step": 5458 }, { "epoch": 0.6624196092707195, "grad_norm": 1.5464452505111694, "learning_rate": 6.836997911804447e-06, "loss": 0.21007059514522552, "step": 5459 }, { "epoch": 0.6625409537677467, "grad_norm": 1.5835366249084473, "learning_rate": 6.8345412111534214e-06, "loss": 0.03176227584481239, "step": 5460 }, { "epoch": 0.6626622982647737, "grad_norm": 3.7851033210754395, "learning_rate": 6.832084510502396e-06, "loss": 0.33773383498191833, "step": 5461 }, { "epoch": 0.6627836427618008, "grad_norm": 2.603685140609741, "learning_rate": 6.82962780985137e-06, "loss": 0.14527064561843872, "step": 5462 }, { "epoch": 0.6629049872588278, "grad_norm": 2.8335654735565186, "learning_rate": 6.827171109200344e-06, "loss": 0.4494399428367615, "step": 5463 }, { "epoch": 0.6630263317558549, "grad_norm": 0.044951941817998886, "learning_rate": 6.8247144085493185e-06, "loss": 0.00038519114605151117, "step": 5464 }, { "epoch": 0.663147676252882, "grad_norm": 1.5377261638641357, "learning_rate": 6.8222577078982936e-06, "loss": 0.07600083947181702, "step": 5465 }, { "epoch": 0.663269020749909, "grad_norm": 4.09573221206665, "learning_rate": 6.819801007247268e-06, "loss": 0.3987540602684021, "step": 5466 }, { "epoch": 0.6633903652469361, "grad_norm": 3.7100517749786377, "learning_rate": 6.817344306596242e-06, "loss": 0.4876163601875305, "step": 5467 }, { "epoch": 0.6635117097439631, "grad_norm": 2.4131739139556885, "learning_rate": 6.814887605945216e-06, "loss": 0.10517054796218872, "step": 5468 }, { "epoch": 0.6636330542409902, "grad_norm": 2.850324869155884, "learning_rate": 6.812430905294191e-06, "loss": 0.3777635097503662, "step": 5469 }, { "epoch": 0.6637543987380172, "grad_norm": 2.7085962295532227, "learning_rate": 6.809974204643165e-06, "loss": 0.09308503568172455, "step": 5470 }, { "epoch": 0.6638757432350443, "grad_norm": 1.7542706727981567, "learning_rate": 6.807517503992139e-06, "loss": 0.10882838070392609, "step": 5471 }, { "epoch": 0.6639970877320713, "grad_norm": 3.0280401706695557, "learning_rate": 6.805060803341113e-06, "loss": 0.5282483100891113, "step": 5472 }, { "epoch": 0.6641184322290984, "grad_norm": 2.256420612335205, "learning_rate": 6.802604102690088e-06, "loss": 0.20184007287025452, "step": 5473 }, { "epoch": 0.6642397767261254, "grad_norm": 4.023746490478516, "learning_rate": 6.800147402039062e-06, "loss": 0.5977967381477356, "step": 5474 }, { "epoch": 0.6643611212231525, "grad_norm": 1.1739015579223633, "learning_rate": 6.797690701388037e-06, "loss": 0.034603800624608994, "step": 5475 }, { "epoch": 0.6644824657201795, "grad_norm": 2.378967046737671, "learning_rate": 6.795234000737011e-06, "loss": 0.5716724395751953, "step": 5476 }, { "epoch": 0.6646038102172066, "grad_norm": 3.461534023284912, "learning_rate": 6.7927773000859855e-06, "loss": 0.4864707589149475, "step": 5477 }, { "epoch": 0.6647251547142337, "grad_norm": 1.9932347536087036, "learning_rate": 6.79032059943496e-06, "loss": 0.04187903553247452, "step": 5478 }, { "epoch": 0.6648464992112608, "grad_norm": 2.463850259780884, "learning_rate": 6.787863898783934e-06, "loss": 0.45438358187675476, "step": 5479 }, { "epoch": 0.6649678437082879, "grad_norm": 2.6375484466552734, "learning_rate": 6.785407198132908e-06, "loss": 0.39521390199661255, "step": 5480 }, { "epoch": 0.6650891882053149, "grad_norm": 1.1503474712371826, "learning_rate": 6.7829504974818826e-06, "loss": 0.09201270341873169, "step": 5481 }, { "epoch": 0.665210532702342, "grad_norm": 2.458698272705078, "learning_rate": 6.780493796830857e-06, "loss": 0.1522335559129715, "step": 5482 }, { "epoch": 0.665331877199369, "grad_norm": 2.951310396194458, "learning_rate": 6.778037096179831e-06, "loss": 0.751285195350647, "step": 5483 }, { "epoch": 0.6654532216963961, "grad_norm": 5.303328037261963, "learning_rate": 6.775580395528806e-06, "loss": 0.6291011571884155, "step": 5484 }, { "epoch": 0.6655745661934231, "grad_norm": 2.211300849914551, "learning_rate": 6.7731236948777804e-06, "loss": 0.27722910046577454, "step": 5485 }, { "epoch": 0.6656959106904502, "grad_norm": 1.8691890239715576, "learning_rate": 6.770666994226755e-06, "loss": 0.035137277096509933, "step": 5486 }, { "epoch": 0.6658172551874773, "grad_norm": 3.8811395168304443, "learning_rate": 6.768210293575729e-06, "loss": 0.32429951429367065, "step": 5487 }, { "epoch": 0.6659385996845043, "grad_norm": 1.7560003995895386, "learning_rate": 6.765753592924703e-06, "loss": 0.216983363032341, "step": 5488 }, { "epoch": 0.6660599441815314, "grad_norm": 2.3027234077453613, "learning_rate": 6.7632968922736775e-06, "loss": 0.3121854364871979, "step": 5489 }, { "epoch": 0.6661812886785584, "grad_norm": 0.6129570603370667, "learning_rate": 6.760840191622652e-06, "loss": 0.009647605009377003, "step": 5490 }, { "epoch": 0.6663026331755855, "grad_norm": 4.5011444091796875, "learning_rate": 6.758383490971626e-06, "loss": 0.20618951320648193, "step": 5491 }, { "epoch": 0.6664239776726125, "grad_norm": 0.5530362725257874, "learning_rate": 6.7559267903206e-06, "loss": 0.019204849377274513, "step": 5492 }, { "epoch": 0.6665453221696396, "grad_norm": 2.6736371517181396, "learning_rate": 6.7534700896695745e-06, "loss": 0.026585828512907028, "step": 5493 }, { "epoch": 0.6666666666666666, "grad_norm": 2.296506404876709, "learning_rate": 6.75101338901855e-06, "loss": 0.28106608986854553, "step": 5494 }, { "epoch": 0.6667880111636937, "grad_norm": 2.441800832748413, "learning_rate": 6.748556688367522e-06, "loss": 0.43331286311149597, "step": 5495 }, { "epoch": 0.6669093556607207, "grad_norm": 5.72952127456665, "learning_rate": 6.7460999877164964e-06, "loss": 0.4107864797115326, "step": 5496 }, { "epoch": 0.6670307001577478, "grad_norm": 3.397773504257202, "learning_rate": 6.7436432870654715e-06, "loss": 0.45873361825942993, "step": 5497 }, { "epoch": 0.667152044654775, "grad_norm": 2.3726885318756104, "learning_rate": 6.741186586414446e-06, "loss": 0.17405016720294952, "step": 5498 }, { "epoch": 0.667273389151802, "grad_norm": 2.1865899562835693, "learning_rate": 6.73872988576342e-06, "loss": 0.16605165600776672, "step": 5499 }, { "epoch": 0.6673947336488291, "grad_norm": 1.8954287767410278, "learning_rate": 6.736273185112394e-06, "loss": 0.08954352140426636, "step": 5500 }, { "epoch": 0.6675160781458561, "grad_norm": 3.172576904296875, "learning_rate": 6.7338164844613686e-06, "loss": 0.5038388967514038, "step": 5501 }, { "epoch": 0.6676374226428832, "grad_norm": 4.444916725158691, "learning_rate": 6.731359783810343e-06, "loss": 0.6621733903884888, "step": 5502 }, { "epoch": 0.6677587671399102, "grad_norm": 4.196537017822266, "learning_rate": 6.728903083159317e-06, "loss": 0.15494079887866974, "step": 5503 }, { "epoch": 0.6678801116369373, "grad_norm": 2.373587131500244, "learning_rate": 6.726446382508291e-06, "loss": 0.2022574245929718, "step": 5504 }, { "epoch": 0.6680014561339643, "grad_norm": 1.895795226097107, "learning_rate": 6.723989681857266e-06, "loss": 0.09423115104436874, "step": 5505 }, { "epoch": 0.6681228006309914, "grad_norm": 3.6271400451660156, "learning_rate": 6.72153298120624e-06, "loss": 0.2506415843963623, "step": 5506 }, { "epoch": 0.6682441451280184, "grad_norm": 0.006097395438700914, "learning_rate": 6.719076280555215e-06, "loss": 6.811006460338831e-05, "step": 5507 }, { "epoch": 0.6683654896250455, "grad_norm": 2.358142614364624, "learning_rate": 6.716619579904189e-06, "loss": 0.15453283488750458, "step": 5508 }, { "epoch": 0.6684868341220725, "grad_norm": 2.70408034324646, "learning_rate": 6.7141628792531635e-06, "loss": 0.2456943243741989, "step": 5509 }, { "epoch": 0.6686081786190996, "grad_norm": 1.0259552001953125, "learning_rate": 6.711706178602138e-06, "loss": 0.04732144996523857, "step": 5510 }, { "epoch": 0.6687295231161267, "grad_norm": 2.1889679431915283, "learning_rate": 6.709249477951112e-06, "loss": 0.3691939413547516, "step": 5511 }, { "epoch": 0.6688508676131537, "grad_norm": 2.9604945182800293, "learning_rate": 6.706792777300086e-06, "loss": 0.30575239658355713, "step": 5512 }, { "epoch": 0.6689722121101808, "grad_norm": 3.0194923877716064, "learning_rate": 6.7043360766490605e-06, "loss": 0.22382302582263947, "step": 5513 }, { "epoch": 0.6690935566072078, "grad_norm": 1.8807471990585327, "learning_rate": 6.701879375998035e-06, "loss": 0.09779742360115051, "step": 5514 }, { "epoch": 0.6692149011042349, "grad_norm": 0.11813199520111084, "learning_rate": 6.699422675347009e-06, "loss": 0.0020191262010484934, "step": 5515 }, { "epoch": 0.669336245601262, "grad_norm": 1.8400187492370605, "learning_rate": 6.696965974695983e-06, "loss": 0.15742945671081543, "step": 5516 }, { "epoch": 0.6694575900982891, "grad_norm": 2.4510364532470703, "learning_rate": 6.694509274044958e-06, "loss": 0.25071439146995544, "step": 5517 }, { "epoch": 0.6695789345953161, "grad_norm": 3.9283738136291504, "learning_rate": 6.692052573393933e-06, "loss": 0.2009691298007965, "step": 5518 }, { "epoch": 0.6697002790923432, "grad_norm": 3.1237053871154785, "learning_rate": 6.689595872742907e-06, "loss": 0.4484008848667145, "step": 5519 }, { "epoch": 0.6698216235893703, "grad_norm": 1.8858933448791504, "learning_rate": 6.687139172091881e-06, "loss": 0.1727811098098755, "step": 5520 }, { "epoch": 0.6699429680863973, "grad_norm": 1.1124522686004639, "learning_rate": 6.684682471440855e-06, "loss": 0.03542888164520264, "step": 5521 }, { "epoch": 0.6700643125834244, "grad_norm": 2.213437557220459, "learning_rate": 6.68222577078983e-06, "loss": 0.3319013714790344, "step": 5522 }, { "epoch": 0.6701856570804514, "grad_norm": 3.609431743621826, "learning_rate": 6.679769070138804e-06, "loss": 0.25913262367248535, "step": 5523 }, { "epoch": 0.6703070015774785, "grad_norm": 1.4197149276733398, "learning_rate": 6.677312369487778e-06, "loss": 0.151391863822937, "step": 5524 }, { "epoch": 0.6704283460745055, "grad_norm": 2.9046998023986816, "learning_rate": 6.6748556688367525e-06, "loss": 0.3086477816104889, "step": 5525 }, { "epoch": 0.6705496905715326, "grad_norm": 2.952237367630005, "learning_rate": 6.672398968185727e-06, "loss": 0.41092440485954285, "step": 5526 }, { "epoch": 0.6706710350685596, "grad_norm": 3.583719253540039, "learning_rate": 6.669942267534702e-06, "loss": 0.14042919874191284, "step": 5527 }, { "epoch": 0.6707923795655867, "grad_norm": 3.921415328979492, "learning_rate": 6.667485566883676e-06, "loss": 0.7217460870742798, "step": 5528 }, { "epoch": 0.6709137240626137, "grad_norm": 2.5777692794799805, "learning_rate": 6.66502886623265e-06, "loss": 0.45752033591270447, "step": 5529 }, { "epoch": 0.6710350685596408, "grad_norm": 0.11003611236810684, "learning_rate": 6.662572165581625e-06, "loss": 0.0017357264878228307, "step": 5530 }, { "epoch": 0.6711564130566678, "grad_norm": 1.6429364681243896, "learning_rate": 6.660115464930599e-06, "loss": 0.36502379179000854, "step": 5531 }, { "epoch": 0.6712777575536949, "grad_norm": 3.7509469985961914, "learning_rate": 6.657658764279573e-06, "loss": 0.136823832988739, "step": 5532 }, { "epoch": 0.671399102050722, "grad_norm": 0.008937112055718899, "learning_rate": 6.655202063628547e-06, "loss": 0.00010829557140823454, "step": 5533 }, { "epoch": 0.671520446547749, "grad_norm": 2.8988256454467773, "learning_rate": 6.652745362977522e-06, "loss": 0.24391184747219086, "step": 5534 }, { "epoch": 0.6716417910447762, "grad_norm": 3.7339224815368652, "learning_rate": 6.650288662326496e-06, "loss": 0.4774061143398285, "step": 5535 }, { "epoch": 0.6717631355418032, "grad_norm": 2.7067511081695557, "learning_rate": 6.64783196167547e-06, "loss": 0.11795608699321747, "step": 5536 }, { "epoch": 0.6718844800388303, "grad_norm": 1.814666986465454, "learning_rate": 6.645375261024445e-06, "loss": 0.05710500851273537, "step": 5537 }, { "epoch": 0.6720058245358573, "grad_norm": 2.7973439693450928, "learning_rate": 6.6429185603734195e-06, "loss": 0.27974510192871094, "step": 5538 }, { "epoch": 0.6721271690328844, "grad_norm": 3.7600221633911133, "learning_rate": 6.640461859722394e-06, "loss": 0.3260442018508911, "step": 5539 }, { "epoch": 0.6722485135299114, "grad_norm": 1.2003672122955322, "learning_rate": 6.638005159071368e-06, "loss": 0.019528107717633247, "step": 5540 }, { "epoch": 0.6723698580269385, "grad_norm": 1.5203560590744019, "learning_rate": 6.635548458420342e-06, "loss": 0.09913506358861923, "step": 5541 }, { "epoch": 0.6724912025239655, "grad_norm": 2.4883971214294434, "learning_rate": 6.6330917577693165e-06, "loss": 0.24341395497322083, "step": 5542 }, { "epoch": 0.6726125470209926, "grad_norm": 0.8650296926498413, "learning_rate": 6.630635057118291e-06, "loss": 0.015706948935985565, "step": 5543 }, { "epoch": 0.6727338915180197, "grad_norm": 3.0836281776428223, "learning_rate": 6.628178356467265e-06, "loss": 0.24335551261901855, "step": 5544 }, { "epoch": 0.6728552360150467, "grad_norm": 3.15561580657959, "learning_rate": 6.625721655816239e-06, "loss": 0.5175299644470215, "step": 5545 }, { "epoch": 0.6729765805120738, "grad_norm": 2.4484927654266357, "learning_rate": 6.6232649551652136e-06, "loss": 0.12641756236553192, "step": 5546 }, { "epoch": 0.6730979250091008, "grad_norm": 3.7064051628112793, "learning_rate": 6.620808254514189e-06, "loss": 0.3600941300392151, "step": 5547 }, { "epoch": 0.6732192695061279, "grad_norm": 2.8471930027008057, "learning_rate": 6.618351553863163e-06, "loss": 0.16768944263458252, "step": 5548 }, { "epoch": 0.6733406140031549, "grad_norm": 0.0013788933865725994, "learning_rate": 6.615894853212137e-06, "loss": 1.9499275367707014e-05, "step": 5549 }, { "epoch": 0.673461958500182, "grad_norm": 4.3498358726501465, "learning_rate": 6.6134381525611114e-06, "loss": 0.35194453597068787, "step": 5550 }, { "epoch": 0.673583302997209, "grad_norm": 2.4301161766052246, "learning_rate": 6.610981451910086e-06, "loss": 0.16175785660743713, "step": 5551 }, { "epoch": 0.6737046474942361, "grad_norm": 1.4775090217590332, "learning_rate": 6.60852475125906e-06, "loss": 0.040994927287101746, "step": 5552 }, { "epoch": 0.6738259919912633, "grad_norm": 1.3354970216751099, "learning_rate": 6.606068050608034e-06, "loss": 0.03294237330555916, "step": 5553 }, { "epoch": 0.6739473364882903, "grad_norm": 2.078505516052246, "learning_rate": 6.6036113499570085e-06, "loss": 0.24495482444763184, "step": 5554 }, { "epoch": 0.6740686809853174, "grad_norm": 1.8554511070251465, "learning_rate": 6.601154649305983e-06, "loss": 0.022977245971560478, "step": 5555 }, { "epoch": 0.6741900254823444, "grad_norm": 1.7021937370300293, "learning_rate": 6.598697948654957e-06, "loss": 0.11590366065502167, "step": 5556 }, { "epoch": 0.6743113699793715, "grad_norm": 0.6264820098876953, "learning_rate": 6.596241248003932e-06, "loss": 0.005115927197039127, "step": 5557 }, { "epoch": 0.6744327144763985, "grad_norm": 3.259997606277466, "learning_rate": 6.593784547352906e-06, "loss": 0.34377309679985046, "step": 5558 }, { "epoch": 0.6745540589734256, "grad_norm": 2.4274485111236572, "learning_rate": 6.591327846701881e-06, "loss": 0.2146991491317749, "step": 5559 }, { "epoch": 0.6746754034704526, "grad_norm": 4.040982246398926, "learning_rate": 6.588871146050855e-06, "loss": 0.389179527759552, "step": 5560 }, { "epoch": 0.6747967479674797, "grad_norm": 6.564568996429443, "learning_rate": 6.586414445399829e-06, "loss": 0.19886216521263123, "step": 5561 }, { "epoch": 0.6749180924645067, "grad_norm": 4.24086332321167, "learning_rate": 6.583957744748803e-06, "loss": 0.5942610502243042, "step": 5562 }, { "epoch": 0.6750394369615338, "grad_norm": 2.9882843494415283, "learning_rate": 6.581501044097777e-06, "loss": 0.1112002432346344, "step": 5563 }, { "epoch": 0.6751607814585608, "grad_norm": 1.21688973903656, "learning_rate": 6.579044343446751e-06, "loss": 0.10873007774353027, "step": 5564 }, { "epoch": 0.6752821259555879, "grad_norm": 4.283537864685059, "learning_rate": 6.576587642795725e-06, "loss": 0.27938392758369446, "step": 5565 }, { "epoch": 0.675403470452615, "grad_norm": 2.61411714553833, "learning_rate": 6.5741309421447e-06, "loss": 0.190615713596344, "step": 5566 }, { "epoch": 0.675524814949642, "grad_norm": 4.234996795654297, "learning_rate": 6.571674241493674e-06, "loss": 0.2808380424976349, "step": 5567 }, { "epoch": 0.6756461594466691, "grad_norm": 1.9552305936813354, "learning_rate": 6.569217540842648e-06, "loss": 0.1349242478609085, "step": 5568 }, { "epoch": 0.6757675039436961, "grad_norm": 3.182126760482788, "learning_rate": 6.566760840191622e-06, "loss": 0.34604835510253906, "step": 5569 }, { "epoch": 0.6758888484407232, "grad_norm": 2.911616563796997, "learning_rate": 6.5643041395405975e-06, "loss": 0.22502827644348145, "step": 5570 }, { "epoch": 0.6760101929377502, "grad_norm": 1.239432692527771, "learning_rate": 6.561847438889572e-06, "loss": 0.03528213873505592, "step": 5571 }, { "epoch": 0.6761315374347774, "grad_norm": 2.722843885421753, "learning_rate": 6.559390738238546e-06, "loss": 0.19403153657913208, "step": 5572 }, { "epoch": 0.6762528819318044, "grad_norm": 2.0937094688415527, "learning_rate": 6.55693403758752e-06, "loss": 0.692645788192749, "step": 5573 }, { "epoch": 0.6763742264288315, "grad_norm": 2.61059308052063, "learning_rate": 6.5544773369364945e-06, "loss": 0.3859490156173706, "step": 5574 }, { "epoch": 0.6764955709258585, "grad_norm": 3.1712324619293213, "learning_rate": 6.552020636285469e-06, "loss": 0.23693794012069702, "step": 5575 }, { "epoch": 0.6766169154228856, "grad_norm": 1.8649239540100098, "learning_rate": 6.549563935634443e-06, "loss": 0.2387673556804657, "step": 5576 }, { "epoch": 0.6767382599199127, "grad_norm": 3.8509275913238525, "learning_rate": 6.547107234983417e-06, "loss": 0.2699548006057739, "step": 5577 }, { "epoch": 0.6768596044169397, "grad_norm": 2.2753689289093018, "learning_rate": 6.5446505343323915e-06, "loss": 0.26807743310928345, "step": 5578 }, { "epoch": 0.6769809489139668, "grad_norm": 3.317873954772949, "learning_rate": 6.542193833681366e-06, "loss": 0.4436611235141754, "step": 5579 }, { "epoch": 0.6771022934109938, "grad_norm": 2.4664347171783447, "learning_rate": 6.539737133030341e-06, "loss": 0.0676579400897026, "step": 5580 }, { "epoch": 0.6772236379080209, "grad_norm": 2.3233351707458496, "learning_rate": 6.537280432379315e-06, "loss": 0.08383767306804657, "step": 5581 }, { "epoch": 0.6773449824050479, "grad_norm": 0.7848469018936157, "learning_rate": 6.534823731728289e-06, "loss": 0.0077636269852519035, "step": 5582 }, { "epoch": 0.677466326902075, "grad_norm": 4.585662364959717, "learning_rate": 6.532367031077264e-06, "loss": 0.6716075539588928, "step": 5583 }, { "epoch": 0.677587671399102, "grad_norm": 2.6710691452026367, "learning_rate": 6.529910330426238e-06, "loss": 0.17611894011497498, "step": 5584 }, { "epoch": 0.6777090158961291, "grad_norm": 3.8173248767852783, "learning_rate": 6.527453629775212e-06, "loss": 0.05688289925456047, "step": 5585 }, { "epoch": 0.6778303603931561, "grad_norm": 3.489004611968994, "learning_rate": 6.5249969291241864e-06, "loss": 0.2277064025402069, "step": 5586 }, { "epoch": 0.6779517048901832, "grad_norm": 3.3463051319122314, "learning_rate": 6.522540228473161e-06, "loss": 0.3401225209236145, "step": 5587 }, { "epoch": 0.6780730493872102, "grad_norm": 2.5739355087280273, "learning_rate": 6.520083527822135e-06, "loss": 0.11237165331840515, "step": 5588 }, { "epoch": 0.6781943938842373, "grad_norm": 2.875991106033325, "learning_rate": 6.517626827171109e-06, "loss": 0.5303326845169067, "step": 5589 }, { "epoch": 0.6783157383812644, "grad_norm": 1.6259905099868774, "learning_rate": 6.515170126520084e-06, "loss": 0.14735515415668488, "step": 5590 }, { "epoch": 0.6784370828782915, "grad_norm": 3.0405800342559814, "learning_rate": 6.5127134258690586e-06, "loss": 0.10388892889022827, "step": 5591 }, { "epoch": 0.6785584273753186, "grad_norm": 3.2558200359344482, "learning_rate": 6.510256725218033e-06, "loss": 0.23588530719280243, "step": 5592 }, { "epoch": 0.6786797718723456, "grad_norm": 2.695221424102783, "learning_rate": 6.507800024567007e-06, "loss": 0.35306811332702637, "step": 5593 }, { "epoch": 0.6788011163693727, "grad_norm": 1.2592837810516357, "learning_rate": 6.505343323915981e-06, "loss": 0.04857534170150757, "step": 5594 }, { "epoch": 0.6789224608663997, "grad_norm": 0.6302855610847473, "learning_rate": 6.502886623264956e-06, "loss": 0.008068013936281204, "step": 5595 }, { "epoch": 0.6790438053634268, "grad_norm": 2.320370674133301, "learning_rate": 6.50042992261393e-06, "loss": 0.4741867184638977, "step": 5596 }, { "epoch": 0.6791651498604538, "grad_norm": 2.861515760421753, "learning_rate": 6.497973221962904e-06, "loss": 0.1916482001543045, "step": 5597 }, { "epoch": 0.6792864943574809, "grad_norm": 3.219991683959961, "learning_rate": 6.495516521311878e-06, "loss": 0.23294135928153992, "step": 5598 }, { "epoch": 0.679407838854508, "grad_norm": 2.569382429122925, "learning_rate": 6.493059820660853e-06, "loss": 0.17656588554382324, "step": 5599 }, { "epoch": 0.679529183351535, "grad_norm": 1.8248486518859863, "learning_rate": 6.490603120009828e-06, "loss": 0.04968787729740143, "step": 5600 }, { "epoch": 0.6796505278485621, "grad_norm": 4.131831169128418, "learning_rate": 6.488146419358802e-06, "loss": 0.5878446102142334, "step": 5601 }, { "epoch": 0.6797718723455891, "grad_norm": 1.087361216545105, "learning_rate": 6.485689718707776e-06, "loss": 0.016051514074206352, "step": 5602 }, { "epoch": 0.6798932168426162, "grad_norm": 5.4450554847717285, "learning_rate": 6.4832330180567505e-06, "loss": 0.5000943541526794, "step": 5603 }, { "epoch": 0.6800145613396432, "grad_norm": 5.101881504058838, "learning_rate": 6.480776317405725e-06, "loss": 0.31910964846611023, "step": 5604 }, { "epoch": 0.6801359058366703, "grad_norm": 3.234663248062134, "learning_rate": 6.478319616754699e-06, "loss": 0.35894861817359924, "step": 5605 }, { "epoch": 0.6802572503336973, "grad_norm": 1.0469131469726562, "learning_rate": 6.475862916103673e-06, "loss": 0.020409047603607178, "step": 5606 }, { "epoch": 0.6803785948307244, "grad_norm": 2.509852647781372, "learning_rate": 6.4734062154526476e-06, "loss": 0.2040126472711563, "step": 5607 }, { "epoch": 0.6804999393277514, "grad_norm": 2.720093250274658, "learning_rate": 6.470949514801622e-06, "loss": 0.33957019448280334, "step": 5608 }, { "epoch": 0.6806212838247786, "grad_norm": 2.9324123859405518, "learning_rate": 6.468492814150596e-06, "loss": 0.23904651403427124, "step": 5609 }, { "epoch": 0.6807426283218057, "grad_norm": 2.1089835166931152, "learning_rate": 6.466036113499571e-06, "loss": 0.14247776567935944, "step": 5610 }, { "epoch": 0.6808639728188327, "grad_norm": 2.764488935470581, "learning_rate": 6.4635794128485454e-06, "loss": 0.15308135747909546, "step": 5611 }, { "epoch": 0.6809853173158598, "grad_norm": 3.426349639892578, "learning_rate": 6.46112271219752e-06, "loss": 0.3017897307872772, "step": 5612 }, { "epoch": 0.6811066618128868, "grad_norm": 2.4092884063720703, "learning_rate": 6.458666011546494e-06, "loss": 0.05786924436688423, "step": 5613 }, { "epoch": 0.6812280063099139, "grad_norm": 2.733335018157959, "learning_rate": 6.456209310895468e-06, "loss": 0.08430862426757812, "step": 5614 }, { "epoch": 0.6813493508069409, "grad_norm": 3.1715805530548096, "learning_rate": 6.4537526102444425e-06, "loss": 0.218908429145813, "step": 5615 }, { "epoch": 0.681470695303968, "grad_norm": 2.517953872680664, "learning_rate": 6.451295909593417e-06, "loss": 0.22934485971927643, "step": 5616 }, { "epoch": 0.681592039800995, "grad_norm": 1.8448259830474854, "learning_rate": 6.448839208942391e-06, "loss": 0.09141306579113007, "step": 5617 }, { "epoch": 0.6817133842980221, "grad_norm": 4.632875919342041, "learning_rate": 6.446382508291365e-06, "loss": 0.16072304546833038, "step": 5618 }, { "epoch": 0.6818347287950491, "grad_norm": 2.024606227874756, "learning_rate": 6.44392580764034e-06, "loss": 0.06668491661548615, "step": 5619 }, { "epoch": 0.6819560732920762, "grad_norm": 2.749347686767578, "learning_rate": 6.441469106989315e-06, "loss": 0.37213465571403503, "step": 5620 }, { "epoch": 0.6820774177891032, "grad_norm": 3.142260789871216, "learning_rate": 6.439012406338289e-06, "loss": 0.2740361988544464, "step": 5621 }, { "epoch": 0.6821987622861303, "grad_norm": 5.434543609619141, "learning_rate": 6.436555705687263e-06, "loss": 0.3408467173576355, "step": 5622 }, { "epoch": 0.6823201067831574, "grad_norm": 1.961105465888977, "learning_rate": 6.434099005036237e-06, "loss": 0.0501723475754261, "step": 5623 }, { "epoch": 0.6824414512801844, "grad_norm": 2.3683059215545654, "learning_rate": 6.431642304385212e-06, "loss": 0.18011420965194702, "step": 5624 }, { "epoch": 0.6825627957772115, "grad_norm": 1.182308316230774, "learning_rate": 6.429185603734186e-06, "loss": 0.04546242952346802, "step": 5625 }, { "epoch": 0.6826841402742385, "grad_norm": 5.316926956176758, "learning_rate": 6.42672890308316e-06, "loss": 0.31844767928123474, "step": 5626 }, { "epoch": 0.6828054847712656, "grad_norm": 3.3798511028289795, "learning_rate": 6.424272202432134e-06, "loss": 0.21782462298870087, "step": 5627 }, { "epoch": 0.6829268292682927, "grad_norm": 0.09902840852737427, "learning_rate": 6.421815501781109e-06, "loss": 0.0005156525876373053, "step": 5628 }, { "epoch": 0.6830481737653198, "grad_norm": 2.326925277709961, "learning_rate": 6.419358801130084e-06, "loss": 0.278003990650177, "step": 5629 }, { "epoch": 0.6831695182623468, "grad_norm": 1.0316002368927002, "learning_rate": 6.416902100479058e-06, "loss": 0.018986064940690994, "step": 5630 }, { "epoch": 0.6832908627593739, "grad_norm": 7.021093368530273, "learning_rate": 6.414445399828031e-06, "loss": 0.5618723034858704, "step": 5631 }, { "epoch": 0.683412207256401, "grad_norm": 1.5250173807144165, "learning_rate": 6.411988699177006e-06, "loss": 0.221332848072052, "step": 5632 }, { "epoch": 0.683533551753428, "grad_norm": 2.791351318359375, "learning_rate": 6.40953199852598e-06, "loss": 0.24066822230815887, "step": 5633 }, { "epoch": 0.6836548962504551, "grad_norm": 2.360670566558838, "learning_rate": 6.407075297874954e-06, "loss": 0.46024349331855774, "step": 5634 }, { "epoch": 0.6837762407474821, "grad_norm": 2.6761934757232666, "learning_rate": 6.4046185972239285e-06, "loss": 0.4033425748348236, "step": 5635 }, { "epoch": 0.6838975852445092, "grad_norm": 2.178936243057251, "learning_rate": 6.402161896572903e-06, "loss": 0.34836938977241516, "step": 5636 }, { "epoch": 0.6840189297415362, "grad_norm": 3.3214011192321777, "learning_rate": 6.399705195921877e-06, "loss": 0.18093852698802948, "step": 5637 }, { "epoch": 0.6841402742385633, "grad_norm": 2.525665044784546, "learning_rate": 6.397248495270851e-06, "loss": 0.282547265291214, "step": 5638 }, { "epoch": 0.6842616187355903, "grad_norm": 5.0141377449035645, "learning_rate": 6.3947917946198255e-06, "loss": 0.18291091918945312, "step": 5639 }, { "epoch": 0.6843829632326174, "grad_norm": 3.3041646480560303, "learning_rate": 6.3923350939688e-06, "loss": 0.10522205382585526, "step": 5640 }, { "epoch": 0.6845043077296444, "grad_norm": 2.802886962890625, "learning_rate": 6.389878393317774e-06, "loss": 0.5766411423683167, "step": 5641 }, { "epoch": 0.6846256522266715, "grad_norm": 4.158419132232666, "learning_rate": 6.387421692666749e-06, "loss": 0.344346284866333, "step": 5642 }, { "epoch": 0.6847469967236985, "grad_norm": 2.5741560459136963, "learning_rate": 6.384964992015723e-06, "loss": 0.08021984249353409, "step": 5643 }, { "epoch": 0.6848683412207256, "grad_norm": 2.1082563400268555, "learning_rate": 6.382508291364698e-06, "loss": 0.25988832116127014, "step": 5644 }, { "epoch": 0.6849896857177526, "grad_norm": 2.481950044631958, "learning_rate": 6.380051590713672e-06, "loss": 0.3718109726905823, "step": 5645 }, { "epoch": 0.6851110302147798, "grad_norm": 3.174114465713501, "learning_rate": 6.377594890062646e-06, "loss": 0.0661323294043541, "step": 5646 }, { "epoch": 0.6852323747118069, "grad_norm": 4.026634693145752, "learning_rate": 6.37513818941162e-06, "loss": 0.566697895526886, "step": 5647 }, { "epoch": 0.6853537192088339, "grad_norm": 1.150229811668396, "learning_rate": 6.372681488760595e-06, "loss": 0.07161320745944977, "step": 5648 }, { "epoch": 0.685475063705861, "grad_norm": 3.598001718521118, "learning_rate": 6.370224788109569e-06, "loss": 0.5429255962371826, "step": 5649 }, { "epoch": 0.685596408202888, "grad_norm": 2.2587733268737793, "learning_rate": 6.367768087458543e-06, "loss": 0.1403500884771347, "step": 5650 }, { "epoch": 0.6857177526999151, "grad_norm": 2.2977283000946045, "learning_rate": 6.3653113868075175e-06, "loss": 0.2060234397649765, "step": 5651 }, { "epoch": 0.6858390971969421, "grad_norm": 2.2504184246063232, "learning_rate": 6.3628546861564926e-06, "loss": 0.15993283689022064, "step": 5652 }, { "epoch": 0.6859604416939692, "grad_norm": 2.112743377685547, "learning_rate": 6.360397985505467e-06, "loss": 0.16262373328208923, "step": 5653 }, { "epoch": 0.6860817861909962, "grad_norm": 3.196451187133789, "learning_rate": 6.357941284854441e-06, "loss": 0.272876113653183, "step": 5654 }, { "epoch": 0.6862031306880233, "grad_norm": 3.642841339111328, "learning_rate": 6.355484584203415e-06, "loss": 0.466037392616272, "step": 5655 }, { "epoch": 0.6863244751850504, "grad_norm": 2.944514513015747, "learning_rate": 6.35302788355239e-06, "loss": 0.10930436849594116, "step": 5656 }, { "epoch": 0.6864458196820774, "grad_norm": 3.2408714294433594, "learning_rate": 6.350571182901364e-06, "loss": 0.2012295424938202, "step": 5657 }, { "epoch": 0.6865671641791045, "grad_norm": 3.4488470554351807, "learning_rate": 6.348114482250338e-06, "loss": 0.17001235485076904, "step": 5658 }, { "epoch": 0.6866885086761315, "grad_norm": 2.6099789142608643, "learning_rate": 6.345657781599312e-06, "loss": 0.41487640142440796, "step": 5659 }, { "epoch": 0.6868098531731586, "grad_norm": 1.7128602266311646, "learning_rate": 6.343201080948287e-06, "loss": 0.12348049879074097, "step": 5660 }, { "epoch": 0.6869311976701856, "grad_norm": 2.862828016281128, "learning_rate": 6.340744380297261e-06, "loss": 0.20161563158035278, "step": 5661 }, { "epoch": 0.6870525421672127, "grad_norm": 2.41933274269104, "learning_rate": 6.338287679646236e-06, "loss": 0.45425787568092346, "step": 5662 }, { "epoch": 0.6871738866642397, "grad_norm": 0.7551639676094055, "learning_rate": 6.33583097899521e-06, "loss": 0.057786889374256134, "step": 5663 }, { "epoch": 0.6872952311612668, "grad_norm": 1.6531691551208496, "learning_rate": 6.3333742783441845e-06, "loss": 0.16558891534805298, "step": 5664 }, { "epoch": 0.687416575658294, "grad_norm": 2.880044937133789, "learning_rate": 6.330917577693159e-06, "loss": 0.17559823393821716, "step": 5665 }, { "epoch": 0.687537920155321, "grad_norm": 2.568293571472168, "learning_rate": 6.328460877042133e-06, "loss": 0.2385990023612976, "step": 5666 }, { "epoch": 0.6876592646523481, "grad_norm": 2.079524517059326, "learning_rate": 6.326004176391107e-06, "loss": 0.14678798615932465, "step": 5667 }, { "epoch": 0.6877806091493751, "grad_norm": 4.814833641052246, "learning_rate": 6.3235474757400815e-06, "loss": 0.38339805603027344, "step": 5668 }, { "epoch": 0.6879019536464022, "grad_norm": 4.654842853546143, "learning_rate": 6.321090775089056e-06, "loss": 0.3428992033004761, "step": 5669 }, { "epoch": 0.6880232981434292, "grad_norm": 0.3473941683769226, "learning_rate": 6.31863407443803e-06, "loss": 0.0034626834094524384, "step": 5670 }, { "epoch": 0.6881446426404563, "grad_norm": 3.5860822200775146, "learning_rate": 6.316177373787004e-06, "loss": 0.3996542692184448, "step": 5671 }, { "epoch": 0.6882659871374833, "grad_norm": 1.2540889978408813, "learning_rate": 6.313720673135979e-06, "loss": 0.02270342782139778, "step": 5672 }, { "epoch": 0.6883873316345104, "grad_norm": 3.4647586345672607, "learning_rate": 6.311263972484954e-06, "loss": 0.3167842924594879, "step": 5673 }, { "epoch": 0.6885086761315374, "grad_norm": 2.728961944580078, "learning_rate": 6.308807271833928e-06, "loss": 0.5077354907989502, "step": 5674 }, { "epoch": 0.6886300206285645, "grad_norm": 1.4408552646636963, "learning_rate": 6.306350571182902e-06, "loss": 0.018086910247802734, "step": 5675 }, { "epoch": 0.6887513651255915, "grad_norm": 2.1667895317077637, "learning_rate": 6.3038938705318765e-06, "loss": 0.11787963658571243, "step": 5676 }, { "epoch": 0.6888727096226186, "grad_norm": 3.927021026611328, "learning_rate": 6.301437169880851e-06, "loss": 0.3485981225967407, "step": 5677 }, { "epoch": 0.6889940541196456, "grad_norm": 3.4576404094696045, "learning_rate": 6.298980469229825e-06, "loss": 0.1780065894126892, "step": 5678 }, { "epoch": 0.6891153986166727, "grad_norm": 2.9855892658233643, "learning_rate": 6.296523768578799e-06, "loss": 0.0316493958234787, "step": 5679 }, { "epoch": 0.6892367431136998, "grad_norm": 3.1415634155273438, "learning_rate": 6.2940670679277735e-06, "loss": 0.3075777292251587, "step": 5680 }, { "epoch": 0.6893580876107268, "grad_norm": 3.3528859615325928, "learning_rate": 6.291610367276748e-06, "loss": 0.33159518241882324, "step": 5681 }, { "epoch": 0.6894794321077539, "grad_norm": 1.5671093463897705, "learning_rate": 6.289153666625723e-06, "loss": 0.12431022524833679, "step": 5682 }, { "epoch": 0.6896007766047809, "grad_norm": 1.2651323080062866, "learning_rate": 6.286696965974697e-06, "loss": 0.033919062465429306, "step": 5683 }, { "epoch": 0.6897221211018081, "grad_norm": 3.275606632232666, "learning_rate": 6.284240265323671e-06, "loss": 0.4819928705692291, "step": 5684 }, { "epoch": 0.6898434655988351, "grad_norm": 3.595076322555542, "learning_rate": 6.281783564672646e-06, "loss": 0.45713359117507935, "step": 5685 }, { "epoch": 0.6899648100958622, "grad_norm": 4.250952243804932, "learning_rate": 6.27932686402162e-06, "loss": 0.4686271548271179, "step": 5686 }, { "epoch": 0.6900861545928892, "grad_norm": 2.2172257900238037, "learning_rate": 6.276870163370594e-06, "loss": 0.290248304605484, "step": 5687 }, { "epoch": 0.6902074990899163, "grad_norm": 2.2708182334899902, "learning_rate": 6.274413462719568e-06, "loss": 0.05581101402640343, "step": 5688 }, { "epoch": 0.6903288435869434, "grad_norm": 3.0389108657836914, "learning_rate": 6.271956762068543e-06, "loss": 0.42256754636764526, "step": 5689 }, { "epoch": 0.6904501880839704, "grad_norm": 2.0080933570861816, "learning_rate": 6.269500061417517e-06, "loss": 0.061008162796497345, "step": 5690 }, { "epoch": 0.6905715325809975, "grad_norm": 5.6373748779296875, "learning_rate": 6.267043360766491e-06, "loss": 0.2220969796180725, "step": 5691 }, { "epoch": 0.6906928770780245, "grad_norm": 2.283975124359131, "learning_rate": 6.264586660115466e-06, "loss": 0.2537585496902466, "step": 5692 }, { "epoch": 0.6908142215750516, "grad_norm": 2.1502058506011963, "learning_rate": 6.2621299594644405e-06, "loss": 0.110890232026577, "step": 5693 }, { "epoch": 0.6909355660720786, "grad_norm": 3.3317623138427734, "learning_rate": 6.259673258813415e-06, "loss": 0.25203296542167664, "step": 5694 }, { "epoch": 0.6910569105691057, "grad_norm": 1.9665324687957764, "learning_rate": 6.257216558162389e-06, "loss": 0.084046371281147, "step": 5695 }, { "epoch": 0.6911782550661327, "grad_norm": 2.5005674362182617, "learning_rate": 6.254759857511363e-06, "loss": 0.1298762857913971, "step": 5696 }, { "epoch": 0.6912995995631598, "grad_norm": 2.871945858001709, "learning_rate": 6.2523031568603376e-06, "loss": 0.18528416752815247, "step": 5697 }, { "epoch": 0.6914209440601868, "grad_norm": 1.8751585483551025, "learning_rate": 6.249846456209311e-06, "loss": 0.2059035748243332, "step": 5698 }, { "epoch": 0.6915422885572139, "grad_norm": 2.9432475566864014, "learning_rate": 6.247389755558285e-06, "loss": 0.13551416993141174, "step": 5699 }, { "epoch": 0.691663633054241, "grad_norm": 2.622704029083252, "learning_rate": 6.2449330549072595e-06, "loss": 0.17497101426124573, "step": 5700 }, { "epoch": 0.691784977551268, "grad_norm": 2.648796319961548, "learning_rate": 6.242476354256234e-06, "loss": 0.06139075756072998, "step": 5701 }, { "epoch": 0.6919063220482952, "grad_norm": 2.7674098014831543, "learning_rate": 6.240019653605208e-06, "loss": 0.14100010693073273, "step": 5702 }, { "epoch": 0.6920276665453222, "grad_norm": 2.7512853145599365, "learning_rate": 6.237562952954182e-06, "loss": 0.29270368814468384, "step": 5703 }, { "epoch": 0.6921490110423493, "grad_norm": 1.9962724447250366, "learning_rate": 6.2351062523031565e-06, "loss": 0.13253211975097656, "step": 5704 }, { "epoch": 0.6922703555393763, "grad_norm": 3.526622772216797, "learning_rate": 6.232649551652132e-06, "loss": 0.30655109882354736, "step": 5705 }, { "epoch": 0.6923917000364034, "grad_norm": 3.0125510692596436, "learning_rate": 6.230192851001106e-06, "loss": 0.18688224256038666, "step": 5706 }, { "epoch": 0.6925130445334304, "grad_norm": 2.504835605621338, "learning_rate": 6.22773615035008e-06, "loss": 0.4734697639942169, "step": 5707 }, { "epoch": 0.6926343890304575, "grad_norm": 2.7178382873535156, "learning_rate": 6.225279449699054e-06, "loss": 0.33622920513153076, "step": 5708 }, { "epoch": 0.6927557335274845, "grad_norm": 3.338055372238159, "learning_rate": 6.222822749048029e-06, "loss": 0.26140522956848145, "step": 5709 }, { "epoch": 0.6928770780245116, "grad_norm": 3.0957043170928955, "learning_rate": 6.220366048397003e-06, "loss": 0.12388262152671814, "step": 5710 }, { "epoch": 0.6929984225215386, "grad_norm": 3.7117812633514404, "learning_rate": 6.217909347745977e-06, "loss": 0.4923725426197052, "step": 5711 }, { "epoch": 0.6931197670185657, "grad_norm": 3.013397216796875, "learning_rate": 6.2154526470949514e-06, "loss": 0.3229036331176758, "step": 5712 }, { "epoch": 0.6932411115155928, "grad_norm": 3.5330052375793457, "learning_rate": 6.212995946443926e-06, "loss": 0.061898380517959595, "step": 5713 }, { "epoch": 0.6933624560126198, "grad_norm": 2.187222480773926, "learning_rate": 6.2105392457929e-06, "loss": 0.12034771591424942, "step": 5714 }, { "epoch": 0.6934838005096469, "grad_norm": 2.223762035369873, "learning_rate": 6.208082545141875e-06, "loss": 0.061734408140182495, "step": 5715 }, { "epoch": 0.6936051450066739, "grad_norm": 3.9538183212280273, "learning_rate": 6.205625844490849e-06, "loss": 0.35148710012435913, "step": 5716 }, { "epoch": 0.693726489503701, "grad_norm": 2.437053680419922, "learning_rate": 6.203169143839824e-06, "loss": 0.09135996550321579, "step": 5717 }, { "epoch": 0.693847834000728, "grad_norm": 4.181443214416504, "learning_rate": 6.200712443188798e-06, "loss": 0.4284602403640747, "step": 5718 }, { "epoch": 0.6939691784977551, "grad_norm": 3.3214221000671387, "learning_rate": 6.198255742537772e-06, "loss": 0.26171714067459106, "step": 5719 }, { "epoch": 0.6940905229947821, "grad_norm": 3.8552441596984863, "learning_rate": 6.195799041886746e-06, "loss": 0.3979511559009552, "step": 5720 }, { "epoch": 0.6942118674918093, "grad_norm": 2.7416181564331055, "learning_rate": 6.193342341235721e-06, "loss": 0.09290871024131775, "step": 5721 }, { "epoch": 0.6943332119888364, "grad_norm": 1.7485246658325195, "learning_rate": 6.190885640584695e-06, "loss": 0.1633225381374359, "step": 5722 }, { "epoch": 0.6944545564858634, "grad_norm": 2.3411684036254883, "learning_rate": 6.188428939933669e-06, "loss": 0.32605451345443726, "step": 5723 }, { "epoch": 0.6945759009828905, "grad_norm": 3.574162006378174, "learning_rate": 6.185972239282643e-06, "loss": 0.3426300883293152, "step": 5724 }, { "epoch": 0.6946972454799175, "grad_norm": 2.5172603130340576, "learning_rate": 6.1835155386316185e-06, "loss": 0.3036474585533142, "step": 5725 }, { "epoch": 0.6948185899769446, "grad_norm": 4.6578779220581055, "learning_rate": 6.181058837980593e-06, "loss": 0.3849017918109894, "step": 5726 }, { "epoch": 0.6949399344739716, "grad_norm": 2.53385329246521, "learning_rate": 6.178602137329567e-06, "loss": 0.2181628942489624, "step": 5727 }, { "epoch": 0.6950612789709987, "grad_norm": 3.0251502990722656, "learning_rate": 6.176145436678541e-06, "loss": 0.3707123398780823, "step": 5728 }, { "epoch": 0.6951826234680257, "grad_norm": 3.7096409797668457, "learning_rate": 6.1736887360275155e-06, "loss": 0.3009989857673645, "step": 5729 }, { "epoch": 0.6953039679650528, "grad_norm": 2.985715627670288, "learning_rate": 6.17123203537649e-06, "loss": 0.10617246478796005, "step": 5730 }, { "epoch": 0.6954253124620798, "grad_norm": 1.5499550104141235, "learning_rate": 6.168775334725464e-06, "loss": 0.0657060369849205, "step": 5731 }, { "epoch": 0.6955466569591069, "grad_norm": 0.001683641690760851, "learning_rate": 6.166318634074438e-06, "loss": 3.6407476727617905e-05, "step": 5732 }, { "epoch": 0.6956680014561339, "grad_norm": 3.881850242614746, "learning_rate": 6.1638619334234126e-06, "loss": 0.0992632657289505, "step": 5733 }, { "epoch": 0.695789345953161, "grad_norm": 2.8861396312713623, "learning_rate": 6.161405232772387e-06, "loss": 0.25874051451683044, "step": 5734 }, { "epoch": 0.695910690450188, "grad_norm": 2.014254331588745, "learning_rate": 6.158948532121362e-06, "loss": 0.08664755523204803, "step": 5735 }, { "epoch": 0.6960320349472151, "grad_norm": 2.3388712406158447, "learning_rate": 6.156491831470336e-06, "loss": 0.12286005169153214, "step": 5736 }, { "epoch": 0.6961533794442422, "grad_norm": 2.127319097518921, "learning_rate": 6.1540351308193104e-06, "loss": 0.04887150228023529, "step": 5737 }, { "epoch": 0.6962747239412692, "grad_norm": 2.2073473930358887, "learning_rate": 6.151578430168285e-06, "loss": 0.25623658299446106, "step": 5738 }, { "epoch": 0.6963960684382964, "grad_norm": 2.480203628540039, "learning_rate": 6.149121729517259e-06, "loss": 0.15554951131343842, "step": 5739 }, { "epoch": 0.6965174129353234, "grad_norm": 3.20697021484375, "learning_rate": 6.146665028866233e-06, "loss": 0.3820798397064209, "step": 5740 }, { "epoch": 0.6966387574323505, "grad_norm": 3.016470193862915, "learning_rate": 6.1442083282152075e-06, "loss": 0.1999262273311615, "step": 5741 }, { "epoch": 0.6967601019293775, "grad_norm": 1.2635258436203003, "learning_rate": 6.141751627564182e-06, "loss": 0.0767585039138794, "step": 5742 }, { "epoch": 0.6968814464264046, "grad_norm": 0.7572587728500366, "learning_rate": 6.139294926913156e-06, "loss": 0.008015908300876617, "step": 5743 }, { "epoch": 0.6970027909234316, "grad_norm": 3.5158944129943848, "learning_rate": 6.13683822626213e-06, "loss": 0.1905626356601715, "step": 5744 }, { "epoch": 0.6971241354204587, "grad_norm": 3.9973971843719482, "learning_rate": 6.134381525611105e-06, "loss": 0.26448121666908264, "step": 5745 }, { "epoch": 0.6972454799174858, "grad_norm": 2.649580240249634, "learning_rate": 6.13192482496008e-06, "loss": 0.22253099083900452, "step": 5746 }, { "epoch": 0.6973668244145128, "grad_norm": 2.3696963787078857, "learning_rate": 6.129468124309054e-06, "loss": 0.6305501461029053, "step": 5747 }, { "epoch": 0.6974881689115399, "grad_norm": 1.0462290048599243, "learning_rate": 6.127011423658028e-06, "loss": 0.01950521022081375, "step": 5748 }, { "epoch": 0.6976095134085669, "grad_norm": 2.8957738876342773, "learning_rate": 6.124554723007002e-06, "loss": 0.3588560223579407, "step": 5749 }, { "epoch": 0.697730857905594, "grad_norm": 2.5845372676849365, "learning_rate": 6.122098022355977e-06, "loss": 0.32804858684539795, "step": 5750 }, { "epoch": 0.697852202402621, "grad_norm": 4.856935977935791, "learning_rate": 6.119641321704951e-06, "loss": 0.3024596571922302, "step": 5751 }, { "epoch": 0.6979735468996481, "grad_norm": 3.42258882522583, "learning_rate": 6.117184621053925e-06, "loss": 0.10903825610876083, "step": 5752 }, { "epoch": 0.6980948913966751, "grad_norm": 2.5965938568115234, "learning_rate": 6.114727920402899e-06, "loss": 0.07731950283050537, "step": 5753 }, { "epoch": 0.6982162358937022, "grad_norm": 0.9776615500450134, "learning_rate": 6.1122712197518745e-06, "loss": 0.011912771500647068, "step": 5754 }, { "epoch": 0.6983375803907292, "grad_norm": 1.9875417947769165, "learning_rate": 6.109814519100849e-06, "loss": 0.10416249930858612, "step": 5755 }, { "epoch": 0.6984589248877563, "grad_norm": 3.0632517337799072, "learning_rate": 6.107357818449823e-06, "loss": 0.30720001459121704, "step": 5756 }, { "epoch": 0.6985802693847833, "grad_norm": 1.4973243474960327, "learning_rate": 6.104901117798797e-06, "loss": 0.045694220811128616, "step": 5757 }, { "epoch": 0.6987016138818105, "grad_norm": 2.2301218509674072, "learning_rate": 6.1024444171477716e-06, "loss": 0.30573010444641113, "step": 5758 }, { "epoch": 0.6988229583788376, "grad_norm": 2.1186938285827637, "learning_rate": 6.099987716496746e-06, "loss": 0.11683495342731476, "step": 5759 }, { "epoch": 0.6989443028758646, "grad_norm": 2.4823174476623535, "learning_rate": 6.09753101584572e-06, "loss": 0.25088736414909363, "step": 5760 }, { "epoch": 0.6990656473728917, "grad_norm": 2.8498106002807617, "learning_rate": 6.095074315194694e-06, "loss": 0.13661500811576843, "step": 5761 }, { "epoch": 0.6991869918699187, "grad_norm": 2.425856828689575, "learning_rate": 6.092617614543669e-06, "loss": 0.19371923804283142, "step": 5762 }, { "epoch": 0.6993083363669458, "grad_norm": 1.8015364408493042, "learning_rate": 6.090160913892643e-06, "loss": 0.13249297440052032, "step": 5763 }, { "epoch": 0.6994296808639728, "grad_norm": 3.318235397338867, "learning_rate": 6.087704213241618e-06, "loss": 0.2441297173500061, "step": 5764 }, { "epoch": 0.6995510253609999, "grad_norm": 3.0751609802246094, "learning_rate": 6.085247512590592e-06, "loss": 0.2404286414384842, "step": 5765 }, { "epoch": 0.6996723698580269, "grad_norm": 3.0550596714019775, "learning_rate": 6.082790811939565e-06, "loss": 0.262876957654953, "step": 5766 }, { "epoch": 0.699793714355054, "grad_norm": 3.5686118602752686, "learning_rate": 6.08033411128854e-06, "loss": 0.1583518087863922, "step": 5767 }, { "epoch": 0.699915058852081, "grad_norm": 3.687243938446045, "learning_rate": 6.077877410637514e-06, "loss": 0.26704466342926025, "step": 5768 }, { "epoch": 0.7000364033491081, "grad_norm": 2.89251446723938, "learning_rate": 6.075420709986488e-06, "loss": 0.5565032958984375, "step": 5769 }, { "epoch": 0.7001577478461352, "grad_norm": 4.210231304168701, "learning_rate": 6.072964009335463e-06, "loss": 0.07972203195095062, "step": 5770 }, { "epoch": 0.7002790923431622, "grad_norm": 3.036869764328003, "learning_rate": 6.070507308684437e-06, "loss": 0.21406593918800354, "step": 5771 }, { "epoch": 0.7004004368401893, "grad_norm": 1.3510385751724243, "learning_rate": 6.068050608033411e-06, "loss": 0.058110691606998444, "step": 5772 }, { "epoch": 0.7005217813372163, "grad_norm": 2.858076333999634, "learning_rate": 6.0655939073823854e-06, "loss": 0.13300542533397675, "step": 5773 }, { "epoch": 0.7006431258342434, "grad_norm": 4.929359436035156, "learning_rate": 6.06313720673136e-06, "loss": 0.11950074136257172, "step": 5774 }, { "epoch": 0.7007644703312704, "grad_norm": 4.178248405456543, "learning_rate": 6.060680506080334e-06, "loss": 0.3169228732585907, "step": 5775 }, { "epoch": 0.7008858148282975, "grad_norm": 3.3864965438842773, "learning_rate": 6.058223805429308e-06, "loss": 0.20987015962600708, "step": 5776 }, { "epoch": 0.7010071593253246, "grad_norm": 3.505870819091797, "learning_rate": 6.055767104778283e-06, "loss": 0.17886711657047272, "step": 5777 }, { "epoch": 0.7011285038223517, "grad_norm": 2.3639237880706787, "learning_rate": 6.0533104041272576e-06, "loss": 0.41323283314704895, "step": 5778 }, { "epoch": 0.7012498483193788, "grad_norm": 4.364718914031982, "learning_rate": 6.050853703476232e-06, "loss": 0.2861219048500061, "step": 5779 }, { "epoch": 0.7013711928164058, "grad_norm": 5.6462178230285645, "learning_rate": 6.048397002825206e-06, "loss": 0.2149006575345993, "step": 5780 }, { "epoch": 0.7014925373134329, "grad_norm": 3.3727569580078125, "learning_rate": 6.04594030217418e-06, "loss": 0.5478169918060303, "step": 5781 }, { "epoch": 0.7016138818104599, "grad_norm": 1.6628745794296265, "learning_rate": 6.043483601523155e-06, "loss": 0.08530642092227936, "step": 5782 }, { "epoch": 0.701735226307487, "grad_norm": 0.4083932340145111, "learning_rate": 6.041026900872129e-06, "loss": 0.00365039287135005, "step": 5783 }, { "epoch": 0.701856570804514, "grad_norm": 1.9153450727462769, "learning_rate": 6.038570200221103e-06, "loss": 0.022864695638418198, "step": 5784 }, { "epoch": 0.7019779153015411, "grad_norm": 2.4661850929260254, "learning_rate": 6.036113499570077e-06, "loss": 0.4609590768814087, "step": 5785 }, { "epoch": 0.7020992597985681, "grad_norm": 2.860145092010498, "learning_rate": 6.033656798919052e-06, "loss": 0.16298139095306396, "step": 5786 }, { "epoch": 0.7022206042955952, "grad_norm": 1.2838356494903564, "learning_rate": 6.031200098268027e-06, "loss": 0.03636278212070465, "step": 5787 }, { "epoch": 0.7023419487926222, "grad_norm": 2.1400389671325684, "learning_rate": 6.028743397617001e-06, "loss": 0.29807841777801514, "step": 5788 }, { "epoch": 0.7024632932896493, "grad_norm": 3.173556089401245, "learning_rate": 6.026286696965975e-06, "loss": 0.3834129571914673, "step": 5789 }, { "epoch": 0.7025846377866763, "grad_norm": 3.4591805934906006, "learning_rate": 6.0238299963149495e-06, "loss": 0.24231453239917755, "step": 5790 }, { "epoch": 0.7027059822837034, "grad_norm": 2.295949935913086, "learning_rate": 6.021373295663924e-06, "loss": 0.1726835072040558, "step": 5791 }, { "epoch": 0.7028273267807305, "grad_norm": 2.6043148040771484, "learning_rate": 6.018916595012898e-06, "loss": 0.16435734927654266, "step": 5792 }, { "epoch": 0.7029486712777575, "grad_norm": 2.6568374633789062, "learning_rate": 6.016459894361872e-06, "loss": 0.2148205041885376, "step": 5793 }, { "epoch": 0.7030700157747846, "grad_norm": 2.0812788009643555, "learning_rate": 6.0140031937108465e-06, "loss": 0.10933089256286621, "step": 5794 }, { "epoch": 0.7031913602718117, "grad_norm": 2.8028173446655273, "learning_rate": 6.011546493059821e-06, "loss": 0.33395737409591675, "step": 5795 }, { "epoch": 0.7033127047688388, "grad_norm": 1.5399020910263062, "learning_rate": 6.009089792408795e-06, "loss": 0.2753638029098511, "step": 5796 }, { "epoch": 0.7034340492658658, "grad_norm": 2.1722145080566406, "learning_rate": 6.00663309175777e-06, "loss": 0.13258258998394012, "step": 5797 }, { "epoch": 0.7035553937628929, "grad_norm": 2.720982313156128, "learning_rate": 6.004176391106744e-06, "loss": 0.5418004393577576, "step": 5798 }, { "epoch": 0.7036767382599199, "grad_norm": 3.1868748664855957, "learning_rate": 6.001719690455719e-06, "loss": 0.4458266496658325, "step": 5799 }, { "epoch": 0.703798082756947, "grad_norm": 3.0444626808166504, "learning_rate": 5.999262989804693e-06, "loss": 0.5494015216827393, "step": 5800 }, { "epoch": 0.703919427253974, "grad_norm": 2.8883461952209473, "learning_rate": 5.996806289153667e-06, "loss": 0.5558677911758423, "step": 5801 }, { "epoch": 0.7040407717510011, "grad_norm": 2.979569911956787, "learning_rate": 5.9943495885026415e-06, "loss": 0.5888940095901489, "step": 5802 }, { "epoch": 0.7041621162480282, "grad_norm": 2.929598093032837, "learning_rate": 5.991892887851616e-06, "loss": 0.3068878650665283, "step": 5803 }, { "epoch": 0.7042834607450552, "grad_norm": 1.256536602973938, "learning_rate": 5.98943618720059e-06, "loss": 0.045782670378685, "step": 5804 }, { "epoch": 0.7044048052420823, "grad_norm": 3.47214674949646, "learning_rate": 5.986979486549564e-06, "loss": 0.35917794704437256, "step": 5805 }, { "epoch": 0.7045261497391093, "grad_norm": 2.6085317134857178, "learning_rate": 5.9845227858985385e-06, "loss": 0.13018730282783508, "step": 5806 }, { "epoch": 0.7046474942361364, "grad_norm": 2.4535932540893555, "learning_rate": 5.982066085247514e-06, "loss": 0.32858872413635254, "step": 5807 }, { "epoch": 0.7047688387331634, "grad_norm": 4.043457984924316, "learning_rate": 5.979609384596488e-06, "loss": 0.270950585603714, "step": 5808 }, { "epoch": 0.7048901832301905, "grad_norm": 2.6979687213897705, "learning_rate": 5.977152683945462e-06, "loss": 0.11231839656829834, "step": 5809 }, { "epoch": 0.7050115277272175, "grad_norm": 2.898728609085083, "learning_rate": 5.974695983294436e-06, "loss": 0.2861657738685608, "step": 5810 }, { "epoch": 0.7051328722242446, "grad_norm": 3.64288592338562, "learning_rate": 5.972239282643411e-06, "loss": 0.311164915561676, "step": 5811 }, { "epoch": 0.7052542167212716, "grad_norm": 2.2695472240448, "learning_rate": 5.969782581992385e-06, "loss": 0.28633707761764526, "step": 5812 }, { "epoch": 0.7053755612182987, "grad_norm": 2.342682361602783, "learning_rate": 5.967325881341359e-06, "loss": 0.43455225229263306, "step": 5813 }, { "epoch": 0.7054969057153259, "grad_norm": 2.894275665283203, "learning_rate": 5.964869180690333e-06, "loss": 0.2556551694869995, "step": 5814 }, { "epoch": 0.7056182502123529, "grad_norm": 2.2578351497650146, "learning_rate": 5.962412480039308e-06, "loss": 0.18596795201301575, "step": 5815 }, { "epoch": 0.70573959470938, "grad_norm": 1.8131707906723022, "learning_rate": 5.959955779388282e-06, "loss": 0.2659645080566406, "step": 5816 }, { "epoch": 0.705860939206407, "grad_norm": 3.1593222618103027, "learning_rate": 5.957499078737257e-06, "loss": 0.3369874656200409, "step": 5817 }, { "epoch": 0.7059822837034341, "grad_norm": 4.152899742126465, "learning_rate": 5.955042378086231e-06, "loss": 0.5103601217269897, "step": 5818 }, { "epoch": 0.7061036282004611, "grad_norm": 2.3949697017669678, "learning_rate": 5.9525856774352055e-06, "loss": 0.5256065726280212, "step": 5819 }, { "epoch": 0.7062249726974882, "grad_norm": 0.5283042788505554, "learning_rate": 5.95012897678418e-06, "loss": 0.009274049662053585, "step": 5820 }, { "epoch": 0.7063463171945152, "grad_norm": 2.565742015838623, "learning_rate": 5.947672276133154e-06, "loss": 0.21998131275177002, "step": 5821 }, { "epoch": 0.7064676616915423, "grad_norm": 3.2698583602905273, "learning_rate": 5.945215575482128e-06, "loss": 0.45817452669143677, "step": 5822 }, { "epoch": 0.7065890061885693, "grad_norm": 1.9504470825195312, "learning_rate": 5.9427588748311026e-06, "loss": 0.1343025118112564, "step": 5823 }, { "epoch": 0.7067103506855964, "grad_norm": 1.543136715888977, "learning_rate": 5.940302174180077e-06, "loss": 0.38056105375289917, "step": 5824 }, { "epoch": 0.7068316951826235, "grad_norm": 6.134747505187988, "learning_rate": 5.937845473529051e-06, "loss": 0.5312180519104004, "step": 5825 }, { "epoch": 0.7069530396796505, "grad_norm": 3.386713743209839, "learning_rate": 5.935388772878025e-06, "loss": 0.4652506113052368, "step": 5826 }, { "epoch": 0.7070743841766776, "grad_norm": 1.7463020086288452, "learning_rate": 5.9329320722270004e-06, "loss": 0.031084027141332626, "step": 5827 }, { "epoch": 0.7071957286737046, "grad_norm": 4.0430755615234375, "learning_rate": 5.930475371575975e-06, "loss": 0.4027259349822998, "step": 5828 }, { "epoch": 0.7073170731707317, "grad_norm": 6.218966960906982, "learning_rate": 5.928018670924949e-06, "loss": 0.2015838623046875, "step": 5829 }, { "epoch": 0.7074384176677587, "grad_norm": 3.496805191040039, "learning_rate": 5.925561970273923e-06, "loss": 0.5277990102767944, "step": 5830 }, { "epoch": 0.7075597621647858, "grad_norm": 1.84635591506958, "learning_rate": 5.9231052696228975e-06, "loss": 0.02089240960776806, "step": 5831 }, { "epoch": 0.7076811066618129, "grad_norm": 2.402447462081909, "learning_rate": 5.920648568971872e-06, "loss": 0.16119323670864105, "step": 5832 }, { "epoch": 0.70780245115884, "grad_norm": 2.6963558197021484, "learning_rate": 5.918191868320846e-06, "loss": 0.20778656005859375, "step": 5833 }, { "epoch": 0.707923795655867, "grad_norm": 2.49267578125, "learning_rate": 5.915735167669819e-06, "loss": 0.10597319155931473, "step": 5834 }, { "epoch": 0.7080451401528941, "grad_norm": 5.318297863006592, "learning_rate": 5.913278467018794e-06, "loss": 0.2169535756111145, "step": 5835 }, { "epoch": 0.7081664846499212, "grad_norm": 1.822165608406067, "learning_rate": 5.910821766367768e-06, "loss": 0.07822578400373459, "step": 5836 }, { "epoch": 0.7082878291469482, "grad_norm": 3.356064796447754, "learning_rate": 5.908365065716742e-06, "loss": 0.21508680284023285, "step": 5837 }, { "epoch": 0.7084091736439753, "grad_norm": 3.1467530727386475, "learning_rate": 5.9059083650657164e-06, "loss": 0.2059308886528015, "step": 5838 }, { "epoch": 0.7085305181410023, "grad_norm": 2.569166898727417, "learning_rate": 5.903451664414691e-06, "loss": 0.2633601427078247, "step": 5839 }, { "epoch": 0.7086518626380294, "grad_norm": 2.9538748264312744, "learning_rate": 5.900994963763666e-06, "loss": 0.24430067837238312, "step": 5840 }, { "epoch": 0.7087732071350564, "grad_norm": 4.529779434204102, "learning_rate": 5.89853826311264e-06, "loss": 0.29330888390541077, "step": 5841 }, { "epoch": 0.7088945516320835, "grad_norm": 3.719217538833618, "learning_rate": 5.896081562461614e-06, "loss": 0.17714166641235352, "step": 5842 }, { "epoch": 0.7090158961291105, "grad_norm": 2.074035406112671, "learning_rate": 5.893624861810589e-06, "loss": 0.24224841594696045, "step": 5843 }, { "epoch": 0.7091372406261376, "grad_norm": 3.35068678855896, "learning_rate": 5.891168161159563e-06, "loss": 0.21858233213424683, "step": 5844 }, { "epoch": 0.7092585851231646, "grad_norm": 2.762637138366699, "learning_rate": 5.888711460508537e-06, "loss": 0.2467726469039917, "step": 5845 }, { "epoch": 0.7093799296201917, "grad_norm": 2.573704481124878, "learning_rate": 5.886254759857511e-06, "loss": 0.46878767013549805, "step": 5846 }, { "epoch": 0.7095012741172187, "grad_norm": 2.2218055725097656, "learning_rate": 5.883798059206486e-06, "loss": 0.5630099773406982, "step": 5847 }, { "epoch": 0.7096226186142458, "grad_norm": 3.371591806411743, "learning_rate": 5.88134135855546e-06, "loss": 0.1649099737405777, "step": 5848 }, { "epoch": 0.7097439631112729, "grad_norm": 2.372459650039673, "learning_rate": 5.878884657904434e-06, "loss": 0.35459232330322266, "step": 5849 }, { "epoch": 0.7098653076082999, "grad_norm": 2.8651182651519775, "learning_rate": 5.876427957253409e-06, "loss": 0.20117178559303284, "step": 5850 }, { "epoch": 0.7099866521053271, "grad_norm": 3.184128999710083, "learning_rate": 5.8739712566023835e-06, "loss": 0.304601788520813, "step": 5851 }, { "epoch": 0.7101079966023541, "grad_norm": 1.0564324855804443, "learning_rate": 5.871514555951358e-06, "loss": 0.011079314164817333, "step": 5852 }, { "epoch": 0.7102293410993812, "grad_norm": 3.4797778129577637, "learning_rate": 5.869057855300332e-06, "loss": 0.351175457239151, "step": 5853 }, { "epoch": 0.7103506855964082, "grad_norm": 2.292452335357666, "learning_rate": 5.866601154649306e-06, "loss": 0.04045844450592995, "step": 5854 }, { "epoch": 0.7104720300934353, "grad_norm": 1.9470033645629883, "learning_rate": 5.8641444539982805e-06, "loss": 0.19889214634895325, "step": 5855 }, { "epoch": 0.7105933745904623, "grad_norm": 2.485527276992798, "learning_rate": 5.861687753347255e-06, "loss": 0.6532883048057556, "step": 5856 }, { "epoch": 0.7107147190874894, "grad_norm": 2.683828592300415, "learning_rate": 5.859231052696229e-06, "loss": 0.3495826721191406, "step": 5857 }, { "epoch": 0.7108360635845165, "grad_norm": 2.5322723388671875, "learning_rate": 5.856774352045203e-06, "loss": 0.40136682987213135, "step": 5858 }, { "epoch": 0.7109574080815435, "grad_norm": 2.8479995727539062, "learning_rate": 5.8543176513941776e-06, "loss": 0.3290691375732422, "step": 5859 }, { "epoch": 0.7110787525785706, "grad_norm": 2.706108808517456, "learning_rate": 5.851860950743153e-06, "loss": 0.2185295820236206, "step": 5860 }, { "epoch": 0.7112000970755976, "grad_norm": 2.2765278816223145, "learning_rate": 5.849404250092127e-06, "loss": 0.471883624792099, "step": 5861 }, { "epoch": 0.7113214415726247, "grad_norm": 3.1659135818481445, "learning_rate": 5.846947549441101e-06, "loss": 0.5340144634246826, "step": 5862 }, { "epoch": 0.7114427860696517, "grad_norm": 2.4277114868164062, "learning_rate": 5.8444908487900754e-06, "loss": 0.180166095495224, "step": 5863 }, { "epoch": 0.7115641305666788, "grad_norm": 0.15620772540569305, "learning_rate": 5.84203414813905e-06, "loss": 0.0012036560801789165, "step": 5864 }, { "epoch": 0.7116854750637058, "grad_norm": 4.861850738525391, "learning_rate": 5.839577447488024e-06, "loss": 0.21675100922584534, "step": 5865 }, { "epoch": 0.7118068195607329, "grad_norm": 3.338036298751831, "learning_rate": 5.837120746836998e-06, "loss": 0.3390640318393707, "step": 5866 }, { "epoch": 0.7119281640577599, "grad_norm": 2.70170521736145, "learning_rate": 5.8346640461859725e-06, "loss": 0.2396087646484375, "step": 5867 }, { "epoch": 0.712049508554787, "grad_norm": 2.8973913192749023, "learning_rate": 5.832207345534947e-06, "loss": 0.06813687831163406, "step": 5868 }, { "epoch": 0.712170853051814, "grad_norm": 2.1060984134674072, "learning_rate": 5.829750644883921e-06, "loss": 0.32629624009132385, "step": 5869 }, { "epoch": 0.7122921975488412, "grad_norm": 4.297245025634766, "learning_rate": 5.827293944232896e-06, "loss": 0.25254303216934204, "step": 5870 }, { "epoch": 0.7124135420458683, "grad_norm": 3.2823848724365234, "learning_rate": 5.82483724358187e-06, "loss": 0.35645216703414917, "step": 5871 }, { "epoch": 0.7125348865428953, "grad_norm": 2.8433432579040527, "learning_rate": 5.822380542930845e-06, "loss": 0.14685554802417755, "step": 5872 }, { "epoch": 0.7126562310399224, "grad_norm": 1.4510807991027832, "learning_rate": 5.819923842279819e-06, "loss": 0.030132941901683807, "step": 5873 }, { "epoch": 0.7127775755369494, "grad_norm": 2.079232692718506, "learning_rate": 5.817467141628793e-06, "loss": 0.11338566988706589, "step": 5874 }, { "epoch": 0.7128989200339765, "grad_norm": 1.9281219244003296, "learning_rate": 5.815010440977767e-06, "loss": 0.11117631942033768, "step": 5875 }, { "epoch": 0.7130202645310035, "grad_norm": 3.595733404159546, "learning_rate": 5.812553740326742e-06, "loss": 0.31811389327049255, "step": 5876 }, { "epoch": 0.7131416090280306, "grad_norm": 4.229249000549316, "learning_rate": 5.810097039675716e-06, "loss": 0.0986100435256958, "step": 5877 }, { "epoch": 0.7132629535250576, "grad_norm": 2.10900616645813, "learning_rate": 5.80764033902469e-06, "loss": 0.10394268482923508, "step": 5878 }, { "epoch": 0.7133842980220847, "grad_norm": 0.0010415057186037302, "learning_rate": 5.805183638373664e-06, "loss": 1.7751994164427742e-05, "step": 5879 }, { "epoch": 0.7135056425191117, "grad_norm": 2.7621231079101562, "learning_rate": 5.8027269377226395e-06, "loss": 0.3443986475467682, "step": 5880 }, { "epoch": 0.7136269870161388, "grad_norm": 2.468208074569702, "learning_rate": 5.800270237071614e-06, "loss": 0.10895691066980362, "step": 5881 }, { "epoch": 0.7137483315131659, "grad_norm": 3.6786742210388184, "learning_rate": 5.797813536420588e-06, "loss": 0.3812035322189331, "step": 5882 }, { "epoch": 0.7138696760101929, "grad_norm": 3.8164408206939697, "learning_rate": 5.795356835769562e-06, "loss": 0.10607192665338516, "step": 5883 }, { "epoch": 0.71399102050722, "grad_norm": 2.6934986114501953, "learning_rate": 5.7929001351185366e-06, "loss": 0.05964359641075134, "step": 5884 }, { "epoch": 0.714112365004247, "grad_norm": 2.9071245193481445, "learning_rate": 5.790443434467511e-06, "loss": 0.2108161747455597, "step": 5885 }, { "epoch": 0.7142337095012741, "grad_norm": 1.4385520219802856, "learning_rate": 5.787986733816485e-06, "loss": 0.07350638508796692, "step": 5886 }, { "epoch": 0.7143550539983011, "grad_norm": 5.168160915374756, "learning_rate": 5.785530033165459e-06, "loss": 0.14292918145656586, "step": 5887 }, { "epoch": 0.7144763984953283, "grad_norm": 4.47307014465332, "learning_rate": 5.783073332514434e-06, "loss": 0.45156922936439514, "step": 5888 }, { "epoch": 0.7145977429923553, "grad_norm": 2.9549779891967773, "learning_rate": 5.780616631863408e-06, "loss": 0.2380158007144928, "step": 5889 }, { "epoch": 0.7147190874893824, "grad_norm": 2.0961437225341797, "learning_rate": 5.778159931212383e-06, "loss": 0.04503151774406433, "step": 5890 }, { "epoch": 0.7148404319864095, "grad_norm": 2.883702278137207, "learning_rate": 5.775703230561357e-06, "loss": 0.15489380061626434, "step": 5891 }, { "epoch": 0.7149617764834365, "grad_norm": 2.4540624618530273, "learning_rate": 5.7732465299103315e-06, "loss": 0.17349041998386383, "step": 5892 }, { "epoch": 0.7150831209804636, "grad_norm": 3.876882553100586, "learning_rate": 5.770789829259306e-06, "loss": 0.5961467027664185, "step": 5893 }, { "epoch": 0.7152044654774906, "grad_norm": 3.614917755126953, "learning_rate": 5.76833312860828e-06, "loss": 0.1574934422969818, "step": 5894 }, { "epoch": 0.7153258099745177, "grad_norm": 5.512879848480225, "learning_rate": 5.765876427957254e-06, "loss": 0.07651883363723755, "step": 5895 }, { "epoch": 0.7154471544715447, "grad_norm": 3.294229507446289, "learning_rate": 5.7634197273062285e-06, "loss": 0.2415429949760437, "step": 5896 }, { "epoch": 0.7155684989685718, "grad_norm": 2.2713234424591064, "learning_rate": 5.760963026655203e-06, "loss": 0.17806842923164368, "step": 5897 }, { "epoch": 0.7156898434655988, "grad_norm": 5.825490474700928, "learning_rate": 5.758506326004177e-06, "loss": 0.25201576948165894, "step": 5898 }, { "epoch": 0.7158111879626259, "grad_norm": 4.063902378082275, "learning_rate": 5.756049625353152e-06, "loss": 0.2880120575428009, "step": 5899 }, { "epoch": 0.7159325324596529, "grad_norm": 2.670093297958374, "learning_rate": 5.753592924702126e-06, "loss": 0.13734492659568787, "step": 5900 }, { "epoch": 0.71605387695668, "grad_norm": 3.403818368911743, "learning_rate": 5.751136224051101e-06, "loss": 0.19644519686698914, "step": 5901 }, { "epoch": 0.716175221453707, "grad_norm": 2.308389902114868, "learning_rate": 5.748679523400073e-06, "loss": 0.18146586418151855, "step": 5902 }, { "epoch": 0.7162965659507341, "grad_norm": 3.162785530090332, "learning_rate": 5.746222822749048e-06, "loss": 0.13632625341415405, "step": 5903 }, { "epoch": 0.7164179104477612, "grad_norm": 4.374864101409912, "learning_rate": 5.7437661220980226e-06, "loss": 0.6074321269989014, "step": 5904 }, { "epoch": 0.7165392549447882, "grad_norm": 3.8082292079925537, "learning_rate": 5.741309421446997e-06, "loss": 0.2642704248428345, "step": 5905 }, { "epoch": 0.7166605994418153, "grad_norm": 4.274983882904053, "learning_rate": 5.738852720795971e-06, "loss": 0.45920705795288086, "step": 5906 }, { "epoch": 0.7167819439388424, "grad_norm": 2.98555064201355, "learning_rate": 5.736396020144945e-06, "loss": 0.2694275677204132, "step": 5907 }, { "epoch": 0.7169032884358695, "grad_norm": 1.9468590021133423, "learning_rate": 5.73393931949392e-06, "loss": 0.09660639613866806, "step": 5908 }, { "epoch": 0.7170246329328965, "grad_norm": 3.239866256713867, "learning_rate": 5.731482618842894e-06, "loss": 0.184454545378685, "step": 5909 }, { "epoch": 0.7171459774299236, "grad_norm": 4.407609462738037, "learning_rate": 5.729025918191868e-06, "loss": 0.34130609035491943, "step": 5910 }, { "epoch": 0.7172673219269506, "grad_norm": 3.951282262802124, "learning_rate": 5.726569217540842e-06, "loss": 0.4171329438686371, "step": 5911 }, { "epoch": 0.7173886664239777, "grad_norm": 3.6581690311431885, "learning_rate": 5.7241125168898175e-06, "loss": 0.4644727408885956, "step": 5912 }, { "epoch": 0.7175100109210047, "grad_norm": 1.2134668827056885, "learning_rate": 5.721655816238792e-06, "loss": 0.010828576982021332, "step": 5913 }, { "epoch": 0.7176313554180318, "grad_norm": 2.9983184337615967, "learning_rate": 5.719199115587766e-06, "loss": 0.5584735870361328, "step": 5914 }, { "epoch": 0.7177526999150589, "grad_norm": 2.840137004852295, "learning_rate": 5.71674241493674e-06, "loss": 0.16097773611545563, "step": 5915 }, { "epoch": 0.7178740444120859, "grad_norm": 2.3423359394073486, "learning_rate": 5.7142857142857145e-06, "loss": 0.20625022053718567, "step": 5916 }, { "epoch": 0.717995388909113, "grad_norm": 2.3200109004974365, "learning_rate": 5.711829013634689e-06, "loss": 0.3803074359893799, "step": 5917 }, { "epoch": 0.71811673340614, "grad_norm": 3.8149688243865967, "learning_rate": 5.709372312983663e-06, "loss": 0.2873532176017761, "step": 5918 }, { "epoch": 0.7182380779031671, "grad_norm": 3.4820616245269775, "learning_rate": 5.706915612332637e-06, "loss": 0.43753859400749207, "step": 5919 }, { "epoch": 0.7183594224001941, "grad_norm": 1.8804795742034912, "learning_rate": 5.7044589116816115e-06, "loss": 0.21988771855831146, "step": 5920 }, { "epoch": 0.7184807668972212, "grad_norm": 0.584765613079071, "learning_rate": 5.702002211030586e-06, "loss": 0.008456402458250523, "step": 5921 }, { "epoch": 0.7186021113942482, "grad_norm": 2.4656035900115967, "learning_rate": 5.699545510379561e-06, "loss": 0.16508972644805908, "step": 5922 }, { "epoch": 0.7187234558912753, "grad_norm": 2.504615068435669, "learning_rate": 5.697088809728535e-06, "loss": 0.09889542311429977, "step": 5923 }, { "epoch": 0.7188448003883023, "grad_norm": 2.375547170639038, "learning_rate": 5.6946321090775094e-06, "loss": 0.2272157073020935, "step": 5924 }, { "epoch": 0.7189661448853295, "grad_norm": 2.445003032684326, "learning_rate": 5.692175408426484e-06, "loss": 0.10069077461957932, "step": 5925 }, { "epoch": 0.7190874893823566, "grad_norm": 2.9557712078094482, "learning_rate": 5.689718707775458e-06, "loss": 0.25702035427093506, "step": 5926 }, { "epoch": 0.7192088338793836, "grad_norm": 3.2584502696990967, "learning_rate": 5.687262007124432e-06, "loss": 0.1505943238735199, "step": 5927 }, { "epoch": 0.7193301783764107, "grad_norm": 2.921945571899414, "learning_rate": 5.6848053064734065e-06, "loss": 0.22055459022521973, "step": 5928 }, { "epoch": 0.7194515228734377, "grad_norm": 2.070234775543213, "learning_rate": 5.682348605822381e-06, "loss": 0.15984347462654114, "step": 5929 }, { "epoch": 0.7195728673704648, "grad_norm": 2.4295060634613037, "learning_rate": 5.679891905171355e-06, "loss": 0.06413902342319489, "step": 5930 }, { "epoch": 0.7196942118674918, "grad_norm": 4.999510765075684, "learning_rate": 5.677435204520329e-06, "loss": 0.5779997706413269, "step": 5931 }, { "epoch": 0.7198155563645189, "grad_norm": 3.3192150592803955, "learning_rate": 5.674978503869304e-06, "loss": 0.2538946270942688, "step": 5932 }, { "epoch": 0.7199369008615459, "grad_norm": 1.8914129734039307, "learning_rate": 5.672521803218279e-06, "loss": 0.20722943544387817, "step": 5933 }, { "epoch": 0.720058245358573, "grad_norm": 3.794032096862793, "learning_rate": 5.670065102567253e-06, "loss": 0.4346970021724701, "step": 5934 }, { "epoch": 0.7201795898556, "grad_norm": 2.5513248443603516, "learning_rate": 5.667608401916227e-06, "loss": 0.28715044260025024, "step": 5935 }, { "epoch": 0.7203009343526271, "grad_norm": 3.046928882598877, "learning_rate": 5.665151701265201e-06, "loss": 0.2396654486656189, "step": 5936 }, { "epoch": 0.7204222788496542, "grad_norm": 4.679769992828369, "learning_rate": 5.662695000614176e-06, "loss": 0.9614903330802917, "step": 5937 }, { "epoch": 0.7205436233466812, "grad_norm": 2.0265700817108154, "learning_rate": 5.66023829996315e-06, "loss": 0.159714013338089, "step": 5938 }, { "epoch": 0.7206649678437083, "grad_norm": 3.405237913131714, "learning_rate": 5.657781599312124e-06, "loss": 0.45624807476997375, "step": 5939 }, { "epoch": 0.7207863123407353, "grad_norm": 3.1068108081817627, "learning_rate": 5.655324898661098e-06, "loss": 0.1589200794696808, "step": 5940 }, { "epoch": 0.7209076568377624, "grad_norm": 0.004847115837037563, "learning_rate": 5.652868198010073e-06, "loss": 6.334451609291136e-05, "step": 5941 }, { "epoch": 0.7210290013347894, "grad_norm": 1.379502534866333, "learning_rate": 5.650411497359048e-06, "loss": 0.11903268843889236, "step": 5942 }, { "epoch": 0.7211503458318165, "grad_norm": 3.634232521057129, "learning_rate": 5.647954796708022e-06, "loss": 0.4407496452331543, "step": 5943 }, { "epoch": 0.7212716903288436, "grad_norm": 3.2847445011138916, "learning_rate": 5.645498096056996e-06, "loss": 0.26056334376335144, "step": 5944 }, { "epoch": 0.7213930348258707, "grad_norm": 2.4205172061920166, "learning_rate": 5.6430413954059705e-06, "loss": 0.2062818706035614, "step": 5945 }, { "epoch": 0.7215143793228977, "grad_norm": 2.1203248500823975, "learning_rate": 5.640584694754945e-06, "loss": 0.44678664207458496, "step": 5946 }, { "epoch": 0.7216357238199248, "grad_norm": 1.9340072870254517, "learning_rate": 5.638127994103919e-06, "loss": 0.10376384109258652, "step": 5947 }, { "epoch": 0.7217570683169519, "grad_norm": 3.9504380226135254, "learning_rate": 5.635671293452893e-06, "loss": 0.15648524463176727, "step": 5948 }, { "epoch": 0.7218784128139789, "grad_norm": 3.183835506439209, "learning_rate": 5.6332145928018676e-06, "loss": 0.26863357424736023, "step": 5949 }, { "epoch": 0.721999757311006, "grad_norm": 3.355085849761963, "learning_rate": 5.630757892150842e-06, "loss": 0.22728218138217926, "step": 5950 }, { "epoch": 0.722121101808033, "grad_norm": 2.330925226211548, "learning_rate": 5.628301191499816e-06, "loss": 0.17029744386672974, "step": 5951 }, { "epoch": 0.7222424463050601, "grad_norm": 2.8520631790161133, "learning_rate": 5.625844490848791e-06, "loss": 0.4079977571964264, "step": 5952 }, { "epoch": 0.7223637908020871, "grad_norm": 3.6604084968566895, "learning_rate": 5.6233877901977655e-06, "loss": 0.2729458808898926, "step": 5953 }, { "epoch": 0.7224851352991142, "grad_norm": 2.2499887943267822, "learning_rate": 5.62093108954674e-06, "loss": 0.049778688699007034, "step": 5954 }, { "epoch": 0.7226064797961412, "grad_norm": 4.224139213562012, "learning_rate": 5.618474388895714e-06, "loss": 0.22293318808078766, "step": 5955 }, { "epoch": 0.7227278242931683, "grad_norm": 4.257871627807617, "learning_rate": 5.616017688244688e-06, "loss": 0.40677717328071594, "step": 5956 }, { "epoch": 0.7228491687901953, "grad_norm": 3.7472782135009766, "learning_rate": 5.6135609875936625e-06, "loss": 0.3548480272293091, "step": 5957 }, { "epoch": 0.7229705132872224, "grad_norm": 2.1576383113861084, "learning_rate": 5.611104286942637e-06, "loss": 0.12408121675252914, "step": 5958 }, { "epoch": 0.7230918577842494, "grad_norm": 1.967600703239441, "learning_rate": 5.608647586291611e-06, "loss": 0.18841183185577393, "step": 5959 }, { "epoch": 0.7232132022812765, "grad_norm": 2.342170000076294, "learning_rate": 5.606190885640585e-06, "loss": 0.22387444972991943, "step": 5960 }, { "epoch": 0.7233345467783036, "grad_norm": 1.798871397972107, "learning_rate": 5.6037341849895595e-06, "loss": 0.09698998928070068, "step": 5961 }, { "epoch": 0.7234558912753306, "grad_norm": 2.6641173362731934, "learning_rate": 5.601277484338535e-06, "loss": 0.19139833748340607, "step": 5962 }, { "epoch": 0.7235772357723578, "grad_norm": 2.3769190311431885, "learning_rate": 5.598820783687509e-06, "loss": 0.2687804400920868, "step": 5963 }, { "epoch": 0.7236985802693848, "grad_norm": 5.9528303146362305, "learning_rate": 5.596364083036483e-06, "loss": 1.0355541706085205, "step": 5964 }, { "epoch": 0.7238199247664119, "grad_norm": 1.5478198528289795, "learning_rate": 5.593907382385457e-06, "loss": 0.0547638013958931, "step": 5965 }, { "epoch": 0.7239412692634389, "grad_norm": 2.4786109924316406, "learning_rate": 5.591450681734432e-06, "loss": 0.2991161644458771, "step": 5966 }, { "epoch": 0.724062613760466, "grad_norm": 3.8517274856567383, "learning_rate": 5.588993981083406e-06, "loss": 0.23159676790237427, "step": 5967 }, { "epoch": 0.724183958257493, "grad_norm": 4.104390621185303, "learning_rate": 5.58653728043238e-06, "loss": 0.6096137762069702, "step": 5968 }, { "epoch": 0.7243053027545201, "grad_norm": 1.946926236152649, "learning_rate": 5.5840805797813544e-06, "loss": 0.1737823784351349, "step": 5969 }, { "epoch": 0.7244266472515472, "grad_norm": 2.9987564086914062, "learning_rate": 5.581623879130328e-06, "loss": 0.3976944386959076, "step": 5970 }, { "epoch": 0.7245479917485742, "grad_norm": 2.196988821029663, "learning_rate": 5.579167178479302e-06, "loss": 0.224501371383667, "step": 5971 }, { "epoch": 0.7246693362456013, "grad_norm": 2.5487804412841797, "learning_rate": 5.576710477828276e-06, "loss": 0.3144124150276184, "step": 5972 }, { "epoch": 0.7247906807426283, "grad_norm": 3.065716028213501, "learning_rate": 5.574253777177251e-06, "loss": 0.356366902589798, "step": 5973 }, { "epoch": 0.7249120252396554, "grad_norm": 4.144125461578369, "learning_rate": 5.571797076526225e-06, "loss": 0.35415583848953247, "step": 5974 }, { "epoch": 0.7250333697366824, "grad_norm": 1.3895941972732544, "learning_rate": 5.5693403758752e-06, "loss": 0.19808076322078705, "step": 5975 }, { "epoch": 0.7251547142337095, "grad_norm": 2.3746864795684814, "learning_rate": 5.566883675224174e-06, "loss": 0.3232831656932831, "step": 5976 }, { "epoch": 0.7252760587307365, "grad_norm": 3.3581595420837402, "learning_rate": 5.5644269745731485e-06, "loss": 0.21106132864952087, "step": 5977 }, { "epoch": 0.7253974032277636, "grad_norm": 2.635436773300171, "learning_rate": 5.561970273922123e-06, "loss": 0.15674924850463867, "step": 5978 }, { "epoch": 0.7255187477247906, "grad_norm": 3.1839826107025146, "learning_rate": 5.559513573271097e-06, "loss": 0.05467141047120094, "step": 5979 }, { "epoch": 0.7256400922218177, "grad_norm": 3.8356521129608154, "learning_rate": 5.557056872620071e-06, "loss": 0.49515900015830994, "step": 5980 }, { "epoch": 0.7257614367188449, "grad_norm": 3.5122339725494385, "learning_rate": 5.5546001719690455e-06, "loss": 0.45044994354248047, "step": 5981 }, { "epoch": 0.7258827812158719, "grad_norm": 2.7272183895111084, "learning_rate": 5.55214347131802e-06, "loss": 0.2524448335170746, "step": 5982 }, { "epoch": 0.726004125712899, "grad_norm": 4.437028408050537, "learning_rate": 5.549686770666994e-06, "loss": 0.5801103711128235, "step": 5983 }, { "epoch": 0.726125470209926, "grad_norm": 3.222682476043701, "learning_rate": 5.547230070015968e-06, "loss": 0.10494711995124817, "step": 5984 }, { "epoch": 0.7262468147069531, "grad_norm": 2.271317958831787, "learning_rate": 5.544773369364943e-06, "loss": 0.18516553938388824, "step": 5985 }, { "epoch": 0.7263681592039801, "grad_norm": 3.1065642833709717, "learning_rate": 5.542316668713918e-06, "loss": 0.27621573209762573, "step": 5986 }, { "epoch": 0.7264895037010072, "grad_norm": 1.6249407529830933, "learning_rate": 5.539859968062892e-06, "loss": 0.06884906440973282, "step": 5987 }, { "epoch": 0.7266108481980342, "grad_norm": 0.42204681038856506, "learning_rate": 5.537403267411866e-06, "loss": 0.006517244968563318, "step": 5988 }, { "epoch": 0.7267321926950613, "grad_norm": 3.288043737411499, "learning_rate": 5.5349465667608404e-06, "loss": 0.2613447606563568, "step": 5989 }, { "epoch": 0.7268535371920883, "grad_norm": 2.13373064994812, "learning_rate": 5.532489866109815e-06, "loss": 0.03126762434840202, "step": 5990 }, { "epoch": 0.7269748816891154, "grad_norm": 2.212397336959839, "learning_rate": 5.530033165458789e-06, "loss": 0.353908509016037, "step": 5991 }, { "epoch": 0.7270962261861424, "grad_norm": 1.5121809244155884, "learning_rate": 5.527576464807763e-06, "loss": 0.109235979616642, "step": 5992 }, { "epoch": 0.7272175706831695, "grad_norm": 3.444430112838745, "learning_rate": 5.5251197641567375e-06, "loss": 0.7616369128227234, "step": 5993 }, { "epoch": 0.7273389151801966, "grad_norm": 1.1829572916030884, "learning_rate": 5.522663063505712e-06, "loss": 0.03352097421884537, "step": 5994 }, { "epoch": 0.7274602596772236, "grad_norm": 3.47609806060791, "learning_rate": 5.520206362854687e-06, "loss": 0.34938204288482666, "step": 5995 }, { "epoch": 0.7275816041742507, "grad_norm": 2.175187587738037, "learning_rate": 5.517749662203661e-06, "loss": 0.31142622232437134, "step": 5996 }, { "epoch": 0.7277029486712777, "grad_norm": 2.8222572803497314, "learning_rate": 5.515292961552635e-06, "loss": 0.3094290792942047, "step": 5997 }, { "epoch": 0.7278242931683048, "grad_norm": 2.3990814685821533, "learning_rate": 5.51283626090161e-06, "loss": 0.2529733180999756, "step": 5998 }, { "epoch": 0.7279456376653318, "grad_norm": 2.8766531944274902, "learning_rate": 5.510379560250584e-06, "loss": 0.5950224995613098, "step": 5999 }, { "epoch": 0.728066982162359, "grad_norm": 0.8455985188484192, "learning_rate": 5.507922859599558e-06, "loss": 0.025821443647146225, "step": 6000 }, { "epoch": 0.728188326659386, "grad_norm": 0.7573692202568054, "learning_rate": 5.505466158948532e-06, "loss": 0.009515678510069847, "step": 6001 }, { "epoch": 0.7283096711564131, "grad_norm": 5.378837585449219, "learning_rate": 5.503009458297507e-06, "loss": 0.17016944289207458, "step": 6002 }, { "epoch": 0.7284310156534402, "grad_norm": 4.6554436683654785, "learning_rate": 5.500552757646481e-06, "loss": 0.5549437999725342, "step": 6003 }, { "epoch": 0.7285523601504672, "grad_norm": 1.9888956546783447, "learning_rate": 5.498096056995455e-06, "loss": 0.259456992149353, "step": 6004 }, { "epoch": 0.7286737046474943, "grad_norm": 3.0132572650909424, "learning_rate": 5.49563935634443e-06, "loss": 0.19398492574691772, "step": 6005 }, { "epoch": 0.7287950491445213, "grad_norm": 2.2493317127227783, "learning_rate": 5.4931826556934045e-06, "loss": 0.10584238171577454, "step": 6006 }, { "epoch": 0.7289163936415484, "grad_norm": 2.8539726734161377, "learning_rate": 5.490725955042379e-06, "loss": 0.25899869203567505, "step": 6007 }, { "epoch": 0.7290377381385754, "grad_norm": 3.6584410667419434, "learning_rate": 5.488269254391353e-06, "loss": 0.18071335554122925, "step": 6008 }, { "epoch": 0.7291590826356025, "grad_norm": 1.5585259199142456, "learning_rate": 5.485812553740327e-06, "loss": 0.17396754026412964, "step": 6009 }, { "epoch": 0.7292804271326295, "grad_norm": 2.700357675552368, "learning_rate": 5.4833558530893016e-06, "loss": 0.32653701305389404, "step": 6010 }, { "epoch": 0.7294017716296566, "grad_norm": 3.2735722064971924, "learning_rate": 5.480899152438276e-06, "loss": 0.27444612979888916, "step": 6011 }, { "epoch": 0.7295231161266836, "grad_norm": 1.2924412488937378, "learning_rate": 5.47844245178725e-06, "loss": 0.016470450907945633, "step": 6012 }, { "epoch": 0.7296444606237107, "grad_norm": 1.9909659624099731, "learning_rate": 5.475985751136224e-06, "loss": 0.09596261382102966, "step": 6013 }, { "epoch": 0.7297658051207377, "grad_norm": 0.0030258377082645893, "learning_rate": 5.473529050485199e-06, "loss": 2.707834937609732e-05, "step": 6014 }, { "epoch": 0.7298871496177648, "grad_norm": 2.2893292903900146, "learning_rate": 5.471072349834174e-06, "loss": 0.10826656967401505, "step": 6015 }, { "epoch": 0.7300084941147919, "grad_norm": 1.646889328956604, "learning_rate": 5.468615649183148e-06, "loss": 0.059941843152046204, "step": 6016 }, { "epoch": 0.7301298386118189, "grad_norm": 4.907802581787109, "learning_rate": 5.466158948532122e-06, "loss": 0.4408078193664551, "step": 6017 }, { "epoch": 0.7302511831088461, "grad_norm": 3.458259105682373, "learning_rate": 5.4637022478810965e-06, "loss": 0.3694349229335785, "step": 6018 }, { "epoch": 0.7303725276058731, "grad_norm": 2.8520514965057373, "learning_rate": 5.461245547230071e-06, "loss": 0.15374965965747833, "step": 6019 }, { "epoch": 0.7304938721029002, "grad_norm": 2.4051003456115723, "learning_rate": 5.458788846579045e-06, "loss": 0.2532127797603607, "step": 6020 }, { "epoch": 0.7306152165999272, "grad_norm": 3.480496644973755, "learning_rate": 5.456332145928019e-06, "loss": 0.238888680934906, "step": 6021 }, { "epoch": 0.7307365610969543, "grad_norm": 0.2724989652633667, "learning_rate": 5.4538754452769935e-06, "loss": 0.0015220105415210128, "step": 6022 }, { "epoch": 0.7308579055939813, "grad_norm": 2.9024786949157715, "learning_rate": 5.451418744625968e-06, "loss": 0.15935266017913818, "step": 6023 }, { "epoch": 0.7309792500910084, "grad_norm": 4.218174934387207, "learning_rate": 5.448962043974942e-06, "loss": 0.05055328831076622, "step": 6024 }, { "epoch": 0.7311005945880354, "grad_norm": 4.228209495544434, "learning_rate": 5.446505343323917e-06, "loss": 0.40679115056991577, "step": 6025 }, { "epoch": 0.7312219390850625, "grad_norm": 3.040673017501831, "learning_rate": 5.444048642672891e-06, "loss": 0.28700733184814453, "step": 6026 }, { "epoch": 0.7313432835820896, "grad_norm": 2.5385582447052, "learning_rate": 5.441591942021866e-06, "loss": 0.17877258360385895, "step": 6027 }, { "epoch": 0.7314646280791166, "grad_norm": 2.24465012550354, "learning_rate": 5.43913524137084e-06, "loss": 0.20123276114463806, "step": 6028 }, { "epoch": 0.7315859725761437, "grad_norm": 3.727886199951172, "learning_rate": 5.436678540719814e-06, "loss": 0.1437690705060959, "step": 6029 }, { "epoch": 0.7317073170731707, "grad_norm": 2.56620192527771, "learning_rate": 5.434221840068788e-06, "loss": 0.14597336947917938, "step": 6030 }, { "epoch": 0.7318286615701978, "grad_norm": 3.3217947483062744, "learning_rate": 5.431765139417763e-06, "loss": 0.4046022593975067, "step": 6031 }, { "epoch": 0.7319500060672248, "grad_norm": 2.7901532649993896, "learning_rate": 5.429308438766737e-06, "loss": 0.39260244369506836, "step": 6032 }, { "epoch": 0.7320713505642519, "grad_norm": 2.499438524246216, "learning_rate": 5.426851738115711e-06, "loss": 0.1548299640417099, "step": 6033 }, { "epoch": 0.7321926950612789, "grad_norm": 3.281625747680664, "learning_rate": 5.424395037464686e-06, "loss": 0.21199099719524384, "step": 6034 }, { "epoch": 0.732314039558306, "grad_norm": 2.519728899002075, "learning_rate": 5.4219383368136606e-06, "loss": 0.09046676009893417, "step": 6035 }, { "epoch": 0.732435384055333, "grad_norm": 2.9867186546325684, "learning_rate": 5.419481636162635e-06, "loss": 0.20599357783794403, "step": 6036 }, { "epoch": 0.7325567285523602, "grad_norm": 2.4106783866882324, "learning_rate": 5.417024935511609e-06, "loss": 0.4471447467803955, "step": 6037 }, { "epoch": 0.7326780730493873, "grad_norm": 1.176792860031128, "learning_rate": 5.4145682348605825e-06, "loss": 0.014408327639102936, "step": 6038 }, { "epoch": 0.7327994175464143, "grad_norm": 3.496919870376587, "learning_rate": 5.412111534209557e-06, "loss": 0.2942601144313812, "step": 6039 }, { "epoch": 0.7329207620434414, "grad_norm": 1.8252711296081543, "learning_rate": 5.409654833558531e-06, "loss": 0.10728634893894196, "step": 6040 }, { "epoch": 0.7330421065404684, "grad_norm": 0.7730040550231934, "learning_rate": 5.407198132907505e-06, "loss": 0.020042577758431435, "step": 6041 }, { "epoch": 0.7331634510374955, "grad_norm": 4.013019561767578, "learning_rate": 5.4047414322564795e-06, "loss": 0.3371189534664154, "step": 6042 }, { "epoch": 0.7332847955345225, "grad_norm": 1.8465662002563477, "learning_rate": 5.402284731605454e-06, "loss": 0.09652253985404968, "step": 6043 }, { "epoch": 0.7334061400315496, "grad_norm": 1.333021640777588, "learning_rate": 5.399828030954428e-06, "loss": 0.03871054947376251, "step": 6044 }, { "epoch": 0.7335274845285766, "grad_norm": 4.001061916351318, "learning_rate": 5.397371330303402e-06, "loss": 0.17190414667129517, "step": 6045 }, { "epoch": 0.7336488290256037, "grad_norm": 4.552881240844727, "learning_rate": 5.3949146296523765e-06, "loss": 0.16267529129981995, "step": 6046 }, { "epoch": 0.7337701735226307, "grad_norm": 4.4398298263549805, "learning_rate": 5.392457929001352e-06, "loss": 0.13243360817432404, "step": 6047 }, { "epoch": 0.7338915180196578, "grad_norm": 2.12166690826416, "learning_rate": 5.390001228350326e-06, "loss": 0.048664700239896774, "step": 6048 }, { "epoch": 0.7340128625166848, "grad_norm": 2.458068370819092, "learning_rate": 5.3875445276993e-06, "loss": 0.31502676010131836, "step": 6049 }, { "epoch": 0.7341342070137119, "grad_norm": 2.3748281002044678, "learning_rate": 5.3850878270482744e-06, "loss": 0.16984929144382477, "step": 6050 }, { "epoch": 0.734255551510739, "grad_norm": 5.6427130699157715, "learning_rate": 5.382631126397249e-06, "loss": 0.32173776626586914, "step": 6051 }, { "epoch": 0.734376896007766, "grad_norm": 2.303086280822754, "learning_rate": 5.380174425746223e-06, "loss": 0.09718358516693115, "step": 6052 }, { "epoch": 0.7344982405047931, "grad_norm": 2.37441086769104, "learning_rate": 5.377717725095197e-06, "loss": 0.21092109382152557, "step": 6053 }, { "epoch": 0.7346195850018201, "grad_norm": 5.583506107330322, "learning_rate": 5.3752610244441715e-06, "loss": 0.48028481006622314, "step": 6054 }, { "epoch": 0.7347409294988472, "grad_norm": 2.725193738937378, "learning_rate": 5.372804323793146e-06, "loss": 0.25264492630958557, "step": 6055 }, { "epoch": 0.7348622739958743, "grad_norm": 4.314597129821777, "learning_rate": 5.37034762314212e-06, "loss": 0.2014649212360382, "step": 6056 }, { "epoch": 0.7349836184929014, "grad_norm": 3.5569369792938232, "learning_rate": 5.367890922491095e-06, "loss": 0.08444103598594666, "step": 6057 }, { "epoch": 0.7351049629899284, "grad_norm": 2.7375552654266357, "learning_rate": 5.365434221840069e-06, "loss": 0.24064046144485474, "step": 6058 }, { "epoch": 0.7352263074869555, "grad_norm": 3.6223182678222656, "learning_rate": 5.362977521189044e-06, "loss": 0.18504835665225983, "step": 6059 }, { "epoch": 0.7353476519839826, "grad_norm": 3.1147232055664062, "learning_rate": 5.360520820538018e-06, "loss": 0.6502425074577332, "step": 6060 }, { "epoch": 0.7354689964810096, "grad_norm": 3.8904662132263184, "learning_rate": 5.358064119886992e-06, "loss": 0.1793517917394638, "step": 6061 }, { "epoch": 0.7355903409780367, "grad_norm": 3.2026498317718506, "learning_rate": 5.355607419235966e-06, "loss": 0.2693966031074524, "step": 6062 }, { "epoch": 0.7357116854750637, "grad_norm": 2.6288602352142334, "learning_rate": 5.353150718584941e-06, "loss": 0.2793180048465729, "step": 6063 }, { "epoch": 0.7358330299720908, "grad_norm": 3.391601800918579, "learning_rate": 5.350694017933915e-06, "loss": 0.13721729815006256, "step": 6064 }, { "epoch": 0.7359543744691178, "grad_norm": 0.011107925325632095, "learning_rate": 5.348237317282889e-06, "loss": 8.779510972090065e-05, "step": 6065 }, { "epoch": 0.7360757189661449, "grad_norm": 4.49339485168457, "learning_rate": 5.345780616631863e-06, "loss": 0.20418241620063782, "step": 6066 }, { "epoch": 0.7361970634631719, "grad_norm": 2.707108497619629, "learning_rate": 5.3433239159808385e-06, "loss": 0.5264400243759155, "step": 6067 }, { "epoch": 0.736318407960199, "grad_norm": 2.863833427429199, "learning_rate": 5.340867215329813e-06, "loss": 0.08749412000179291, "step": 6068 }, { "epoch": 0.736439752457226, "grad_norm": 1.0281168222427368, "learning_rate": 5.338410514678787e-06, "loss": 0.021600745618343353, "step": 6069 }, { "epoch": 0.7365610969542531, "grad_norm": 3.5002834796905518, "learning_rate": 5.335953814027761e-06, "loss": 0.22379548847675323, "step": 6070 }, { "epoch": 0.7366824414512801, "grad_norm": 2.6478257179260254, "learning_rate": 5.3334971133767355e-06, "loss": 0.27664268016815186, "step": 6071 }, { "epoch": 0.7368037859483072, "grad_norm": 4.026993751525879, "learning_rate": 5.33104041272571e-06, "loss": 0.15627796947956085, "step": 6072 }, { "epoch": 0.7369251304453343, "grad_norm": 2.988118886947632, "learning_rate": 5.328583712074684e-06, "loss": 0.482835054397583, "step": 6073 }, { "epoch": 0.7370464749423614, "grad_norm": 3.4281396865844727, "learning_rate": 5.326127011423658e-06, "loss": 0.2668412923812866, "step": 6074 }, { "epoch": 0.7371678194393885, "grad_norm": 2.3023934364318848, "learning_rate": 5.3236703107726326e-06, "loss": 0.241678386926651, "step": 6075 }, { "epoch": 0.7372891639364155, "grad_norm": 4.051460266113281, "learning_rate": 5.321213610121607e-06, "loss": 0.3297157883644104, "step": 6076 }, { "epoch": 0.7374105084334426, "grad_norm": 3.5478155612945557, "learning_rate": 5.318756909470582e-06, "loss": 0.1358185112476349, "step": 6077 }, { "epoch": 0.7375318529304696, "grad_norm": 4.829777717590332, "learning_rate": 5.316300208819556e-06, "loss": 0.3885994553565979, "step": 6078 }, { "epoch": 0.7376531974274967, "grad_norm": 3.8190550804138184, "learning_rate": 5.3138435081685305e-06, "loss": 0.3717467188835144, "step": 6079 }, { "epoch": 0.7377745419245237, "grad_norm": 2.1169159412384033, "learning_rate": 5.311386807517505e-06, "loss": 0.0654282420873642, "step": 6080 }, { "epoch": 0.7378958864215508, "grad_norm": 3.0781452655792236, "learning_rate": 5.308930106866479e-06, "loss": 0.8481940627098083, "step": 6081 }, { "epoch": 0.7380172309185778, "grad_norm": 3.5993378162384033, "learning_rate": 5.306473406215453e-06, "loss": 0.42031940817832947, "step": 6082 }, { "epoch": 0.7381385754156049, "grad_norm": 2.445352554321289, "learning_rate": 5.3040167055644275e-06, "loss": 0.23383674025535583, "step": 6083 }, { "epoch": 0.738259919912632, "grad_norm": 2.534459352493286, "learning_rate": 5.301560004913402e-06, "loss": 0.13110268115997314, "step": 6084 }, { "epoch": 0.738381264409659, "grad_norm": 1.9263595342636108, "learning_rate": 5.299103304262376e-06, "loss": 0.09313894063234329, "step": 6085 }, { "epoch": 0.7385026089066861, "grad_norm": 3.1383190155029297, "learning_rate": 5.29664660361135e-06, "loss": 0.08998968452215195, "step": 6086 }, { "epoch": 0.7386239534037131, "grad_norm": 1.0290981531143188, "learning_rate": 5.294189902960325e-06, "loss": 0.006245792843401432, "step": 6087 }, { "epoch": 0.7387452979007402, "grad_norm": 2.7595598697662354, "learning_rate": 5.2917332023093e-06, "loss": 0.19690197706222534, "step": 6088 }, { "epoch": 0.7388666423977672, "grad_norm": 2.638073682785034, "learning_rate": 5.289276501658274e-06, "loss": 0.32155919075012207, "step": 6089 }, { "epoch": 0.7389879868947943, "grad_norm": 3.8128395080566406, "learning_rate": 5.286819801007248e-06, "loss": 0.08895954489707947, "step": 6090 }, { "epoch": 0.7391093313918213, "grad_norm": 2.3826963901519775, "learning_rate": 5.284363100356222e-06, "loss": 0.2801493704319, "step": 6091 }, { "epoch": 0.7392306758888484, "grad_norm": 3.6643731594085693, "learning_rate": 5.281906399705197e-06, "loss": 0.4262004792690277, "step": 6092 }, { "epoch": 0.7393520203858756, "grad_norm": 5.505343914031982, "learning_rate": 5.279449699054171e-06, "loss": 0.4200313091278076, "step": 6093 }, { "epoch": 0.7394733648829026, "grad_norm": 3.0820748805999756, "learning_rate": 5.276992998403145e-06, "loss": 0.22633503377437592, "step": 6094 }, { "epoch": 0.7395947093799297, "grad_norm": 3.5972537994384766, "learning_rate": 5.2745362977521194e-06, "loss": 0.33024129271507263, "step": 6095 }, { "epoch": 0.7397160538769567, "grad_norm": 1.9884674549102783, "learning_rate": 5.272079597101094e-06, "loss": 0.21871161460876465, "step": 6096 }, { "epoch": 0.7398373983739838, "grad_norm": 2.371495485305786, "learning_rate": 5.269622896450069e-06, "loss": 0.0697174146771431, "step": 6097 }, { "epoch": 0.7399587428710108, "grad_norm": 3.6451632976531982, "learning_rate": 5.267166195799043e-06, "loss": 0.5686128735542297, "step": 6098 }, { "epoch": 0.7400800873680379, "grad_norm": 2.452488660812378, "learning_rate": 5.264709495148017e-06, "loss": 0.1902797818183899, "step": 6099 }, { "epoch": 0.7402014318650649, "grad_norm": 0.062154341489076614, "learning_rate": 5.2622527944969916e-06, "loss": 0.0004367259971331805, "step": 6100 }, { "epoch": 0.740322776362092, "grad_norm": 2.278038263320923, "learning_rate": 5.259796093845966e-06, "loss": 0.23871245980262756, "step": 6101 }, { "epoch": 0.740444120859119, "grad_norm": 1.9518351554870605, "learning_rate": 5.25733939319494e-06, "loss": 0.3181134760379791, "step": 6102 }, { "epoch": 0.7405654653561461, "grad_norm": 1.8060133457183838, "learning_rate": 5.254882692543914e-06, "loss": 0.11069828271865845, "step": 6103 }, { "epoch": 0.7406868098531731, "grad_norm": 3.1474545001983643, "learning_rate": 5.252425991892889e-06, "loss": 0.2772263288497925, "step": 6104 }, { "epoch": 0.7408081543502002, "grad_norm": 2.531541109085083, "learning_rate": 5.249969291241862e-06, "loss": 0.3391886055469513, "step": 6105 }, { "epoch": 0.7409294988472273, "grad_norm": 3.42336368560791, "learning_rate": 5.247512590590836e-06, "loss": 0.1531331092119217, "step": 6106 }, { "epoch": 0.7410508433442543, "grad_norm": 2.091553211212158, "learning_rate": 5.2450558899398105e-06, "loss": 0.17458342015743256, "step": 6107 }, { "epoch": 0.7411721878412814, "grad_norm": 2.9428212642669678, "learning_rate": 5.242599189288785e-06, "loss": 0.1342279016971588, "step": 6108 }, { "epoch": 0.7412935323383084, "grad_norm": 2.061671018600464, "learning_rate": 5.240142488637759e-06, "loss": 0.18146148324012756, "step": 6109 }, { "epoch": 0.7414148768353355, "grad_norm": 2.2744908332824707, "learning_rate": 5.237685787986734e-06, "loss": 0.30764180421829224, "step": 6110 }, { "epoch": 0.7415362213323626, "grad_norm": 2.3445730209350586, "learning_rate": 5.235229087335708e-06, "loss": 0.2054719775915146, "step": 6111 }, { "epoch": 0.7416575658293897, "grad_norm": 2.932183265686035, "learning_rate": 5.232772386684683e-06, "loss": 0.32985925674438477, "step": 6112 }, { "epoch": 0.7417789103264167, "grad_norm": 3.0728490352630615, "learning_rate": 5.230315686033657e-06, "loss": 0.30228087306022644, "step": 6113 }, { "epoch": 0.7419002548234438, "grad_norm": 2.396747350692749, "learning_rate": 5.227858985382631e-06, "loss": 0.03514408692717552, "step": 6114 }, { "epoch": 0.7420215993204708, "grad_norm": 2.0309367179870605, "learning_rate": 5.2254022847316054e-06, "loss": 0.09916436672210693, "step": 6115 }, { "epoch": 0.7421429438174979, "grad_norm": 3.8186354637145996, "learning_rate": 5.22294558408058e-06, "loss": 0.23127666115760803, "step": 6116 }, { "epoch": 0.742264288314525, "grad_norm": 2.2370920181274414, "learning_rate": 5.220488883429554e-06, "loss": 0.14914922416210175, "step": 6117 }, { "epoch": 0.742385632811552, "grad_norm": 4.619099140167236, "learning_rate": 5.218032182778528e-06, "loss": 0.6422855854034424, "step": 6118 }, { "epoch": 0.7425069773085791, "grad_norm": 4.122846603393555, "learning_rate": 5.2155754821275025e-06, "loss": 0.3402933180332184, "step": 6119 }, { "epoch": 0.7426283218056061, "grad_norm": 3.8320505619049072, "learning_rate": 5.213118781476478e-06, "loss": 0.26605284214019775, "step": 6120 }, { "epoch": 0.7427496663026332, "grad_norm": 1.5006953477859497, "learning_rate": 5.210662080825452e-06, "loss": 0.11039668321609497, "step": 6121 }, { "epoch": 0.7428710107996602, "grad_norm": 1.8892015218734741, "learning_rate": 5.208205380174426e-06, "loss": 0.0748990997672081, "step": 6122 }, { "epoch": 0.7429923552966873, "grad_norm": 2.6751394271850586, "learning_rate": 5.2057486795234e-06, "loss": 0.39051082730293274, "step": 6123 }, { "epoch": 0.7431136997937143, "grad_norm": 1.7238034009933472, "learning_rate": 5.203291978872375e-06, "loss": 0.09202565252780914, "step": 6124 }, { "epoch": 0.7432350442907414, "grad_norm": 3.1296885013580322, "learning_rate": 5.200835278221349e-06, "loss": 0.13738593459129333, "step": 6125 }, { "epoch": 0.7433563887877684, "grad_norm": 2.722809314727783, "learning_rate": 5.198378577570323e-06, "loss": 0.2991894781589508, "step": 6126 }, { "epoch": 0.7434777332847955, "grad_norm": 3.4512181282043457, "learning_rate": 5.195921876919297e-06, "loss": 0.4395756423473358, "step": 6127 }, { "epoch": 0.7435990777818225, "grad_norm": 1.9228408336639404, "learning_rate": 5.193465176268272e-06, "loss": 0.11191265285015106, "step": 6128 }, { "epoch": 0.7437204222788496, "grad_norm": 2.552380084991455, "learning_rate": 5.191008475617246e-06, "loss": 0.09430459141731262, "step": 6129 }, { "epoch": 0.7438417667758768, "grad_norm": 2.4581985473632812, "learning_rate": 5.188551774966221e-06, "loss": 0.19212648272514343, "step": 6130 }, { "epoch": 0.7439631112729038, "grad_norm": 2.4295077323913574, "learning_rate": 5.186095074315195e-06, "loss": 0.2281629592180252, "step": 6131 }, { "epoch": 0.7440844557699309, "grad_norm": 3.2524912357330322, "learning_rate": 5.1836383736641695e-06, "loss": 0.9003543853759766, "step": 6132 }, { "epoch": 0.7442058002669579, "grad_norm": 3.528815507888794, "learning_rate": 5.181181673013144e-06, "loss": 0.523150622844696, "step": 6133 }, { "epoch": 0.744327144763985, "grad_norm": 2.3027942180633545, "learning_rate": 5.178724972362118e-06, "loss": 0.10617703944444656, "step": 6134 }, { "epoch": 0.744448489261012, "grad_norm": 2.518292188644409, "learning_rate": 5.176268271711092e-06, "loss": 0.14154504239559174, "step": 6135 }, { "epoch": 0.7445698337580391, "grad_norm": 2.1202285289764404, "learning_rate": 5.1738115710600666e-06, "loss": 0.244090735912323, "step": 6136 }, { "epoch": 0.7446911782550661, "grad_norm": 3.6532821655273438, "learning_rate": 5.171354870409041e-06, "loss": 0.21685296297073364, "step": 6137 }, { "epoch": 0.7448125227520932, "grad_norm": 3.0243682861328125, "learning_rate": 5.168898169758015e-06, "loss": 0.08891250193119049, "step": 6138 }, { "epoch": 0.7449338672491203, "grad_norm": 3.455143451690674, "learning_rate": 5.166441469106989e-06, "loss": 0.2935527265071869, "step": 6139 }, { "epoch": 0.7450552117461473, "grad_norm": 3.185150146484375, "learning_rate": 5.1639847684559644e-06, "loss": 0.30718570947647095, "step": 6140 }, { "epoch": 0.7451765562431744, "grad_norm": 4.620425701141357, "learning_rate": 5.161528067804939e-06, "loss": 0.24378523230552673, "step": 6141 }, { "epoch": 0.7452979007402014, "grad_norm": 3.6762614250183105, "learning_rate": 5.159071367153913e-06, "loss": 0.3794040083885193, "step": 6142 }, { "epoch": 0.7454192452372285, "grad_norm": 2.746579885482788, "learning_rate": 5.156614666502887e-06, "loss": 0.19041243195533752, "step": 6143 }, { "epoch": 0.7455405897342555, "grad_norm": 2.2057271003723145, "learning_rate": 5.1541579658518615e-06, "loss": 0.45037567615509033, "step": 6144 }, { "epoch": 0.7456619342312826, "grad_norm": 1.7280893325805664, "learning_rate": 5.151701265200836e-06, "loss": 0.06480126827955246, "step": 6145 }, { "epoch": 0.7457832787283096, "grad_norm": 4.756091117858887, "learning_rate": 5.14924456454981e-06, "loss": 0.3321785032749176, "step": 6146 }, { "epoch": 0.7459046232253367, "grad_norm": 3.2189605236053467, "learning_rate": 5.146787863898784e-06, "loss": 0.4174753427505493, "step": 6147 }, { "epoch": 0.7460259677223637, "grad_norm": 4.002439975738525, "learning_rate": 5.1443311632477585e-06, "loss": 0.31413859128952026, "step": 6148 }, { "epoch": 0.7461473122193909, "grad_norm": 2.585407257080078, "learning_rate": 5.141874462596733e-06, "loss": 0.253828227519989, "step": 6149 }, { "epoch": 0.746268656716418, "grad_norm": 4.738624095916748, "learning_rate": 5.139417761945708e-06, "loss": 0.2535017728805542, "step": 6150 }, { "epoch": 0.746390001213445, "grad_norm": 3.3355610370635986, "learning_rate": 5.136961061294682e-06, "loss": 0.16348248720169067, "step": 6151 }, { "epoch": 0.7465113457104721, "grad_norm": 3.435638189315796, "learning_rate": 5.134504360643656e-06, "loss": 0.2830241024494171, "step": 6152 }, { "epoch": 0.7466326902074991, "grad_norm": 3.6183454990386963, "learning_rate": 5.132047659992631e-06, "loss": 0.22828207910060883, "step": 6153 }, { "epoch": 0.7467540347045262, "grad_norm": 3.0082411766052246, "learning_rate": 5.129590959341605e-06, "loss": 0.4223429560661316, "step": 6154 }, { "epoch": 0.7468753792015532, "grad_norm": 2.925135374069214, "learning_rate": 5.127134258690579e-06, "loss": 0.2765306830406189, "step": 6155 }, { "epoch": 0.7469967236985803, "grad_norm": 2.721219539642334, "learning_rate": 5.124677558039553e-06, "loss": 0.35968345403671265, "step": 6156 }, { "epoch": 0.7471180681956073, "grad_norm": 4.187283515930176, "learning_rate": 5.122220857388528e-06, "loss": 0.1606966108083725, "step": 6157 }, { "epoch": 0.7472394126926344, "grad_norm": 3.6889595985412598, "learning_rate": 5.119764156737502e-06, "loss": 0.1387285441160202, "step": 6158 }, { "epoch": 0.7473607571896614, "grad_norm": 3.1883678436279297, "learning_rate": 5.117307456086476e-06, "loss": 0.2624755799770355, "step": 6159 }, { "epoch": 0.7474821016866885, "grad_norm": 4.074292182922363, "learning_rate": 5.114850755435451e-06, "loss": 0.3521261513233185, "step": 6160 }, { "epoch": 0.7476034461837155, "grad_norm": 3.2648277282714844, "learning_rate": 5.1123940547844256e-06, "loss": 0.2719040811061859, "step": 6161 }, { "epoch": 0.7477247906807426, "grad_norm": 2.5433475971221924, "learning_rate": 5.1099373541334e-06, "loss": 0.22350968420505524, "step": 6162 }, { "epoch": 0.7478461351777697, "grad_norm": 2.4978525638580322, "learning_rate": 5.107480653482374e-06, "loss": 0.14676865935325623, "step": 6163 }, { "epoch": 0.7479674796747967, "grad_norm": 2.7057502269744873, "learning_rate": 5.105023952831348e-06, "loss": 0.34689924120903015, "step": 6164 }, { "epoch": 0.7480888241718238, "grad_norm": 0.0012219235068187118, "learning_rate": 5.102567252180323e-06, "loss": 3.7860947486478835e-05, "step": 6165 }, { "epoch": 0.7482101686688508, "grad_norm": 2.426791191101074, "learning_rate": 5.100110551529297e-06, "loss": 0.19633875787258148, "step": 6166 }, { "epoch": 0.748331513165878, "grad_norm": 2.3322594165802, "learning_rate": 5.097653850878271e-06, "loss": 0.14331631362438202, "step": 6167 }, { "epoch": 0.748452857662905, "grad_norm": 4.592036724090576, "learning_rate": 5.095197150227245e-06, "loss": 0.2885703444480896, "step": 6168 }, { "epoch": 0.7485742021599321, "grad_norm": 3.185380697250366, "learning_rate": 5.0927404495762205e-06, "loss": 0.24644258618354797, "step": 6169 }, { "epoch": 0.7486955466569591, "grad_norm": 1.987817406654358, "learning_rate": 5.090283748925195e-06, "loss": 0.22316516935825348, "step": 6170 }, { "epoch": 0.7488168911539862, "grad_norm": 2.721917152404785, "learning_rate": 5.087827048274169e-06, "loss": 0.11828722059726715, "step": 6171 }, { "epoch": 0.7489382356510133, "grad_norm": 2.2800559997558594, "learning_rate": 5.085370347623143e-06, "loss": 0.4969063103199005, "step": 6172 }, { "epoch": 0.7490595801480403, "grad_norm": 2.5014631748199463, "learning_rate": 5.082913646972117e-06, "loss": 0.14561735093593597, "step": 6173 }, { "epoch": 0.7491809246450674, "grad_norm": 5.154873847961426, "learning_rate": 5.080456946321091e-06, "loss": 0.3625945448875427, "step": 6174 }, { "epoch": 0.7493022691420944, "grad_norm": 3.7645256519317627, "learning_rate": 5.078000245670065e-06, "loss": 0.26764583587646484, "step": 6175 }, { "epoch": 0.7494236136391215, "grad_norm": 2.340911388397217, "learning_rate": 5.0755435450190394e-06, "loss": 0.5015676617622375, "step": 6176 }, { "epoch": 0.7495449581361485, "grad_norm": 1.5423394441604614, "learning_rate": 5.073086844368014e-06, "loss": 0.08588645607233047, "step": 6177 }, { "epoch": 0.7496663026331756, "grad_norm": 2.1467580795288086, "learning_rate": 5.070630143716988e-06, "loss": 0.20486102998256683, "step": 6178 }, { "epoch": 0.7497876471302026, "grad_norm": 2.8227269649505615, "learning_rate": 5.068173443065962e-06, "loss": 0.4013114869594574, "step": 6179 }, { "epoch": 0.7499089916272297, "grad_norm": 2.392918586730957, "learning_rate": 5.0657167424149365e-06, "loss": 0.4827364981174469, "step": 6180 }, { "epoch": 0.7500303361242567, "grad_norm": 3.182701349258423, "learning_rate": 5.063260041763911e-06, "loss": 0.2986999750137329, "step": 6181 }, { "epoch": 0.7501516806212838, "grad_norm": 3.5402045249938965, "learning_rate": 5.060803341112886e-06, "loss": 0.1260857731103897, "step": 6182 }, { "epoch": 0.7502730251183108, "grad_norm": 3.353041172027588, "learning_rate": 5.05834664046186e-06, "loss": 0.2746659815311432, "step": 6183 }, { "epoch": 0.7503943696153379, "grad_norm": 1.4612878561019897, "learning_rate": 5.055889939810834e-06, "loss": 0.0787825658917427, "step": 6184 }, { "epoch": 0.750515714112365, "grad_norm": 1.781085729598999, "learning_rate": 5.053433239159809e-06, "loss": 0.10522723942995071, "step": 6185 }, { "epoch": 0.7506370586093921, "grad_norm": 2.9119465351104736, "learning_rate": 5.050976538508783e-06, "loss": 0.2065354585647583, "step": 6186 }, { "epoch": 0.7507584031064192, "grad_norm": 3.7499608993530273, "learning_rate": 5.048519837857757e-06, "loss": 0.5196564197540283, "step": 6187 }, { "epoch": 0.7508797476034462, "grad_norm": 2.9962098598480225, "learning_rate": 5.046063137206731e-06, "loss": 0.1541701704263687, "step": 6188 }, { "epoch": 0.7510010921004733, "grad_norm": 2.366020441055298, "learning_rate": 5.043606436555706e-06, "loss": 0.3453333377838135, "step": 6189 }, { "epoch": 0.7511224365975003, "grad_norm": 2.670876979827881, "learning_rate": 5.04114973590468e-06, "loss": 0.12783971428871155, "step": 6190 }, { "epoch": 0.7512437810945274, "grad_norm": 4.420131206512451, "learning_rate": 5.038693035253654e-06, "loss": 0.31812334060668945, "step": 6191 }, { "epoch": 0.7513651255915544, "grad_norm": 2.4923925399780273, "learning_rate": 5.036236334602629e-06, "loss": 0.10306712985038757, "step": 6192 }, { "epoch": 0.7514864700885815, "grad_norm": 3.91148042678833, "learning_rate": 5.0337796339516035e-06, "loss": 0.47170981764793396, "step": 6193 }, { "epoch": 0.7516078145856085, "grad_norm": 2.6023366451263428, "learning_rate": 5.031322933300578e-06, "loss": 0.28575947880744934, "step": 6194 }, { "epoch": 0.7517291590826356, "grad_norm": 5.270868301391602, "learning_rate": 5.028866232649552e-06, "loss": 0.2974729835987091, "step": 6195 }, { "epoch": 0.7518505035796627, "grad_norm": 1.2348856925964355, "learning_rate": 5.026409531998526e-06, "loss": 0.031190041452646255, "step": 6196 }, { "epoch": 0.7519718480766897, "grad_norm": 1.2243016958236694, "learning_rate": 5.0239528313475005e-06, "loss": 0.0528661273419857, "step": 6197 }, { "epoch": 0.7520931925737168, "grad_norm": 1.1914527416229248, "learning_rate": 5.021496130696475e-06, "loss": 0.0238338690251112, "step": 6198 }, { "epoch": 0.7522145370707438, "grad_norm": 4.2159247398376465, "learning_rate": 5.019039430045449e-06, "loss": 0.3619239628314972, "step": 6199 }, { "epoch": 0.7523358815677709, "grad_norm": 5.78615140914917, "learning_rate": 5.016582729394423e-06, "loss": 0.14820732176303864, "step": 6200 }, { "epoch": 0.7524572260647979, "grad_norm": 2.7117760181427, "learning_rate": 5.014126028743398e-06, "loss": 0.1961364895105362, "step": 6201 }, { "epoch": 0.752578570561825, "grad_norm": 2.980877637863159, "learning_rate": 5.011669328092373e-06, "loss": 0.15707306563854218, "step": 6202 }, { "epoch": 0.752699915058852, "grad_norm": 2.652109384536743, "learning_rate": 5.009212627441347e-06, "loss": 0.17819511890411377, "step": 6203 }, { "epoch": 0.7528212595558791, "grad_norm": 5.495177268981934, "learning_rate": 5.006755926790321e-06, "loss": 0.2184428572654724, "step": 6204 }, { "epoch": 0.7529426040529063, "grad_norm": 4.1116862297058105, "learning_rate": 5.0042992261392955e-06, "loss": 0.27709823846817017, "step": 6205 }, { "epoch": 0.7530639485499333, "grad_norm": 3.0363829135894775, "learning_rate": 5.00184252548827e-06, "loss": 0.2257017195224762, "step": 6206 }, { "epoch": 0.7531852930469604, "grad_norm": 2.288376569747925, "learning_rate": 4.999385824837244e-06, "loss": 0.1640644669532776, "step": 6207 }, { "epoch": 0.7533066375439874, "grad_norm": 2.0336341857910156, "learning_rate": 4.996929124186218e-06, "loss": 0.13411562144756317, "step": 6208 }, { "epoch": 0.7534279820410145, "grad_norm": 2.012877941131592, "learning_rate": 4.9944724235351925e-06, "loss": 0.1161537691950798, "step": 6209 }, { "epoch": 0.7535493265380415, "grad_norm": 3.6053786277770996, "learning_rate": 4.992015722884167e-06, "loss": 0.3146477937698364, "step": 6210 }, { "epoch": 0.7536706710350686, "grad_norm": 2.7688205242156982, "learning_rate": 4.989559022233141e-06, "loss": 0.30305224657058716, "step": 6211 }, { "epoch": 0.7537920155320956, "grad_norm": 2.9132072925567627, "learning_rate": 4.987102321582116e-06, "loss": 0.16586503386497498, "step": 6212 }, { "epoch": 0.7539133600291227, "grad_norm": 3.06526255607605, "learning_rate": 4.98464562093109e-06, "loss": 0.27287396788597107, "step": 6213 }, { "epoch": 0.7540347045261497, "grad_norm": 2.819847345352173, "learning_rate": 4.982188920280065e-06, "loss": 0.2961858808994293, "step": 6214 }, { "epoch": 0.7541560490231768, "grad_norm": 2.4052021503448486, "learning_rate": 4.979732219629039e-06, "loss": 0.3016352653503418, "step": 6215 }, { "epoch": 0.7542773935202038, "grad_norm": 2.8137502670288086, "learning_rate": 4.977275518978013e-06, "loss": 0.2597748637199402, "step": 6216 }, { "epoch": 0.7543987380172309, "grad_norm": 2.0219075679779053, "learning_rate": 4.974818818326987e-06, "loss": 0.34743431210517883, "step": 6217 }, { "epoch": 0.754520082514258, "grad_norm": 2.2797374725341797, "learning_rate": 4.972362117675962e-06, "loss": 0.016978608444333076, "step": 6218 }, { "epoch": 0.754641427011285, "grad_norm": 3.772848606109619, "learning_rate": 4.969905417024936e-06, "loss": 0.4629445970058441, "step": 6219 }, { "epoch": 0.7547627715083121, "grad_norm": 1.9099055528640747, "learning_rate": 4.96744871637391e-06, "loss": 0.11615204811096191, "step": 6220 }, { "epoch": 0.7548841160053391, "grad_norm": 4.5436482429504395, "learning_rate": 4.9649920157228844e-06, "loss": 0.2678883373737335, "step": 6221 }, { "epoch": 0.7550054605023662, "grad_norm": 2.6170272827148438, "learning_rate": 4.9625353150718595e-06, "loss": 0.30527979135513306, "step": 6222 }, { "epoch": 0.7551268049993933, "grad_norm": 2.7844960689544678, "learning_rate": 4.960078614420834e-06, "loss": 0.7649198770523071, "step": 6223 }, { "epoch": 0.7552481494964204, "grad_norm": 2.5510048866271973, "learning_rate": 4.957621913769807e-06, "loss": 0.17851464450359344, "step": 6224 }, { "epoch": 0.7553694939934474, "grad_norm": 3.5941805839538574, "learning_rate": 4.9551652131187815e-06, "loss": 0.40384799242019653, "step": 6225 }, { "epoch": 0.7554908384904745, "grad_norm": 2.5451440811157227, "learning_rate": 4.952708512467756e-06, "loss": 0.1324377954006195, "step": 6226 }, { "epoch": 0.7556121829875015, "grad_norm": 3.8331265449523926, "learning_rate": 4.95025181181673e-06, "loss": 0.23887789249420166, "step": 6227 }, { "epoch": 0.7557335274845286, "grad_norm": 3.8652446269989014, "learning_rate": 4.947795111165704e-06, "loss": 0.3720439672470093, "step": 6228 }, { "epoch": 0.7558548719815557, "grad_norm": 3.836756467819214, "learning_rate": 4.945338410514679e-06, "loss": 0.24539825320243835, "step": 6229 }, { "epoch": 0.7559762164785827, "grad_norm": 2.8147051334381104, "learning_rate": 4.942881709863654e-06, "loss": 0.2799602448940277, "step": 6230 }, { "epoch": 0.7560975609756098, "grad_norm": 3.0806689262390137, "learning_rate": 4.940425009212628e-06, "loss": 0.5132972002029419, "step": 6231 }, { "epoch": 0.7562189054726368, "grad_norm": 2.6222777366638184, "learning_rate": 4.937968308561602e-06, "loss": 0.5868977904319763, "step": 6232 }, { "epoch": 0.7563402499696639, "grad_norm": 2.187312602996826, "learning_rate": 4.935511607910576e-06, "loss": 0.2645559012889862, "step": 6233 }, { "epoch": 0.7564615944666909, "grad_norm": 2.827319860458374, "learning_rate": 4.933054907259551e-06, "loss": 0.25744396448135376, "step": 6234 }, { "epoch": 0.756582938963718, "grad_norm": 3.580073356628418, "learning_rate": 4.930598206608525e-06, "loss": 0.21719695627689362, "step": 6235 }, { "epoch": 0.756704283460745, "grad_norm": 3.361360788345337, "learning_rate": 4.928141505957499e-06, "loss": 0.37333130836486816, "step": 6236 }, { "epoch": 0.7568256279577721, "grad_norm": 2.3654870986938477, "learning_rate": 4.925684805306473e-06, "loss": 0.39005333185195923, "step": 6237 }, { "epoch": 0.7569469724547991, "grad_norm": 2.8449320793151855, "learning_rate": 4.923228104655448e-06, "loss": 0.3511606454849243, "step": 6238 }, { "epoch": 0.7570683169518262, "grad_norm": 2.145744562149048, "learning_rate": 4.920771404004423e-06, "loss": 0.3534967303276062, "step": 6239 }, { "epoch": 0.7571896614488532, "grad_norm": 2.5103373527526855, "learning_rate": 4.918314703353397e-06, "loss": 0.41211745142936707, "step": 6240 }, { "epoch": 0.7573110059458803, "grad_norm": 2.1382884979248047, "learning_rate": 4.915858002702371e-06, "loss": 0.07940667867660522, "step": 6241 }, { "epoch": 0.7574323504429075, "grad_norm": 4.381336688995361, "learning_rate": 4.9134013020513456e-06, "loss": 0.39928656816482544, "step": 6242 }, { "epoch": 0.7575536949399345, "grad_norm": 3.697925090789795, "learning_rate": 4.91094460140032e-06, "loss": 0.2894873321056366, "step": 6243 }, { "epoch": 0.7576750394369616, "grad_norm": 0.5381807684898376, "learning_rate": 4.908487900749294e-06, "loss": 0.005261936690658331, "step": 6244 }, { "epoch": 0.7577963839339886, "grad_norm": 4.569410800933838, "learning_rate": 4.906031200098268e-06, "loss": 0.41820061206817627, "step": 6245 }, { "epoch": 0.7579177284310157, "grad_norm": 3.288412570953369, "learning_rate": 4.903574499447243e-06, "loss": 0.40276527404785156, "step": 6246 }, { "epoch": 0.7580390729280427, "grad_norm": 1.8613978624343872, "learning_rate": 4.901117798796217e-06, "loss": 0.4635149836540222, "step": 6247 }, { "epoch": 0.7581604174250698, "grad_norm": 1.4258508682250977, "learning_rate": 4.898661098145192e-06, "loss": 0.03760741651058197, "step": 6248 }, { "epoch": 0.7582817619220968, "grad_norm": 2.28505539894104, "learning_rate": 4.896204397494166e-06, "loss": 0.5228300094604492, "step": 6249 }, { "epoch": 0.7584031064191239, "grad_norm": 1.7584125995635986, "learning_rate": 4.8937476968431405e-06, "loss": 0.19300074875354767, "step": 6250 }, { "epoch": 0.758524450916151, "grad_norm": 2.7199137210845947, "learning_rate": 4.891290996192115e-06, "loss": 0.11461424827575684, "step": 6251 }, { "epoch": 0.758645795413178, "grad_norm": 2.128697156906128, "learning_rate": 4.888834295541089e-06, "loss": 0.22157546877861023, "step": 6252 }, { "epoch": 0.7587671399102051, "grad_norm": 3.559788703918457, "learning_rate": 4.886377594890063e-06, "loss": 0.34244662523269653, "step": 6253 }, { "epoch": 0.7588884844072321, "grad_norm": 3.134685516357422, "learning_rate": 4.8839208942390375e-06, "loss": 0.4163839519023895, "step": 6254 }, { "epoch": 0.7590098289042592, "grad_norm": 1.1957815885543823, "learning_rate": 4.881464193588012e-06, "loss": 0.017217732965946198, "step": 6255 }, { "epoch": 0.7591311734012862, "grad_norm": 3.6754605770111084, "learning_rate": 4.879007492936986e-06, "loss": 0.622282862663269, "step": 6256 }, { "epoch": 0.7592525178983133, "grad_norm": 1.7039974927902222, "learning_rate": 4.87655079228596e-06, "loss": 0.08525048196315765, "step": 6257 }, { "epoch": 0.7593738623953403, "grad_norm": 2.7281017303466797, "learning_rate": 4.8740940916349345e-06, "loss": 0.1686839759349823, "step": 6258 }, { "epoch": 0.7594952068923674, "grad_norm": 2.5803916454315186, "learning_rate": 4.871637390983909e-06, "loss": 0.4553817808628082, "step": 6259 }, { "epoch": 0.7596165513893945, "grad_norm": 3.5768139362335205, "learning_rate": 4.869180690332883e-06, "loss": 0.2363990843296051, "step": 6260 }, { "epoch": 0.7597378958864216, "grad_norm": 2.553990364074707, "learning_rate": 4.866723989681857e-06, "loss": 0.19040818512439728, "step": 6261 }, { "epoch": 0.7598592403834487, "grad_norm": 3.3872764110565186, "learning_rate": 4.8642672890308316e-06, "loss": 0.19410133361816406, "step": 6262 }, { "epoch": 0.7599805848804757, "grad_norm": 4.80523681640625, "learning_rate": 4.861810588379806e-06, "loss": 0.2133503258228302, "step": 6263 }, { "epoch": 0.7601019293775028, "grad_norm": 2.419116735458374, "learning_rate": 4.85935388772878e-06, "loss": 0.32985448837280273, "step": 6264 }, { "epoch": 0.7602232738745298, "grad_norm": 3.351750373840332, "learning_rate": 4.856897187077755e-06, "loss": 0.25633615255355835, "step": 6265 }, { "epoch": 0.7603446183715569, "grad_norm": 2.6609017848968506, "learning_rate": 4.8544404864267294e-06, "loss": 0.03340404853224754, "step": 6266 }, { "epoch": 0.7604659628685839, "grad_norm": 1.9019726514816284, "learning_rate": 4.851983785775704e-06, "loss": 0.29311856627464294, "step": 6267 }, { "epoch": 0.760587307365611, "grad_norm": 5.039964199066162, "learning_rate": 4.849527085124678e-06, "loss": 0.13519582152366638, "step": 6268 }, { "epoch": 0.760708651862638, "grad_norm": 2.70578932762146, "learning_rate": 4.847070384473652e-06, "loss": 0.37142035365104675, "step": 6269 }, { "epoch": 0.7608299963596651, "grad_norm": 2.2589988708496094, "learning_rate": 4.8446136838226265e-06, "loss": 0.12164948880672455, "step": 6270 }, { "epoch": 0.7609513408566921, "grad_norm": 1.4588427543640137, "learning_rate": 4.842156983171601e-06, "loss": 0.03697766363620758, "step": 6271 }, { "epoch": 0.7610726853537192, "grad_norm": 4.2712883949279785, "learning_rate": 4.839700282520575e-06, "loss": 0.9612004160881042, "step": 6272 }, { "epoch": 0.7611940298507462, "grad_norm": 2.005854606628418, "learning_rate": 4.837243581869549e-06, "loss": 0.15236130356788635, "step": 6273 }, { "epoch": 0.7613153743477733, "grad_norm": 2.708271026611328, "learning_rate": 4.8347868812185235e-06, "loss": 0.32045403122901917, "step": 6274 }, { "epoch": 0.7614367188448004, "grad_norm": 4.7945556640625, "learning_rate": 4.832330180567499e-06, "loss": 0.26466530561447144, "step": 6275 }, { "epoch": 0.7615580633418274, "grad_norm": 3.3037641048431396, "learning_rate": 4.829873479916473e-06, "loss": 0.38225167989730835, "step": 6276 }, { "epoch": 0.7616794078388545, "grad_norm": 2.3742892742156982, "learning_rate": 4.827416779265447e-06, "loss": 0.29324111342430115, "step": 6277 }, { "epoch": 0.7618007523358815, "grad_norm": 3.0542781352996826, "learning_rate": 4.824960078614421e-06, "loss": 0.14004333317279816, "step": 6278 }, { "epoch": 0.7619220968329087, "grad_norm": 3.281198501586914, "learning_rate": 4.822503377963396e-06, "loss": 0.531724750995636, "step": 6279 }, { "epoch": 0.7620434413299357, "grad_norm": 4.238002777099609, "learning_rate": 4.82004667731237e-06, "loss": 0.36457952857017517, "step": 6280 }, { "epoch": 0.7621647858269628, "grad_norm": 2.8041346073150635, "learning_rate": 4.817589976661344e-06, "loss": 0.11324553936719894, "step": 6281 }, { "epoch": 0.7622861303239898, "grad_norm": 4.083532333374023, "learning_rate": 4.815133276010318e-06, "loss": 0.5509198904037476, "step": 6282 }, { "epoch": 0.7624074748210169, "grad_norm": 2.5590710639953613, "learning_rate": 4.812676575359293e-06, "loss": 0.26683640480041504, "step": 6283 }, { "epoch": 0.762528819318044, "grad_norm": 6.500755310058594, "learning_rate": 4.810219874708267e-06, "loss": 0.3242247700691223, "step": 6284 }, { "epoch": 0.762650163815071, "grad_norm": 2.0653891563415527, "learning_rate": 4.807763174057242e-06, "loss": 0.21183478832244873, "step": 6285 }, { "epoch": 0.7627715083120981, "grad_norm": 3.0948431491851807, "learning_rate": 4.805306473406216e-06, "loss": 0.365980327129364, "step": 6286 }, { "epoch": 0.7628928528091251, "grad_norm": 2.2289342880249023, "learning_rate": 4.8028497727551906e-06, "loss": 0.1621999591588974, "step": 6287 }, { "epoch": 0.7630141973061522, "grad_norm": 2.0054759979248047, "learning_rate": 4.800393072104165e-06, "loss": 0.29094764590263367, "step": 6288 }, { "epoch": 0.7631355418031792, "grad_norm": 2.9220924377441406, "learning_rate": 4.797936371453139e-06, "loss": 0.1605411171913147, "step": 6289 }, { "epoch": 0.7632568863002063, "grad_norm": 2.3013696670532227, "learning_rate": 4.795479670802113e-06, "loss": 0.5218464136123657, "step": 6290 }, { "epoch": 0.7633782307972333, "grad_norm": 2.7082877159118652, "learning_rate": 4.793022970151088e-06, "loss": 0.34820646047592163, "step": 6291 }, { "epoch": 0.7634995752942604, "grad_norm": 1.520072102546692, "learning_rate": 4.790566269500062e-06, "loss": 0.03211662545800209, "step": 6292 }, { "epoch": 0.7636209197912874, "grad_norm": 2.962343454360962, "learning_rate": 4.788109568849036e-06, "loss": 0.17726458609104156, "step": 6293 }, { "epoch": 0.7637422642883145, "grad_norm": 2.8694403171539307, "learning_rate": 4.78565286819801e-06, "loss": 0.3281645178794861, "step": 6294 }, { "epoch": 0.7638636087853415, "grad_norm": 3.4579315185546875, "learning_rate": 4.783196167546985e-06, "loss": 0.5070574879646301, "step": 6295 }, { "epoch": 0.7639849532823686, "grad_norm": 2.5204885005950928, "learning_rate": 4.780739466895959e-06, "loss": 0.463503897190094, "step": 6296 }, { "epoch": 0.7641062977793956, "grad_norm": 3.199266195297241, "learning_rate": 4.778282766244933e-06, "loss": 0.27177995443344116, "step": 6297 }, { "epoch": 0.7642276422764228, "grad_norm": 1.8268396854400635, "learning_rate": 4.775826065593907e-06, "loss": 0.3064328134059906, "step": 6298 }, { "epoch": 0.7643489867734499, "grad_norm": 4.775673866271973, "learning_rate": 4.773369364942882e-06, "loss": 0.4525625705718994, "step": 6299 }, { "epoch": 0.7644703312704769, "grad_norm": 3.831678628921509, "learning_rate": 4.770912664291856e-06, "loss": 0.26737967133522034, "step": 6300 }, { "epoch": 0.764591675767504, "grad_norm": 1.9673227071762085, "learning_rate": 4.768455963640831e-06, "loss": 0.1006266325712204, "step": 6301 }, { "epoch": 0.764713020264531, "grad_norm": 4.161457061767578, "learning_rate": 4.765999262989805e-06, "loss": 0.4247751235961914, "step": 6302 }, { "epoch": 0.7648343647615581, "grad_norm": 1.5105000734329224, "learning_rate": 4.7635425623387795e-06, "loss": 0.08788177371025085, "step": 6303 }, { "epoch": 0.7649557092585851, "grad_norm": 2.967437267303467, "learning_rate": 4.761085861687754e-06, "loss": 0.14938151836395264, "step": 6304 }, { "epoch": 0.7650770537556122, "grad_norm": 4.1199188232421875, "learning_rate": 4.758629161036728e-06, "loss": 0.1035306304693222, "step": 6305 }, { "epoch": 0.7651983982526392, "grad_norm": 2.9349915981292725, "learning_rate": 4.756172460385702e-06, "loss": 0.2705841064453125, "step": 6306 }, { "epoch": 0.7653197427496663, "grad_norm": 3.3858180046081543, "learning_rate": 4.7537157597346766e-06, "loss": 0.2820214331150055, "step": 6307 }, { "epoch": 0.7654410872466934, "grad_norm": 0.11071504652500153, "learning_rate": 4.751259059083651e-06, "loss": 0.0012212601723149419, "step": 6308 }, { "epoch": 0.7655624317437204, "grad_norm": 3.565671920776367, "learning_rate": 4.748802358432625e-06, "loss": 0.338204026222229, "step": 6309 }, { "epoch": 0.7656837762407475, "grad_norm": 5.0238447189331055, "learning_rate": 4.746345657781599e-06, "loss": 0.2808016240596771, "step": 6310 }, { "epoch": 0.7658051207377745, "grad_norm": 2.7688677310943604, "learning_rate": 4.7438889571305744e-06, "loss": 0.17074747383594513, "step": 6311 }, { "epoch": 0.7659264652348016, "grad_norm": 2.3034141063690186, "learning_rate": 4.741432256479549e-06, "loss": 0.1067560613155365, "step": 6312 }, { "epoch": 0.7660478097318286, "grad_norm": 3.475233554840088, "learning_rate": 4.738975555828523e-06, "loss": 0.5670208930969238, "step": 6313 }, { "epoch": 0.7661691542288557, "grad_norm": 4.8230814933776855, "learning_rate": 4.736518855177497e-06, "loss": 0.3976907432079315, "step": 6314 }, { "epoch": 0.7662904987258827, "grad_norm": 2.661699056625366, "learning_rate": 4.7340621545264715e-06, "loss": 0.2819995880126953, "step": 6315 }, { "epoch": 0.7664118432229099, "grad_norm": 2.895176887512207, "learning_rate": 4.731605453875446e-06, "loss": 0.13895300030708313, "step": 6316 }, { "epoch": 0.766533187719937, "grad_norm": 2.911834716796875, "learning_rate": 4.72914875322442e-06, "loss": 0.10232560336589813, "step": 6317 }, { "epoch": 0.766654532216964, "grad_norm": 2.654876232147217, "learning_rate": 4.726692052573394e-06, "loss": 0.247539222240448, "step": 6318 }, { "epoch": 0.7667758767139911, "grad_norm": 3.643242597579956, "learning_rate": 4.7242353519223685e-06, "loss": 0.16921818256378174, "step": 6319 }, { "epoch": 0.7668972212110181, "grad_norm": 2.202592372894287, "learning_rate": 4.721778651271343e-06, "loss": 0.26234740018844604, "step": 6320 }, { "epoch": 0.7670185657080452, "grad_norm": 2.0325238704681396, "learning_rate": 4.719321950620318e-06, "loss": 0.38809943199157715, "step": 6321 }, { "epoch": 0.7671399102050722, "grad_norm": 1.6500788927078247, "learning_rate": 4.716865249969292e-06, "loss": 0.03305043280124664, "step": 6322 }, { "epoch": 0.7672612547020993, "grad_norm": 2.1645689010620117, "learning_rate": 4.714408549318266e-06, "loss": 0.031939469277858734, "step": 6323 }, { "epoch": 0.7673825991991263, "grad_norm": 1.51390540599823, "learning_rate": 4.711951848667241e-06, "loss": 0.024049704894423485, "step": 6324 }, { "epoch": 0.7675039436961534, "grad_norm": 3.959571599960327, "learning_rate": 4.709495148016215e-06, "loss": 0.5314072370529175, "step": 6325 }, { "epoch": 0.7676252881931804, "grad_norm": 2.4944803714752197, "learning_rate": 4.707038447365188e-06, "loss": 0.18289212882518768, "step": 6326 }, { "epoch": 0.7677466326902075, "grad_norm": 4.0031418800354, "learning_rate": 4.7045817467141634e-06, "loss": 0.4481150805950165, "step": 6327 }, { "epoch": 0.7678679771872345, "grad_norm": 2.282942771911621, "learning_rate": 4.702125046063138e-06, "loss": 0.2553749680519104, "step": 6328 }, { "epoch": 0.7679893216842616, "grad_norm": 2.946272611618042, "learning_rate": 4.699668345412112e-06, "loss": 0.3737369775772095, "step": 6329 }, { "epoch": 0.7681106661812886, "grad_norm": 1.5515050888061523, "learning_rate": 4.697211644761086e-06, "loss": 0.08575527369976044, "step": 6330 }, { "epoch": 0.7682320106783157, "grad_norm": 2.80452299118042, "learning_rate": 4.6947549441100605e-06, "loss": 0.5501646399497986, "step": 6331 }, { "epoch": 0.7683533551753428, "grad_norm": 2.7549021244049072, "learning_rate": 4.692298243459035e-06, "loss": 0.2560290992259979, "step": 6332 }, { "epoch": 0.7684746996723698, "grad_norm": 2.4808313846588135, "learning_rate": 4.689841542808009e-06, "loss": 0.21266111731529236, "step": 6333 }, { "epoch": 0.7685960441693969, "grad_norm": 3.2097270488739014, "learning_rate": 4.687384842156983e-06, "loss": 0.22478227317333221, "step": 6334 }, { "epoch": 0.768717388666424, "grad_norm": 2.2234067916870117, "learning_rate": 4.6849281415059575e-06, "loss": 0.5265548229217529, "step": 6335 }, { "epoch": 0.7688387331634511, "grad_norm": 1.9915642738342285, "learning_rate": 4.682471440854932e-06, "loss": 0.04436564818024635, "step": 6336 }, { "epoch": 0.7689600776604781, "grad_norm": 2.6071906089782715, "learning_rate": 4.680014740203907e-06, "loss": 0.41043955087661743, "step": 6337 }, { "epoch": 0.7690814221575052, "grad_norm": 2.709695816040039, "learning_rate": 4.677558039552881e-06, "loss": 0.13903136551380157, "step": 6338 }, { "epoch": 0.7692027666545322, "grad_norm": 4.254518985748291, "learning_rate": 4.675101338901855e-06, "loss": 0.5995197296142578, "step": 6339 }, { "epoch": 0.7693241111515593, "grad_norm": 3.6241259574890137, "learning_rate": 4.67264463825083e-06, "loss": 0.2822847366333008, "step": 6340 }, { "epoch": 0.7694454556485864, "grad_norm": 2.4329872131347656, "learning_rate": 4.670187937599804e-06, "loss": 0.0869186520576477, "step": 6341 }, { "epoch": 0.7695668001456134, "grad_norm": 1.8589963912963867, "learning_rate": 4.667731236948778e-06, "loss": 0.2684268057346344, "step": 6342 }, { "epoch": 0.7696881446426405, "grad_norm": 2.4690945148468018, "learning_rate": 4.665274536297752e-06, "loss": 0.46985679864883423, "step": 6343 }, { "epoch": 0.7698094891396675, "grad_norm": 2.575575590133667, "learning_rate": 4.662817835646727e-06, "loss": 0.3075098693370819, "step": 6344 }, { "epoch": 0.7699308336366946, "grad_norm": 3.188321590423584, "learning_rate": 4.660361134995701e-06, "loss": 0.2677619457244873, "step": 6345 }, { "epoch": 0.7700521781337216, "grad_norm": 2.645050525665283, "learning_rate": 4.657904434344675e-06, "loss": 0.3741461932659149, "step": 6346 }, { "epoch": 0.7701735226307487, "grad_norm": 2.0937888622283936, "learning_rate": 4.65544773369365e-06, "loss": 0.06885021924972534, "step": 6347 }, { "epoch": 0.7702948671277757, "grad_norm": 2.459610939025879, "learning_rate": 4.6529910330426245e-06, "loss": 0.18064071238040924, "step": 6348 }, { "epoch": 0.7704162116248028, "grad_norm": 2.4816629886627197, "learning_rate": 4.650534332391599e-06, "loss": 0.06170821934938431, "step": 6349 }, { "epoch": 0.7705375561218298, "grad_norm": 2.046984910964966, "learning_rate": 4.648077631740573e-06, "loss": 0.041270386427640915, "step": 6350 }, { "epoch": 0.7706589006188569, "grad_norm": 2.5267961025238037, "learning_rate": 4.645620931089547e-06, "loss": 0.2431546449661255, "step": 6351 }, { "epoch": 0.7707802451158839, "grad_norm": 2.0764780044555664, "learning_rate": 4.6431642304385216e-06, "loss": 0.10435756295919418, "step": 6352 }, { "epoch": 0.7709015896129111, "grad_norm": 2.6882548332214355, "learning_rate": 4.640707529787496e-06, "loss": 0.15032249689102173, "step": 6353 }, { "epoch": 0.7710229341099382, "grad_norm": 4.2935380935668945, "learning_rate": 4.63825082913647e-06, "loss": 0.33001211285591125, "step": 6354 }, { "epoch": 0.7711442786069652, "grad_norm": 3.7318320274353027, "learning_rate": 4.635794128485444e-06, "loss": 0.1465204358100891, "step": 6355 }, { "epoch": 0.7712656231039923, "grad_norm": 3.029622793197632, "learning_rate": 4.633337427834419e-06, "loss": 0.4043807089328766, "step": 6356 }, { "epoch": 0.7713869676010193, "grad_norm": 2.3448076248168945, "learning_rate": 4.630880727183394e-06, "loss": 0.3357170820236206, "step": 6357 }, { "epoch": 0.7715083120980464, "grad_norm": 2.083742380142212, "learning_rate": 4.628424026532368e-06, "loss": 0.07426615059375763, "step": 6358 }, { "epoch": 0.7716296565950734, "grad_norm": 3.853686571121216, "learning_rate": 4.625967325881342e-06, "loss": 0.21924316883087158, "step": 6359 }, { "epoch": 0.7717510010921005, "grad_norm": 3.0639350414276123, "learning_rate": 4.623510625230316e-06, "loss": 0.7467944622039795, "step": 6360 }, { "epoch": 0.7718723455891275, "grad_norm": 2.695655107498169, "learning_rate": 4.62105392457929e-06, "loss": 0.4106215834617615, "step": 6361 }, { "epoch": 0.7719936900861546, "grad_norm": 4.471068859100342, "learning_rate": 4.618597223928264e-06, "loss": 0.1481250673532486, "step": 6362 }, { "epoch": 0.7721150345831816, "grad_norm": 4.1774468421936035, "learning_rate": 4.616140523277238e-06, "loss": 0.24754458665847778, "step": 6363 }, { "epoch": 0.7722363790802087, "grad_norm": 2.6000709533691406, "learning_rate": 4.6136838226262135e-06, "loss": 0.280974805355072, "step": 6364 }, { "epoch": 0.7723577235772358, "grad_norm": 2.687857151031494, "learning_rate": 4.611227121975188e-06, "loss": 0.28935420513153076, "step": 6365 }, { "epoch": 0.7724790680742628, "grad_norm": 2.715841770172119, "learning_rate": 4.608770421324162e-06, "loss": 0.13105760514736176, "step": 6366 }, { "epoch": 0.7726004125712899, "grad_norm": 3.137078046798706, "learning_rate": 4.606313720673136e-06, "loss": 0.28405484557151794, "step": 6367 }, { "epoch": 0.7727217570683169, "grad_norm": 2.4169275760650635, "learning_rate": 4.6038570200221106e-06, "loss": 0.18494774401187897, "step": 6368 }, { "epoch": 0.772843101565344, "grad_norm": 2.4386234283447266, "learning_rate": 4.601400319371085e-06, "loss": 0.19060175120830536, "step": 6369 }, { "epoch": 0.772964446062371, "grad_norm": 2.709022283554077, "learning_rate": 4.598943618720059e-06, "loss": 0.10595878958702087, "step": 6370 }, { "epoch": 0.7730857905593981, "grad_norm": 2.6569178104400635, "learning_rate": 4.596486918069033e-06, "loss": 0.1430637091398239, "step": 6371 }, { "epoch": 0.7732071350564252, "grad_norm": 2.808230400085449, "learning_rate": 4.594030217418008e-06, "loss": 0.2833418548107147, "step": 6372 }, { "epoch": 0.7733284795534523, "grad_norm": 2.9210734367370605, "learning_rate": 4.591573516766982e-06, "loss": 0.31633836030960083, "step": 6373 }, { "epoch": 0.7734498240504794, "grad_norm": 1.723806381225586, "learning_rate": 4.589116816115957e-06, "loss": 0.22488446533679962, "step": 6374 }, { "epoch": 0.7735711685475064, "grad_norm": 3.214703321456909, "learning_rate": 4.586660115464931e-06, "loss": 0.18176576495170593, "step": 6375 }, { "epoch": 0.7736925130445335, "grad_norm": 3.420064687728882, "learning_rate": 4.5842034148139055e-06, "loss": 0.32026544213294983, "step": 6376 }, { "epoch": 0.7738138575415605, "grad_norm": 3.7068350315093994, "learning_rate": 4.58174671416288e-06, "loss": 0.08665711432695389, "step": 6377 }, { "epoch": 0.7739352020385876, "grad_norm": 2.6542892456054688, "learning_rate": 4.579290013511854e-06, "loss": 0.41907477378845215, "step": 6378 }, { "epoch": 0.7740565465356146, "grad_norm": 4.602875709533691, "learning_rate": 4.576833312860828e-06, "loss": 0.31964462995529175, "step": 6379 }, { "epoch": 0.7741778910326417, "grad_norm": 2.032029628753662, "learning_rate": 4.5743766122098025e-06, "loss": 0.3056650757789612, "step": 6380 }, { "epoch": 0.7742992355296687, "grad_norm": 3.251859188079834, "learning_rate": 4.571919911558777e-06, "loss": 0.3651753067970276, "step": 6381 }, { "epoch": 0.7744205800266958, "grad_norm": 3.0149381160736084, "learning_rate": 4.569463210907751e-06, "loss": 0.11063934862613678, "step": 6382 }, { "epoch": 0.7745419245237228, "grad_norm": 2.75785756111145, "learning_rate": 4.567006510256726e-06, "loss": 0.2814720869064331, "step": 6383 }, { "epoch": 0.7746632690207499, "grad_norm": 2.623645544052124, "learning_rate": 4.5645498096057e-06, "loss": 0.4151870906352997, "step": 6384 }, { "epoch": 0.7747846135177769, "grad_norm": 2.44284987449646, "learning_rate": 4.562093108954675e-06, "loss": 0.0524413175880909, "step": 6385 }, { "epoch": 0.774905958014804, "grad_norm": 2.279738187789917, "learning_rate": 4.559636408303649e-06, "loss": 0.33477315306663513, "step": 6386 }, { "epoch": 0.775027302511831, "grad_norm": 1.2471245527267456, "learning_rate": 4.557179707652623e-06, "loss": 0.029785610735416412, "step": 6387 }, { "epoch": 0.7751486470088581, "grad_norm": 1.907488465309143, "learning_rate": 4.554723007001597e-06, "loss": 0.176889106631279, "step": 6388 }, { "epoch": 0.7752699915058852, "grad_norm": 2.8199515342712402, "learning_rate": 4.552266306350572e-06, "loss": 0.4121607840061188, "step": 6389 }, { "epoch": 0.7753913360029122, "grad_norm": 3.453632116317749, "learning_rate": 4.549809605699546e-06, "loss": 0.22480365633964539, "step": 6390 }, { "epoch": 0.7755126804999394, "grad_norm": 2.111342668533325, "learning_rate": 4.54735290504852e-06, "loss": 0.23140795528888702, "step": 6391 }, { "epoch": 0.7756340249969664, "grad_norm": 2.4816272258758545, "learning_rate": 4.5448962043974944e-06, "loss": 0.21532276272773743, "step": 6392 }, { "epoch": 0.7757553694939935, "grad_norm": 2.2983977794647217, "learning_rate": 4.5424395037464695e-06, "loss": 0.47989997267723083, "step": 6393 }, { "epoch": 0.7758767139910205, "grad_norm": 3.2014997005462646, "learning_rate": 4.539982803095443e-06, "loss": 0.22381608188152313, "step": 6394 }, { "epoch": 0.7759980584880476, "grad_norm": 2.269134998321533, "learning_rate": 4.537526102444417e-06, "loss": 0.0469818152487278, "step": 6395 }, { "epoch": 0.7761194029850746, "grad_norm": 1.4303529262542725, "learning_rate": 4.5350694017933915e-06, "loss": 0.12142118066549301, "step": 6396 }, { "epoch": 0.7762407474821017, "grad_norm": 3.256178855895996, "learning_rate": 4.532612701142366e-06, "loss": 0.2415519803762436, "step": 6397 }, { "epoch": 0.7763620919791288, "grad_norm": 2.3224399089813232, "learning_rate": 4.53015600049134e-06, "loss": 0.240364670753479, "step": 6398 }, { "epoch": 0.7764834364761558, "grad_norm": 2.4115383625030518, "learning_rate": 4.527699299840314e-06, "loss": 0.6231740117073059, "step": 6399 }, { "epoch": 0.7766047809731829, "grad_norm": 2.7625975608825684, "learning_rate": 4.525242599189289e-06, "loss": 0.368480920791626, "step": 6400 }, { "epoch": 0.7767261254702099, "grad_norm": 6.9144744873046875, "learning_rate": 4.522785898538264e-06, "loss": 0.39374804496765137, "step": 6401 }, { "epoch": 0.776847469967237, "grad_norm": 2.810518264770508, "learning_rate": 4.520329197887238e-06, "loss": 0.15081170201301575, "step": 6402 }, { "epoch": 0.776968814464264, "grad_norm": 3.8127126693725586, "learning_rate": 4.517872497236212e-06, "loss": 0.17230099439620972, "step": 6403 }, { "epoch": 0.7770901589612911, "grad_norm": 3.6117820739746094, "learning_rate": 4.515415796585186e-06, "loss": 0.226241335272789, "step": 6404 }, { "epoch": 0.7772115034583181, "grad_norm": 6.431974411010742, "learning_rate": 4.512959095934161e-06, "loss": 0.1548989713191986, "step": 6405 }, { "epoch": 0.7773328479553452, "grad_norm": 6.107861042022705, "learning_rate": 4.510502395283135e-06, "loss": 0.3426174521446228, "step": 6406 }, { "epoch": 0.7774541924523722, "grad_norm": 2.7525339126586914, "learning_rate": 4.508045694632109e-06, "loss": 0.5501914620399475, "step": 6407 }, { "epoch": 0.7775755369493993, "grad_norm": 1.4359118938446045, "learning_rate": 4.505588993981083e-06, "loss": 0.03954862430691719, "step": 6408 }, { "epoch": 0.7776968814464265, "grad_norm": 3.8487396240234375, "learning_rate": 4.503132293330058e-06, "loss": 0.07578788697719574, "step": 6409 }, { "epoch": 0.7778182259434535, "grad_norm": 2.2612719535827637, "learning_rate": 4.500675592679033e-06, "loss": 0.16447754204273224, "step": 6410 }, { "epoch": 0.7779395704404806, "grad_norm": 0.9925763010978699, "learning_rate": 4.498218892028007e-06, "loss": 0.012776023708283901, "step": 6411 }, { "epoch": 0.7780609149375076, "grad_norm": 3.1363155841827393, "learning_rate": 4.495762191376981e-06, "loss": 0.24573159217834473, "step": 6412 }, { "epoch": 0.7781822594345347, "grad_norm": 2.9706130027770996, "learning_rate": 4.4933054907259556e-06, "loss": 0.07683251053094864, "step": 6413 }, { "epoch": 0.7783036039315617, "grad_norm": 3.12202525138855, "learning_rate": 4.49084879007493e-06, "loss": 0.29011330008506775, "step": 6414 }, { "epoch": 0.7784249484285888, "grad_norm": 3.222151279449463, "learning_rate": 4.488392089423904e-06, "loss": 0.602154016494751, "step": 6415 }, { "epoch": 0.7785462929256158, "grad_norm": 2.489774703979492, "learning_rate": 4.485935388772878e-06, "loss": 0.5674700736999512, "step": 6416 }, { "epoch": 0.7786676374226429, "grad_norm": 1.7968354225158691, "learning_rate": 4.483478688121853e-06, "loss": 0.1510230302810669, "step": 6417 }, { "epoch": 0.7787889819196699, "grad_norm": 2.298459768295288, "learning_rate": 4.481021987470827e-06, "loss": 0.21535147726535797, "step": 6418 }, { "epoch": 0.778910326416697, "grad_norm": 2.2820539474487305, "learning_rate": 4.478565286819801e-06, "loss": 0.1509256362915039, "step": 6419 }, { "epoch": 0.779031670913724, "grad_norm": 3.310863971710205, "learning_rate": 4.476108586168776e-06, "loss": 0.5143593549728394, "step": 6420 }, { "epoch": 0.7791530154107511, "grad_norm": 3.042276382446289, "learning_rate": 4.4736518855177505e-06, "loss": 0.25789931416511536, "step": 6421 }, { "epoch": 0.7792743599077782, "grad_norm": 3.8789684772491455, "learning_rate": 4.471195184866725e-06, "loss": 0.402145653963089, "step": 6422 }, { "epoch": 0.7793957044048052, "grad_norm": 3.716724157333374, "learning_rate": 4.468738484215699e-06, "loss": 0.297132283449173, "step": 6423 }, { "epoch": 0.7795170489018323, "grad_norm": 4.115288257598877, "learning_rate": 4.466281783564673e-06, "loss": 0.3783510625362396, "step": 6424 }, { "epoch": 0.7796383933988593, "grad_norm": 3.4507060050964355, "learning_rate": 4.4638250829136475e-06, "loss": 0.2643261253833771, "step": 6425 }, { "epoch": 0.7797597378958864, "grad_norm": 3.116774559020996, "learning_rate": 4.461368382262622e-06, "loss": 0.29760268330574036, "step": 6426 }, { "epoch": 0.7798810823929134, "grad_norm": 3.5407583713531494, "learning_rate": 4.458911681611596e-06, "loss": 0.4427409768104553, "step": 6427 }, { "epoch": 0.7800024268899406, "grad_norm": 3.666749954223633, "learning_rate": 4.45645498096057e-06, "loss": 0.155917227268219, "step": 6428 }, { "epoch": 0.7801237713869676, "grad_norm": 2.7522711753845215, "learning_rate": 4.4539982803095445e-06, "loss": 0.1973097175359726, "step": 6429 }, { "epoch": 0.7802451158839947, "grad_norm": 3.774338483810425, "learning_rate": 4.451541579658519e-06, "loss": 0.6611090898513794, "step": 6430 }, { "epoch": 0.7803664603810218, "grad_norm": 2.5514419078826904, "learning_rate": 4.449084879007493e-06, "loss": 0.12756521999835968, "step": 6431 }, { "epoch": 0.7804878048780488, "grad_norm": 2.4818921089172363, "learning_rate": 4.446628178356467e-06, "loss": 0.12032885104417801, "step": 6432 }, { "epoch": 0.7806091493750759, "grad_norm": 3.407658100128174, "learning_rate": 4.4441714777054416e-06, "loss": 0.23678940534591675, "step": 6433 }, { "epoch": 0.7807304938721029, "grad_norm": 2.232343912124634, "learning_rate": 4.441714777054416e-06, "loss": 0.1967906802892685, "step": 6434 }, { "epoch": 0.78085183836913, "grad_norm": 1.2758965492248535, "learning_rate": 4.43925807640339e-06, "loss": 0.019335933029651642, "step": 6435 }, { "epoch": 0.780973182866157, "grad_norm": 2.5265238285064697, "learning_rate": 4.436801375752365e-06, "loss": 0.22825980186462402, "step": 6436 }, { "epoch": 0.7810945273631841, "grad_norm": 4.378757953643799, "learning_rate": 4.4343446751013395e-06, "loss": 0.6585421562194824, "step": 6437 }, { "epoch": 0.7812158718602111, "grad_norm": 3.281132936477661, "learning_rate": 4.431887974450314e-06, "loss": 0.38722264766693115, "step": 6438 }, { "epoch": 0.7813372163572382, "grad_norm": 3.119901657104492, "learning_rate": 4.429431273799288e-06, "loss": 0.43782442808151245, "step": 6439 }, { "epoch": 0.7814585608542652, "grad_norm": 1.9686561822891235, "learning_rate": 4.426974573148262e-06, "loss": 0.1010037511587143, "step": 6440 }, { "epoch": 0.7815799053512923, "grad_norm": 3.366628646850586, "learning_rate": 4.4245178724972365e-06, "loss": 0.2785365879535675, "step": 6441 }, { "epoch": 0.7817012498483193, "grad_norm": 2.3017587661743164, "learning_rate": 4.422061171846211e-06, "loss": 0.12744954228401184, "step": 6442 }, { "epoch": 0.7818225943453464, "grad_norm": 1.909255862236023, "learning_rate": 4.419604471195185e-06, "loss": 0.1448153853416443, "step": 6443 }, { "epoch": 0.7819439388423735, "grad_norm": 2.247882127761841, "learning_rate": 4.417147770544159e-06, "loss": 0.3933148682117462, "step": 6444 }, { "epoch": 0.7820652833394005, "grad_norm": 4.377729415893555, "learning_rate": 4.4146910698931335e-06, "loss": 0.4097541868686676, "step": 6445 }, { "epoch": 0.7821866278364277, "grad_norm": 3.1902618408203125, "learning_rate": 4.412234369242109e-06, "loss": 0.147195503115654, "step": 6446 }, { "epoch": 0.7823079723334547, "grad_norm": 2.6920559406280518, "learning_rate": 4.409777668591083e-06, "loss": 0.5535942912101746, "step": 6447 }, { "epoch": 0.7824293168304818, "grad_norm": 2.914238452911377, "learning_rate": 4.407320967940057e-06, "loss": 0.39960941672325134, "step": 6448 }, { "epoch": 0.7825506613275088, "grad_norm": 2.2160208225250244, "learning_rate": 4.404864267289031e-06, "loss": 0.3003831207752228, "step": 6449 }, { "epoch": 0.7826720058245359, "grad_norm": 2.4591715335845947, "learning_rate": 4.402407566638006e-06, "loss": 0.24475690722465515, "step": 6450 }, { "epoch": 0.7827933503215629, "grad_norm": 1.6534250974655151, "learning_rate": 4.39995086598698e-06, "loss": 0.1438816338777542, "step": 6451 }, { "epoch": 0.78291469481859, "grad_norm": 1.74684476852417, "learning_rate": 4.397494165335954e-06, "loss": 0.1886909455060959, "step": 6452 }, { "epoch": 0.783036039315617, "grad_norm": 4.324092864990234, "learning_rate": 4.3950374646849284e-06, "loss": 0.20751282572746277, "step": 6453 }, { "epoch": 0.7831573838126441, "grad_norm": 1.834557056427002, "learning_rate": 4.392580764033903e-06, "loss": 0.18888777494430542, "step": 6454 }, { "epoch": 0.7832787283096712, "grad_norm": 2.1161882877349854, "learning_rate": 4.390124063382877e-06, "loss": 0.04564416781067848, "step": 6455 }, { "epoch": 0.7834000728066982, "grad_norm": 1.8820161819458008, "learning_rate": 4.387667362731852e-06, "loss": 0.1277010440826416, "step": 6456 }, { "epoch": 0.7835214173037253, "grad_norm": 3.432955741882324, "learning_rate": 4.385210662080826e-06, "loss": 0.3815966844558716, "step": 6457 }, { "epoch": 0.7836427618007523, "grad_norm": 3.6532607078552246, "learning_rate": 4.3827539614298006e-06, "loss": 0.6250628232955933, "step": 6458 }, { "epoch": 0.7837641062977794, "grad_norm": 4.1647772789001465, "learning_rate": 4.380297260778775e-06, "loss": 0.27042707800865173, "step": 6459 }, { "epoch": 0.7838854507948064, "grad_norm": 2.3568685054779053, "learning_rate": 4.377840560127749e-06, "loss": 0.18345856666564941, "step": 6460 }, { "epoch": 0.7840067952918335, "grad_norm": 2.6344447135925293, "learning_rate": 4.375383859476723e-06, "loss": 0.11850124597549438, "step": 6461 }, { "epoch": 0.7841281397888605, "grad_norm": 2.1554629802703857, "learning_rate": 4.372927158825698e-06, "loss": 0.13863715529441833, "step": 6462 }, { "epoch": 0.7842494842858876, "grad_norm": 3.2373669147491455, "learning_rate": 4.370470458174672e-06, "loss": 0.3511412739753723, "step": 6463 }, { "epoch": 0.7843708287829146, "grad_norm": 1.8582392930984497, "learning_rate": 4.368013757523646e-06, "loss": 0.027220167219638824, "step": 6464 }, { "epoch": 0.7844921732799418, "grad_norm": 0.18131950497627258, "learning_rate": 4.36555705687262e-06, "loss": 0.0016064320225268602, "step": 6465 }, { "epoch": 0.7846135177769689, "grad_norm": 2.867327928543091, "learning_rate": 4.363100356221595e-06, "loss": 0.06312599778175354, "step": 6466 }, { "epoch": 0.7847348622739959, "grad_norm": 4.367501735687256, "learning_rate": 4.360643655570569e-06, "loss": 0.159521222114563, "step": 6467 }, { "epoch": 0.784856206771023, "grad_norm": 2.039729118347168, "learning_rate": 4.358186954919543e-06, "loss": 0.09699828922748566, "step": 6468 }, { "epoch": 0.78497755126805, "grad_norm": 2.3016397953033447, "learning_rate": 4.355730254268517e-06, "loss": 0.3210980296134949, "step": 6469 }, { "epoch": 0.7850988957650771, "grad_norm": 3.2037105560302734, "learning_rate": 4.353273553617492e-06, "loss": 0.38871103525161743, "step": 6470 }, { "epoch": 0.7852202402621041, "grad_norm": 2.2801473140716553, "learning_rate": 4.350816852966466e-06, "loss": 0.5368230938911438, "step": 6471 }, { "epoch": 0.7853415847591312, "grad_norm": 3.4872708320617676, "learning_rate": 4.348360152315441e-06, "loss": 0.2184293419122696, "step": 6472 }, { "epoch": 0.7854629292561582, "grad_norm": 2.6147866249084473, "learning_rate": 4.345903451664415e-06, "loss": 0.22878122329711914, "step": 6473 }, { "epoch": 0.7855842737531853, "grad_norm": 0.02401047945022583, "learning_rate": 4.3434467510133895e-06, "loss": 0.0003122301131952554, "step": 6474 }, { "epoch": 0.7857056182502123, "grad_norm": 3.5741004943847656, "learning_rate": 4.340990050362364e-06, "loss": 0.258160799741745, "step": 6475 }, { "epoch": 0.7858269627472394, "grad_norm": 2.3386569023132324, "learning_rate": 4.338533349711338e-06, "loss": 0.43861427903175354, "step": 6476 }, { "epoch": 0.7859483072442665, "grad_norm": 3.5124711990356445, "learning_rate": 4.336076649060312e-06, "loss": 0.23719018697738647, "step": 6477 }, { "epoch": 0.7860696517412935, "grad_norm": 2.767235040664673, "learning_rate": 4.333619948409287e-06, "loss": 0.14562003314495087, "step": 6478 }, { "epoch": 0.7861909962383206, "grad_norm": 6.545829772949219, "learning_rate": 4.331163247758261e-06, "loss": 0.3260343074798584, "step": 6479 }, { "epoch": 0.7863123407353476, "grad_norm": 3.2027957439422607, "learning_rate": 4.328706547107235e-06, "loss": 0.30140048265457153, "step": 6480 }, { "epoch": 0.7864336852323747, "grad_norm": 2.2336976528167725, "learning_rate": 4.326249846456209e-06, "loss": 0.27698373794555664, "step": 6481 }, { "epoch": 0.7865550297294017, "grad_norm": 1.818571925163269, "learning_rate": 4.3237931458051845e-06, "loss": 0.034028589725494385, "step": 6482 }, { "epoch": 0.7866763742264288, "grad_norm": 5.775863170623779, "learning_rate": 4.321336445154159e-06, "loss": 0.4969641864299774, "step": 6483 }, { "epoch": 0.7867977187234559, "grad_norm": 2.0215463638305664, "learning_rate": 4.318879744503133e-06, "loss": 0.09396806359291077, "step": 6484 }, { "epoch": 0.786919063220483, "grad_norm": 3.610365629196167, "learning_rate": 4.316423043852107e-06, "loss": 0.1636180430650711, "step": 6485 }, { "epoch": 0.78704040771751, "grad_norm": 2.7774477005004883, "learning_rate": 4.3139663432010815e-06, "loss": 0.32300931215286255, "step": 6486 }, { "epoch": 0.7871617522145371, "grad_norm": 4.054925918579102, "learning_rate": 4.311509642550056e-06, "loss": 0.3204984664916992, "step": 6487 }, { "epoch": 0.7872830967115642, "grad_norm": 2.3974671363830566, "learning_rate": 4.30905294189903e-06, "loss": 0.043223824352025986, "step": 6488 }, { "epoch": 0.7874044412085912, "grad_norm": 1.3574353456497192, "learning_rate": 4.306596241248004e-06, "loss": 0.11758372187614441, "step": 6489 }, { "epoch": 0.7875257857056183, "grad_norm": 3.1383309364318848, "learning_rate": 4.3041395405969785e-06, "loss": 0.2852936089038849, "step": 6490 }, { "epoch": 0.7876471302026453, "grad_norm": 3.736408233642578, "learning_rate": 4.301682839945953e-06, "loss": 0.2789285182952881, "step": 6491 }, { "epoch": 0.7877684746996724, "grad_norm": 3.101489305496216, "learning_rate": 4.299226139294928e-06, "loss": 0.21540771424770355, "step": 6492 }, { "epoch": 0.7878898191966994, "grad_norm": 2.926675796508789, "learning_rate": 4.296769438643902e-06, "loss": 0.24694085121154785, "step": 6493 }, { "epoch": 0.7880111636937265, "grad_norm": 2.558529853820801, "learning_rate": 4.294312737992876e-06, "loss": 0.2887660562992096, "step": 6494 }, { "epoch": 0.7881325081907535, "grad_norm": 1.5882494449615479, "learning_rate": 4.291856037341851e-06, "loss": 0.08104268461465836, "step": 6495 }, { "epoch": 0.7882538526877806, "grad_norm": 3.0380935668945312, "learning_rate": 4.289399336690824e-06, "loss": 0.06264898180961609, "step": 6496 }, { "epoch": 0.7883751971848076, "grad_norm": 3.3185455799102783, "learning_rate": 4.286942636039798e-06, "loss": 0.32205671072006226, "step": 6497 }, { "epoch": 0.7884965416818347, "grad_norm": 1.497285008430481, "learning_rate": 4.284485935388773e-06, "loss": 0.23415257036685944, "step": 6498 }, { "epoch": 0.7886178861788617, "grad_norm": 2.515693187713623, "learning_rate": 4.282029234737748e-06, "loss": 0.1302194595336914, "step": 6499 }, { "epoch": 0.7887392306758888, "grad_norm": 2.94808292388916, "learning_rate": 4.279572534086722e-06, "loss": 0.24327217042446136, "step": 6500 }, { "epoch": 0.7888605751729159, "grad_norm": 3.796133041381836, "learning_rate": 4.277115833435696e-06, "loss": 0.39262670278549194, "step": 6501 }, { "epoch": 0.788981919669943, "grad_norm": 2.049665689468384, "learning_rate": 4.2746591327846705e-06, "loss": 0.05918196961283684, "step": 6502 }, { "epoch": 0.7891032641669701, "grad_norm": 2.8294785022735596, "learning_rate": 4.272202432133645e-06, "loss": 0.4330283999443054, "step": 6503 }, { "epoch": 0.7892246086639971, "grad_norm": 5.363167762756348, "learning_rate": 4.269745731482619e-06, "loss": 0.2413649708032608, "step": 6504 }, { "epoch": 0.7893459531610242, "grad_norm": 2.153827667236328, "learning_rate": 4.267289030831593e-06, "loss": 0.07489977777004242, "step": 6505 }, { "epoch": 0.7894672976580512, "grad_norm": 2.361706256866455, "learning_rate": 4.2648323301805675e-06, "loss": 0.14649678766727448, "step": 6506 }, { "epoch": 0.7895886421550783, "grad_norm": 0.3576081097126007, "learning_rate": 4.262375629529542e-06, "loss": 0.002649220870807767, "step": 6507 }, { "epoch": 0.7897099866521053, "grad_norm": 2.6648709774017334, "learning_rate": 4.259918928878516e-06, "loss": 0.20227254927158356, "step": 6508 }, { "epoch": 0.7898313311491324, "grad_norm": 2.972090005874634, "learning_rate": 4.257462228227491e-06, "loss": 0.28047892451286316, "step": 6509 }, { "epoch": 0.7899526756461595, "grad_norm": 3.274681329727173, "learning_rate": 4.255005527576465e-06, "loss": 0.3691864311695099, "step": 6510 }, { "epoch": 0.7900740201431865, "grad_norm": 0.520077645778656, "learning_rate": 4.25254882692544e-06, "loss": 0.010266249999403954, "step": 6511 }, { "epoch": 0.7901953646402136, "grad_norm": 2.475417137145996, "learning_rate": 4.250092126274414e-06, "loss": 0.24465353786945343, "step": 6512 }, { "epoch": 0.7903167091372406, "grad_norm": 1.7395836114883423, "learning_rate": 4.247635425623388e-06, "loss": 0.10792295634746552, "step": 6513 }, { "epoch": 0.7904380536342677, "grad_norm": 1.0599745512008667, "learning_rate": 4.245178724972362e-06, "loss": 0.01674012281000614, "step": 6514 }, { "epoch": 0.7905593981312947, "grad_norm": 1.9243594408035278, "learning_rate": 4.242722024321337e-06, "loss": 0.15234003961086273, "step": 6515 }, { "epoch": 0.7906807426283218, "grad_norm": 3.3193655014038086, "learning_rate": 4.240265323670311e-06, "loss": 0.31402426958084106, "step": 6516 }, { "epoch": 0.7908020871253488, "grad_norm": 1.8793197870254517, "learning_rate": 4.237808623019285e-06, "loss": 0.12571895122528076, "step": 6517 }, { "epoch": 0.7909234316223759, "grad_norm": 2.8956828117370605, "learning_rate": 4.23535192236826e-06, "loss": 0.11954250186681747, "step": 6518 }, { "epoch": 0.7910447761194029, "grad_norm": 1.9595601558685303, "learning_rate": 4.2328952217172346e-06, "loss": 0.09080285578966141, "step": 6519 }, { "epoch": 0.79116612061643, "grad_norm": 1.3676323890686035, "learning_rate": 4.230438521066209e-06, "loss": 0.04919954389333725, "step": 6520 }, { "epoch": 0.7912874651134572, "grad_norm": 3.5600767135620117, "learning_rate": 4.227981820415183e-06, "loss": 0.2768237590789795, "step": 6521 }, { "epoch": 0.7914088096104842, "grad_norm": 1.9124046564102173, "learning_rate": 4.225525119764157e-06, "loss": 0.3597565293312073, "step": 6522 }, { "epoch": 0.7915301541075113, "grad_norm": 3.8310134410858154, "learning_rate": 4.223068419113132e-06, "loss": 0.274907648563385, "step": 6523 }, { "epoch": 0.7916514986045383, "grad_norm": 3.0598886013031006, "learning_rate": 4.220611718462106e-06, "loss": 0.27484413981437683, "step": 6524 }, { "epoch": 0.7917728431015654, "grad_norm": 2.0933125019073486, "learning_rate": 4.21815501781108e-06, "loss": 0.29507148265838623, "step": 6525 }, { "epoch": 0.7918941875985924, "grad_norm": 3.1035170555114746, "learning_rate": 4.215698317160054e-06, "loss": 0.35656148195266724, "step": 6526 }, { "epoch": 0.7920155320956195, "grad_norm": 2.4212398529052734, "learning_rate": 4.213241616509029e-06, "loss": 0.08589452505111694, "step": 6527 }, { "epoch": 0.7921368765926465, "grad_norm": 1.9937949180603027, "learning_rate": 4.210784915858004e-06, "loss": 0.18196922540664673, "step": 6528 }, { "epoch": 0.7922582210896736, "grad_norm": 3.013063430786133, "learning_rate": 4.208328215206977e-06, "loss": 0.21820466220378876, "step": 6529 }, { "epoch": 0.7923795655867006, "grad_norm": 1.7275670766830444, "learning_rate": 4.205871514555951e-06, "loss": 0.1533723920583725, "step": 6530 }, { "epoch": 0.7925009100837277, "grad_norm": 1.9667290449142456, "learning_rate": 4.203414813904926e-06, "loss": 0.07888812571763992, "step": 6531 }, { "epoch": 0.7926222545807547, "grad_norm": 2.965832471847534, "learning_rate": 4.2009581132539e-06, "loss": 0.3979637622833252, "step": 6532 }, { "epoch": 0.7927435990777818, "grad_norm": 3.166045665740967, "learning_rate": 4.198501412602874e-06, "loss": 0.2521720230579376, "step": 6533 }, { "epoch": 0.7928649435748089, "grad_norm": 2.2923505306243896, "learning_rate": 4.1960447119518484e-06, "loss": 0.2617327868938446, "step": 6534 }, { "epoch": 0.7929862880718359, "grad_norm": 2.497920513153076, "learning_rate": 4.1935880113008235e-06, "loss": 0.10758870095014572, "step": 6535 }, { "epoch": 0.793107632568863, "grad_norm": 4.368521213531494, "learning_rate": 4.191131310649798e-06, "loss": 0.6334824562072754, "step": 6536 }, { "epoch": 0.79322897706589, "grad_norm": 2.7320990562438965, "learning_rate": 4.188674609998772e-06, "loss": 0.14693842828273773, "step": 6537 }, { "epoch": 0.7933503215629171, "grad_norm": 3.197514772415161, "learning_rate": 4.186217909347746e-06, "loss": 0.4030400216579437, "step": 6538 }, { "epoch": 0.7934716660599442, "grad_norm": 3.243173599243164, "learning_rate": 4.1837612086967206e-06, "loss": 0.27274659276008606, "step": 6539 }, { "epoch": 0.7935930105569713, "grad_norm": 1.5146145820617676, "learning_rate": 4.181304508045695e-06, "loss": 0.004353052005171776, "step": 6540 }, { "epoch": 0.7937143550539983, "grad_norm": 2.3552823066711426, "learning_rate": 4.178847807394669e-06, "loss": 0.1369583159685135, "step": 6541 }, { "epoch": 0.7938356995510254, "grad_norm": 2.861161708831787, "learning_rate": 4.176391106743643e-06, "loss": 0.17817439138889313, "step": 6542 }, { "epoch": 0.7939570440480525, "grad_norm": 6.095719814300537, "learning_rate": 4.173934406092618e-06, "loss": 0.18237227201461792, "step": 6543 }, { "epoch": 0.7940783885450795, "grad_norm": 3.0768039226531982, "learning_rate": 4.171477705441592e-06, "loss": 0.23579031229019165, "step": 6544 }, { "epoch": 0.7941997330421066, "grad_norm": 3.3174545764923096, "learning_rate": 4.169021004790567e-06, "loss": 0.2707357704639435, "step": 6545 }, { "epoch": 0.7943210775391336, "grad_norm": 3.117645740509033, "learning_rate": 4.166564304139541e-06, "loss": 0.5517486333847046, "step": 6546 }, { "epoch": 0.7944424220361607, "grad_norm": 2.8909337520599365, "learning_rate": 4.1641076034885155e-06, "loss": 0.1638697385787964, "step": 6547 }, { "epoch": 0.7945637665331877, "grad_norm": 3.41280460357666, "learning_rate": 4.16165090283749e-06, "loss": 0.3926564157009125, "step": 6548 }, { "epoch": 0.7946851110302148, "grad_norm": 2.3228983879089355, "learning_rate": 4.159194202186464e-06, "loss": 0.13678133487701416, "step": 6549 }, { "epoch": 0.7948064555272418, "grad_norm": 3.6135988235473633, "learning_rate": 4.156737501535438e-06, "loss": 0.36513572931289673, "step": 6550 }, { "epoch": 0.7949278000242689, "grad_norm": 0.8045632839202881, "learning_rate": 4.1542808008844125e-06, "loss": 0.017242571339011192, "step": 6551 }, { "epoch": 0.7950491445212959, "grad_norm": 3.0983500480651855, "learning_rate": 4.151824100233387e-06, "loss": 0.27598631381988525, "step": 6552 }, { "epoch": 0.795170489018323, "grad_norm": 2.038543939590454, "learning_rate": 4.149367399582361e-06, "loss": 0.2141578048467636, "step": 6553 }, { "epoch": 0.79529183351535, "grad_norm": 3.480186700820923, "learning_rate": 4.146910698931335e-06, "loss": 0.74944669008255, "step": 6554 }, { "epoch": 0.7954131780123771, "grad_norm": 2.039120674133301, "learning_rate": 4.14445399828031e-06, "loss": 0.2185424119234085, "step": 6555 }, { "epoch": 0.7955345225094042, "grad_norm": 2.8920652866363525, "learning_rate": 4.141997297629285e-06, "loss": 0.3972042202949524, "step": 6556 }, { "epoch": 0.7956558670064312, "grad_norm": 3.2139415740966797, "learning_rate": 4.139540596978259e-06, "loss": 0.09744603931903839, "step": 6557 }, { "epoch": 0.7957772115034584, "grad_norm": 3.994015693664551, "learning_rate": 4.137083896327233e-06, "loss": 0.2610197961330414, "step": 6558 }, { "epoch": 0.7958985560004854, "grad_norm": 6.436521053314209, "learning_rate": 4.134627195676207e-06, "loss": 0.4498376250267029, "step": 6559 }, { "epoch": 0.7960199004975125, "grad_norm": 2.878349781036377, "learning_rate": 4.132170495025182e-06, "loss": 0.27656057476997375, "step": 6560 }, { "epoch": 0.7961412449945395, "grad_norm": 4.326077938079834, "learning_rate": 4.129713794374156e-06, "loss": 0.2992628216743469, "step": 6561 }, { "epoch": 0.7962625894915666, "grad_norm": 2.7669126987457275, "learning_rate": 4.12725709372313e-06, "loss": 0.14854852855205536, "step": 6562 }, { "epoch": 0.7963839339885936, "grad_norm": 2.4812045097351074, "learning_rate": 4.1248003930721045e-06, "loss": 0.45607486367225647, "step": 6563 }, { "epoch": 0.7965052784856207, "grad_norm": 3.5426836013793945, "learning_rate": 4.122343692421079e-06, "loss": 0.166934534907341, "step": 6564 }, { "epoch": 0.7966266229826477, "grad_norm": 3.294287919998169, "learning_rate": 4.119886991770053e-06, "loss": 0.1658996045589447, "step": 6565 }, { "epoch": 0.7967479674796748, "grad_norm": 2.937004566192627, "learning_rate": 4.117430291119027e-06, "loss": 0.14486131072044373, "step": 6566 }, { "epoch": 0.7968693119767019, "grad_norm": 4.61464786529541, "learning_rate": 4.1149735904680015e-06, "loss": 0.32321280241012573, "step": 6567 }, { "epoch": 0.7969906564737289, "grad_norm": 1.410923719406128, "learning_rate": 4.112516889816976e-06, "loss": 0.04748472571372986, "step": 6568 }, { "epoch": 0.797112000970756, "grad_norm": 3.4198832511901855, "learning_rate": 4.11006018916595e-06, "loss": 0.07758872210979462, "step": 6569 }, { "epoch": 0.797233345467783, "grad_norm": 2.980977773666382, "learning_rate": 4.107603488514924e-06, "loss": 0.21952246129512787, "step": 6570 }, { "epoch": 0.7973546899648101, "grad_norm": 2.6943843364715576, "learning_rate": 4.105146787863899e-06, "loss": 0.21312905848026276, "step": 6571 }, { "epoch": 0.7974760344618371, "grad_norm": 3.4871673583984375, "learning_rate": 4.102690087212874e-06, "loss": 0.570946991443634, "step": 6572 }, { "epoch": 0.7975973789588642, "grad_norm": 2.8352138996124268, "learning_rate": 4.100233386561848e-06, "loss": 0.3428955674171448, "step": 6573 }, { "epoch": 0.7977187234558912, "grad_norm": 3.3398468494415283, "learning_rate": 4.097776685910822e-06, "loss": 0.7798470258712769, "step": 6574 }, { "epoch": 0.7978400679529183, "grad_norm": 3.0020010471343994, "learning_rate": 4.095319985259796e-06, "loss": 0.3725772798061371, "step": 6575 }, { "epoch": 0.7979614124499453, "grad_norm": 3.6366734504699707, "learning_rate": 4.092863284608771e-06, "loss": 0.48107102513313293, "step": 6576 }, { "epoch": 0.7980827569469725, "grad_norm": 3.3437814712524414, "learning_rate": 4.090406583957745e-06, "loss": 0.5270974636077881, "step": 6577 }, { "epoch": 0.7982041014439996, "grad_norm": 1.6223424673080444, "learning_rate": 4.087949883306719e-06, "loss": 0.030853355303406715, "step": 6578 }, { "epoch": 0.7983254459410266, "grad_norm": 2.7995445728302, "learning_rate": 4.0854931826556934e-06, "loss": 0.3250923752784729, "step": 6579 }, { "epoch": 0.7984467904380537, "grad_norm": 3.901015281677246, "learning_rate": 4.083036482004668e-06, "loss": 0.26171764731407166, "step": 6580 }, { "epoch": 0.7985681349350807, "grad_norm": 2.5968594551086426, "learning_rate": 4.080579781353643e-06, "loss": 0.20191214978694916, "step": 6581 }, { "epoch": 0.7986894794321078, "grad_norm": 2.2633583545684814, "learning_rate": 4.078123080702617e-06, "loss": 0.199258491396904, "step": 6582 }, { "epoch": 0.7988108239291348, "grad_norm": 1.6461598873138428, "learning_rate": 4.075666380051591e-06, "loss": 0.18728569149971008, "step": 6583 }, { "epoch": 0.7989321684261619, "grad_norm": 1.783345341682434, "learning_rate": 4.0732096794005656e-06, "loss": 0.3982468545436859, "step": 6584 }, { "epoch": 0.7990535129231889, "grad_norm": 3.13999342918396, "learning_rate": 4.07075297874954e-06, "loss": 0.6492754220962524, "step": 6585 }, { "epoch": 0.799174857420216, "grad_norm": 3.020235061645508, "learning_rate": 4.068296278098514e-06, "loss": 0.19440317153930664, "step": 6586 }, { "epoch": 0.799296201917243, "grad_norm": 3.0852584838867188, "learning_rate": 4.065839577447488e-06, "loss": 0.19910934567451477, "step": 6587 }, { "epoch": 0.7994175464142701, "grad_norm": 2.4017696380615234, "learning_rate": 4.063382876796463e-06, "loss": 0.27799099683761597, "step": 6588 }, { "epoch": 0.7995388909112972, "grad_norm": 2.7164597511291504, "learning_rate": 4.060926176145437e-06, "loss": 0.4979640543460846, "step": 6589 }, { "epoch": 0.7996602354083242, "grad_norm": 1.026125431060791, "learning_rate": 4.058469475494411e-06, "loss": 0.011569774709641933, "step": 6590 }, { "epoch": 0.7997815799053513, "grad_norm": 3.7451164722442627, "learning_rate": 4.056012774843386e-06, "loss": 0.5459825992584229, "step": 6591 }, { "epoch": 0.7999029244023783, "grad_norm": 0.9381096959114075, "learning_rate": 4.0535560741923605e-06, "loss": 0.0311014112085104, "step": 6592 }, { "epoch": 0.8000242688994054, "grad_norm": 2.708676338195801, "learning_rate": 4.051099373541335e-06, "loss": 0.11515533924102783, "step": 6593 }, { "epoch": 0.8001456133964324, "grad_norm": 1.5754221677780151, "learning_rate": 4.048642672890309e-06, "loss": 0.03958265110850334, "step": 6594 }, { "epoch": 0.8002669578934596, "grad_norm": 4.521170139312744, "learning_rate": 4.046185972239283e-06, "loss": 0.18150576949119568, "step": 6595 }, { "epoch": 0.8003883023904866, "grad_norm": 1.71464204788208, "learning_rate": 4.0437292715882575e-06, "loss": 0.22202780842781067, "step": 6596 }, { "epoch": 0.8005096468875137, "grad_norm": 1.8283807039260864, "learning_rate": 4.041272570937232e-06, "loss": 0.03156605735421181, "step": 6597 }, { "epoch": 0.8006309913845407, "grad_norm": 2.180387496948242, "learning_rate": 4.038815870286206e-06, "loss": 0.21059465408325195, "step": 6598 }, { "epoch": 0.8007523358815678, "grad_norm": 2.6369519233703613, "learning_rate": 4.03635916963518e-06, "loss": 0.3531011939048767, "step": 6599 }, { "epoch": 0.8008736803785949, "grad_norm": 2.6418352127075195, "learning_rate": 4.0339024689841545e-06, "loss": 0.3543723523616791, "step": 6600 }, { "epoch": 0.8009950248756219, "grad_norm": 3.0131137371063232, "learning_rate": 4.031445768333129e-06, "loss": 0.5975422859191895, "step": 6601 }, { "epoch": 0.801116369372649, "grad_norm": 1.9909241199493408, "learning_rate": 4.028989067682103e-06, "loss": 0.049935679882764816, "step": 6602 }, { "epoch": 0.801237713869676, "grad_norm": 2.8487932682037354, "learning_rate": 4.026532367031077e-06, "loss": 0.06786294281482697, "step": 6603 }, { "epoch": 0.8013590583667031, "grad_norm": 2.6494598388671875, "learning_rate": 4.024075666380052e-06, "loss": 0.46533340215682983, "step": 6604 }, { "epoch": 0.8014804028637301, "grad_norm": 0.005126302130520344, "learning_rate": 4.021618965729026e-06, "loss": 5.521670755115338e-05, "step": 6605 }, { "epoch": 0.8016017473607572, "grad_norm": 4.548635959625244, "learning_rate": 4.019162265078e-06, "loss": 0.4114702343940735, "step": 6606 }, { "epoch": 0.8017230918577842, "grad_norm": 1.4289395809173584, "learning_rate": 4.016705564426975e-06, "loss": 0.019071076065301895, "step": 6607 }, { "epoch": 0.8018444363548113, "grad_norm": 3.109151601791382, "learning_rate": 4.0142488637759495e-06, "loss": 0.24891160428524017, "step": 6608 }, { "epoch": 0.8019657808518383, "grad_norm": 4.29478645324707, "learning_rate": 4.011792163124924e-06, "loss": 0.1579393595457077, "step": 6609 }, { "epoch": 0.8020871253488654, "grad_norm": 3.1532704830169678, "learning_rate": 4.009335462473898e-06, "loss": 0.30874794721603394, "step": 6610 }, { "epoch": 0.8022084698458924, "grad_norm": 3.2752685546875, "learning_rate": 4.006878761822872e-06, "loss": 0.5039411783218384, "step": 6611 }, { "epoch": 0.8023298143429195, "grad_norm": 3.102611541748047, "learning_rate": 4.0044220611718465e-06, "loss": 0.2438671588897705, "step": 6612 }, { "epoch": 0.8024511588399466, "grad_norm": 1.083940029144287, "learning_rate": 4.001965360520821e-06, "loss": 0.06349487602710724, "step": 6613 }, { "epoch": 0.8025725033369737, "grad_norm": 2.1552391052246094, "learning_rate": 3.999508659869795e-06, "loss": 0.07114855945110321, "step": 6614 }, { "epoch": 0.8026938478340008, "grad_norm": 1.7562923431396484, "learning_rate": 3.997051959218769e-06, "loss": 0.18720519542694092, "step": 6615 }, { "epoch": 0.8028151923310278, "grad_norm": 3.799426555633545, "learning_rate": 3.9945952585677435e-06, "loss": 0.22640950977802277, "step": 6616 }, { "epoch": 0.8029365368280549, "grad_norm": 1.239338755607605, "learning_rate": 3.992138557916719e-06, "loss": 0.03580031543970108, "step": 6617 }, { "epoch": 0.8030578813250819, "grad_norm": 3.7889976501464844, "learning_rate": 3.989681857265693e-06, "loss": 0.24766330420970917, "step": 6618 }, { "epoch": 0.803179225822109, "grad_norm": 1.6296659708023071, "learning_rate": 3.987225156614667e-06, "loss": 0.07303915172815323, "step": 6619 }, { "epoch": 0.803300570319136, "grad_norm": 2.0196661949157715, "learning_rate": 3.984768455963641e-06, "loss": 0.06441037356853485, "step": 6620 }, { "epoch": 0.8034219148161631, "grad_norm": 3.695604085922241, "learning_rate": 3.982311755312616e-06, "loss": 0.26378822326660156, "step": 6621 }, { "epoch": 0.8035432593131902, "grad_norm": 3.2157745361328125, "learning_rate": 3.97985505466159e-06, "loss": 0.35270118713378906, "step": 6622 }, { "epoch": 0.8036646038102172, "grad_norm": 2.7108592987060547, "learning_rate": 3.977398354010564e-06, "loss": 0.07957126945257187, "step": 6623 }, { "epoch": 0.8037859483072443, "grad_norm": 0.08923215419054031, "learning_rate": 3.9749416533595384e-06, "loss": 0.0010781866731122136, "step": 6624 }, { "epoch": 0.8039072928042713, "grad_norm": 3.511784791946411, "learning_rate": 3.972484952708513e-06, "loss": 0.4331079423427582, "step": 6625 }, { "epoch": 0.8040286373012984, "grad_norm": 2.711080551147461, "learning_rate": 3.970028252057487e-06, "loss": 0.21829015016555786, "step": 6626 }, { "epoch": 0.8041499817983254, "grad_norm": 2.5576412677764893, "learning_rate": 3.967571551406462e-06, "loss": 0.12248795479536057, "step": 6627 }, { "epoch": 0.8042713262953525, "grad_norm": 3.0151665210723877, "learning_rate": 3.965114850755436e-06, "loss": 0.19137531518936157, "step": 6628 }, { "epoch": 0.8043926707923795, "grad_norm": 6.75675630569458, "learning_rate": 3.9626581501044106e-06, "loss": 0.48259443044662476, "step": 6629 }, { "epoch": 0.8045140152894066, "grad_norm": 2.458219528198242, "learning_rate": 3.960201449453385e-06, "loss": 0.04076368361711502, "step": 6630 }, { "epoch": 0.8046353597864336, "grad_norm": 3.9970200061798096, "learning_rate": 3.957744748802358e-06, "loss": 0.39954903721809387, "step": 6631 }, { "epoch": 0.8047567042834608, "grad_norm": 2.065253734588623, "learning_rate": 3.9552880481513325e-06, "loss": 0.05017009377479553, "step": 6632 }, { "epoch": 0.8048780487804879, "grad_norm": 3.1867516040802, "learning_rate": 3.952831347500307e-06, "loss": 0.3620324730873108, "step": 6633 }, { "epoch": 0.8049993932775149, "grad_norm": 2.897542953491211, "learning_rate": 3.950374646849282e-06, "loss": 0.44972336292266846, "step": 6634 }, { "epoch": 0.805120737774542, "grad_norm": 2.6464767456054688, "learning_rate": 3.947917946198256e-06, "loss": 0.23840764164924622, "step": 6635 }, { "epoch": 0.805242082271569, "grad_norm": 2.5002012252807617, "learning_rate": 3.94546124554723e-06, "loss": 0.39583227038383484, "step": 6636 }, { "epoch": 0.8053634267685961, "grad_norm": 3.959690570831299, "learning_rate": 3.943004544896205e-06, "loss": 0.2653675079345703, "step": 6637 }, { "epoch": 0.8054847712656231, "grad_norm": 2.53692364692688, "learning_rate": 3.940547844245179e-06, "loss": 0.30943697690963745, "step": 6638 }, { "epoch": 0.8056061157626502, "grad_norm": 2.519892454147339, "learning_rate": 3.938091143594153e-06, "loss": 0.15365438163280487, "step": 6639 }, { "epoch": 0.8057274602596772, "grad_norm": 1.9748144149780273, "learning_rate": 3.935634442943127e-06, "loss": 0.5129886865615845, "step": 6640 }, { "epoch": 0.8058488047567043, "grad_norm": 3.1452157497406006, "learning_rate": 3.933177742292102e-06, "loss": 0.11659903079271317, "step": 6641 }, { "epoch": 0.8059701492537313, "grad_norm": 3.482516050338745, "learning_rate": 3.930721041641076e-06, "loss": 0.4053746461868286, "step": 6642 }, { "epoch": 0.8060914937507584, "grad_norm": 2.991159677505493, "learning_rate": 3.92826434099005e-06, "loss": 0.32650983333587646, "step": 6643 }, { "epoch": 0.8062128382477854, "grad_norm": 3.7795679569244385, "learning_rate": 3.925807640339025e-06, "loss": 0.24658876657485962, "step": 6644 }, { "epoch": 0.8063341827448125, "grad_norm": 1.4431885480880737, "learning_rate": 3.9233509396879996e-06, "loss": 0.12061242014169693, "step": 6645 }, { "epoch": 0.8064555272418396, "grad_norm": 1.620798110961914, "learning_rate": 3.920894239036974e-06, "loss": 0.071258045732975, "step": 6646 }, { "epoch": 0.8065768717388666, "grad_norm": 3.617227554321289, "learning_rate": 3.918437538385948e-06, "loss": 0.2783767282962799, "step": 6647 }, { "epoch": 0.8066982162358937, "grad_norm": 3.0398104190826416, "learning_rate": 3.915980837734922e-06, "loss": 0.2965163588523865, "step": 6648 }, { "epoch": 0.8068195607329207, "grad_norm": 2.797853469848633, "learning_rate": 3.913524137083897e-06, "loss": 0.5412133932113647, "step": 6649 }, { "epoch": 0.8069409052299478, "grad_norm": 2.9355576038360596, "learning_rate": 3.911067436432871e-06, "loss": 0.1913897842168808, "step": 6650 }, { "epoch": 0.8070622497269749, "grad_norm": 4.244832992553711, "learning_rate": 3.908610735781845e-06, "loss": 0.2390647977590561, "step": 6651 }, { "epoch": 0.807183594224002, "grad_norm": 3.9284796714782715, "learning_rate": 3.906154035130819e-06, "loss": 0.2966839373111725, "step": 6652 }, { "epoch": 0.807304938721029, "grad_norm": 4.396958827972412, "learning_rate": 3.903697334479794e-06, "loss": 0.16656361520290375, "step": 6653 }, { "epoch": 0.8074262832180561, "grad_norm": 1.7677090167999268, "learning_rate": 3.901240633828769e-06, "loss": 0.16527658700942993, "step": 6654 }, { "epoch": 0.8075476277150831, "grad_norm": 2.0427474975585938, "learning_rate": 3.898783933177743e-06, "loss": 0.2587355077266693, "step": 6655 }, { "epoch": 0.8076689722121102, "grad_norm": 2.893012285232544, "learning_rate": 3.896327232526717e-06, "loss": 0.24935519695281982, "step": 6656 }, { "epoch": 0.8077903167091373, "grad_norm": 1.672229290008545, "learning_rate": 3.8938705318756915e-06, "loss": 0.24609805643558502, "step": 6657 }, { "epoch": 0.8079116612061643, "grad_norm": 2.08367919921875, "learning_rate": 3.891413831224666e-06, "loss": 0.1021939292550087, "step": 6658 }, { "epoch": 0.8080330057031914, "grad_norm": 1.7799001932144165, "learning_rate": 3.88895713057364e-06, "loss": 0.10724438726902008, "step": 6659 }, { "epoch": 0.8081543502002184, "grad_norm": 3.1048190593719482, "learning_rate": 3.886500429922614e-06, "loss": 0.3057997226715088, "step": 6660 }, { "epoch": 0.8082756946972455, "grad_norm": 2.503143787384033, "learning_rate": 3.8840437292715885e-06, "loss": 0.195095956325531, "step": 6661 }, { "epoch": 0.8083970391942725, "grad_norm": 2.3299179077148438, "learning_rate": 3.881587028620563e-06, "loss": 0.4845414161682129, "step": 6662 }, { "epoch": 0.8085183836912996, "grad_norm": 2.2140963077545166, "learning_rate": 3.879130327969538e-06, "loss": 0.3068188428878784, "step": 6663 }, { "epoch": 0.8086397281883266, "grad_norm": 3.078150510787964, "learning_rate": 3.876673627318512e-06, "loss": 0.16094480454921722, "step": 6664 }, { "epoch": 0.8087610726853537, "grad_norm": 3.013787031173706, "learning_rate": 3.8742169266674856e-06, "loss": 0.17939543724060059, "step": 6665 }, { "epoch": 0.8088824171823807, "grad_norm": 1.24863600730896, "learning_rate": 3.87176022601646e-06, "loss": 0.03918027505278587, "step": 6666 }, { "epoch": 0.8090037616794078, "grad_norm": 2.7702269554138184, "learning_rate": 3.869303525365434e-06, "loss": 0.5451526641845703, "step": 6667 }, { "epoch": 0.8091251061764348, "grad_norm": 2.4714043140411377, "learning_rate": 3.866846824714408e-06, "loss": 0.23342032730579376, "step": 6668 }, { "epoch": 0.8092464506734619, "grad_norm": 3.105299472808838, "learning_rate": 3.864390124063383e-06, "loss": 0.019350716844201088, "step": 6669 }, { "epoch": 0.8093677951704891, "grad_norm": 4.187367916107178, "learning_rate": 3.861933423412358e-06, "loss": 0.3013722598552704, "step": 6670 }, { "epoch": 0.8094891396675161, "grad_norm": 2.6615848541259766, "learning_rate": 3.859476722761332e-06, "loss": 0.5773792266845703, "step": 6671 }, { "epoch": 0.8096104841645432, "grad_norm": 3.884686231613159, "learning_rate": 3.857020022110306e-06, "loss": 0.39107030630111694, "step": 6672 }, { "epoch": 0.8097318286615702, "grad_norm": 3.5008022785186768, "learning_rate": 3.8545633214592805e-06, "loss": 0.1874467432498932, "step": 6673 }, { "epoch": 0.8098531731585973, "grad_norm": 1.9166048765182495, "learning_rate": 3.852106620808255e-06, "loss": 0.29444965720176697, "step": 6674 }, { "epoch": 0.8099745176556243, "grad_norm": 3.0065436363220215, "learning_rate": 3.849649920157229e-06, "loss": 0.3920338451862335, "step": 6675 }, { "epoch": 0.8100958621526514, "grad_norm": 2.0884344577789307, "learning_rate": 3.847193219506203e-06, "loss": 0.13678330183029175, "step": 6676 }, { "epoch": 0.8102172066496784, "grad_norm": 3.901662826538086, "learning_rate": 3.8447365188551775e-06, "loss": 0.13689109683036804, "step": 6677 }, { "epoch": 0.8103385511467055, "grad_norm": 2.3899385929107666, "learning_rate": 3.842279818204152e-06, "loss": 0.0520562045276165, "step": 6678 }, { "epoch": 0.8104598956437326, "grad_norm": 2.826720952987671, "learning_rate": 3.839823117553126e-06, "loss": 0.18041086196899414, "step": 6679 }, { "epoch": 0.8105812401407596, "grad_norm": 2.4474194049835205, "learning_rate": 3.837366416902101e-06, "loss": 0.33639171719551086, "step": 6680 }, { "epoch": 0.8107025846377867, "grad_norm": 4.378361701965332, "learning_rate": 3.834909716251075e-06, "loss": 0.14323371648788452, "step": 6681 }, { "epoch": 0.8108239291348137, "grad_norm": 4.671327590942383, "learning_rate": 3.83245301560005e-06, "loss": 0.2726319432258606, "step": 6682 }, { "epoch": 0.8109452736318408, "grad_norm": 2.3815016746520996, "learning_rate": 3.829996314949024e-06, "loss": 0.10937896370887756, "step": 6683 }, { "epoch": 0.8110666181288678, "grad_norm": 2.8184664249420166, "learning_rate": 3.827539614297998e-06, "loss": 0.5277265906333923, "step": 6684 }, { "epoch": 0.8111879626258949, "grad_norm": 3.0475993156433105, "learning_rate": 3.825082913646972e-06, "loss": 0.18517693877220154, "step": 6685 }, { "epoch": 0.8113093071229219, "grad_norm": 3.0039260387420654, "learning_rate": 3.822626212995947e-06, "loss": 0.25552740693092346, "step": 6686 }, { "epoch": 0.811430651619949, "grad_norm": 3.028026819229126, "learning_rate": 3.820169512344921e-06, "loss": 0.4642429053783417, "step": 6687 }, { "epoch": 0.8115519961169761, "grad_norm": 1.7808955907821655, "learning_rate": 3.817712811693895e-06, "loss": 0.4249172806739807, "step": 6688 }, { "epoch": 0.8116733406140032, "grad_norm": 3.8394598960876465, "learning_rate": 3.8152561110428695e-06, "loss": 0.20517447590827942, "step": 6689 }, { "epoch": 0.8117946851110303, "grad_norm": 3.0762851238250732, "learning_rate": 3.812799410391844e-06, "loss": 0.26011645793914795, "step": 6690 }, { "epoch": 0.8119160296080573, "grad_norm": 0.36471477150917053, "learning_rate": 3.8103427097408184e-06, "loss": 0.0017904721898958087, "step": 6691 }, { "epoch": 0.8120373741050844, "grad_norm": 3.759127140045166, "learning_rate": 3.807886009089793e-06, "loss": 0.3421536684036255, "step": 6692 }, { "epoch": 0.8121587186021114, "grad_norm": 5.05056619644165, "learning_rate": 3.8054293084387673e-06, "loss": 0.1940101832151413, "step": 6693 }, { "epoch": 0.8122800630991385, "grad_norm": 2.6995625495910645, "learning_rate": 3.8029726077877416e-06, "loss": 0.49819812178611755, "step": 6694 }, { "epoch": 0.8124014075961655, "grad_norm": 0.9911708235740662, "learning_rate": 3.800515907136716e-06, "loss": 0.028568070381879807, "step": 6695 }, { "epoch": 0.8125227520931926, "grad_norm": 2.044843912124634, "learning_rate": 3.79805920648569e-06, "loss": 0.1470371037721634, "step": 6696 }, { "epoch": 0.8126440965902196, "grad_norm": 4.1062912940979, "learning_rate": 3.795602505834665e-06, "loss": 0.27860140800476074, "step": 6697 }, { "epoch": 0.8127654410872467, "grad_norm": 1.8229626417160034, "learning_rate": 3.793145805183639e-06, "loss": 0.17779235541820526, "step": 6698 }, { "epoch": 0.8128867855842737, "grad_norm": 2.2865500450134277, "learning_rate": 3.790689104532613e-06, "loss": 0.09890875965356827, "step": 6699 }, { "epoch": 0.8130081300813008, "grad_norm": 0.11759572476148605, "learning_rate": 3.788232403881587e-06, "loss": 0.0008665010100230575, "step": 6700 }, { "epoch": 0.8131294745783278, "grad_norm": 2.4693143367767334, "learning_rate": 3.7857757032305614e-06, "loss": 0.4542531967163086, "step": 6701 }, { "epoch": 0.8132508190753549, "grad_norm": 4.179076671600342, "learning_rate": 3.7833190025795357e-06, "loss": 0.38996270298957825, "step": 6702 }, { "epoch": 0.813372163572382, "grad_norm": 2.2068464756011963, "learning_rate": 3.78086230192851e-06, "loss": 0.5312044620513916, "step": 6703 }, { "epoch": 0.813493508069409, "grad_norm": 3.893051862716675, "learning_rate": 3.7784056012774846e-06, "loss": 0.16980214416980743, "step": 6704 }, { "epoch": 0.8136148525664361, "grad_norm": 2.596898317337036, "learning_rate": 3.775948900626459e-06, "loss": 0.05190807208418846, "step": 6705 }, { "epoch": 0.8137361970634631, "grad_norm": 2.301811933517456, "learning_rate": 3.773492199975433e-06, "loss": 0.10089710354804993, "step": 6706 }, { "epoch": 0.8138575415604903, "grad_norm": 3.1873576641082764, "learning_rate": 3.7710354993244074e-06, "loss": 0.1626112014055252, "step": 6707 }, { "epoch": 0.8139788860575173, "grad_norm": 3.3977346420288086, "learning_rate": 3.7685787986733816e-06, "loss": 0.2582404315471649, "step": 6708 }, { "epoch": 0.8141002305545444, "grad_norm": 2.014662981033325, "learning_rate": 3.7661220980223563e-06, "loss": 0.21209846436977386, "step": 6709 }, { "epoch": 0.8142215750515714, "grad_norm": 4.884772300720215, "learning_rate": 3.7636653973713306e-06, "loss": 0.1928073614835739, "step": 6710 }, { "epoch": 0.8143429195485985, "grad_norm": 1.948598861694336, "learning_rate": 3.761208696720305e-06, "loss": 0.21422305703163147, "step": 6711 }, { "epoch": 0.8144642640456256, "grad_norm": 3.9440293312072754, "learning_rate": 3.758751996069279e-06, "loss": 0.3065508008003235, "step": 6712 }, { "epoch": 0.8145856085426526, "grad_norm": 1.7317429780960083, "learning_rate": 3.7562952954182533e-06, "loss": 0.1341513991355896, "step": 6713 }, { "epoch": 0.8147069530396797, "grad_norm": 3.069157361984253, "learning_rate": 3.753838594767228e-06, "loss": 0.1525770127773285, "step": 6714 }, { "epoch": 0.8148282975367067, "grad_norm": 2.9268157482147217, "learning_rate": 3.7513818941162023e-06, "loss": 0.6065709590911865, "step": 6715 }, { "epoch": 0.8149496420337338, "grad_norm": 2.6515984535217285, "learning_rate": 3.7489251934651765e-06, "loss": 0.5626264810562134, "step": 6716 }, { "epoch": 0.8150709865307608, "grad_norm": 2.255336046218872, "learning_rate": 3.746468492814151e-06, "loss": 0.448883056640625, "step": 6717 }, { "epoch": 0.8151923310277879, "grad_norm": 2.54229474067688, "learning_rate": 3.744011792163125e-06, "loss": 0.3871064782142639, "step": 6718 }, { "epoch": 0.8153136755248149, "grad_norm": 1.604027271270752, "learning_rate": 3.7415550915120997e-06, "loss": 0.26578041911125183, "step": 6719 }, { "epoch": 0.815435020021842, "grad_norm": 3.225337505340576, "learning_rate": 3.739098390861074e-06, "loss": 0.289085328578949, "step": 6720 }, { "epoch": 0.815556364518869, "grad_norm": 0.6007100939750671, "learning_rate": 3.7366416902100483e-06, "loss": 0.009111521765589714, "step": 6721 }, { "epoch": 0.8156777090158961, "grad_norm": 3.8475897312164307, "learning_rate": 3.7341849895590225e-06, "loss": 0.3504578173160553, "step": 6722 }, { "epoch": 0.8157990535129231, "grad_norm": 4.793224334716797, "learning_rate": 3.731728288907997e-06, "loss": 0.1213584765791893, "step": 6723 }, { "epoch": 0.8159203980099502, "grad_norm": 1.6105690002441406, "learning_rate": 3.7292715882569715e-06, "loss": 0.09882637858390808, "step": 6724 }, { "epoch": 0.8160417425069774, "grad_norm": 0.8898985981941223, "learning_rate": 3.7268148876059457e-06, "loss": 0.014464862644672394, "step": 6725 }, { "epoch": 0.8161630870040044, "grad_norm": 3.4661402702331543, "learning_rate": 3.72435818695492e-06, "loss": 0.1814114898443222, "step": 6726 }, { "epoch": 0.8162844315010315, "grad_norm": 3.9169623851776123, "learning_rate": 3.7219014863038942e-06, "loss": 0.25299271941185, "step": 6727 }, { "epoch": 0.8164057759980585, "grad_norm": 2.64855694770813, "learning_rate": 3.719444785652869e-06, "loss": 0.5279485583305359, "step": 6728 }, { "epoch": 0.8165271204950856, "grad_norm": 4.274377346038818, "learning_rate": 3.716988085001843e-06, "loss": 0.30675750970840454, "step": 6729 }, { "epoch": 0.8166484649921126, "grad_norm": 3.577768325805664, "learning_rate": 3.7145313843508174e-06, "loss": 0.24699963629245758, "step": 6730 }, { "epoch": 0.8167698094891397, "grad_norm": 1.0805702209472656, "learning_rate": 3.7120746836997917e-06, "loss": 0.15606819093227386, "step": 6731 }, { "epoch": 0.8168911539861667, "grad_norm": 1.8441777229309082, "learning_rate": 3.709617983048766e-06, "loss": 0.07125689089298248, "step": 6732 }, { "epoch": 0.8170124984831938, "grad_norm": 3.129384756088257, "learning_rate": 3.7071612823977398e-06, "loss": 0.22119547426700592, "step": 6733 }, { "epoch": 0.8171338429802208, "grad_norm": 5.085041522979736, "learning_rate": 3.704704581746714e-06, "loss": 0.44871485233306885, "step": 6734 }, { "epoch": 0.8172551874772479, "grad_norm": 2.358431339263916, "learning_rate": 3.7022478810956887e-06, "loss": 0.273497998714447, "step": 6735 }, { "epoch": 0.817376531974275, "grad_norm": 2.9331090450286865, "learning_rate": 3.699791180444663e-06, "loss": 0.35886138677597046, "step": 6736 }, { "epoch": 0.817497876471302, "grad_norm": 4.742720603942871, "learning_rate": 3.6973344797936372e-06, "loss": 0.4396566152572632, "step": 6737 }, { "epoch": 0.8176192209683291, "grad_norm": 1.5712121725082397, "learning_rate": 3.6948777791426115e-06, "loss": 0.08995253592729568, "step": 6738 }, { "epoch": 0.8177405654653561, "grad_norm": 6.304478645324707, "learning_rate": 3.6924210784915858e-06, "loss": 0.2879182696342468, "step": 6739 }, { "epoch": 0.8178619099623832, "grad_norm": 2.503936529159546, "learning_rate": 3.6899643778405604e-06, "loss": 0.3863844871520996, "step": 6740 }, { "epoch": 0.8179832544594102, "grad_norm": 4.0037713050842285, "learning_rate": 3.6875076771895347e-06, "loss": 0.6038287878036499, "step": 6741 }, { "epoch": 0.8181045989564373, "grad_norm": 4.071784496307373, "learning_rate": 3.685050976538509e-06, "loss": 0.2076113373041153, "step": 6742 }, { "epoch": 0.8182259434534643, "grad_norm": 2.986686944961548, "learning_rate": 3.682594275887483e-06, "loss": 0.22197431325912476, "step": 6743 }, { "epoch": 0.8183472879504915, "grad_norm": 2.678746461868286, "learning_rate": 3.6801375752364575e-06, "loss": 0.4616473913192749, "step": 6744 }, { "epoch": 0.8184686324475186, "grad_norm": 0.481780081987381, "learning_rate": 3.677680874585432e-06, "loss": 0.0064141107723116875, "step": 6745 }, { "epoch": 0.8185899769445456, "grad_norm": 3.31144118309021, "learning_rate": 3.6752241739344064e-06, "loss": 0.16807107627391815, "step": 6746 }, { "epoch": 0.8187113214415727, "grad_norm": 4.9298319816589355, "learning_rate": 3.6727674732833807e-06, "loss": 0.24832284450531006, "step": 6747 }, { "epoch": 0.8188326659385997, "grad_norm": 3.1086180210113525, "learning_rate": 3.670310772632355e-06, "loss": 0.3832709491252899, "step": 6748 }, { "epoch": 0.8189540104356268, "grad_norm": 3.851874589920044, "learning_rate": 3.667854071981329e-06, "loss": 0.2971126437187195, "step": 6749 }, { "epoch": 0.8190753549326538, "grad_norm": 1.743041753768921, "learning_rate": 3.665397371330304e-06, "loss": 0.1614815592765808, "step": 6750 }, { "epoch": 0.8191966994296809, "grad_norm": 2.700718402862549, "learning_rate": 3.662940670679278e-06, "loss": 0.10668056458234787, "step": 6751 }, { "epoch": 0.8193180439267079, "grad_norm": 1.9353426694869995, "learning_rate": 3.6604839700282524e-06, "loss": 0.1393553614616394, "step": 6752 }, { "epoch": 0.819439388423735, "grad_norm": 2.778040647506714, "learning_rate": 3.6580272693772266e-06, "loss": 0.42829546332359314, "step": 6753 }, { "epoch": 0.819560732920762, "grad_norm": 4.314340114593506, "learning_rate": 3.655570568726201e-06, "loss": 0.3616076707839966, "step": 6754 }, { "epoch": 0.8196820774177891, "grad_norm": 4.088403224945068, "learning_rate": 3.6531138680751756e-06, "loss": 0.4304305613040924, "step": 6755 }, { "epoch": 0.8198034219148161, "grad_norm": 3.429335832595825, "learning_rate": 3.65065716742415e-06, "loss": 0.20053096115589142, "step": 6756 }, { "epoch": 0.8199247664118432, "grad_norm": 2.5037689208984375, "learning_rate": 3.648200466773124e-06, "loss": 0.34238749742507935, "step": 6757 }, { "epoch": 0.8200461109088703, "grad_norm": 3.290123224258423, "learning_rate": 3.6457437661220984e-06, "loss": 0.565638542175293, "step": 6758 }, { "epoch": 0.8201674554058973, "grad_norm": 3.689878463745117, "learning_rate": 3.6432870654710726e-06, "loss": 0.39724528789520264, "step": 6759 }, { "epoch": 0.8202887999029244, "grad_norm": 2.9510858058929443, "learning_rate": 3.6408303648200473e-06, "loss": 0.13038933277130127, "step": 6760 }, { "epoch": 0.8204101443999514, "grad_norm": 3.384977102279663, "learning_rate": 3.6383736641690216e-06, "loss": 0.08723199367523193, "step": 6761 }, { "epoch": 0.8205314888969785, "grad_norm": 2.8317391872406006, "learning_rate": 3.635916963517996e-06, "loss": 0.6884158849716187, "step": 6762 }, { "epoch": 0.8206528333940056, "grad_norm": 3.6133947372436523, "learning_rate": 3.63346026286697e-06, "loss": 0.4518013894557953, "step": 6763 }, { "epoch": 0.8207741778910327, "grad_norm": 3.9152801036834717, "learning_rate": 3.6310035622159443e-06, "loss": 0.786054253578186, "step": 6764 }, { "epoch": 0.8208955223880597, "grad_norm": 4.052156448364258, "learning_rate": 3.628546861564919e-06, "loss": 0.5258886218070984, "step": 6765 }, { "epoch": 0.8210168668850868, "grad_norm": 3.024718999862671, "learning_rate": 3.6260901609138933e-06, "loss": 0.41166749596595764, "step": 6766 }, { "epoch": 0.8211382113821138, "grad_norm": 3.168370008468628, "learning_rate": 3.623633460262867e-06, "loss": 0.12763360142707825, "step": 6767 }, { "epoch": 0.8212595558791409, "grad_norm": 3.279024839401245, "learning_rate": 3.6211767596118414e-06, "loss": 0.46948593854904175, "step": 6768 }, { "epoch": 0.821380900376168, "grad_norm": 3.74454402923584, "learning_rate": 3.6187200589608156e-06, "loss": 0.39905619621276855, "step": 6769 }, { "epoch": 0.821502244873195, "grad_norm": 2.0140058994293213, "learning_rate": 3.61626335830979e-06, "loss": 0.21369946002960205, "step": 6770 }, { "epoch": 0.8216235893702221, "grad_norm": 1.4842931032180786, "learning_rate": 3.6138066576587646e-06, "loss": 0.0913446918129921, "step": 6771 }, { "epoch": 0.8217449338672491, "grad_norm": 2.055582046508789, "learning_rate": 3.611349957007739e-06, "loss": 0.0780344307422638, "step": 6772 }, { "epoch": 0.8218662783642762, "grad_norm": 2.851762056350708, "learning_rate": 3.608893256356713e-06, "loss": 0.14409537613391876, "step": 6773 }, { "epoch": 0.8219876228613032, "grad_norm": 2.8877456188201904, "learning_rate": 3.6064365557056873e-06, "loss": 0.3314208686351776, "step": 6774 }, { "epoch": 0.8221089673583303, "grad_norm": 3.1609134674072266, "learning_rate": 3.6039798550546616e-06, "loss": 0.4208483397960663, "step": 6775 }, { "epoch": 0.8222303118553573, "grad_norm": 3.334987163543701, "learning_rate": 3.6015231544036363e-06, "loss": 0.30299055576324463, "step": 6776 }, { "epoch": 0.8223516563523844, "grad_norm": 2.2902650833129883, "learning_rate": 3.5990664537526105e-06, "loss": 0.18830594420433044, "step": 6777 }, { "epoch": 0.8224730008494114, "grad_norm": 1.8845689296722412, "learning_rate": 3.5966097531015848e-06, "loss": 0.11689156293869019, "step": 6778 }, { "epoch": 0.8225943453464385, "grad_norm": 3.333507776260376, "learning_rate": 3.594153052450559e-06, "loss": 0.3059668242931366, "step": 6779 }, { "epoch": 0.8227156898434655, "grad_norm": 2.58060359954834, "learning_rate": 3.5916963517995333e-06, "loss": 0.27494269609451294, "step": 6780 }, { "epoch": 0.8228370343404927, "grad_norm": 3.9025275707244873, "learning_rate": 3.589239651148508e-06, "loss": 0.3437436819076538, "step": 6781 }, { "epoch": 0.8229583788375198, "grad_norm": 2.6767919063568115, "learning_rate": 3.5867829504974822e-06, "loss": 0.5569031238555908, "step": 6782 }, { "epoch": 0.8230797233345468, "grad_norm": 2.056861162185669, "learning_rate": 3.5843262498464565e-06, "loss": 0.1050429567694664, "step": 6783 }, { "epoch": 0.8232010678315739, "grad_norm": 1.8521912097930908, "learning_rate": 3.5818695491954308e-06, "loss": 0.07997500151395798, "step": 6784 }, { "epoch": 0.8233224123286009, "grad_norm": 5.538821697235107, "learning_rate": 3.579412848544405e-06, "loss": 0.41199004650115967, "step": 6785 }, { "epoch": 0.823443756825628, "grad_norm": 2.943445920944214, "learning_rate": 3.5769561478933797e-06, "loss": 0.32280415296554565, "step": 6786 }, { "epoch": 0.823565101322655, "grad_norm": 3.3855884075164795, "learning_rate": 3.574499447242354e-06, "loss": 0.2151934951543808, "step": 6787 }, { "epoch": 0.8236864458196821, "grad_norm": 2.185356378555298, "learning_rate": 3.5720427465913282e-06, "loss": 0.12587378919124603, "step": 6788 }, { "epoch": 0.8238077903167091, "grad_norm": 2.7918384075164795, "learning_rate": 3.5695860459403025e-06, "loss": 0.23025935888290405, "step": 6789 }, { "epoch": 0.8239291348137362, "grad_norm": 2.424013614654541, "learning_rate": 3.5671293452892767e-06, "loss": 0.07525413483381271, "step": 6790 }, { "epoch": 0.8240504793107633, "grad_norm": 2.929352283477783, "learning_rate": 3.5646726446382514e-06, "loss": 0.4053027033805847, "step": 6791 }, { "epoch": 0.8241718238077903, "grad_norm": 3.7398786544799805, "learning_rate": 3.5622159439872257e-06, "loss": 0.11230237782001495, "step": 6792 }, { "epoch": 0.8242931683048174, "grad_norm": 2.8606534004211426, "learning_rate": 3.5597592433362e-06, "loss": 0.18288648128509521, "step": 6793 }, { "epoch": 0.8244145128018444, "grad_norm": 3.1940064430236816, "learning_rate": 3.557302542685174e-06, "loss": 0.4204731583595276, "step": 6794 }, { "epoch": 0.8245358572988715, "grad_norm": 2.931840419769287, "learning_rate": 3.5548458420341484e-06, "loss": 0.2163475751876831, "step": 6795 }, { "epoch": 0.8246572017958985, "grad_norm": 2.093036413192749, "learning_rate": 3.552389141383123e-06, "loss": 0.07137944549322128, "step": 6796 }, { "epoch": 0.8247785462929256, "grad_norm": 3.9743685722351074, "learning_rate": 3.5499324407320974e-06, "loss": 0.5070067644119263, "step": 6797 }, { "epoch": 0.8248998907899526, "grad_norm": 2.109325885772705, "learning_rate": 3.5474757400810716e-06, "loss": 0.23668597638607025, "step": 6798 }, { "epoch": 0.8250212352869797, "grad_norm": 3.4567558765411377, "learning_rate": 3.545019039430046e-06, "loss": 0.3939383327960968, "step": 6799 }, { "epoch": 0.8251425797840068, "grad_norm": 3.913071870803833, "learning_rate": 3.54256233877902e-06, "loss": 0.21311461925506592, "step": 6800 }, { "epoch": 0.8252639242810339, "grad_norm": 3.61267352104187, "learning_rate": 3.540105638127994e-06, "loss": 0.2959325611591339, "step": 6801 }, { "epoch": 0.825385268778061, "grad_norm": 2.8759641647338867, "learning_rate": 3.5376489374769687e-06, "loss": 0.21242651343345642, "step": 6802 }, { "epoch": 0.825506613275088, "grad_norm": 2.0843024253845215, "learning_rate": 3.535192236825943e-06, "loss": 0.07963552325963974, "step": 6803 }, { "epoch": 0.8256279577721151, "grad_norm": 1.8972768783569336, "learning_rate": 3.532735536174917e-06, "loss": 0.03358915448188782, "step": 6804 }, { "epoch": 0.8257493022691421, "grad_norm": 4.073275089263916, "learning_rate": 3.5302788355238915e-06, "loss": 0.17286907136440277, "step": 6805 }, { "epoch": 0.8258706467661692, "grad_norm": 3.4637153148651123, "learning_rate": 3.5278221348728657e-06, "loss": 0.21072585880756378, "step": 6806 }, { "epoch": 0.8259919912631962, "grad_norm": 3.1427841186523438, "learning_rate": 3.5253654342218404e-06, "loss": 0.7577795386314392, "step": 6807 }, { "epoch": 0.8261133357602233, "grad_norm": 3.6251673698425293, "learning_rate": 3.5229087335708147e-06, "loss": 0.4603130519390106, "step": 6808 }, { "epoch": 0.8262346802572503, "grad_norm": 3.153904914855957, "learning_rate": 3.520452032919789e-06, "loss": 0.2193812131881714, "step": 6809 }, { "epoch": 0.8263560247542774, "grad_norm": 2.5841803550720215, "learning_rate": 3.517995332268763e-06, "loss": 0.20880109071731567, "step": 6810 }, { "epoch": 0.8264773692513044, "grad_norm": 5.037141799926758, "learning_rate": 3.5155386316177374e-06, "loss": 0.6874407529830933, "step": 6811 }, { "epoch": 0.8265987137483315, "grad_norm": 2.8509349822998047, "learning_rate": 3.513081930966712e-06, "loss": 0.5869269967079163, "step": 6812 }, { "epoch": 0.8267200582453585, "grad_norm": 0.8769944310188293, "learning_rate": 3.5106252303156864e-06, "loss": 0.013514043763279915, "step": 6813 }, { "epoch": 0.8268414027423856, "grad_norm": 3.4467477798461914, "learning_rate": 3.5081685296646606e-06, "loss": 0.24369296431541443, "step": 6814 }, { "epoch": 0.8269627472394127, "grad_norm": 1.6407837867736816, "learning_rate": 3.505711829013635e-06, "loss": 0.0849594846367836, "step": 6815 }, { "epoch": 0.8270840917364397, "grad_norm": 1.9391635656356812, "learning_rate": 3.503255128362609e-06, "loss": 0.05148938670754433, "step": 6816 }, { "epoch": 0.8272054362334668, "grad_norm": 4.975749969482422, "learning_rate": 3.500798427711584e-06, "loss": 0.05556638538837433, "step": 6817 }, { "epoch": 0.8273267807304939, "grad_norm": 3.056779384613037, "learning_rate": 3.498341727060558e-06, "loss": 0.2528456449508667, "step": 6818 }, { "epoch": 0.827448125227521, "grad_norm": 2.6551737785339355, "learning_rate": 3.4958850264095323e-06, "loss": 0.12487128376960754, "step": 6819 }, { "epoch": 0.827569469724548, "grad_norm": 2.599277973175049, "learning_rate": 3.4934283257585066e-06, "loss": 0.2700466215610504, "step": 6820 }, { "epoch": 0.8276908142215751, "grad_norm": 1.5839006900787354, "learning_rate": 3.490971625107481e-06, "loss": 0.09825972467660904, "step": 6821 }, { "epoch": 0.8278121587186021, "grad_norm": 4.6725006103515625, "learning_rate": 3.4885149244564555e-06, "loss": 0.39630934596061707, "step": 6822 }, { "epoch": 0.8279335032156292, "grad_norm": 1.8797847032546997, "learning_rate": 3.48605822380543e-06, "loss": 0.023944133892655373, "step": 6823 }, { "epoch": 0.8280548477126563, "grad_norm": 2.582404375076294, "learning_rate": 3.483601523154404e-06, "loss": 0.4406931698322296, "step": 6824 }, { "epoch": 0.8281761922096833, "grad_norm": 2.11747670173645, "learning_rate": 3.4811448225033783e-06, "loss": 0.12363192439079285, "step": 6825 }, { "epoch": 0.8282975367067104, "grad_norm": 4.396477699279785, "learning_rate": 3.4786881218523526e-06, "loss": 0.20247876644134521, "step": 6826 }, { "epoch": 0.8284188812037374, "grad_norm": 3.0489273071289062, "learning_rate": 3.4762314212013273e-06, "loss": 0.24223044514656067, "step": 6827 }, { "epoch": 0.8285402257007645, "grad_norm": 0.8217211365699768, "learning_rate": 3.4737747205503015e-06, "loss": 0.020067425444722176, "step": 6828 }, { "epoch": 0.8286615701977915, "grad_norm": 3.627739906311035, "learning_rate": 3.4713180198992758e-06, "loss": 0.41334742307662964, "step": 6829 }, { "epoch": 0.8287829146948186, "grad_norm": 4.345524787902832, "learning_rate": 3.46886131924825e-06, "loss": 0.2067423164844513, "step": 6830 }, { "epoch": 0.8289042591918456, "grad_norm": 2.5111241340637207, "learning_rate": 3.4664046185972243e-06, "loss": 0.23007822036743164, "step": 6831 }, { "epoch": 0.8290256036888727, "grad_norm": 3.37785267829895, "learning_rate": 3.463947917946199e-06, "loss": 0.35011154413223267, "step": 6832 }, { "epoch": 0.8291469481858997, "grad_norm": 1.5547535419464111, "learning_rate": 3.4614912172951732e-06, "loss": 0.08940161019563675, "step": 6833 }, { "epoch": 0.8292682926829268, "grad_norm": 3.557382106781006, "learning_rate": 3.4590345166441475e-06, "loss": 0.5060527324676514, "step": 6834 }, { "epoch": 0.8293896371799538, "grad_norm": 3.557947874069214, "learning_rate": 3.4565778159931213e-06, "loss": 0.36648252606391907, "step": 6835 }, { "epoch": 0.8295109816769809, "grad_norm": 2.047501564025879, "learning_rate": 3.4541211153420956e-06, "loss": 0.12090091407299042, "step": 6836 }, { "epoch": 0.8296323261740081, "grad_norm": 2.9178578853607178, "learning_rate": 3.45166441469107e-06, "loss": 0.12622840702533722, "step": 6837 }, { "epoch": 0.8297536706710351, "grad_norm": 2.9971184730529785, "learning_rate": 3.449207714040044e-06, "loss": 0.1552826166152954, "step": 6838 }, { "epoch": 0.8298750151680622, "grad_norm": 2.084660768508911, "learning_rate": 3.4467510133890188e-06, "loss": 0.06152136251330376, "step": 6839 }, { "epoch": 0.8299963596650892, "grad_norm": 2.2172141075134277, "learning_rate": 3.444294312737993e-06, "loss": 0.6652753949165344, "step": 6840 }, { "epoch": 0.8301177041621163, "grad_norm": 3.848055124282837, "learning_rate": 3.4418376120869673e-06, "loss": 0.20330071449279785, "step": 6841 }, { "epoch": 0.8302390486591433, "grad_norm": 3.6585676670074463, "learning_rate": 3.4393809114359415e-06, "loss": 0.4291767477989197, "step": 6842 }, { "epoch": 0.8303603931561704, "grad_norm": 3.7044172286987305, "learning_rate": 3.436924210784916e-06, "loss": 0.332914799451828, "step": 6843 }, { "epoch": 0.8304817376531974, "grad_norm": 3.1939730644226074, "learning_rate": 3.4344675101338905e-06, "loss": 0.250424861907959, "step": 6844 }, { "epoch": 0.8306030821502245, "grad_norm": 2.3102211952209473, "learning_rate": 3.4320108094828647e-06, "loss": 0.17594367265701294, "step": 6845 }, { "epoch": 0.8307244266472515, "grad_norm": 1.9560959339141846, "learning_rate": 3.429554108831839e-06, "loss": 0.08633501827716827, "step": 6846 }, { "epoch": 0.8308457711442786, "grad_norm": 3.92397141456604, "learning_rate": 3.4270974081808133e-06, "loss": 0.45120692253112793, "step": 6847 }, { "epoch": 0.8309671156413057, "grad_norm": 3.174454689025879, "learning_rate": 3.4246407075297875e-06, "loss": 0.06616245210170746, "step": 6848 }, { "epoch": 0.8310884601383327, "grad_norm": 2.3291804790496826, "learning_rate": 3.422184006878762e-06, "loss": 0.8134903907775879, "step": 6849 }, { "epoch": 0.8312098046353598, "grad_norm": 2.712312698364258, "learning_rate": 3.4197273062277365e-06, "loss": 0.217189759016037, "step": 6850 }, { "epoch": 0.8313311491323868, "grad_norm": 4.0668721199035645, "learning_rate": 3.4172706055767107e-06, "loss": 0.15618684887886047, "step": 6851 }, { "epoch": 0.8314524936294139, "grad_norm": 3.117466926574707, "learning_rate": 3.414813904925685e-06, "loss": 0.2538308799266815, "step": 6852 }, { "epoch": 0.8315738381264409, "grad_norm": 3.172910690307617, "learning_rate": 3.4123572042746592e-06, "loss": 0.2975618243217468, "step": 6853 }, { "epoch": 0.831695182623468, "grad_norm": 1.8531725406646729, "learning_rate": 3.409900503623634e-06, "loss": 0.11580969393253326, "step": 6854 }, { "epoch": 0.831816527120495, "grad_norm": 1.7458871603012085, "learning_rate": 3.407443802972608e-06, "loss": 0.028265465050935745, "step": 6855 }, { "epoch": 0.8319378716175222, "grad_norm": 2.18174147605896, "learning_rate": 3.4049871023215824e-06, "loss": 0.6139706969261169, "step": 6856 }, { "epoch": 0.8320592161145492, "grad_norm": 2.37796950340271, "learning_rate": 3.4025304016705567e-06, "loss": 0.1167609840631485, "step": 6857 }, { "epoch": 0.8321805606115763, "grad_norm": 1.959659218788147, "learning_rate": 3.400073701019531e-06, "loss": 0.17627853155136108, "step": 6858 }, { "epoch": 0.8323019051086034, "grad_norm": 3.0840392112731934, "learning_rate": 3.3976170003685056e-06, "loss": 0.2246231585741043, "step": 6859 }, { "epoch": 0.8324232496056304, "grad_norm": 2.3034398555755615, "learning_rate": 3.39516029971748e-06, "loss": 0.052022263407707214, "step": 6860 }, { "epoch": 0.8325445941026575, "grad_norm": 5.5387115478515625, "learning_rate": 3.392703599066454e-06, "loss": 0.5688335299491882, "step": 6861 }, { "epoch": 0.8326659385996845, "grad_norm": 2.5914297103881836, "learning_rate": 3.3902468984154284e-06, "loss": 0.2809569835662842, "step": 6862 }, { "epoch": 0.8327872830967116, "grad_norm": 2.2908918857574463, "learning_rate": 3.387790197764403e-06, "loss": 0.17095370590686798, "step": 6863 }, { "epoch": 0.8329086275937386, "grad_norm": 2.1114633083343506, "learning_rate": 3.3853334971133773e-06, "loss": 0.10393644869327545, "step": 6864 }, { "epoch": 0.8330299720907657, "grad_norm": 3.112346649169922, "learning_rate": 3.3828767964623516e-06, "loss": 0.2850360870361328, "step": 6865 }, { "epoch": 0.8331513165877927, "grad_norm": 2.635087013244629, "learning_rate": 3.380420095811326e-06, "loss": 0.15219621360301971, "step": 6866 }, { "epoch": 0.8332726610848198, "grad_norm": 3.986943244934082, "learning_rate": 3.3779633951603e-06, "loss": 0.49320510029792786, "step": 6867 }, { "epoch": 0.8333940055818468, "grad_norm": 4.542457103729248, "learning_rate": 3.375506694509275e-06, "loss": 0.19827598333358765, "step": 6868 }, { "epoch": 0.8335153500788739, "grad_norm": 2.1342854499816895, "learning_rate": 3.3730499938582482e-06, "loss": 0.1310568004846573, "step": 6869 }, { "epoch": 0.833636694575901, "grad_norm": 3.1129865646362305, "learning_rate": 3.370593293207223e-06, "loss": 0.7989140152931213, "step": 6870 }, { "epoch": 0.833758039072928, "grad_norm": 3.7294504642486572, "learning_rate": 3.368136592556197e-06, "loss": 0.5086326003074646, "step": 6871 }, { "epoch": 0.8338793835699551, "grad_norm": 2.9754269123077393, "learning_rate": 3.3656798919051714e-06, "loss": 0.1442694365978241, "step": 6872 }, { "epoch": 0.8340007280669821, "grad_norm": 4.514766216278076, "learning_rate": 3.3632231912541457e-06, "loss": 0.4973801374435425, "step": 6873 }, { "epoch": 0.8341220725640093, "grad_norm": 2.78999924659729, "learning_rate": 3.36076649060312e-06, "loss": 0.0574166402220726, "step": 6874 }, { "epoch": 0.8342434170610363, "grad_norm": 3.6680967807769775, "learning_rate": 3.3583097899520946e-06, "loss": 0.140468031167984, "step": 6875 }, { "epoch": 0.8343647615580634, "grad_norm": 3.8663129806518555, "learning_rate": 3.355853089301069e-06, "loss": 0.1941162347793579, "step": 6876 }, { "epoch": 0.8344861060550904, "grad_norm": 2.5639312267303467, "learning_rate": 3.353396388650043e-06, "loss": 0.34298932552337646, "step": 6877 }, { "epoch": 0.8346074505521175, "grad_norm": 3.3421082496643066, "learning_rate": 3.3509396879990174e-06, "loss": 0.25687387585639954, "step": 6878 }, { "epoch": 0.8347287950491445, "grad_norm": 4.559319972991943, "learning_rate": 3.3484829873479916e-06, "loss": 0.9453887939453125, "step": 6879 }, { "epoch": 0.8348501395461716, "grad_norm": 2.476771593093872, "learning_rate": 3.3460262866969663e-06, "loss": 0.14670430123806, "step": 6880 }, { "epoch": 0.8349714840431987, "grad_norm": 4.480035781860352, "learning_rate": 3.3435695860459406e-06, "loss": 0.23352235555648804, "step": 6881 }, { "epoch": 0.8350928285402257, "grad_norm": 3.138627767562866, "learning_rate": 3.341112885394915e-06, "loss": 0.2470666766166687, "step": 6882 }, { "epoch": 0.8352141730372528, "grad_norm": 2.7379207611083984, "learning_rate": 3.338656184743889e-06, "loss": 0.10671674460172653, "step": 6883 }, { "epoch": 0.8353355175342798, "grad_norm": 3.6516220569610596, "learning_rate": 3.3361994840928634e-06, "loss": 0.29818716645240784, "step": 6884 }, { "epoch": 0.8354568620313069, "grad_norm": 2.991102457046509, "learning_rate": 3.333742783441838e-06, "loss": 0.32640331983566284, "step": 6885 }, { "epoch": 0.8355782065283339, "grad_norm": 3.1945176124572754, "learning_rate": 3.3312860827908123e-06, "loss": 0.5794629454612732, "step": 6886 }, { "epoch": 0.835699551025361, "grad_norm": 3.1439077854156494, "learning_rate": 3.3288293821397866e-06, "loss": 0.15370886027812958, "step": 6887 }, { "epoch": 0.835820895522388, "grad_norm": 3.0695621967315674, "learning_rate": 3.326372681488761e-06, "loss": 0.14216814935207367, "step": 6888 }, { "epoch": 0.8359422400194151, "grad_norm": 2.44069766998291, "learning_rate": 3.323915980837735e-06, "loss": 0.12188559025526047, "step": 6889 }, { "epoch": 0.8360635845164421, "grad_norm": 4.41136360168457, "learning_rate": 3.3214592801867098e-06, "loss": 0.38775038719177246, "step": 6890 }, { "epoch": 0.8361849290134692, "grad_norm": 0.4444239139556885, "learning_rate": 3.319002579535684e-06, "loss": 0.011417511850595474, "step": 6891 }, { "epoch": 0.8363062735104962, "grad_norm": 2.1226723194122314, "learning_rate": 3.3165458788846583e-06, "loss": 0.1501627266407013, "step": 6892 }, { "epoch": 0.8364276180075234, "grad_norm": 4.472182273864746, "learning_rate": 3.3140891782336325e-06, "loss": 0.14066681265830994, "step": 6893 }, { "epoch": 0.8365489625045505, "grad_norm": 1.6639797687530518, "learning_rate": 3.3116324775826068e-06, "loss": 0.033426955342292786, "step": 6894 }, { "epoch": 0.8366703070015775, "grad_norm": 3.6559207439422607, "learning_rate": 3.3091757769315815e-06, "loss": 0.23195600509643555, "step": 6895 }, { "epoch": 0.8367916514986046, "grad_norm": 2.478848934173584, "learning_rate": 3.3067190762805557e-06, "loss": 0.10762138664722443, "step": 6896 }, { "epoch": 0.8369129959956316, "grad_norm": 2.454444169998169, "learning_rate": 3.30426237562953e-06, "loss": 0.10892121493816376, "step": 6897 }, { "epoch": 0.8370343404926587, "grad_norm": 3.3657851219177246, "learning_rate": 3.3018056749785042e-06, "loss": 0.4438430666923523, "step": 6898 }, { "epoch": 0.8371556849896857, "grad_norm": 5.692933082580566, "learning_rate": 3.2993489743274785e-06, "loss": 0.45371416211128235, "step": 6899 }, { "epoch": 0.8372770294867128, "grad_norm": 0.758480966091156, "learning_rate": 3.296892273676453e-06, "loss": 0.00901311170309782, "step": 6900 }, { "epoch": 0.8373983739837398, "grad_norm": 2.1392502784729004, "learning_rate": 3.2944355730254274e-06, "loss": 0.07757799327373505, "step": 6901 }, { "epoch": 0.8375197184807669, "grad_norm": 2.8749027252197266, "learning_rate": 3.2919788723744017e-06, "loss": 0.42415302991867065, "step": 6902 }, { "epoch": 0.837641062977794, "grad_norm": 3.14534330368042, "learning_rate": 3.2895221717233755e-06, "loss": 0.311821848154068, "step": 6903 }, { "epoch": 0.837762407474821, "grad_norm": 2.2982680797576904, "learning_rate": 3.28706547107235e-06, "loss": 0.1840910166501999, "step": 6904 }, { "epoch": 0.8378837519718481, "grad_norm": 3.828580856323242, "learning_rate": 3.284608770421324e-06, "loss": 0.5593740940093994, "step": 6905 }, { "epoch": 0.8380050964688751, "grad_norm": 2.5447475910186768, "learning_rate": 3.2821520697702987e-06, "loss": 0.3468548655509949, "step": 6906 }, { "epoch": 0.8381264409659022, "grad_norm": 2.726973295211792, "learning_rate": 3.279695369119273e-06, "loss": 0.15887433290481567, "step": 6907 }, { "epoch": 0.8382477854629292, "grad_norm": 3.5735280513763428, "learning_rate": 3.2772386684682472e-06, "loss": 0.27131223678588867, "step": 6908 }, { "epoch": 0.8383691299599563, "grad_norm": 4.12821626663208, "learning_rate": 3.2747819678172215e-06, "loss": 0.5308485627174377, "step": 6909 }, { "epoch": 0.8384904744569833, "grad_norm": 3.4795799255371094, "learning_rate": 3.2723252671661958e-06, "loss": 0.23644676804542542, "step": 6910 }, { "epoch": 0.8386118189540105, "grad_norm": 3.1768124103546143, "learning_rate": 3.2698685665151704e-06, "loss": 0.27877575159072876, "step": 6911 }, { "epoch": 0.8387331634510375, "grad_norm": 3.365161895751953, "learning_rate": 3.2674118658641447e-06, "loss": 0.44212260842323303, "step": 6912 }, { "epoch": 0.8388545079480646, "grad_norm": 2.0970654487609863, "learning_rate": 3.264955165213119e-06, "loss": 0.24569711089134216, "step": 6913 }, { "epoch": 0.8389758524450917, "grad_norm": 3.7470173835754395, "learning_rate": 3.2624984645620932e-06, "loss": 0.24200408160686493, "step": 6914 }, { "epoch": 0.8390971969421187, "grad_norm": 3.3245131969451904, "learning_rate": 3.2600417639110675e-06, "loss": 0.5474309921264648, "step": 6915 }, { "epoch": 0.8392185414391458, "grad_norm": 2.0636677742004395, "learning_rate": 3.257585063260042e-06, "loss": 0.24118487536907196, "step": 6916 }, { "epoch": 0.8393398859361728, "grad_norm": 4.253865718841553, "learning_rate": 3.2551283626090164e-06, "loss": 0.2964988946914673, "step": 6917 }, { "epoch": 0.8394612304331999, "grad_norm": 3.7565863132476807, "learning_rate": 3.2526716619579907e-06, "loss": 0.2549467384815216, "step": 6918 }, { "epoch": 0.8395825749302269, "grad_norm": 2.241302967071533, "learning_rate": 3.250214961306965e-06, "loss": 0.29352012276649475, "step": 6919 }, { "epoch": 0.839703919427254, "grad_norm": 3.108471632003784, "learning_rate": 3.247758260655939e-06, "loss": 0.37880799174308777, "step": 6920 }, { "epoch": 0.839825263924281, "grad_norm": 2.5510976314544678, "learning_rate": 3.245301560004914e-06, "loss": 0.3438318371772766, "step": 6921 }, { "epoch": 0.8399466084213081, "grad_norm": 3.776893377304077, "learning_rate": 3.242844859353888e-06, "loss": 0.4803541898727417, "step": 6922 }, { "epoch": 0.8400679529183351, "grad_norm": 3.642200469970703, "learning_rate": 3.2403881587028624e-06, "loss": 0.9076679944992065, "step": 6923 }, { "epoch": 0.8401892974153622, "grad_norm": 3.3507513999938965, "learning_rate": 3.2379314580518366e-06, "loss": 0.32394278049468994, "step": 6924 }, { "epoch": 0.8403106419123892, "grad_norm": 3.0851638317108154, "learning_rate": 3.235474757400811e-06, "loss": 0.32902824878692627, "step": 6925 }, { "epoch": 0.8404319864094163, "grad_norm": 3.341080904006958, "learning_rate": 3.2330180567497856e-06, "loss": 0.7036463022232056, "step": 6926 }, { "epoch": 0.8405533309064434, "grad_norm": 5.262468338012695, "learning_rate": 3.23056135609876e-06, "loss": 0.1510286033153534, "step": 6927 }, { "epoch": 0.8406746754034704, "grad_norm": 2.0184450149536133, "learning_rate": 3.228104655447734e-06, "loss": 0.30196115374565125, "step": 6928 }, { "epoch": 0.8407960199004975, "grad_norm": 3.439990520477295, "learning_rate": 3.2256479547967084e-06, "loss": 0.30372095108032227, "step": 6929 }, { "epoch": 0.8409173643975246, "grad_norm": 3.3977556228637695, "learning_rate": 3.2231912541456826e-06, "loss": 0.4240829348564148, "step": 6930 }, { "epoch": 0.8410387088945517, "grad_norm": 1.438707947731018, "learning_rate": 3.2207345534946573e-06, "loss": 0.04350636526942253, "step": 6931 }, { "epoch": 0.8411600533915787, "grad_norm": 3.7273783683776855, "learning_rate": 3.2182778528436316e-06, "loss": 0.3346658945083618, "step": 6932 }, { "epoch": 0.8412813978886058, "grad_norm": 3.917146921157837, "learning_rate": 3.215821152192606e-06, "loss": 0.284939169883728, "step": 6933 }, { "epoch": 0.8414027423856328, "grad_norm": 3.047935962677002, "learning_rate": 3.21336445154158e-06, "loss": 0.29623159766197205, "step": 6934 }, { "epoch": 0.8415240868826599, "grad_norm": 2.509671211242676, "learning_rate": 3.2109077508905543e-06, "loss": 0.1958446353673935, "step": 6935 }, { "epoch": 0.841645431379687, "grad_norm": 4.57324743270874, "learning_rate": 3.208451050239529e-06, "loss": 0.26713207364082336, "step": 6936 }, { "epoch": 0.841766775876714, "grad_norm": 2.8205790519714355, "learning_rate": 3.205994349588503e-06, "loss": 0.27474963665008545, "step": 6937 }, { "epoch": 0.8418881203737411, "grad_norm": 2.99729061126709, "learning_rate": 3.203537648937477e-06, "loss": 0.2710411548614502, "step": 6938 }, { "epoch": 0.8420094648707681, "grad_norm": 2.475558280944824, "learning_rate": 3.2010809482864514e-06, "loss": 0.3628034293651581, "step": 6939 }, { "epoch": 0.8421308093677952, "grad_norm": 2.745260000228882, "learning_rate": 3.1986242476354256e-06, "loss": 0.627814769744873, "step": 6940 }, { "epoch": 0.8422521538648222, "grad_norm": 1.8091062307357788, "learning_rate": 3.1961675469844e-06, "loss": 0.02080518566071987, "step": 6941 }, { "epoch": 0.8423734983618493, "grad_norm": 3.111811399459839, "learning_rate": 3.1937108463333746e-06, "loss": 0.49202537536621094, "step": 6942 }, { "epoch": 0.8424948428588763, "grad_norm": 1.34796142578125, "learning_rate": 3.191254145682349e-06, "loss": 0.008436929434537888, "step": 6943 }, { "epoch": 0.8426161873559034, "grad_norm": 1.9575963020324707, "learning_rate": 3.188797445031323e-06, "loss": 0.14711707830429077, "step": 6944 }, { "epoch": 0.8427375318529304, "grad_norm": 2.463390350341797, "learning_rate": 3.1863407443802973e-06, "loss": 0.3435473144054413, "step": 6945 }, { "epoch": 0.8428588763499575, "grad_norm": 3.201693296432495, "learning_rate": 3.1838840437292716e-06, "loss": 0.32881832122802734, "step": 6946 }, { "epoch": 0.8429802208469845, "grad_norm": 2.8188202381134033, "learning_rate": 3.1814273430782463e-06, "loss": 0.26789554953575134, "step": 6947 }, { "epoch": 0.8431015653440116, "grad_norm": 2.2286438941955566, "learning_rate": 3.1789706424272205e-06, "loss": 0.18095393478870392, "step": 6948 }, { "epoch": 0.8432229098410388, "grad_norm": 3.319666862487793, "learning_rate": 3.176513941776195e-06, "loss": 0.47127020359039307, "step": 6949 }, { "epoch": 0.8433442543380658, "grad_norm": 2.9960155487060547, "learning_rate": 3.174057241125169e-06, "loss": 0.22836250066757202, "step": 6950 }, { "epoch": 0.8434655988350929, "grad_norm": 3.0551769733428955, "learning_rate": 3.1716005404741433e-06, "loss": 0.4529463052749634, "step": 6951 }, { "epoch": 0.8435869433321199, "grad_norm": 3.8939425945281982, "learning_rate": 3.169143839823118e-06, "loss": 0.08402471244335175, "step": 6952 }, { "epoch": 0.843708287829147, "grad_norm": 2.644606351852417, "learning_rate": 3.1666871391720923e-06, "loss": 0.24211370944976807, "step": 6953 }, { "epoch": 0.843829632326174, "grad_norm": 2.4403512477874756, "learning_rate": 3.1642304385210665e-06, "loss": 0.9809610247612, "step": 6954 }, { "epoch": 0.8439509768232011, "grad_norm": 2.710603713989258, "learning_rate": 3.1617737378700408e-06, "loss": 0.2144758552312851, "step": 6955 }, { "epoch": 0.8440723213202281, "grad_norm": 2.556513547897339, "learning_rate": 3.159317037219015e-06, "loss": 0.2785454988479614, "step": 6956 }, { "epoch": 0.8441936658172552, "grad_norm": 2.6532576084136963, "learning_rate": 3.1568603365679897e-06, "loss": 0.16588816046714783, "step": 6957 }, { "epoch": 0.8443150103142822, "grad_norm": 5.003971576690674, "learning_rate": 3.154403635916964e-06, "loss": 0.5207638144493103, "step": 6958 }, { "epoch": 0.8444363548113093, "grad_norm": 2.731966495513916, "learning_rate": 3.1519469352659382e-06, "loss": 0.5808115005493164, "step": 6959 }, { "epoch": 0.8445576993083364, "grad_norm": 2.638315439224243, "learning_rate": 3.1494902346149125e-06, "loss": 0.12964996695518494, "step": 6960 }, { "epoch": 0.8446790438053634, "grad_norm": 2.3210289478302, "learning_rate": 3.1470335339638867e-06, "loss": 0.34859225153923035, "step": 6961 }, { "epoch": 0.8448003883023905, "grad_norm": 3.4336025714874268, "learning_rate": 3.1445768333128614e-06, "loss": 0.35792455077171326, "step": 6962 }, { "epoch": 0.8449217327994175, "grad_norm": 3.6950907707214355, "learning_rate": 3.1421201326618357e-06, "loss": 0.14269599318504333, "step": 6963 }, { "epoch": 0.8450430772964446, "grad_norm": 9.83622932434082, "learning_rate": 3.13966343201081e-06, "loss": 0.23878054320812225, "step": 6964 }, { "epoch": 0.8451644217934716, "grad_norm": 3.2411391735076904, "learning_rate": 3.137206731359784e-06, "loss": 0.5225003957748413, "step": 6965 }, { "epoch": 0.8452857662904987, "grad_norm": 1.951334834098816, "learning_rate": 3.1347500307087585e-06, "loss": 0.1600467413663864, "step": 6966 }, { "epoch": 0.8454071107875258, "grad_norm": 4.768952369689941, "learning_rate": 3.132293330057733e-06, "loss": 0.2989200949668884, "step": 6967 }, { "epoch": 0.8455284552845529, "grad_norm": 3.6509668827056885, "learning_rate": 3.1298366294067074e-06, "loss": 0.07277454435825348, "step": 6968 }, { "epoch": 0.84564979978158, "grad_norm": 1.9770021438598633, "learning_rate": 3.1273799287556817e-06, "loss": 0.1035429984331131, "step": 6969 }, { "epoch": 0.845771144278607, "grad_norm": 3.268134832382202, "learning_rate": 3.1249232281046555e-06, "loss": 0.13667438924312592, "step": 6970 }, { "epoch": 0.845892488775634, "grad_norm": 2.877018690109253, "learning_rate": 3.1224665274536297e-06, "loss": 0.274125337600708, "step": 6971 }, { "epoch": 0.8460138332726611, "grad_norm": 3.49861216545105, "learning_rate": 3.120009826802604e-06, "loss": 0.4536648690700531, "step": 6972 }, { "epoch": 0.8461351777696882, "grad_norm": 1.4402945041656494, "learning_rate": 3.1175531261515783e-06, "loss": 0.044798653572797775, "step": 6973 }, { "epoch": 0.8462565222667152, "grad_norm": 2.4259963035583496, "learning_rate": 3.115096425500553e-06, "loss": 0.13824260234832764, "step": 6974 }, { "epoch": 0.8463778667637423, "grad_norm": 1.7666187286376953, "learning_rate": 3.112639724849527e-06, "loss": 0.02869933471083641, "step": 6975 }, { "epoch": 0.8464992112607693, "grad_norm": 3.6917741298675537, "learning_rate": 3.1101830241985015e-06, "loss": 0.49447521567344666, "step": 6976 }, { "epoch": 0.8466205557577964, "grad_norm": 2.890197277069092, "learning_rate": 3.1077263235474757e-06, "loss": 0.6599770784378052, "step": 6977 }, { "epoch": 0.8467419002548234, "grad_norm": 3.119922399520874, "learning_rate": 3.10526962289645e-06, "loss": 0.07581758499145508, "step": 6978 }, { "epoch": 0.8468632447518505, "grad_norm": 2.3820254802703857, "learning_rate": 3.1028129222454247e-06, "loss": 0.20934540033340454, "step": 6979 }, { "epoch": 0.8469845892488775, "grad_norm": 3.2602460384368896, "learning_rate": 3.100356221594399e-06, "loss": 0.15573839843273163, "step": 6980 }, { "epoch": 0.8471059337459046, "grad_norm": 4.165925979614258, "learning_rate": 3.097899520943373e-06, "loss": 0.4676305651664734, "step": 6981 }, { "epoch": 0.8472272782429316, "grad_norm": 3.1588294506073, "learning_rate": 3.0954428202923474e-06, "loss": 0.3364924192428589, "step": 6982 }, { "epoch": 0.8473486227399587, "grad_norm": 4.618885040283203, "learning_rate": 3.0929861196413217e-06, "loss": 0.306081622838974, "step": 6983 }, { "epoch": 0.8474699672369858, "grad_norm": 2.2341275215148926, "learning_rate": 3.0905294189902964e-06, "loss": 0.23107625544071198, "step": 6984 }, { "epoch": 0.8475913117340128, "grad_norm": 5.061868667602539, "learning_rate": 3.0880727183392706e-06, "loss": 0.1108107939362526, "step": 6985 }, { "epoch": 0.84771265623104, "grad_norm": 2.459580898284912, "learning_rate": 3.085616017688245e-06, "loss": 0.4878939986228943, "step": 6986 }, { "epoch": 0.847834000728067, "grad_norm": 2.6996302604675293, "learning_rate": 3.083159317037219e-06, "loss": 0.29952630400657654, "step": 6987 }, { "epoch": 0.8479553452250941, "grad_norm": 6.550355911254883, "learning_rate": 3.0807026163861934e-06, "loss": 0.31304818391799927, "step": 6988 }, { "epoch": 0.8480766897221211, "grad_norm": 2.9638619422912598, "learning_rate": 3.078245915735168e-06, "loss": 0.44451746344566345, "step": 6989 }, { "epoch": 0.8481980342191482, "grad_norm": 2.197995185852051, "learning_rate": 3.0757892150841423e-06, "loss": 0.11127583682537079, "step": 6990 }, { "epoch": 0.8483193787161752, "grad_norm": 5.024190902709961, "learning_rate": 3.0733325144331166e-06, "loss": 0.19716215133666992, "step": 6991 }, { "epoch": 0.8484407232132023, "grad_norm": 2.8712639808654785, "learning_rate": 3.070875813782091e-06, "loss": 0.14844805002212524, "step": 6992 }, { "epoch": 0.8485620677102294, "grad_norm": 2.8406758308410645, "learning_rate": 3.068419113131065e-06, "loss": 0.3424641191959381, "step": 6993 }, { "epoch": 0.8486834122072564, "grad_norm": 1.6260130405426025, "learning_rate": 3.06596241248004e-06, "loss": 0.21992406249046326, "step": 6994 }, { "epoch": 0.8488047567042835, "grad_norm": 2.5255558490753174, "learning_rate": 3.063505711829014e-06, "loss": 0.11587707698345184, "step": 6995 }, { "epoch": 0.8489261012013105, "grad_norm": 1.9023125171661377, "learning_rate": 3.0610490111779883e-06, "loss": 0.07392515987157822, "step": 6996 }, { "epoch": 0.8490474456983376, "grad_norm": 2.6726009845733643, "learning_rate": 3.0585923105269626e-06, "loss": 0.11547443270683289, "step": 6997 }, { "epoch": 0.8491687901953646, "grad_norm": 3.734428882598877, "learning_rate": 3.0561356098759373e-06, "loss": 0.41031309962272644, "step": 6998 }, { "epoch": 0.8492901346923917, "grad_norm": 3.3383021354675293, "learning_rate": 3.0536789092249115e-06, "loss": 0.31253552436828613, "step": 6999 }, { "epoch": 0.8494114791894187, "grad_norm": 2.654759645462036, "learning_rate": 3.0512222085738858e-06, "loss": 0.371398389339447, "step": 7000 }, { "epoch": 0.8495328236864458, "grad_norm": 1.6910996437072754, "learning_rate": 3.04876550792286e-06, "loss": 0.02691303938627243, "step": 7001 }, { "epoch": 0.8496541681834728, "grad_norm": 2.7468392848968506, "learning_rate": 3.0463088072718343e-06, "loss": 0.12048020958900452, "step": 7002 }, { "epoch": 0.8497755126804999, "grad_norm": 1.5235919952392578, "learning_rate": 3.043852106620809e-06, "loss": 0.2509908378124237, "step": 7003 }, { "epoch": 0.849896857177527, "grad_norm": 2.8356335163116455, "learning_rate": 3.0413954059697824e-06, "loss": 0.22163629531860352, "step": 7004 }, { "epoch": 0.8500182016745541, "grad_norm": 3.3203046321868896, "learning_rate": 3.038938705318757e-06, "loss": 0.07819177210330963, "step": 7005 }, { "epoch": 0.8501395461715812, "grad_norm": 2.584587574005127, "learning_rate": 3.0364820046677313e-06, "loss": 0.2889801859855652, "step": 7006 }, { "epoch": 0.8502608906686082, "grad_norm": 3.367771625518799, "learning_rate": 3.0340253040167056e-06, "loss": 0.269736111164093, "step": 7007 }, { "epoch": 0.8503822351656353, "grad_norm": 2.7965128421783447, "learning_rate": 3.03156860336568e-06, "loss": 0.342124342918396, "step": 7008 }, { "epoch": 0.8505035796626623, "grad_norm": 2.3814570903778076, "learning_rate": 3.029111902714654e-06, "loss": 0.3101362884044647, "step": 7009 }, { "epoch": 0.8506249241596894, "grad_norm": 3.514681100845337, "learning_rate": 3.0266552020636288e-06, "loss": 0.2990638017654419, "step": 7010 }, { "epoch": 0.8507462686567164, "grad_norm": 3.3331496715545654, "learning_rate": 3.024198501412603e-06, "loss": 0.3963875472545624, "step": 7011 }, { "epoch": 0.8508676131537435, "grad_norm": 4.082784652709961, "learning_rate": 3.0217418007615773e-06, "loss": 0.41762393712997437, "step": 7012 }, { "epoch": 0.8509889576507705, "grad_norm": 1.512162446975708, "learning_rate": 3.0192851001105516e-06, "loss": 0.10029443353414536, "step": 7013 }, { "epoch": 0.8511103021477976, "grad_norm": 3.4991228580474854, "learning_rate": 3.016828399459526e-06, "loss": 0.39079177379608154, "step": 7014 }, { "epoch": 0.8512316466448246, "grad_norm": 3.102940797805786, "learning_rate": 3.0143716988085005e-06, "loss": 0.5344756245613098, "step": 7015 }, { "epoch": 0.8513529911418517, "grad_norm": 1.2168452739715576, "learning_rate": 3.0119149981574748e-06, "loss": 0.04794701188802719, "step": 7016 }, { "epoch": 0.8514743356388788, "grad_norm": 1.6426223516464233, "learning_rate": 3.009458297506449e-06, "loss": 0.051560211926698685, "step": 7017 }, { "epoch": 0.8515956801359058, "grad_norm": 4.13529634475708, "learning_rate": 3.0070015968554233e-06, "loss": 0.580233633518219, "step": 7018 }, { "epoch": 0.8517170246329329, "grad_norm": 7.022589683532715, "learning_rate": 3.0045448962043975e-06, "loss": 0.23432393372058868, "step": 7019 }, { "epoch": 0.8518383691299599, "grad_norm": 1.2914435863494873, "learning_rate": 3.002088195553372e-06, "loss": 0.11873156577348709, "step": 7020 }, { "epoch": 0.851959713626987, "grad_norm": 2.664066791534424, "learning_rate": 2.9996314949023465e-06, "loss": 0.07818897068500519, "step": 7021 }, { "epoch": 0.852081058124014, "grad_norm": 1.9257783889770508, "learning_rate": 2.9971747942513207e-06, "loss": 0.11939866840839386, "step": 7022 }, { "epoch": 0.8522024026210412, "grad_norm": 4.344173908233643, "learning_rate": 2.994718093600295e-06, "loss": 0.21064193546772003, "step": 7023 }, { "epoch": 0.8523237471180682, "grad_norm": 0.4381919801235199, "learning_rate": 2.9922613929492692e-06, "loss": 0.008110599592328072, "step": 7024 }, { "epoch": 0.8524450916150953, "grad_norm": 3.920785665512085, "learning_rate": 2.989804692298244e-06, "loss": 0.3203832507133484, "step": 7025 }, { "epoch": 0.8525664361121224, "grad_norm": 3.3634207248687744, "learning_rate": 2.987347991647218e-06, "loss": 0.1917008012533188, "step": 7026 }, { "epoch": 0.8526877806091494, "grad_norm": 2.6357431411743164, "learning_rate": 2.9848912909961924e-06, "loss": 0.32172542810440063, "step": 7027 }, { "epoch": 0.8528091251061765, "grad_norm": 0.9338294267654419, "learning_rate": 2.9824345903451667e-06, "loss": 0.02059715799987316, "step": 7028 }, { "epoch": 0.8529304696032035, "grad_norm": 2.4373443126678467, "learning_rate": 2.979977889694141e-06, "loss": 0.35857200622558594, "step": 7029 }, { "epoch": 0.8530518141002306, "grad_norm": 2.0851542949676514, "learning_rate": 2.9775211890431156e-06, "loss": 0.037901174277067184, "step": 7030 }, { "epoch": 0.8531731585972576, "grad_norm": 2.705993413925171, "learning_rate": 2.97506448839209e-06, "loss": 0.3027760088443756, "step": 7031 }, { "epoch": 0.8532945030942847, "grad_norm": 5.604785919189453, "learning_rate": 2.972607787741064e-06, "loss": 0.5876014232635498, "step": 7032 }, { "epoch": 0.8534158475913117, "grad_norm": 3.0596961975097656, "learning_rate": 2.9701510870900384e-06, "loss": 0.28145304322242737, "step": 7033 }, { "epoch": 0.8535371920883388, "grad_norm": 3.0776116847991943, "learning_rate": 2.9676943864390127e-06, "loss": 0.2644558548927307, "step": 7034 }, { "epoch": 0.8536585365853658, "grad_norm": 2.528351068496704, "learning_rate": 2.9652376857879874e-06, "loss": 0.20479048788547516, "step": 7035 }, { "epoch": 0.8537798810823929, "grad_norm": 0.2610277235507965, "learning_rate": 2.9627809851369616e-06, "loss": 0.0016505885869264603, "step": 7036 }, { "epoch": 0.8539012255794199, "grad_norm": 1.899753451347351, "learning_rate": 2.960324284485936e-06, "loss": 0.0858369916677475, "step": 7037 }, { "epoch": 0.854022570076447, "grad_norm": 2.2260661125183105, "learning_rate": 2.9578675838349097e-06, "loss": 0.0841599851846695, "step": 7038 }, { "epoch": 0.854143914573474, "grad_norm": 2.2857778072357178, "learning_rate": 2.955410883183884e-06, "loss": 0.3642984926700592, "step": 7039 }, { "epoch": 0.8542652590705011, "grad_norm": 3.277188539505005, "learning_rate": 2.9529541825328582e-06, "loss": 0.389038622379303, "step": 7040 }, { "epoch": 0.8543866035675282, "grad_norm": 3.034886360168457, "learning_rate": 2.950497481881833e-06, "loss": 0.4886973798274994, "step": 7041 }, { "epoch": 0.8545079480645553, "grad_norm": 2.2855474948883057, "learning_rate": 2.948040781230807e-06, "loss": 0.4565061926841736, "step": 7042 }, { "epoch": 0.8546292925615824, "grad_norm": 2.857999324798584, "learning_rate": 2.9455840805797814e-06, "loss": 0.24616867303848267, "step": 7043 }, { "epoch": 0.8547506370586094, "grad_norm": 1.3700459003448486, "learning_rate": 2.9431273799287557e-06, "loss": 0.08694622665643692, "step": 7044 }, { "epoch": 0.8548719815556365, "grad_norm": 3.356945753097534, "learning_rate": 2.94067067927773e-06, "loss": 0.37518659234046936, "step": 7045 }, { "epoch": 0.8549933260526635, "grad_norm": 3.661881923675537, "learning_rate": 2.9382139786267046e-06, "loss": 0.829643964767456, "step": 7046 }, { "epoch": 0.8551146705496906, "grad_norm": 2.47060227394104, "learning_rate": 2.935757277975679e-06, "loss": 0.24145224690437317, "step": 7047 }, { "epoch": 0.8552360150467176, "grad_norm": 9.078522682189941, "learning_rate": 2.933300577324653e-06, "loss": 0.5664098858833313, "step": 7048 }, { "epoch": 0.8553573595437447, "grad_norm": 1.5566760301589966, "learning_rate": 2.9308438766736274e-06, "loss": 0.10271979868412018, "step": 7049 }, { "epoch": 0.8554787040407718, "grad_norm": 2.8408308029174805, "learning_rate": 2.9283871760226017e-06, "loss": 0.4013898968696594, "step": 7050 }, { "epoch": 0.8556000485377988, "grad_norm": 4.113753318786621, "learning_rate": 2.9259304753715763e-06, "loss": 0.024133479222655296, "step": 7051 }, { "epoch": 0.8557213930348259, "grad_norm": 1.789657473564148, "learning_rate": 2.9234737747205506e-06, "loss": 0.4821186065673828, "step": 7052 }, { "epoch": 0.8558427375318529, "grad_norm": 0.9499931931495667, "learning_rate": 2.921017074069525e-06, "loss": 0.007942973636090755, "step": 7053 }, { "epoch": 0.85596408202888, "grad_norm": 2.5588266849517822, "learning_rate": 2.918560373418499e-06, "loss": 0.5921562910079956, "step": 7054 }, { "epoch": 0.856085426525907, "grad_norm": 2.585763454437256, "learning_rate": 2.9161036727674734e-06, "loss": 0.23923486471176147, "step": 7055 }, { "epoch": 0.8562067710229341, "grad_norm": 2.9194765090942383, "learning_rate": 2.913646972116448e-06, "loss": 0.22150197625160217, "step": 7056 }, { "epoch": 0.8563281155199611, "grad_norm": 1.6685653924942017, "learning_rate": 2.9111902714654223e-06, "loss": 0.13064536452293396, "step": 7057 }, { "epoch": 0.8564494600169882, "grad_norm": 2.979707956314087, "learning_rate": 2.9087335708143966e-06, "loss": 0.33076491951942444, "step": 7058 }, { "epoch": 0.8565708045140152, "grad_norm": 1.997219204902649, "learning_rate": 2.906276870163371e-06, "loss": 0.15862685441970825, "step": 7059 }, { "epoch": 0.8566921490110424, "grad_norm": 0.7270478010177612, "learning_rate": 2.903820169512345e-06, "loss": 0.010279606096446514, "step": 7060 }, { "epoch": 0.8568134935080695, "grad_norm": 2.8859310150146484, "learning_rate": 2.9013634688613198e-06, "loss": 0.12126273661851883, "step": 7061 }, { "epoch": 0.8569348380050965, "grad_norm": 0.7463700175285339, "learning_rate": 2.898906768210294e-06, "loss": 0.0066599538549780846, "step": 7062 }, { "epoch": 0.8570561825021236, "grad_norm": 1.9388129711151123, "learning_rate": 2.8964500675592683e-06, "loss": 0.10856817662715912, "step": 7063 }, { "epoch": 0.8571775269991506, "grad_norm": 1.8254261016845703, "learning_rate": 2.8939933669082425e-06, "loss": 0.05702649801969528, "step": 7064 }, { "epoch": 0.8572988714961777, "grad_norm": 3.8804454803466797, "learning_rate": 2.891536666257217e-06, "loss": 0.5377075672149658, "step": 7065 }, { "epoch": 0.8574202159932047, "grad_norm": 3.1177656650543213, "learning_rate": 2.8890799656061915e-06, "loss": 0.05281057208776474, "step": 7066 }, { "epoch": 0.8575415604902318, "grad_norm": 2.8817882537841797, "learning_rate": 2.8866232649551657e-06, "loss": 0.1622104048728943, "step": 7067 }, { "epoch": 0.8576629049872588, "grad_norm": 1.9447433948516846, "learning_rate": 2.88416656430414e-06, "loss": 0.19133144617080688, "step": 7068 }, { "epoch": 0.8577842494842859, "grad_norm": 4.5952863693237305, "learning_rate": 2.8817098636531143e-06, "loss": 0.27357247471809387, "step": 7069 }, { "epoch": 0.8579055939813129, "grad_norm": 2.105983018875122, "learning_rate": 2.8792531630020885e-06, "loss": 0.0819065049290657, "step": 7070 }, { "epoch": 0.85802693847834, "grad_norm": 2.3149921894073486, "learning_rate": 2.876796462351063e-06, "loss": 0.03273585066199303, "step": 7071 }, { "epoch": 0.858148282975367, "grad_norm": 1.9907177686691284, "learning_rate": 2.8743397617000366e-06, "loss": 0.35368597507476807, "step": 7072 }, { "epoch": 0.8582696274723941, "grad_norm": 3.588503837585449, "learning_rate": 2.8718830610490113e-06, "loss": 0.21179752051830292, "step": 7073 }, { "epoch": 0.8583909719694212, "grad_norm": 4.482846260070801, "learning_rate": 2.8694263603979855e-06, "loss": 0.34688422083854675, "step": 7074 }, { "epoch": 0.8585123164664482, "grad_norm": 3.830953598022461, "learning_rate": 2.86696965974696e-06, "loss": 0.5369146466255188, "step": 7075 }, { "epoch": 0.8586336609634753, "grad_norm": 3.5696070194244385, "learning_rate": 2.864512959095934e-06, "loss": 0.4569949805736542, "step": 7076 }, { "epoch": 0.8587550054605023, "grad_norm": 3.8767800331115723, "learning_rate": 2.8620562584449087e-06, "loss": 0.4168549180030823, "step": 7077 }, { "epoch": 0.8588763499575294, "grad_norm": 3.625641107559204, "learning_rate": 2.859599557793883e-06, "loss": 0.2185884267091751, "step": 7078 }, { "epoch": 0.8589976944545565, "grad_norm": 2.705265998840332, "learning_rate": 2.8571428571428573e-06, "loss": 0.20235607028007507, "step": 7079 }, { "epoch": 0.8591190389515836, "grad_norm": 3.8683249950408936, "learning_rate": 2.8546861564918315e-06, "loss": 0.1019795835018158, "step": 7080 }, { "epoch": 0.8592403834486106, "grad_norm": 2.407533884048462, "learning_rate": 2.8522294558408058e-06, "loss": 0.14181159436702728, "step": 7081 }, { "epoch": 0.8593617279456377, "grad_norm": 3.3294122219085693, "learning_rate": 2.8497727551897805e-06, "loss": 0.1594405323266983, "step": 7082 }, { "epoch": 0.8594830724426648, "grad_norm": 2.5036275386810303, "learning_rate": 2.8473160545387547e-06, "loss": 0.09559286385774612, "step": 7083 }, { "epoch": 0.8596044169396918, "grad_norm": 2.876209259033203, "learning_rate": 2.844859353887729e-06, "loss": 0.20865465700626373, "step": 7084 }, { "epoch": 0.8597257614367189, "grad_norm": 4.647447109222412, "learning_rate": 2.8424026532367032e-06, "loss": 0.10300012677907944, "step": 7085 }, { "epoch": 0.8598471059337459, "grad_norm": 2.2687220573425293, "learning_rate": 2.8399459525856775e-06, "loss": 0.09592045843601227, "step": 7086 }, { "epoch": 0.859968450430773, "grad_norm": 2.4750635623931885, "learning_rate": 2.837489251934652e-06, "loss": 0.3558158874511719, "step": 7087 }, { "epoch": 0.8600897949278, "grad_norm": 2.3530735969543457, "learning_rate": 2.8350325512836264e-06, "loss": 0.19091293215751648, "step": 7088 }, { "epoch": 0.8602111394248271, "grad_norm": 2.2834043502807617, "learning_rate": 2.8325758506326007e-06, "loss": 0.21620085835456848, "step": 7089 }, { "epoch": 0.8603324839218541, "grad_norm": 3.6632115840911865, "learning_rate": 2.830119149981575e-06, "loss": 0.25205421447753906, "step": 7090 }, { "epoch": 0.8604538284188812, "grad_norm": 3.3758699893951416, "learning_rate": 2.827662449330549e-06, "loss": 0.3692517876625061, "step": 7091 }, { "epoch": 0.8605751729159082, "grad_norm": 4.025643348693848, "learning_rate": 2.825205748679524e-06, "loss": 0.5240206122398376, "step": 7092 }, { "epoch": 0.8606965174129353, "grad_norm": 2.696481704711914, "learning_rate": 2.822749048028498e-06, "loss": 0.456709086894989, "step": 7093 }, { "epoch": 0.8608178619099623, "grad_norm": 2.495738983154297, "learning_rate": 2.8202923473774724e-06, "loss": 0.11835456639528275, "step": 7094 }, { "epoch": 0.8609392064069894, "grad_norm": 3.6897764205932617, "learning_rate": 2.8178356467264467e-06, "loss": 0.28914886713027954, "step": 7095 }, { "epoch": 0.8610605509040165, "grad_norm": 2.64186954498291, "learning_rate": 2.815378946075421e-06, "loss": 0.18223711848258972, "step": 7096 }, { "epoch": 0.8611818954010436, "grad_norm": 2.0327646732330322, "learning_rate": 2.8129222454243956e-06, "loss": 0.21361568570137024, "step": 7097 }, { "epoch": 0.8613032398980707, "grad_norm": 2.719067096710205, "learning_rate": 2.81046554477337e-06, "loss": 0.26090946793556213, "step": 7098 }, { "epoch": 0.8614245843950977, "grad_norm": 1.9690985679626465, "learning_rate": 2.808008844122344e-06, "loss": 0.17356324195861816, "step": 7099 }, { "epoch": 0.8615459288921248, "grad_norm": 2.230926275253296, "learning_rate": 2.8055521434713184e-06, "loss": 0.07068397849798203, "step": 7100 }, { "epoch": 0.8616672733891518, "grad_norm": 2.1213841438293457, "learning_rate": 2.8030954428202926e-06, "loss": 0.20684444904327393, "step": 7101 }, { "epoch": 0.8617886178861789, "grad_norm": 2.7514796257019043, "learning_rate": 2.8006387421692673e-06, "loss": 0.45711928606033325, "step": 7102 }, { "epoch": 0.8619099623832059, "grad_norm": 1.9763429164886475, "learning_rate": 2.7981820415182416e-06, "loss": 0.3582599461078644, "step": 7103 }, { "epoch": 0.862031306880233, "grad_norm": 4.48066520690918, "learning_rate": 2.795725340867216e-06, "loss": 0.1828390657901764, "step": 7104 }, { "epoch": 0.86215265137726, "grad_norm": 1.9051178693771362, "learning_rate": 2.79326864021619e-06, "loss": 0.21242202818393707, "step": 7105 }, { "epoch": 0.8622739958742871, "grad_norm": 4.259411811828613, "learning_rate": 2.790811939565164e-06, "loss": 0.3332081437110901, "step": 7106 }, { "epoch": 0.8623953403713142, "grad_norm": 3.2528862953186035, "learning_rate": 2.788355238914138e-06, "loss": 0.5307045578956604, "step": 7107 }, { "epoch": 0.8625166848683412, "grad_norm": 4.3195977210998535, "learning_rate": 2.7858985382631124e-06, "loss": 0.27847328782081604, "step": 7108 }, { "epoch": 0.8626380293653683, "grad_norm": 2.5798733234405518, "learning_rate": 2.783441837612087e-06, "loss": 0.19586151838302612, "step": 7109 }, { "epoch": 0.8627593738623953, "grad_norm": 2.59307599067688, "learning_rate": 2.7809851369610614e-06, "loss": 0.05180683732032776, "step": 7110 }, { "epoch": 0.8628807183594224, "grad_norm": 3.1613094806671143, "learning_rate": 2.7785284363100356e-06, "loss": 0.4816007614135742, "step": 7111 }, { "epoch": 0.8630020628564494, "grad_norm": 4.004734516143799, "learning_rate": 2.77607173565901e-06, "loss": 0.450778603553772, "step": 7112 }, { "epoch": 0.8631234073534765, "grad_norm": 5.115476608276367, "learning_rate": 2.773615035007984e-06, "loss": 0.32343369722366333, "step": 7113 }, { "epoch": 0.8632447518505035, "grad_norm": 2.875621795654297, "learning_rate": 2.771158334356959e-06, "loss": 0.34808868169784546, "step": 7114 }, { "epoch": 0.8633660963475306, "grad_norm": 4.0104169845581055, "learning_rate": 2.768701633705933e-06, "loss": 0.16032248735427856, "step": 7115 }, { "epoch": 0.8634874408445578, "grad_norm": 3.4452435970306396, "learning_rate": 2.7662449330549073e-06, "loss": 0.23121321201324463, "step": 7116 }, { "epoch": 0.8636087853415848, "grad_norm": 0.046863678842782974, "learning_rate": 2.7637882324038816e-06, "loss": 0.00034937585587613285, "step": 7117 }, { "epoch": 0.8637301298386119, "grad_norm": 3.7444040775299072, "learning_rate": 2.761331531752856e-06, "loss": 0.263185977935791, "step": 7118 }, { "epoch": 0.8638514743356389, "grad_norm": 2.488030195236206, "learning_rate": 2.7588748311018305e-06, "loss": 0.04321906715631485, "step": 7119 }, { "epoch": 0.863972818832666, "grad_norm": 3.2163314819335938, "learning_rate": 2.756418130450805e-06, "loss": 0.4688873291015625, "step": 7120 }, { "epoch": 0.864094163329693, "grad_norm": 2.115067720413208, "learning_rate": 2.753961429799779e-06, "loss": 0.10651101171970367, "step": 7121 }, { "epoch": 0.8642155078267201, "grad_norm": 1.5948396921157837, "learning_rate": 2.7515047291487533e-06, "loss": 0.03486703336238861, "step": 7122 }, { "epoch": 0.8643368523237471, "grad_norm": 4.831717014312744, "learning_rate": 2.7490480284977276e-06, "loss": 0.3072448670864105, "step": 7123 }, { "epoch": 0.8644581968207742, "grad_norm": 2.841844081878662, "learning_rate": 2.7465913278467023e-06, "loss": 0.04939646273851395, "step": 7124 }, { "epoch": 0.8645795413178012, "grad_norm": 2.407266855239868, "learning_rate": 2.7441346271956765e-06, "loss": 0.1447884887456894, "step": 7125 }, { "epoch": 0.8647008858148283, "grad_norm": 2.4702091217041016, "learning_rate": 2.7416779265446508e-06, "loss": 0.4155519902706146, "step": 7126 }, { "epoch": 0.8648222303118553, "grad_norm": 1.9849815368652344, "learning_rate": 2.739221225893625e-06, "loss": 0.10303430259227753, "step": 7127 }, { "epoch": 0.8649435748088824, "grad_norm": 3.1493022441864014, "learning_rate": 2.7367645252425993e-06, "loss": 0.2294466644525528, "step": 7128 }, { "epoch": 0.8650649193059095, "grad_norm": 3.9424824714660645, "learning_rate": 2.734307824591574e-06, "loss": 0.2790471911430359, "step": 7129 }, { "epoch": 0.8651862638029365, "grad_norm": 2.780045747756958, "learning_rate": 2.7318511239405482e-06, "loss": 0.22282983362674713, "step": 7130 }, { "epoch": 0.8653076082999636, "grad_norm": 1.0326614379882812, "learning_rate": 2.7293944232895225e-06, "loss": 0.023292383179068565, "step": 7131 }, { "epoch": 0.8654289527969906, "grad_norm": 3.3221399784088135, "learning_rate": 2.7269377226384968e-06, "loss": 0.22073718905448914, "step": 7132 }, { "epoch": 0.8655502972940177, "grad_norm": 3.544674873352051, "learning_rate": 2.724481021987471e-06, "loss": 0.3137612044811249, "step": 7133 }, { "epoch": 0.8656716417910447, "grad_norm": 2.724566698074341, "learning_rate": 2.7220243213364457e-06, "loss": 0.4036012291908264, "step": 7134 }, { "epoch": 0.8657929862880719, "grad_norm": 2.6269707679748535, "learning_rate": 2.71956762068542e-06, "loss": 0.2432948350906372, "step": 7135 }, { "epoch": 0.8659143307850989, "grad_norm": 2.325350522994995, "learning_rate": 2.717110920034394e-06, "loss": 0.1763550341129303, "step": 7136 }, { "epoch": 0.866035675282126, "grad_norm": 2.9463956356048584, "learning_rate": 2.7146542193833685e-06, "loss": 0.14132045209407806, "step": 7137 }, { "epoch": 0.866157019779153, "grad_norm": 3.3343312740325928, "learning_rate": 2.712197518732343e-06, "loss": 0.21295496821403503, "step": 7138 }, { "epoch": 0.8662783642761801, "grad_norm": 2.4285316467285156, "learning_rate": 2.7097408180813174e-06, "loss": 0.3640367388725281, "step": 7139 }, { "epoch": 0.8663997087732072, "grad_norm": 2.2766284942626953, "learning_rate": 2.7072841174302912e-06, "loss": 0.3269546627998352, "step": 7140 }, { "epoch": 0.8665210532702342, "grad_norm": 2.1505331993103027, "learning_rate": 2.7048274167792655e-06, "loss": 0.06415476649999619, "step": 7141 }, { "epoch": 0.8666423977672613, "grad_norm": 2.5312998294830322, "learning_rate": 2.7023707161282398e-06, "loss": 0.18168886005878448, "step": 7142 }, { "epoch": 0.8667637422642883, "grad_norm": 2.7288007736206055, "learning_rate": 2.699914015477214e-06, "loss": 0.05534587427973747, "step": 7143 }, { "epoch": 0.8668850867613154, "grad_norm": 3.191704511642456, "learning_rate": 2.6974573148261883e-06, "loss": 0.10829851776361465, "step": 7144 }, { "epoch": 0.8670064312583424, "grad_norm": 2.577655076980591, "learning_rate": 2.695000614175163e-06, "loss": 0.3686380386352539, "step": 7145 }, { "epoch": 0.8671277757553695, "grad_norm": 3.830982208251953, "learning_rate": 2.6925439135241372e-06, "loss": 0.33678144216537476, "step": 7146 }, { "epoch": 0.8672491202523965, "grad_norm": 3.421431064605713, "learning_rate": 2.6900872128731115e-06, "loss": 0.15217962861061096, "step": 7147 }, { "epoch": 0.8673704647494236, "grad_norm": 4.527274131774902, "learning_rate": 2.6876305122220857e-06, "loss": 0.4409812092781067, "step": 7148 }, { "epoch": 0.8674918092464506, "grad_norm": 3.528879165649414, "learning_rate": 2.68517381157106e-06, "loss": 0.308748334646225, "step": 7149 }, { "epoch": 0.8676131537434777, "grad_norm": 2.3364856243133545, "learning_rate": 2.6827171109200347e-06, "loss": 0.2512088418006897, "step": 7150 }, { "epoch": 0.8677344982405047, "grad_norm": 2.7550840377807617, "learning_rate": 2.680260410269009e-06, "loss": 0.2519649863243103, "step": 7151 }, { "epoch": 0.8678558427375318, "grad_norm": 3.6409289836883545, "learning_rate": 2.677803709617983e-06, "loss": 0.24921074509620667, "step": 7152 }, { "epoch": 0.867977187234559, "grad_norm": 7.678958892822266, "learning_rate": 2.6753470089669574e-06, "loss": 0.280142605304718, "step": 7153 }, { "epoch": 0.868098531731586, "grad_norm": 2.580979824066162, "learning_rate": 2.6728903083159317e-06, "loss": 0.1728818118572235, "step": 7154 }, { "epoch": 0.8682198762286131, "grad_norm": 3.392606496810913, "learning_rate": 2.6704336076649064e-06, "loss": 0.26491957902908325, "step": 7155 }, { "epoch": 0.8683412207256401, "grad_norm": 2.1183362007141113, "learning_rate": 2.6679769070138806e-06, "loss": 0.5120954513549805, "step": 7156 }, { "epoch": 0.8684625652226672, "grad_norm": 3.032524585723877, "learning_rate": 2.665520206362855e-06, "loss": 0.2381318211555481, "step": 7157 }, { "epoch": 0.8685839097196942, "grad_norm": 3.982102155685425, "learning_rate": 2.663063505711829e-06, "loss": 0.318340003490448, "step": 7158 }, { "epoch": 0.8687052542167213, "grad_norm": 4.076990127563477, "learning_rate": 2.6606068050608034e-06, "loss": 0.3346697688102722, "step": 7159 }, { "epoch": 0.8688265987137483, "grad_norm": 2.558311939239502, "learning_rate": 2.658150104409778e-06, "loss": 0.20165614783763885, "step": 7160 }, { "epoch": 0.8689479432107754, "grad_norm": 4.396156311035156, "learning_rate": 2.6556934037587524e-06, "loss": 0.1303500235080719, "step": 7161 }, { "epoch": 0.8690692877078025, "grad_norm": 2.608095407485962, "learning_rate": 2.6532367031077266e-06, "loss": 0.22743166983127594, "step": 7162 }, { "epoch": 0.8691906322048295, "grad_norm": 2.9021799564361572, "learning_rate": 2.650780002456701e-06, "loss": 0.13229672610759735, "step": 7163 }, { "epoch": 0.8693119767018566, "grad_norm": 3.394623279571533, "learning_rate": 2.648323301805675e-06, "loss": 0.26138249039649963, "step": 7164 }, { "epoch": 0.8694333211988836, "grad_norm": 4.23438024520874, "learning_rate": 2.64586660115465e-06, "loss": 0.34015390276908875, "step": 7165 }, { "epoch": 0.8695546656959107, "grad_norm": 1.9342962503433228, "learning_rate": 2.643409900503624e-06, "loss": 0.10184457153081894, "step": 7166 }, { "epoch": 0.8696760101929377, "grad_norm": 2.0839011669158936, "learning_rate": 2.6409531998525983e-06, "loss": 0.0730535164475441, "step": 7167 }, { "epoch": 0.8697973546899648, "grad_norm": 2.576525926589966, "learning_rate": 2.6384964992015726e-06, "loss": 0.2988077700138092, "step": 7168 }, { "epoch": 0.8699186991869918, "grad_norm": 1.9121394157409668, "learning_rate": 2.636039798550547e-06, "loss": 0.09420738369226456, "step": 7169 }, { "epoch": 0.8700400436840189, "grad_norm": 2.4156837463378906, "learning_rate": 2.6335830978995215e-06, "loss": 0.47344744205474854, "step": 7170 }, { "epoch": 0.8701613881810459, "grad_norm": 2.564854621887207, "learning_rate": 2.6311263972484958e-06, "loss": 0.22413146495819092, "step": 7171 }, { "epoch": 0.8702827326780731, "grad_norm": 0.6509429812431335, "learning_rate": 2.62866969659747e-06, "loss": 0.01984437182545662, "step": 7172 }, { "epoch": 0.8704040771751002, "grad_norm": 3.0104944705963135, "learning_rate": 2.6262129959464443e-06, "loss": 0.24897661805152893, "step": 7173 }, { "epoch": 0.8705254216721272, "grad_norm": 4.974316120147705, "learning_rate": 2.623756295295418e-06, "loss": 0.49699342250823975, "step": 7174 }, { "epoch": 0.8706467661691543, "grad_norm": 3.752211332321167, "learning_rate": 2.6212995946443924e-06, "loss": 0.31734195351600647, "step": 7175 }, { "epoch": 0.8707681106661813, "grad_norm": 3.9235215187072754, "learning_rate": 2.618842893993367e-06, "loss": 0.4012463688850403, "step": 7176 }, { "epoch": 0.8708894551632084, "grad_norm": 5.576437950134277, "learning_rate": 2.6163861933423413e-06, "loss": 0.1708105355501175, "step": 7177 }, { "epoch": 0.8710107996602354, "grad_norm": 3.679490327835083, "learning_rate": 2.6139294926913156e-06, "loss": 0.4382195770740509, "step": 7178 }, { "epoch": 0.8711321441572625, "grad_norm": 5.257097244262695, "learning_rate": 2.61147279204029e-06, "loss": 0.23268331587314606, "step": 7179 }, { "epoch": 0.8712534886542895, "grad_norm": 2.1600453853607178, "learning_rate": 2.609016091389264e-06, "loss": 0.03982246667146683, "step": 7180 }, { "epoch": 0.8713748331513166, "grad_norm": 3.4499950408935547, "learning_rate": 2.606559390738239e-06, "loss": 0.4340933561325073, "step": 7181 }, { "epoch": 0.8714961776483436, "grad_norm": 4.456201553344727, "learning_rate": 2.604102690087213e-06, "loss": 0.31439143419265747, "step": 7182 }, { "epoch": 0.8716175221453707, "grad_norm": 4.347347736358643, "learning_rate": 2.6016459894361873e-06, "loss": 0.7838672399520874, "step": 7183 }, { "epoch": 0.8717388666423977, "grad_norm": 3.8891732692718506, "learning_rate": 2.5991892887851616e-06, "loss": 0.14060290157794952, "step": 7184 }, { "epoch": 0.8718602111394248, "grad_norm": 4.11174201965332, "learning_rate": 2.596732588134136e-06, "loss": 0.5083247423171997, "step": 7185 }, { "epoch": 0.8719815556364519, "grad_norm": 2.438664197921753, "learning_rate": 2.5942758874831105e-06, "loss": 0.06622228771448135, "step": 7186 }, { "epoch": 0.8721029001334789, "grad_norm": 2.319875478744507, "learning_rate": 2.5918191868320848e-06, "loss": 0.610319972038269, "step": 7187 }, { "epoch": 0.872224244630506, "grad_norm": 1.8762333393096924, "learning_rate": 2.589362486181059e-06, "loss": 0.0952775850892067, "step": 7188 }, { "epoch": 0.872345589127533, "grad_norm": 2.3073575496673584, "learning_rate": 2.5869057855300333e-06, "loss": 0.22657442092895508, "step": 7189 }, { "epoch": 0.8724669336245602, "grad_norm": 2.1056783199310303, "learning_rate": 2.5844490848790075e-06, "loss": 0.304258793592453, "step": 7190 }, { "epoch": 0.8725882781215872, "grad_norm": 2.9154739379882812, "learning_rate": 2.5819923842279822e-06, "loss": 0.06888644397258759, "step": 7191 }, { "epoch": 0.8727096226186143, "grad_norm": 2.7751946449279785, "learning_rate": 2.5795356835769565e-06, "loss": 0.14578725397586823, "step": 7192 }, { "epoch": 0.8728309671156413, "grad_norm": 0.8267895579338074, "learning_rate": 2.5770789829259307e-06, "loss": 0.017587192356586456, "step": 7193 }, { "epoch": 0.8729523116126684, "grad_norm": 4.077385902404785, "learning_rate": 2.574622282274905e-06, "loss": 0.3297792077064514, "step": 7194 }, { "epoch": 0.8730736561096955, "grad_norm": 2.6292121410369873, "learning_rate": 2.5721655816238793e-06, "loss": 0.1940331906080246, "step": 7195 }, { "epoch": 0.8731950006067225, "grad_norm": 3.625981330871582, "learning_rate": 2.569708880972854e-06, "loss": 0.2088521271944046, "step": 7196 }, { "epoch": 0.8733163451037496, "grad_norm": 2.815002679824829, "learning_rate": 2.567252180321828e-06, "loss": 0.576909065246582, "step": 7197 }, { "epoch": 0.8734376896007766, "grad_norm": 3.721240758895874, "learning_rate": 2.5647954796708025e-06, "loss": 0.309369295835495, "step": 7198 }, { "epoch": 0.8735590340978037, "grad_norm": 2.2218291759490967, "learning_rate": 2.5623387790197767e-06, "loss": 0.24099940061569214, "step": 7199 }, { "epoch": 0.8736803785948307, "grad_norm": 1.8637815713882446, "learning_rate": 2.559882078368751e-06, "loss": 0.2856712341308594, "step": 7200 }, { "epoch": 0.8738017230918578, "grad_norm": 2.5239310264587402, "learning_rate": 2.5574253777177256e-06, "loss": 0.4822431206703186, "step": 7201 }, { "epoch": 0.8739230675888848, "grad_norm": 3.1314783096313477, "learning_rate": 2.5549686770667e-06, "loss": 0.3668977618217468, "step": 7202 }, { "epoch": 0.8740444120859119, "grad_norm": 4.325174331665039, "learning_rate": 2.552511976415674e-06, "loss": 0.9459158182144165, "step": 7203 }, { "epoch": 0.8741657565829389, "grad_norm": 3.116748809814453, "learning_rate": 2.5500552757646484e-06, "loss": 0.4748590588569641, "step": 7204 }, { "epoch": 0.874287101079966, "grad_norm": 2.7933132648468018, "learning_rate": 2.5475985751136227e-06, "loss": 0.09355674684047699, "step": 7205 }, { "epoch": 0.874408445576993, "grad_norm": 2.865896224975586, "learning_rate": 2.5451418744625974e-06, "loss": 0.21055704355239868, "step": 7206 }, { "epoch": 0.8745297900740201, "grad_norm": 1.6679754257202148, "learning_rate": 2.5426851738115716e-06, "loss": 0.12651997804641724, "step": 7207 }, { "epoch": 0.8746511345710472, "grad_norm": 4.246089935302734, "learning_rate": 2.5402284731605455e-06, "loss": 0.24046117067337036, "step": 7208 }, { "epoch": 0.8747724790680743, "grad_norm": 3.025325059890747, "learning_rate": 2.5377717725095197e-06, "loss": 0.490458220243454, "step": 7209 }, { "epoch": 0.8748938235651014, "grad_norm": 1.6394269466400146, "learning_rate": 2.535315071858494e-06, "loss": 0.12201985716819763, "step": 7210 }, { "epoch": 0.8750151680621284, "grad_norm": 2.6317739486694336, "learning_rate": 2.5328583712074682e-06, "loss": 0.3419342041015625, "step": 7211 }, { "epoch": 0.8751365125591555, "grad_norm": 2.8906447887420654, "learning_rate": 2.530401670556443e-06, "loss": 0.4393903613090515, "step": 7212 }, { "epoch": 0.8752578570561825, "grad_norm": 3.5665674209594727, "learning_rate": 2.527944969905417e-06, "loss": 0.5320695638656616, "step": 7213 }, { "epoch": 0.8753792015532096, "grad_norm": 2.9178922176361084, "learning_rate": 2.5254882692543914e-06, "loss": 0.41213804483413696, "step": 7214 }, { "epoch": 0.8755005460502366, "grad_norm": 3.6075122356414795, "learning_rate": 2.5230315686033657e-06, "loss": 0.2531580924987793, "step": 7215 }, { "epoch": 0.8756218905472637, "grad_norm": 2.9588119983673096, "learning_rate": 2.52057486795234e-06, "loss": 0.4362886846065521, "step": 7216 }, { "epoch": 0.8757432350442907, "grad_norm": 4.757589340209961, "learning_rate": 2.5181181673013146e-06, "loss": 0.6258336305618286, "step": 7217 }, { "epoch": 0.8758645795413178, "grad_norm": 2.9549386501312256, "learning_rate": 2.515661466650289e-06, "loss": 0.31738391518592834, "step": 7218 }, { "epoch": 0.8759859240383449, "grad_norm": 3.611607074737549, "learning_rate": 2.513204765999263e-06, "loss": 0.5804440975189209, "step": 7219 }, { "epoch": 0.8761072685353719, "grad_norm": 2.2248311042785645, "learning_rate": 2.5107480653482374e-06, "loss": 0.4223018288612366, "step": 7220 }, { "epoch": 0.876228613032399, "grad_norm": 1.3377211093902588, "learning_rate": 2.5082913646972117e-06, "loss": 0.02766583301126957, "step": 7221 }, { "epoch": 0.876349957529426, "grad_norm": 3.478156089782715, "learning_rate": 2.5058346640461863e-06, "loss": 0.4316959083080292, "step": 7222 }, { "epoch": 0.8764713020264531, "grad_norm": 3.1435372829437256, "learning_rate": 2.5033779633951606e-06, "loss": 0.5785991549491882, "step": 7223 }, { "epoch": 0.8765926465234801, "grad_norm": 2.8768720626831055, "learning_rate": 2.500921262744135e-06, "loss": 0.1657986044883728, "step": 7224 }, { "epoch": 0.8767139910205072, "grad_norm": 2.585378885269165, "learning_rate": 2.498464562093109e-06, "loss": 0.06901342421770096, "step": 7225 }, { "epoch": 0.8768353355175342, "grad_norm": 3.6780240535736084, "learning_rate": 2.4960078614420834e-06, "loss": 0.256317675113678, "step": 7226 }, { "epoch": 0.8769566800145613, "grad_norm": 2.7045629024505615, "learning_rate": 2.493551160791058e-06, "loss": 0.5429891347885132, "step": 7227 }, { "epoch": 0.8770780245115884, "grad_norm": 0.005706280469894409, "learning_rate": 2.4910944601400323e-06, "loss": 6.110382673796266e-05, "step": 7228 }, { "epoch": 0.8771993690086155, "grad_norm": 2.9794600009918213, "learning_rate": 2.4886377594890066e-06, "loss": 0.41533488035202026, "step": 7229 }, { "epoch": 0.8773207135056426, "grad_norm": 4.213878631591797, "learning_rate": 2.486181058837981e-06, "loss": 0.15909931063652039, "step": 7230 }, { "epoch": 0.8774420580026696, "grad_norm": 3.3542065620422363, "learning_rate": 2.483724358186955e-06, "loss": 0.20584966242313385, "step": 7231 }, { "epoch": 0.8775634024996967, "grad_norm": 3.307152032852173, "learning_rate": 2.4812676575359298e-06, "loss": 0.18035940825939178, "step": 7232 }, { "epoch": 0.8776847469967237, "grad_norm": 2.5734260082244873, "learning_rate": 2.4788109568849036e-06, "loss": 0.20854441821575165, "step": 7233 }, { "epoch": 0.8778060914937508, "grad_norm": 2.2857789993286133, "learning_rate": 2.476354256233878e-06, "loss": 0.1527138352394104, "step": 7234 }, { "epoch": 0.8779274359907778, "grad_norm": 2.3170769214630127, "learning_rate": 2.473897555582852e-06, "loss": 0.27472570538520813, "step": 7235 }, { "epoch": 0.8780487804878049, "grad_norm": 1.1254174709320068, "learning_rate": 2.471440854931827e-06, "loss": 0.2133559137582779, "step": 7236 }, { "epoch": 0.8781701249848319, "grad_norm": 2.722909927368164, "learning_rate": 2.468984154280801e-06, "loss": 0.14660637080669403, "step": 7237 }, { "epoch": 0.878291469481859, "grad_norm": 0.759692907333374, "learning_rate": 2.4665274536297753e-06, "loss": 0.011479537934064865, "step": 7238 }, { "epoch": 0.878412813978886, "grad_norm": 4.913758754730225, "learning_rate": 2.4640707529787496e-06, "loss": 0.1934860497713089, "step": 7239 }, { "epoch": 0.8785341584759131, "grad_norm": 16.62023162841797, "learning_rate": 2.461614052327724e-06, "loss": 0.1819230169057846, "step": 7240 }, { "epoch": 0.8786555029729402, "grad_norm": 2.2639732360839844, "learning_rate": 2.4591573516766985e-06, "loss": 0.14490516483783722, "step": 7241 }, { "epoch": 0.8787768474699672, "grad_norm": 2.4609289169311523, "learning_rate": 2.4567006510256728e-06, "loss": 0.07475931197404861, "step": 7242 }, { "epoch": 0.8788981919669943, "grad_norm": 2.25115966796875, "learning_rate": 2.454243950374647e-06, "loss": 0.19221821427345276, "step": 7243 }, { "epoch": 0.8790195364640213, "grad_norm": 3.247354745864868, "learning_rate": 2.4517872497236213e-06, "loss": 0.49833476543426514, "step": 7244 }, { "epoch": 0.8791408809610484, "grad_norm": 3.4707889556884766, "learning_rate": 2.449330549072596e-06, "loss": 0.46887364983558655, "step": 7245 }, { "epoch": 0.8792622254580755, "grad_norm": 2.2384603023529053, "learning_rate": 2.4468738484215702e-06, "loss": 0.13914352655410767, "step": 7246 }, { "epoch": 0.8793835699551026, "grad_norm": 1.8582179546356201, "learning_rate": 2.4444171477705445e-06, "loss": 0.16124430298805237, "step": 7247 }, { "epoch": 0.8795049144521296, "grad_norm": 3.444772481918335, "learning_rate": 2.4419604471195187e-06, "loss": 0.36175933480262756, "step": 7248 }, { "epoch": 0.8796262589491567, "grad_norm": 2.2737019062042236, "learning_rate": 2.439503746468493e-06, "loss": 0.0932854637503624, "step": 7249 }, { "epoch": 0.8797476034461837, "grad_norm": 2.5586299896240234, "learning_rate": 2.4370470458174673e-06, "loss": 0.2689233720302582, "step": 7250 }, { "epoch": 0.8798689479432108, "grad_norm": 2.22524094581604, "learning_rate": 2.4345903451664415e-06, "loss": 0.19555819034576416, "step": 7251 }, { "epoch": 0.8799902924402379, "grad_norm": 2.465029239654541, "learning_rate": 2.4321336445154158e-06, "loss": 0.21250879764556885, "step": 7252 }, { "epoch": 0.8801116369372649, "grad_norm": 4.265073299407959, "learning_rate": 2.42967694386439e-06, "loss": 0.5684558153152466, "step": 7253 }, { "epoch": 0.880232981434292, "grad_norm": 4.206843852996826, "learning_rate": 2.4272202432133647e-06, "loss": 0.37537893652915955, "step": 7254 }, { "epoch": 0.880354325931319, "grad_norm": 1.6062086820602417, "learning_rate": 2.424763542562339e-06, "loss": 0.03002196177840233, "step": 7255 }, { "epoch": 0.8804756704283461, "grad_norm": 4.058706760406494, "learning_rate": 2.4223068419113132e-06, "loss": 0.32264262437820435, "step": 7256 }, { "epoch": 0.8805970149253731, "grad_norm": 1.9792404174804688, "learning_rate": 2.4198501412602875e-06, "loss": 0.18087688088417053, "step": 7257 }, { "epoch": 0.8807183594224002, "grad_norm": 2.281331777572632, "learning_rate": 2.4173934406092618e-06, "loss": 0.1051100566983223, "step": 7258 }, { "epoch": 0.8808397039194272, "grad_norm": 2.904029369354248, "learning_rate": 2.4149367399582364e-06, "loss": 0.3299245536327362, "step": 7259 }, { "epoch": 0.8809610484164543, "grad_norm": 4.280839443206787, "learning_rate": 2.4124800393072107e-06, "loss": 0.35770630836486816, "step": 7260 }, { "epoch": 0.8810823929134813, "grad_norm": 0.06119786947965622, "learning_rate": 2.410023338656185e-06, "loss": 0.0004747446801047772, "step": 7261 }, { "epoch": 0.8812037374105084, "grad_norm": 2.639040231704712, "learning_rate": 2.407566638005159e-06, "loss": 0.1917620599269867, "step": 7262 }, { "epoch": 0.8813250819075354, "grad_norm": 3.2761173248291016, "learning_rate": 2.4051099373541335e-06, "loss": 0.3220812976360321, "step": 7263 }, { "epoch": 0.8814464264045625, "grad_norm": 4.127908706665039, "learning_rate": 2.402653236703108e-06, "loss": 0.3020712733268738, "step": 7264 }, { "epoch": 0.8815677709015897, "grad_norm": 4.082529544830322, "learning_rate": 2.4001965360520824e-06, "loss": 0.16915155947208405, "step": 7265 }, { "epoch": 0.8816891153986167, "grad_norm": 2.3538827896118164, "learning_rate": 2.3977398354010567e-06, "loss": 0.19317658245563507, "step": 7266 }, { "epoch": 0.8818104598956438, "grad_norm": 3.917081117630005, "learning_rate": 2.395283134750031e-06, "loss": 0.26034319400787354, "step": 7267 }, { "epoch": 0.8819318043926708, "grad_norm": 3.0980799198150635, "learning_rate": 2.392826434099005e-06, "loss": 0.1721857339143753, "step": 7268 }, { "epoch": 0.8820531488896979, "grad_norm": 4.392425060272217, "learning_rate": 2.3903697334479794e-06, "loss": 0.22158734500408173, "step": 7269 }, { "epoch": 0.8821744933867249, "grad_norm": 3.9363925457000732, "learning_rate": 2.3879130327969537e-06, "loss": 0.33650079369544983, "step": 7270 }, { "epoch": 0.882295837883752, "grad_norm": 2.449134349822998, "learning_rate": 2.385456332145928e-06, "loss": 0.2904933989048004, "step": 7271 }, { "epoch": 0.882417182380779, "grad_norm": 2.1101419925689697, "learning_rate": 2.3829996314949026e-06, "loss": 0.20620116591453552, "step": 7272 }, { "epoch": 0.8825385268778061, "grad_norm": 1.0900321006774902, "learning_rate": 2.380542930843877e-06, "loss": 0.04004345461726189, "step": 7273 }, { "epoch": 0.8826598713748331, "grad_norm": 2.6900761127471924, "learning_rate": 2.378086230192851e-06, "loss": 0.16099628806114197, "step": 7274 }, { "epoch": 0.8827812158718602, "grad_norm": 3.4988174438476562, "learning_rate": 2.3756295295418254e-06, "loss": 0.23735542595386505, "step": 7275 }, { "epoch": 0.8829025603688873, "grad_norm": 4.298156261444092, "learning_rate": 2.3731728288907997e-06, "loss": 0.20018303394317627, "step": 7276 }, { "epoch": 0.8830239048659143, "grad_norm": 2.930922508239746, "learning_rate": 2.3707161282397744e-06, "loss": 0.1587323546409607, "step": 7277 }, { "epoch": 0.8831452493629414, "grad_norm": 3.449187755584717, "learning_rate": 2.3682594275887486e-06, "loss": 0.3879536986351013, "step": 7278 }, { "epoch": 0.8832665938599684, "grad_norm": 2.834610939025879, "learning_rate": 2.365802726937723e-06, "loss": 0.22692027688026428, "step": 7279 }, { "epoch": 0.8833879383569955, "grad_norm": 1.4396802186965942, "learning_rate": 2.363346026286697e-06, "loss": 0.01570996642112732, "step": 7280 }, { "epoch": 0.8835092828540225, "grad_norm": 2.7699172496795654, "learning_rate": 2.3608893256356714e-06, "loss": 0.3094848394393921, "step": 7281 }, { "epoch": 0.8836306273510496, "grad_norm": 3.460564613342285, "learning_rate": 2.358432624984646e-06, "loss": 0.49858933687210083, "step": 7282 }, { "epoch": 0.8837519718480767, "grad_norm": 3.340667486190796, "learning_rate": 2.3559759243336203e-06, "loss": 0.18169380724430084, "step": 7283 }, { "epoch": 0.8838733163451038, "grad_norm": 3.133686065673828, "learning_rate": 2.353519223682594e-06, "loss": 0.2808037996292114, "step": 7284 }, { "epoch": 0.8839946608421309, "grad_norm": 2.8561999797821045, "learning_rate": 2.351062523031569e-06, "loss": 0.262689471244812, "step": 7285 }, { "epoch": 0.8841160053391579, "grad_norm": 2.9865832328796387, "learning_rate": 2.348605822380543e-06, "loss": 0.2521791160106659, "step": 7286 }, { "epoch": 0.884237349836185, "grad_norm": 2.0802948474884033, "learning_rate": 2.3461491217295174e-06, "loss": 0.31153398752212524, "step": 7287 }, { "epoch": 0.884358694333212, "grad_norm": 2.8857109546661377, "learning_rate": 2.3436924210784916e-06, "loss": 0.18441811203956604, "step": 7288 }, { "epoch": 0.8844800388302391, "grad_norm": 3.8758232593536377, "learning_rate": 2.341235720427466e-06, "loss": 0.2313961386680603, "step": 7289 }, { "epoch": 0.8846013833272661, "grad_norm": 1.9324100017547607, "learning_rate": 2.3387790197764406e-06, "loss": 0.20936143398284912, "step": 7290 }, { "epoch": 0.8847227278242932, "grad_norm": 3.4938673973083496, "learning_rate": 2.336322319125415e-06, "loss": 0.13372638821601868, "step": 7291 }, { "epoch": 0.8848440723213202, "grad_norm": 3.023977518081665, "learning_rate": 2.333865618474389e-06, "loss": 0.18577519059181213, "step": 7292 }, { "epoch": 0.8849654168183473, "grad_norm": 2.559607982635498, "learning_rate": 2.3314089178233633e-06, "loss": 0.20482182502746582, "step": 7293 }, { "epoch": 0.8850867613153743, "grad_norm": 0.9489441514015198, "learning_rate": 2.3289522171723376e-06, "loss": 0.011272236704826355, "step": 7294 }, { "epoch": 0.8852081058124014, "grad_norm": 3.4267635345458984, "learning_rate": 2.3264955165213123e-06, "loss": 0.1779641956090927, "step": 7295 }, { "epoch": 0.8853294503094284, "grad_norm": 1.8881423473358154, "learning_rate": 2.3240388158702865e-06, "loss": 0.05340428650379181, "step": 7296 }, { "epoch": 0.8854507948064555, "grad_norm": 5.468567371368408, "learning_rate": 2.3215821152192608e-06, "loss": 0.2699938714504242, "step": 7297 }, { "epoch": 0.8855721393034826, "grad_norm": 3.266860008239746, "learning_rate": 2.319125414568235e-06, "loss": 0.18706607818603516, "step": 7298 }, { "epoch": 0.8856934838005096, "grad_norm": 2.616980791091919, "learning_rate": 2.3166687139172093e-06, "loss": 0.16837313771247864, "step": 7299 }, { "epoch": 0.8858148282975367, "grad_norm": 1.8267781734466553, "learning_rate": 2.314212013266184e-06, "loss": 0.03985995054244995, "step": 7300 }, { "epoch": 0.8859361727945637, "grad_norm": 2.379559278488159, "learning_rate": 2.311755312615158e-06, "loss": 0.15115372836589813, "step": 7301 }, { "epoch": 0.8860575172915909, "grad_norm": 1.4731677770614624, "learning_rate": 2.309298611964132e-06, "loss": 0.018749015405774117, "step": 7302 }, { "epoch": 0.8861788617886179, "grad_norm": 2.0960049629211426, "learning_rate": 2.3068419113131068e-06, "loss": 0.2749252915382385, "step": 7303 }, { "epoch": 0.886300206285645, "grad_norm": 2.5193257331848145, "learning_rate": 2.304385210662081e-06, "loss": 0.1866881549358368, "step": 7304 }, { "epoch": 0.886421550782672, "grad_norm": 2.6523261070251465, "learning_rate": 2.3019285100110553e-06, "loss": 0.11079314351081848, "step": 7305 }, { "epoch": 0.8865428952796991, "grad_norm": 1.8730148077011108, "learning_rate": 2.2994718093600295e-06, "loss": 0.3616553246974945, "step": 7306 }, { "epoch": 0.8866642397767261, "grad_norm": 3.8815550804138184, "learning_rate": 2.297015108709004e-06, "loss": 0.06467530876398087, "step": 7307 }, { "epoch": 0.8867855842737532, "grad_norm": 2.878570556640625, "learning_rate": 2.2945584080579785e-06, "loss": 0.10755035281181335, "step": 7308 }, { "epoch": 0.8869069287707803, "grad_norm": 3.6043365001678467, "learning_rate": 2.2921017074069527e-06, "loss": 0.5451908111572266, "step": 7309 }, { "epoch": 0.8870282732678073, "grad_norm": 1.158017635345459, "learning_rate": 2.289645006755927e-06, "loss": 0.03668788820505142, "step": 7310 }, { "epoch": 0.8871496177648344, "grad_norm": 2.8820438385009766, "learning_rate": 2.2871883061049012e-06, "loss": 0.2614104747772217, "step": 7311 }, { "epoch": 0.8872709622618614, "grad_norm": 1.952326774597168, "learning_rate": 2.2847316054538755e-06, "loss": 0.41317111253738403, "step": 7312 }, { "epoch": 0.8873923067588885, "grad_norm": 2.78066349029541, "learning_rate": 2.28227490480285e-06, "loss": 0.22066840529441833, "step": 7313 }, { "epoch": 0.8875136512559155, "grad_norm": 1.8187859058380127, "learning_rate": 2.2798182041518244e-06, "loss": 0.13779865205287933, "step": 7314 }, { "epoch": 0.8876349957529426, "grad_norm": 2.814129590988159, "learning_rate": 2.2773615035007987e-06, "loss": 0.6092060804367065, "step": 7315 }, { "epoch": 0.8877563402499696, "grad_norm": 3.0045409202575684, "learning_rate": 2.274904802849773e-06, "loss": 0.2748780846595764, "step": 7316 }, { "epoch": 0.8878776847469967, "grad_norm": 1.9781297445297241, "learning_rate": 2.2724481021987472e-06, "loss": 0.1207587867975235, "step": 7317 }, { "epoch": 0.8879990292440237, "grad_norm": 4.760767459869385, "learning_rate": 2.2699914015477215e-06, "loss": 0.5091161727905273, "step": 7318 }, { "epoch": 0.8881203737410508, "grad_norm": 3.005652904510498, "learning_rate": 2.2675347008966957e-06, "loss": 0.17301541566848755, "step": 7319 }, { "epoch": 0.8882417182380778, "grad_norm": 2.6124937534332275, "learning_rate": 2.26507800024567e-06, "loss": 0.31198567152023315, "step": 7320 }, { "epoch": 0.888363062735105, "grad_norm": 2.9585866928100586, "learning_rate": 2.2626212995946447e-06, "loss": 0.6131227612495422, "step": 7321 }, { "epoch": 0.8884844072321321, "grad_norm": 2.6112563610076904, "learning_rate": 2.260164598943619e-06, "loss": 0.48277920484542847, "step": 7322 }, { "epoch": 0.8886057517291591, "grad_norm": 1.46724534034729, "learning_rate": 2.257707898292593e-06, "loss": 0.021986987441778183, "step": 7323 }, { "epoch": 0.8887270962261862, "grad_norm": 2.4757473468780518, "learning_rate": 2.2552511976415675e-06, "loss": 0.09765561670064926, "step": 7324 }, { "epoch": 0.8888484407232132, "grad_norm": 3.3800413608551025, "learning_rate": 2.2527944969905417e-06, "loss": 0.49142158031463623, "step": 7325 }, { "epoch": 0.8889697852202403, "grad_norm": 5.046632289886475, "learning_rate": 2.2503377963395164e-06, "loss": 0.3861925005912781, "step": 7326 }, { "epoch": 0.8890911297172673, "grad_norm": 3.4906094074249268, "learning_rate": 2.2478810956884907e-06, "loss": 0.0907096341252327, "step": 7327 }, { "epoch": 0.8892124742142944, "grad_norm": 2.4767980575561523, "learning_rate": 2.245424395037465e-06, "loss": 0.39836227893829346, "step": 7328 }, { "epoch": 0.8893338187113214, "grad_norm": 4.176862716674805, "learning_rate": 2.242967694386439e-06, "loss": 0.11980126053094864, "step": 7329 }, { "epoch": 0.8894551632083485, "grad_norm": 4.120904922485352, "learning_rate": 2.2405109937354134e-06, "loss": 0.3578144907951355, "step": 7330 }, { "epoch": 0.8895765077053756, "grad_norm": 2.3334808349609375, "learning_rate": 2.238054293084388e-06, "loss": 0.10864166915416718, "step": 7331 }, { "epoch": 0.8896978522024026, "grad_norm": 0.6748518347740173, "learning_rate": 2.2355975924333624e-06, "loss": 0.010982434265315533, "step": 7332 }, { "epoch": 0.8898191966994297, "grad_norm": 0.7746337652206421, "learning_rate": 2.2331408917823366e-06, "loss": 0.012732215225696564, "step": 7333 }, { "epoch": 0.8899405411964567, "grad_norm": 4.661366939544678, "learning_rate": 2.230684191131311e-06, "loss": 0.3516770899295807, "step": 7334 }, { "epoch": 0.8900618856934838, "grad_norm": 4.711503982543945, "learning_rate": 2.228227490480285e-06, "loss": 0.20862603187561035, "step": 7335 }, { "epoch": 0.8901832301905108, "grad_norm": 1.111460566520691, "learning_rate": 2.2257707898292594e-06, "loss": 0.0464380644261837, "step": 7336 }, { "epoch": 0.8903045746875379, "grad_norm": 4.295136451721191, "learning_rate": 2.2233140891782337e-06, "loss": 0.588898777961731, "step": 7337 }, { "epoch": 0.8904259191845649, "grad_norm": 4.444903373718262, "learning_rate": 2.220857388527208e-06, "loss": 0.4099515676498413, "step": 7338 }, { "epoch": 0.8905472636815921, "grad_norm": 2.8411715030670166, "learning_rate": 2.2184006878761826e-06, "loss": 0.08895280212163925, "step": 7339 }, { "epoch": 0.8906686081786191, "grad_norm": 3.589730978012085, "learning_rate": 2.215943987225157e-06, "loss": 0.42792820930480957, "step": 7340 }, { "epoch": 0.8907899526756462, "grad_norm": 2.5385637283325195, "learning_rate": 2.213487286574131e-06, "loss": 0.5615171194076538, "step": 7341 }, { "epoch": 0.8909112971726733, "grad_norm": 3.2804677486419678, "learning_rate": 2.2110305859231054e-06, "loss": 0.18427158892154694, "step": 7342 }, { "epoch": 0.8910326416697003, "grad_norm": 4.738703727722168, "learning_rate": 2.2085738852720796e-06, "loss": 0.22713854908943176, "step": 7343 }, { "epoch": 0.8911539861667274, "grad_norm": 2.0193862915039062, "learning_rate": 2.2061171846210543e-06, "loss": 0.12107071280479431, "step": 7344 }, { "epoch": 0.8912753306637544, "grad_norm": 2.3513896465301514, "learning_rate": 2.2036604839700286e-06, "loss": 0.12796138226985931, "step": 7345 }, { "epoch": 0.8913966751607815, "grad_norm": 3.276506185531616, "learning_rate": 2.201203783319003e-06, "loss": 0.0930899828672409, "step": 7346 }, { "epoch": 0.8915180196578085, "grad_norm": 2.787973642349243, "learning_rate": 2.198747082667977e-06, "loss": 0.35933148860931396, "step": 7347 }, { "epoch": 0.8916393641548356, "grad_norm": 2.8930647373199463, "learning_rate": 2.1962903820169513e-06, "loss": 0.2165001630783081, "step": 7348 }, { "epoch": 0.8917607086518626, "grad_norm": 5.778714179992676, "learning_rate": 2.193833681365926e-06, "loss": 0.15689614415168762, "step": 7349 }, { "epoch": 0.8918820531488897, "grad_norm": 2.2789816856384277, "learning_rate": 2.1913769807149003e-06, "loss": 0.14808917045593262, "step": 7350 }, { "epoch": 0.8920033976459167, "grad_norm": 1.6797430515289307, "learning_rate": 2.1889202800638745e-06, "loss": 0.2401178777217865, "step": 7351 }, { "epoch": 0.8921247421429438, "grad_norm": 1.3246757984161377, "learning_rate": 2.186463579412849e-06, "loss": 0.025255030021071434, "step": 7352 }, { "epoch": 0.8922460866399708, "grad_norm": 2.862485408782959, "learning_rate": 2.184006878761823e-06, "loss": 0.34429752826690674, "step": 7353 }, { "epoch": 0.8923674311369979, "grad_norm": 2.033928394317627, "learning_rate": 2.1815501781107973e-06, "loss": 0.10566048324108124, "step": 7354 }, { "epoch": 0.892488775634025, "grad_norm": 2.1872165203094482, "learning_rate": 2.1790934774597716e-06, "loss": 0.16154050827026367, "step": 7355 }, { "epoch": 0.892610120131052, "grad_norm": 1.829770803451538, "learning_rate": 2.176636776808746e-06, "loss": 0.057375259697437286, "step": 7356 }, { "epoch": 0.8927314646280791, "grad_norm": 2.3001973628997803, "learning_rate": 2.1741800761577205e-06, "loss": 0.11615393310785294, "step": 7357 }, { "epoch": 0.8928528091251062, "grad_norm": 4.109961032867432, "learning_rate": 2.1717233755066948e-06, "loss": 0.3616872727870941, "step": 7358 }, { "epoch": 0.8929741536221333, "grad_norm": 4.410674571990967, "learning_rate": 2.169266674855669e-06, "loss": 0.4620034396648407, "step": 7359 }, { "epoch": 0.8930954981191603, "grad_norm": 4.68530797958374, "learning_rate": 2.1668099742046433e-06, "loss": 0.48962199687957764, "step": 7360 }, { "epoch": 0.8932168426161874, "grad_norm": 2.6346750259399414, "learning_rate": 2.1643532735536175e-06, "loss": 0.08795701712369919, "step": 7361 }, { "epoch": 0.8933381871132144, "grad_norm": 1.8426774740219116, "learning_rate": 2.1618965729025922e-06, "loss": 0.2903626561164856, "step": 7362 }, { "epoch": 0.8934595316102415, "grad_norm": 3.3150839805603027, "learning_rate": 2.1594398722515665e-06, "loss": 0.4045177698135376, "step": 7363 }, { "epoch": 0.8935808761072686, "grad_norm": 3.593903064727783, "learning_rate": 2.1569831716005407e-06, "loss": 0.29470449686050415, "step": 7364 }, { "epoch": 0.8937022206042956, "grad_norm": 2.2171714305877686, "learning_rate": 2.154526470949515e-06, "loss": 0.19586271047592163, "step": 7365 }, { "epoch": 0.8938235651013227, "grad_norm": 3.127655029296875, "learning_rate": 2.1520697702984893e-06, "loss": 0.21248342096805573, "step": 7366 }, { "epoch": 0.8939449095983497, "grad_norm": 2.3655169010162354, "learning_rate": 2.149613069647464e-06, "loss": 0.17027202248573303, "step": 7367 }, { "epoch": 0.8940662540953768, "grad_norm": 3.119476556777954, "learning_rate": 2.147156368996438e-06, "loss": 0.28729382157325745, "step": 7368 }, { "epoch": 0.8941875985924038, "grad_norm": 1.5625767707824707, "learning_rate": 2.144699668345412e-06, "loss": 0.15961848199367523, "step": 7369 }, { "epoch": 0.8943089430894309, "grad_norm": 2.196375608444214, "learning_rate": 2.1422429676943863e-06, "loss": 0.08865858614444733, "step": 7370 }, { "epoch": 0.8944302875864579, "grad_norm": 2.410477638244629, "learning_rate": 2.139786267043361e-06, "loss": 0.12414608150720596, "step": 7371 }, { "epoch": 0.894551632083485, "grad_norm": 3.295868158340454, "learning_rate": 2.1373295663923352e-06, "loss": 0.22907426953315735, "step": 7372 }, { "epoch": 0.894672976580512, "grad_norm": 3.142878532409668, "learning_rate": 2.1348728657413095e-06, "loss": 0.31906574964523315, "step": 7373 }, { "epoch": 0.8947943210775391, "grad_norm": 3.5751004219055176, "learning_rate": 2.1324161650902838e-06, "loss": 0.3048775792121887, "step": 7374 }, { "epoch": 0.8949156655745661, "grad_norm": 2.9480321407318115, "learning_rate": 2.129959464439258e-06, "loss": 0.10639877617359161, "step": 7375 }, { "epoch": 0.8950370100715933, "grad_norm": 2.5159683227539062, "learning_rate": 2.1275027637882327e-06, "loss": 0.6303149461746216, "step": 7376 }, { "epoch": 0.8951583545686204, "grad_norm": 4.623355388641357, "learning_rate": 2.125046063137207e-06, "loss": 0.2866925597190857, "step": 7377 }, { "epoch": 0.8952796990656474, "grad_norm": 0.7050560116767883, "learning_rate": 2.122589362486181e-06, "loss": 0.01007622666656971, "step": 7378 }, { "epoch": 0.8954010435626745, "grad_norm": 3.3988585472106934, "learning_rate": 2.1201326618351555e-06, "loss": 0.5734153985977173, "step": 7379 }, { "epoch": 0.8955223880597015, "grad_norm": 1.9510142803192139, "learning_rate": 2.11767596118413e-06, "loss": 0.06315430253744125, "step": 7380 }, { "epoch": 0.8956437325567286, "grad_norm": 1.8870779275894165, "learning_rate": 2.1152192605331044e-06, "loss": 0.36645373702049255, "step": 7381 }, { "epoch": 0.8957650770537556, "grad_norm": 5.672052383422852, "learning_rate": 2.1127625598820787e-06, "loss": 0.2625335454940796, "step": 7382 }, { "epoch": 0.8958864215507827, "grad_norm": 3.0958752632141113, "learning_rate": 2.110305859231053e-06, "loss": 0.16049502789974213, "step": 7383 }, { "epoch": 0.8960077660478097, "grad_norm": 2.0242135524749756, "learning_rate": 2.107849158580027e-06, "loss": 0.1419283002614975, "step": 7384 }, { "epoch": 0.8961291105448368, "grad_norm": 1.4867055416107178, "learning_rate": 2.105392457929002e-06, "loss": 0.017841370776295662, "step": 7385 }, { "epoch": 0.8962504550418638, "grad_norm": 2.397122383117676, "learning_rate": 2.1029357572779757e-06, "loss": 0.3061310648918152, "step": 7386 }, { "epoch": 0.8963717995388909, "grad_norm": 3.2423348426818848, "learning_rate": 2.10047905662695e-06, "loss": 0.07060565054416656, "step": 7387 }, { "epoch": 0.896493144035918, "grad_norm": 3.3881418704986572, "learning_rate": 2.0980223559759242e-06, "loss": 0.19000673294067383, "step": 7388 }, { "epoch": 0.896614488532945, "grad_norm": 3.4170455932617188, "learning_rate": 2.095565655324899e-06, "loss": 0.1008756011724472, "step": 7389 }, { "epoch": 0.8967358330299721, "grad_norm": 3.6398050785064697, "learning_rate": 2.093108954673873e-06, "loss": 0.05616134777665138, "step": 7390 }, { "epoch": 0.8968571775269991, "grad_norm": 0.9692047834396362, "learning_rate": 2.0906522540228474e-06, "loss": 0.043182067573070526, "step": 7391 }, { "epoch": 0.8969785220240262, "grad_norm": 2.8210971355438232, "learning_rate": 2.0881955533718217e-06, "loss": 0.3575339615345001, "step": 7392 }, { "epoch": 0.8970998665210532, "grad_norm": 3.9880330562591553, "learning_rate": 2.085738852720796e-06, "loss": 0.11465466022491455, "step": 7393 }, { "epoch": 0.8972212110180803, "grad_norm": 2.7480459213256836, "learning_rate": 2.0832821520697706e-06, "loss": 0.12300153821706772, "step": 7394 }, { "epoch": 0.8973425555151074, "grad_norm": 2.212963104248047, "learning_rate": 2.080825451418745e-06, "loss": 0.3678915798664093, "step": 7395 }, { "epoch": 0.8974639000121345, "grad_norm": 3.0686490535736084, "learning_rate": 2.078368750767719e-06, "loss": 0.12072588503360748, "step": 7396 }, { "epoch": 0.8975852445091616, "grad_norm": 4.69550895690918, "learning_rate": 2.0759120501166934e-06, "loss": 0.3488573431968689, "step": 7397 }, { "epoch": 0.8977065890061886, "grad_norm": 7.124651908874512, "learning_rate": 2.0734553494656676e-06, "loss": 0.5677511692047119, "step": 7398 }, { "epoch": 0.8978279335032157, "grad_norm": 5.276618480682373, "learning_rate": 2.0709986488146423e-06, "loss": 0.33057287335395813, "step": 7399 }, { "epoch": 0.8979492780002427, "grad_norm": 4.955247402191162, "learning_rate": 2.0685419481636166e-06, "loss": 0.10799411684274673, "step": 7400 }, { "epoch": 0.8980706224972698, "grad_norm": 1.6192156076431274, "learning_rate": 2.066085247512591e-06, "loss": 0.1268284171819687, "step": 7401 }, { "epoch": 0.8981919669942968, "grad_norm": 2.3719849586486816, "learning_rate": 2.063628546861565e-06, "loss": 0.22636553645133972, "step": 7402 }, { "epoch": 0.8983133114913239, "grad_norm": 0.8024221062660217, "learning_rate": 2.0611718462105394e-06, "loss": 0.011440525762736797, "step": 7403 }, { "epoch": 0.8984346559883509, "grad_norm": 3.6468541622161865, "learning_rate": 2.0587151455595136e-06, "loss": 0.36010831594467163, "step": 7404 }, { "epoch": 0.898556000485378, "grad_norm": 1.5671072006225586, "learning_rate": 2.056258444908488e-06, "loss": 0.0496683195233345, "step": 7405 }, { "epoch": 0.898677344982405, "grad_norm": 2.1089842319488525, "learning_rate": 2.053801744257462e-06, "loss": 0.15190784633159637, "step": 7406 }, { "epoch": 0.8987986894794321, "grad_norm": 4.577912330627441, "learning_rate": 2.051345043606437e-06, "loss": 0.0959743857383728, "step": 7407 }, { "epoch": 0.8989200339764591, "grad_norm": 2.406871795654297, "learning_rate": 2.048888342955411e-06, "loss": 0.06723768264055252, "step": 7408 }, { "epoch": 0.8990413784734862, "grad_norm": 2.8904898166656494, "learning_rate": 2.0464316423043853e-06, "loss": 0.5867793560028076, "step": 7409 }, { "epoch": 0.8991627229705133, "grad_norm": 4.1484761238098145, "learning_rate": 2.0439749416533596e-06, "loss": 0.1372481882572174, "step": 7410 }, { "epoch": 0.8992840674675403, "grad_norm": 2.071909189224243, "learning_rate": 2.041518241002334e-06, "loss": 0.3026125133037567, "step": 7411 }, { "epoch": 0.8994054119645674, "grad_norm": 3.0064752101898193, "learning_rate": 2.0390615403513085e-06, "loss": 0.19599057734012604, "step": 7412 }, { "epoch": 0.8995267564615944, "grad_norm": 4.363378524780273, "learning_rate": 2.0366048397002828e-06, "loss": 0.13841788470745087, "step": 7413 }, { "epoch": 0.8996481009586216, "grad_norm": 2.1906862258911133, "learning_rate": 2.034148139049257e-06, "loss": 0.3581477999687195, "step": 7414 }, { "epoch": 0.8997694454556486, "grad_norm": 4.55972957611084, "learning_rate": 2.0316914383982313e-06, "loss": 0.2185634970664978, "step": 7415 }, { "epoch": 0.8998907899526757, "grad_norm": 3.103508234024048, "learning_rate": 2.0292347377472056e-06, "loss": 0.296328604221344, "step": 7416 }, { "epoch": 0.9000121344497027, "grad_norm": 4.930680751800537, "learning_rate": 2.0267780370961802e-06, "loss": 0.21130648255348206, "step": 7417 }, { "epoch": 0.9001334789467298, "grad_norm": 2.330505132675171, "learning_rate": 2.0243213364451545e-06, "loss": 0.11915557831525803, "step": 7418 }, { "epoch": 0.9002548234437568, "grad_norm": 3.190603017807007, "learning_rate": 2.0218646357941288e-06, "loss": 0.08767152577638626, "step": 7419 }, { "epoch": 0.9003761679407839, "grad_norm": 2.2957651615142822, "learning_rate": 2.019407935143103e-06, "loss": 0.052407294511795044, "step": 7420 }, { "epoch": 0.900497512437811, "grad_norm": 3.075167179107666, "learning_rate": 2.0169512344920773e-06, "loss": 0.359375, "step": 7421 }, { "epoch": 0.900618856934838, "grad_norm": 7.659646511077881, "learning_rate": 2.0144945338410515e-06, "loss": 0.08534778654575348, "step": 7422 }, { "epoch": 0.9007402014318651, "grad_norm": 2.713837146759033, "learning_rate": 2.012037833190026e-06, "loss": 0.34643298387527466, "step": 7423 }, { "epoch": 0.9008615459288921, "grad_norm": 11.049539566040039, "learning_rate": 2.009581132539e-06, "loss": 0.20055465400218964, "step": 7424 }, { "epoch": 0.9009828904259192, "grad_norm": 3.2106688022613525, "learning_rate": 2.0071244318879747e-06, "loss": 0.4559691846370697, "step": 7425 }, { "epoch": 0.9011042349229462, "grad_norm": 2.495382070541382, "learning_rate": 2.004667731236949e-06, "loss": 0.3569727838039398, "step": 7426 }, { "epoch": 0.9012255794199733, "grad_norm": 1.7505056858062744, "learning_rate": 2.0022110305859232e-06, "loss": 0.13278140127658844, "step": 7427 }, { "epoch": 0.9013469239170003, "grad_norm": 1.5528112649917603, "learning_rate": 1.9997543299348975e-06, "loss": 0.19569289684295654, "step": 7428 }, { "epoch": 0.9014682684140274, "grad_norm": 3.3927977085113525, "learning_rate": 1.9972976292838718e-06, "loss": 0.11010460555553436, "step": 7429 }, { "epoch": 0.9015896129110544, "grad_norm": 2.8908958435058594, "learning_rate": 1.9948409286328464e-06, "loss": 0.37166133522987366, "step": 7430 }, { "epoch": 0.9017109574080815, "grad_norm": 1.2622119188308716, "learning_rate": 1.9923842279818207e-06, "loss": 0.030880846083164215, "step": 7431 }, { "epoch": 0.9018323019051087, "grad_norm": 2.134920120239258, "learning_rate": 1.989927527330795e-06, "loss": 0.3936614990234375, "step": 7432 }, { "epoch": 0.9019536464021357, "grad_norm": 2.017226457595825, "learning_rate": 1.9874708266797692e-06, "loss": 0.10396426916122437, "step": 7433 }, { "epoch": 0.9020749908991628, "grad_norm": 3.6607940196990967, "learning_rate": 1.9850141260287435e-06, "loss": 0.7534957528114319, "step": 7434 }, { "epoch": 0.9021963353961898, "grad_norm": 2.9066672325134277, "learning_rate": 1.982557425377718e-06, "loss": 0.23474059998989105, "step": 7435 }, { "epoch": 0.9023176798932169, "grad_norm": 1.8730189800262451, "learning_rate": 1.9801007247266924e-06, "loss": 0.14778026938438416, "step": 7436 }, { "epoch": 0.9024390243902439, "grad_norm": 3.9658565521240234, "learning_rate": 1.9776440240756663e-06, "loss": 0.12145949900150299, "step": 7437 }, { "epoch": 0.902560368887271, "grad_norm": 4.9231390953063965, "learning_rate": 1.975187323424641e-06, "loss": 0.35269099473953247, "step": 7438 }, { "epoch": 0.902681713384298, "grad_norm": 1.8997570276260376, "learning_rate": 1.972730622773615e-06, "loss": 0.11863474547863007, "step": 7439 }, { "epoch": 0.9028030578813251, "grad_norm": 3.3502988815307617, "learning_rate": 1.9702739221225895e-06, "loss": 0.37591052055358887, "step": 7440 }, { "epoch": 0.9029244023783521, "grad_norm": 2.145277500152588, "learning_rate": 1.9678172214715637e-06, "loss": 0.27159857749938965, "step": 7441 }, { "epoch": 0.9030457468753792, "grad_norm": 2.9786314964294434, "learning_rate": 1.965360520820538e-06, "loss": 0.3800956606864929, "step": 7442 }, { "epoch": 0.9031670913724062, "grad_norm": 3.2319674491882324, "learning_rate": 1.9629038201695126e-06, "loss": 0.24009039998054504, "step": 7443 }, { "epoch": 0.9032884358694333, "grad_norm": 3.9278883934020996, "learning_rate": 1.960447119518487e-06, "loss": 0.11150793731212616, "step": 7444 }, { "epoch": 0.9034097803664604, "grad_norm": 2.3229260444641113, "learning_rate": 1.957990418867461e-06, "loss": 0.09369368106126785, "step": 7445 }, { "epoch": 0.9035311248634874, "grad_norm": 3.162471294403076, "learning_rate": 1.9555337182164354e-06, "loss": 0.3369750380516052, "step": 7446 }, { "epoch": 0.9036524693605145, "grad_norm": 1.5041249990463257, "learning_rate": 1.9530770175654097e-06, "loss": 0.0574989840388298, "step": 7447 }, { "epoch": 0.9037738138575415, "grad_norm": 1.9272373914718628, "learning_rate": 1.9506203169143844e-06, "loss": 0.08676691353321075, "step": 7448 }, { "epoch": 0.9038951583545686, "grad_norm": 2.736392021179199, "learning_rate": 1.9481636162633586e-06, "loss": 0.19345402717590332, "step": 7449 }, { "epoch": 0.9040165028515956, "grad_norm": 2.0525641441345215, "learning_rate": 1.945706915612333e-06, "loss": 0.23823395371437073, "step": 7450 }, { "epoch": 0.9041378473486228, "grad_norm": 3.215465784072876, "learning_rate": 1.943250214961307e-06, "loss": 0.1869097203016281, "step": 7451 }, { "epoch": 0.9042591918456498, "grad_norm": 2.216021776199341, "learning_rate": 1.9407935143102814e-06, "loss": 0.14476510882377625, "step": 7452 }, { "epoch": 0.9043805363426769, "grad_norm": 4.351615905761719, "learning_rate": 1.938336813659256e-06, "loss": 0.42431074380874634, "step": 7453 }, { "epoch": 0.904501880839704, "grad_norm": 1.9846943616867065, "learning_rate": 1.93588011300823e-06, "loss": 0.10923407971858978, "step": 7454 }, { "epoch": 0.904623225336731, "grad_norm": 3.5587844848632812, "learning_rate": 1.933423412357204e-06, "loss": 0.3372516334056854, "step": 7455 }, { "epoch": 0.9047445698337581, "grad_norm": 2.557978868484497, "learning_rate": 1.930966711706179e-06, "loss": 0.11217866837978363, "step": 7456 }, { "epoch": 0.9048659143307851, "grad_norm": 3.111910820007324, "learning_rate": 1.928510011055153e-06, "loss": 0.37539708614349365, "step": 7457 }, { "epoch": 0.9049872588278122, "grad_norm": 2.6046721935272217, "learning_rate": 1.9260533104041274e-06, "loss": 0.10763710737228394, "step": 7458 }, { "epoch": 0.9051086033248392, "grad_norm": 4.322963237762451, "learning_rate": 1.9235966097531016e-06, "loss": 0.2844175696372986, "step": 7459 }, { "epoch": 0.9052299478218663, "grad_norm": 1.9635112285614014, "learning_rate": 1.921139909102076e-06, "loss": 0.1964401751756668, "step": 7460 }, { "epoch": 0.9053512923188933, "grad_norm": 2.835508108139038, "learning_rate": 1.9186832084510506e-06, "loss": 0.0860644206404686, "step": 7461 }, { "epoch": 0.9054726368159204, "grad_norm": 2.124131441116333, "learning_rate": 1.916226507800025e-06, "loss": 0.20888873934745789, "step": 7462 }, { "epoch": 0.9055939813129474, "grad_norm": 2.7853617668151855, "learning_rate": 1.913769807148999e-06, "loss": 0.0889788344502449, "step": 7463 }, { "epoch": 0.9057153258099745, "grad_norm": 3.7252917289733887, "learning_rate": 1.9113131064979733e-06, "loss": 0.10484079271554947, "step": 7464 }, { "epoch": 0.9058366703070015, "grad_norm": 2.6950154304504395, "learning_rate": 1.9088564058469476e-06, "loss": 0.39662379026412964, "step": 7465 }, { "epoch": 0.9059580148040286, "grad_norm": 2.2061471939086914, "learning_rate": 1.906399705195922e-06, "loss": 0.1987730860710144, "step": 7466 }, { "epoch": 0.9060793593010557, "grad_norm": 2.927544355392456, "learning_rate": 1.9039430045448965e-06, "loss": 0.2588297724723816, "step": 7467 }, { "epoch": 0.9062007037980827, "grad_norm": 2.3907759189605713, "learning_rate": 1.9014863038938708e-06, "loss": 0.24698996543884277, "step": 7468 }, { "epoch": 0.9063220482951099, "grad_norm": 2.8663182258605957, "learning_rate": 1.899029603242845e-06, "loss": 0.2035178393125534, "step": 7469 }, { "epoch": 0.9064433927921369, "grad_norm": 3.228379726409912, "learning_rate": 1.8965729025918195e-06, "loss": 0.05339556559920311, "step": 7470 }, { "epoch": 0.906564737289164, "grad_norm": 2.377063751220703, "learning_rate": 1.8941162019407936e-06, "loss": 0.19178226590156555, "step": 7471 }, { "epoch": 0.906686081786191, "grad_norm": 1.7288857698440552, "learning_rate": 1.8916595012897678e-06, "loss": 0.01746448501944542, "step": 7472 }, { "epoch": 0.9068074262832181, "grad_norm": 2.283479928970337, "learning_rate": 1.8892028006387423e-06, "loss": 0.1446288824081421, "step": 7473 }, { "epoch": 0.9069287707802451, "grad_norm": 2.3545918464660645, "learning_rate": 1.8867460999877166e-06, "loss": 0.16515220701694489, "step": 7474 }, { "epoch": 0.9070501152772722, "grad_norm": 2.9457449913024902, "learning_rate": 1.8842893993366908e-06, "loss": 0.2361689805984497, "step": 7475 }, { "epoch": 0.9071714597742992, "grad_norm": 4.155648708343506, "learning_rate": 1.8818326986856653e-06, "loss": 0.2442786991596222, "step": 7476 }, { "epoch": 0.9072928042713263, "grad_norm": 4.0758280754089355, "learning_rate": 1.8793759980346395e-06, "loss": 0.20407623052597046, "step": 7477 }, { "epoch": 0.9074141487683534, "grad_norm": 2.2536520957946777, "learning_rate": 1.876919297383614e-06, "loss": 0.13455994427204132, "step": 7478 }, { "epoch": 0.9075354932653804, "grad_norm": 2.606672763824463, "learning_rate": 1.8744625967325883e-06, "loss": 0.21091125905513763, "step": 7479 }, { "epoch": 0.9076568377624075, "grad_norm": 0.9121490120887756, "learning_rate": 1.8720058960815625e-06, "loss": 0.016326220706105232, "step": 7480 }, { "epoch": 0.9077781822594345, "grad_norm": 1.8820711374282837, "learning_rate": 1.869549195430537e-06, "loss": 0.1294446885585785, "step": 7481 }, { "epoch": 0.9078995267564616, "grad_norm": 0.7890458106994629, "learning_rate": 1.8670924947795113e-06, "loss": 0.003942539449781179, "step": 7482 }, { "epoch": 0.9080208712534886, "grad_norm": 2.441418170928955, "learning_rate": 1.8646357941284857e-06, "loss": 0.21965008974075317, "step": 7483 }, { "epoch": 0.9081422157505157, "grad_norm": 3.021550178527832, "learning_rate": 1.86217909347746e-06, "loss": 0.25183913111686707, "step": 7484 }, { "epoch": 0.9082635602475427, "grad_norm": 2.1423757076263428, "learning_rate": 1.8597223928264345e-06, "loss": 0.17834988236427307, "step": 7485 }, { "epoch": 0.9083849047445698, "grad_norm": 2.7812860012054443, "learning_rate": 1.8572656921754087e-06, "loss": 0.47860974073410034, "step": 7486 }, { "epoch": 0.9085062492415968, "grad_norm": 0.4166809916496277, "learning_rate": 1.854808991524383e-06, "loss": 0.0033094538375735283, "step": 7487 }, { "epoch": 0.908627593738624, "grad_norm": 5.607845306396484, "learning_rate": 1.852352290873357e-06, "loss": 0.30408239364624023, "step": 7488 }, { "epoch": 0.9087489382356511, "grad_norm": 2.632187604904175, "learning_rate": 1.8498955902223315e-06, "loss": 0.2503441572189331, "step": 7489 }, { "epoch": 0.9088702827326781, "grad_norm": 3.6022424697875977, "learning_rate": 1.8474388895713057e-06, "loss": 0.32474979758262634, "step": 7490 }, { "epoch": 0.9089916272297052, "grad_norm": 3.711111068725586, "learning_rate": 1.8449821889202802e-06, "loss": 0.14662274718284607, "step": 7491 }, { "epoch": 0.9091129717267322, "grad_norm": 2.470646619796753, "learning_rate": 1.8425254882692545e-06, "loss": 0.0887284204363823, "step": 7492 }, { "epoch": 0.9092343162237593, "grad_norm": 3.7523088455200195, "learning_rate": 1.8400687876182287e-06, "loss": 0.26335951685905457, "step": 7493 }, { "epoch": 0.9093556607207863, "grad_norm": 1.9296585321426392, "learning_rate": 1.8376120869672032e-06, "loss": 0.04782257229089737, "step": 7494 }, { "epoch": 0.9094770052178134, "grad_norm": 2.7970430850982666, "learning_rate": 1.8351553863161775e-06, "loss": 0.19831950962543488, "step": 7495 }, { "epoch": 0.9095983497148404, "grad_norm": 3.171095609664917, "learning_rate": 1.832698685665152e-06, "loss": 0.3936327397823334, "step": 7496 }, { "epoch": 0.9097196942118675, "grad_norm": 4.295749187469482, "learning_rate": 1.8302419850141262e-06, "loss": 0.5307641625404358, "step": 7497 }, { "epoch": 0.9098410387088945, "grad_norm": 3.872589111328125, "learning_rate": 1.8277852843631004e-06, "loss": 0.2568395733833313, "step": 7498 }, { "epoch": 0.9099623832059216, "grad_norm": 2.176837682723999, "learning_rate": 1.825328583712075e-06, "loss": 0.3798977732658386, "step": 7499 }, { "epoch": 0.9100837277029487, "grad_norm": 2.7664482593536377, "learning_rate": 1.8228718830610492e-06, "loss": 0.43864595890045166, "step": 7500 }, { "epoch": 0.9102050721999757, "grad_norm": 3.5727176666259766, "learning_rate": 1.8204151824100236e-06, "loss": 0.28170347213745117, "step": 7501 }, { "epoch": 0.9103264166970028, "grad_norm": 3.7870001792907715, "learning_rate": 1.817958481758998e-06, "loss": 0.18589770793914795, "step": 7502 }, { "epoch": 0.9104477611940298, "grad_norm": 2.786297559738159, "learning_rate": 1.8155017811079722e-06, "loss": 0.3667725920677185, "step": 7503 }, { "epoch": 0.9105691056910569, "grad_norm": 4.740426540374756, "learning_rate": 1.8130450804569466e-06, "loss": 0.5046045780181885, "step": 7504 }, { "epoch": 0.9106904501880839, "grad_norm": 0.15774966776371002, "learning_rate": 1.8105883798059207e-06, "loss": 0.0010225607547909021, "step": 7505 }, { "epoch": 0.910811794685111, "grad_norm": 1.0974762439727783, "learning_rate": 1.808131679154895e-06, "loss": 0.09092831611633301, "step": 7506 }, { "epoch": 0.9109331391821381, "grad_norm": 2.5774683952331543, "learning_rate": 1.8056749785038694e-06, "loss": 0.10180414468050003, "step": 7507 }, { "epoch": 0.9110544836791652, "grad_norm": 3.3084161281585693, "learning_rate": 1.8032182778528437e-06, "loss": 0.26475611329078674, "step": 7508 }, { "epoch": 0.9111758281761922, "grad_norm": 3.4202897548675537, "learning_rate": 1.8007615772018181e-06, "loss": 0.15361031889915466, "step": 7509 }, { "epoch": 0.9112971726732193, "grad_norm": 2.8122458457946777, "learning_rate": 1.7983048765507924e-06, "loss": 0.047721099108457565, "step": 7510 }, { "epoch": 0.9114185171702464, "grad_norm": 3.903679132461548, "learning_rate": 1.7958481758997667e-06, "loss": 0.20062564313411713, "step": 7511 }, { "epoch": 0.9115398616672734, "grad_norm": 2.239053726196289, "learning_rate": 1.7933914752487411e-06, "loss": 0.16219298541545868, "step": 7512 }, { "epoch": 0.9116612061643005, "grad_norm": 2.9836928844451904, "learning_rate": 1.7909347745977154e-06, "loss": 0.6457222700119019, "step": 7513 }, { "epoch": 0.9117825506613275, "grad_norm": 2.241115093231201, "learning_rate": 1.7884780739466899e-06, "loss": 0.417328804731369, "step": 7514 }, { "epoch": 0.9119038951583546, "grad_norm": 2.1815357208251953, "learning_rate": 1.7860213732956641e-06, "loss": 0.1678621768951416, "step": 7515 }, { "epoch": 0.9120252396553816, "grad_norm": 1.6675710678100586, "learning_rate": 1.7835646726446384e-06, "loss": 0.08807383477687836, "step": 7516 }, { "epoch": 0.9121465841524087, "grad_norm": 4.204915523529053, "learning_rate": 1.7811079719936128e-06, "loss": 0.5145528316497803, "step": 7517 }, { "epoch": 0.9122679286494357, "grad_norm": 2.342357873916626, "learning_rate": 1.778651271342587e-06, "loss": 0.31135350465774536, "step": 7518 }, { "epoch": 0.9123892731464628, "grad_norm": 2.2402260303497314, "learning_rate": 1.7761945706915616e-06, "loss": 0.22782722115516663, "step": 7519 }, { "epoch": 0.9125106176434898, "grad_norm": 1.0926158428192139, "learning_rate": 1.7737378700405358e-06, "loss": 0.028440076857805252, "step": 7520 }, { "epoch": 0.9126319621405169, "grad_norm": 2.6789932250976562, "learning_rate": 1.77128116938951e-06, "loss": 0.20205746591091156, "step": 7521 }, { "epoch": 0.912753306637544, "grad_norm": 3.812530040740967, "learning_rate": 1.7688244687384843e-06, "loss": 0.4499821662902832, "step": 7522 }, { "epoch": 0.912874651134571, "grad_norm": 3.7137694358825684, "learning_rate": 1.7663677680874586e-06, "loss": 0.5422126650810242, "step": 7523 }, { "epoch": 0.9129959956315981, "grad_norm": 3.561210870742798, "learning_rate": 1.7639110674364329e-06, "loss": 0.11602824181318283, "step": 7524 }, { "epoch": 0.9131173401286252, "grad_norm": 3.2919998168945312, "learning_rate": 1.7614543667854073e-06, "loss": 0.38832855224609375, "step": 7525 }, { "epoch": 0.9132386846256523, "grad_norm": 2.9205236434936523, "learning_rate": 1.7589976661343816e-06, "loss": 0.12150450795888901, "step": 7526 }, { "epoch": 0.9133600291226793, "grad_norm": 3.807326316833496, "learning_rate": 1.756540965483356e-06, "loss": 0.33380383253097534, "step": 7527 }, { "epoch": 0.9134813736197064, "grad_norm": 2.4677345752716064, "learning_rate": 1.7540842648323303e-06, "loss": 0.06848317384719849, "step": 7528 }, { "epoch": 0.9136027181167334, "grad_norm": 1.2071479558944702, "learning_rate": 1.7516275641813046e-06, "loss": 0.04495592415332794, "step": 7529 }, { "epoch": 0.9137240626137605, "grad_norm": 5.849497318267822, "learning_rate": 1.749170863530279e-06, "loss": 0.2006947249174118, "step": 7530 }, { "epoch": 0.9138454071107875, "grad_norm": 1.1550135612487793, "learning_rate": 1.7467141628792533e-06, "loss": 0.03469691425561905, "step": 7531 }, { "epoch": 0.9139667516078146, "grad_norm": 2.703401565551758, "learning_rate": 1.7442574622282278e-06, "loss": 0.22835633158683777, "step": 7532 }, { "epoch": 0.9140880961048417, "grad_norm": 1.9235637187957764, "learning_rate": 1.741800761577202e-06, "loss": 0.14990800619125366, "step": 7533 }, { "epoch": 0.9142094406018687, "grad_norm": 3.359044313430786, "learning_rate": 1.7393440609261763e-06, "loss": 0.23429545760154724, "step": 7534 }, { "epoch": 0.9143307850988958, "grad_norm": 3.8214263916015625, "learning_rate": 1.7368873602751508e-06, "loss": 0.26239895820617676, "step": 7535 }, { "epoch": 0.9144521295959228, "grad_norm": 3.6190099716186523, "learning_rate": 1.734430659624125e-06, "loss": 0.2074119597673416, "step": 7536 }, { "epoch": 0.9145734740929499, "grad_norm": 1.998765468597412, "learning_rate": 1.7319739589730995e-06, "loss": 0.16956986486911774, "step": 7537 }, { "epoch": 0.9146948185899769, "grad_norm": 4.853466987609863, "learning_rate": 1.7295172583220737e-06, "loss": 0.1034846305847168, "step": 7538 }, { "epoch": 0.914816163087004, "grad_norm": 2.4809370040893555, "learning_rate": 1.7270605576710478e-06, "loss": 0.22278955578804016, "step": 7539 }, { "epoch": 0.914937507584031, "grad_norm": 3.0840606689453125, "learning_rate": 1.724603857020022e-06, "loss": 0.43750569224357605, "step": 7540 }, { "epoch": 0.9150588520810581, "grad_norm": 2.728461265563965, "learning_rate": 1.7221471563689965e-06, "loss": 0.41385066509246826, "step": 7541 }, { "epoch": 0.9151801965780851, "grad_norm": 2.4124996662139893, "learning_rate": 1.7196904557179708e-06, "loss": 0.20769189298152924, "step": 7542 }, { "epoch": 0.9153015410751122, "grad_norm": 3.131361722946167, "learning_rate": 1.7172337550669452e-06, "loss": 0.2054785192012787, "step": 7543 }, { "epoch": 0.9154228855721394, "grad_norm": 3.2738847732543945, "learning_rate": 1.7147770544159195e-06, "loss": 0.6165505051612854, "step": 7544 }, { "epoch": 0.9155442300691664, "grad_norm": 3.837169647216797, "learning_rate": 1.7123203537648938e-06, "loss": 0.20592233538627625, "step": 7545 }, { "epoch": 0.9156655745661935, "grad_norm": 2.771010398864746, "learning_rate": 1.7098636531138682e-06, "loss": 0.22152075171470642, "step": 7546 }, { "epoch": 0.9157869190632205, "grad_norm": 2.675323486328125, "learning_rate": 1.7074069524628425e-06, "loss": 0.1993785798549652, "step": 7547 }, { "epoch": 0.9159082635602476, "grad_norm": 1.4481064081192017, "learning_rate": 1.704950251811817e-06, "loss": 0.028583435341715813, "step": 7548 }, { "epoch": 0.9160296080572746, "grad_norm": 3.4130771160125732, "learning_rate": 1.7024935511607912e-06, "loss": 0.09175242483615875, "step": 7549 }, { "epoch": 0.9161509525543017, "grad_norm": 2.603886842727661, "learning_rate": 1.7000368505097655e-06, "loss": 0.09837143868207932, "step": 7550 }, { "epoch": 0.9162722970513287, "grad_norm": 1.7299238443374634, "learning_rate": 1.69758014985874e-06, "loss": 0.3974321484565735, "step": 7551 }, { "epoch": 0.9163936415483558, "grad_norm": 3.2703120708465576, "learning_rate": 1.6951234492077142e-06, "loss": 0.21745122969150543, "step": 7552 }, { "epoch": 0.9165149860453828, "grad_norm": 4.754316329956055, "learning_rate": 1.6926667485566887e-06, "loss": 0.09385549277067184, "step": 7553 }, { "epoch": 0.9166363305424099, "grad_norm": 2.999983310699463, "learning_rate": 1.690210047905663e-06, "loss": 0.5028836727142334, "step": 7554 }, { "epoch": 0.916757675039437, "grad_norm": 2.2782232761383057, "learning_rate": 1.6877533472546374e-06, "loss": 0.1277066022157669, "step": 7555 }, { "epoch": 0.916879019536464, "grad_norm": 2.1583948135375977, "learning_rate": 1.6852966466036114e-06, "loss": 0.3073510527610779, "step": 7556 }, { "epoch": 0.9170003640334911, "grad_norm": 5.3561248779296875, "learning_rate": 1.6828399459525857e-06, "loss": 0.4705343246459961, "step": 7557 }, { "epoch": 0.9171217085305181, "grad_norm": 2.383962631225586, "learning_rate": 1.68038324530156e-06, "loss": 0.04234011471271515, "step": 7558 }, { "epoch": 0.9172430530275452, "grad_norm": 2.430588722229004, "learning_rate": 1.6779265446505344e-06, "loss": 0.02911709062755108, "step": 7559 }, { "epoch": 0.9173643975245722, "grad_norm": 4.252199172973633, "learning_rate": 1.6754698439995087e-06, "loss": 0.4706071615219116, "step": 7560 }, { "epoch": 0.9174857420215993, "grad_norm": 2.3053929805755615, "learning_rate": 1.6730131433484832e-06, "loss": 0.03571076691150665, "step": 7561 }, { "epoch": 0.9176070865186263, "grad_norm": 3.401657819747925, "learning_rate": 1.6705564426974574e-06, "loss": 0.6334425210952759, "step": 7562 }, { "epoch": 0.9177284310156535, "grad_norm": 1.8322160243988037, "learning_rate": 1.6680997420464317e-06, "loss": 0.038127169013023376, "step": 7563 }, { "epoch": 0.9178497755126805, "grad_norm": 1.3694559335708618, "learning_rate": 1.6656430413954061e-06, "loss": 0.04083748161792755, "step": 7564 }, { "epoch": 0.9179711200097076, "grad_norm": 3.317769765853882, "learning_rate": 1.6631863407443804e-06, "loss": 0.49107688665390015, "step": 7565 }, { "epoch": 0.9180924645067347, "grad_norm": 2.8166568279266357, "learning_rate": 1.6607296400933549e-06, "loss": 0.14916600286960602, "step": 7566 }, { "epoch": 0.9182138090037617, "grad_norm": 1.4980144500732422, "learning_rate": 1.6582729394423291e-06, "loss": 0.11245954036712646, "step": 7567 }, { "epoch": 0.9183351535007888, "grad_norm": 2.1674089431762695, "learning_rate": 1.6558162387913034e-06, "loss": 0.2225176841020584, "step": 7568 }, { "epoch": 0.9184564979978158, "grad_norm": 0.0004718915733974427, "learning_rate": 1.6533595381402779e-06, "loss": 5.294471520755906e-06, "step": 7569 }, { "epoch": 0.9185778424948429, "grad_norm": 2.375760555267334, "learning_rate": 1.6509028374892521e-06, "loss": 0.07288525998592377, "step": 7570 }, { "epoch": 0.9186991869918699, "grad_norm": 2.4699392318725586, "learning_rate": 1.6484461368382266e-06, "loss": 0.05991046130657196, "step": 7571 }, { "epoch": 0.918820531488897, "grad_norm": 2.6527345180511475, "learning_rate": 1.6459894361872008e-06, "loss": 0.1387142539024353, "step": 7572 }, { "epoch": 0.918941875985924, "grad_norm": 3.4235682487487793, "learning_rate": 1.643532735536175e-06, "loss": 0.18901677429676056, "step": 7573 }, { "epoch": 0.9190632204829511, "grad_norm": 3.2505950927734375, "learning_rate": 1.6410760348851494e-06, "loss": 0.33302611112594604, "step": 7574 }, { "epoch": 0.9191845649799781, "grad_norm": 3.645282030105591, "learning_rate": 1.6386193342341236e-06, "loss": 0.40069323778152466, "step": 7575 }, { "epoch": 0.9193059094770052, "grad_norm": 2.0404906272888184, "learning_rate": 1.6361626335830979e-06, "loss": 0.12753736972808838, "step": 7576 }, { "epoch": 0.9194272539740322, "grad_norm": 2.842745780944824, "learning_rate": 1.6337059329320724e-06, "loss": 0.3615136444568634, "step": 7577 }, { "epoch": 0.9195485984710593, "grad_norm": 1.300636649131775, "learning_rate": 1.6312492322810466e-06, "loss": 0.06108599528670311, "step": 7578 }, { "epoch": 0.9196699429680864, "grad_norm": 3.905754327774048, "learning_rate": 1.628792531630021e-06, "loss": 0.19545507431030273, "step": 7579 }, { "epoch": 0.9197912874651134, "grad_norm": 2.8498713970184326, "learning_rate": 1.6263358309789953e-06, "loss": 0.08190570771694183, "step": 7580 }, { "epoch": 0.9199126319621406, "grad_norm": 3.2004201412200928, "learning_rate": 1.6238791303279696e-06, "loss": 0.300342857837677, "step": 7581 }, { "epoch": 0.9200339764591676, "grad_norm": 6.547180652618408, "learning_rate": 1.621422429676944e-06, "loss": 0.4675617516040802, "step": 7582 }, { "epoch": 0.9201553209561947, "grad_norm": 2.887190341949463, "learning_rate": 1.6189657290259183e-06, "loss": 0.28483060002326965, "step": 7583 }, { "epoch": 0.9202766654532217, "grad_norm": 2.7878620624542236, "learning_rate": 1.6165090283748928e-06, "loss": 0.12105894833803177, "step": 7584 }, { "epoch": 0.9203980099502488, "grad_norm": 2.5115127563476562, "learning_rate": 1.614052327723867e-06, "loss": 0.13412846624851227, "step": 7585 }, { "epoch": 0.9205193544472758, "grad_norm": 3.2601370811462402, "learning_rate": 1.6115956270728413e-06, "loss": 0.1704588085412979, "step": 7586 }, { "epoch": 0.9206406989443029, "grad_norm": 2.3462302684783936, "learning_rate": 1.6091389264218158e-06, "loss": 0.7366325855255127, "step": 7587 }, { "epoch": 0.92076204344133, "grad_norm": 3.7472238540649414, "learning_rate": 1.60668222577079e-06, "loss": 0.7007284164428711, "step": 7588 }, { "epoch": 0.920883387938357, "grad_norm": 2.7839455604553223, "learning_rate": 1.6042255251197645e-06, "loss": 0.3716367483139038, "step": 7589 }, { "epoch": 0.921004732435384, "grad_norm": 3.770204544067383, "learning_rate": 1.6017688244687386e-06, "loss": 0.3682631254196167, "step": 7590 }, { "epoch": 0.9211260769324111, "grad_norm": 0.008622940629720688, "learning_rate": 1.5993121238177128e-06, "loss": 6.435848627006635e-05, "step": 7591 }, { "epoch": 0.9212474214294382, "grad_norm": 1.7897554636001587, "learning_rate": 1.5968554231666873e-06, "loss": 0.16758069396018982, "step": 7592 }, { "epoch": 0.9213687659264652, "grad_norm": 4.155150890350342, "learning_rate": 1.5943987225156615e-06, "loss": 0.3724444508552551, "step": 7593 }, { "epoch": 0.9214901104234923, "grad_norm": 2.229081869125366, "learning_rate": 1.5919420218646358e-06, "loss": 0.16655631363391876, "step": 7594 }, { "epoch": 0.9216114549205193, "grad_norm": 5.016583442687988, "learning_rate": 1.5894853212136103e-06, "loss": 0.18958589434623718, "step": 7595 }, { "epoch": 0.9217327994175464, "grad_norm": 1.8144944906234741, "learning_rate": 1.5870286205625845e-06, "loss": 0.1904633641242981, "step": 7596 }, { "epoch": 0.9218541439145734, "grad_norm": 2.3484699726104736, "learning_rate": 1.584571919911559e-06, "loss": 0.10929018259048462, "step": 7597 }, { "epoch": 0.9219754884116005, "grad_norm": 4.495340824127197, "learning_rate": 1.5821152192605333e-06, "loss": 0.13412052392959595, "step": 7598 }, { "epoch": 0.9220968329086275, "grad_norm": 18.988906860351562, "learning_rate": 1.5796585186095075e-06, "loss": 0.24815183877944946, "step": 7599 }, { "epoch": 0.9222181774056547, "grad_norm": 2.896266460418701, "learning_rate": 1.577201817958482e-06, "loss": 0.2419583797454834, "step": 7600 }, { "epoch": 0.9223395219026818, "grad_norm": 3.3901069164276123, "learning_rate": 1.5747451173074562e-06, "loss": 0.415423184633255, "step": 7601 }, { "epoch": 0.9224608663997088, "grad_norm": 2.6932108402252197, "learning_rate": 1.5722884166564307e-06, "loss": 0.10442853718996048, "step": 7602 }, { "epoch": 0.9225822108967359, "grad_norm": 4.323896408081055, "learning_rate": 1.569831716005405e-06, "loss": 0.5410029292106628, "step": 7603 }, { "epoch": 0.9227035553937629, "grad_norm": 2.5239245891571045, "learning_rate": 1.5673750153543792e-06, "loss": 0.22779493033885956, "step": 7604 }, { "epoch": 0.92282489989079, "grad_norm": 2.7307255268096924, "learning_rate": 1.5649183147033537e-06, "loss": 0.24216026067733765, "step": 7605 }, { "epoch": 0.922946244387817, "grad_norm": 2.952867269515991, "learning_rate": 1.5624616140523277e-06, "loss": 0.0866016075015068, "step": 7606 }, { "epoch": 0.9230675888848441, "grad_norm": 1.5647088289260864, "learning_rate": 1.560004913401302e-06, "loss": 0.22735784947872162, "step": 7607 }, { "epoch": 0.9231889333818711, "grad_norm": 1.8668208122253418, "learning_rate": 1.5575482127502765e-06, "loss": 0.07372453063726425, "step": 7608 }, { "epoch": 0.9233102778788982, "grad_norm": 5.340537071228027, "learning_rate": 1.5550915120992507e-06, "loss": 0.27618148922920227, "step": 7609 }, { "epoch": 0.9234316223759252, "grad_norm": 3.0771965980529785, "learning_rate": 1.552634811448225e-06, "loss": 0.3175598978996277, "step": 7610 }, { "epoch": 0.9235529668729523, "grad_norm": 2.191600799560547, "learning_rate": 1.5501781107971995e-06, "loss": 0.07629764080047607, "step": 7611 }, { "epoch": 0.9236743113699794, "grad_norm": 5.438360214233398, "learning_rate": 1.5477214101461737e-06, "loss": 0.20655547082424164, "step": 7612 }, { "epoch": 0.9237956558670064, "grad_norm": 3.094555616378784, "learning_rate": 1.5452647094951482e-06, "loss": 0.2614663243293762, "step": 7613 }, { "epoch": 0.9239170003640335, "grad_norm": 3.4840619564056396, "learning_rate": 1.5428080088441224e-06, "loss": 0.36846640706062317, "step": 7614 }, { "epoch": 0.9240383448610605, "grad_norm": 2.478020668029785, "learning_rate": 1.5403513081930967e-06, "loss": 0.24794909358024597, "step": 7615 }, { "epoch": 0.9241596893580876, "grad_norm": 3.6334385871887207, "learning_rate": 1.5378946075420712e-06, "loss": 0.7339620590209961, "step": 7616 }, { "epoch": 0.9242810338551146, "grad_norm": 3.7096314430236816, "learning_rate": 1.5354379068910454e-06, "loss": 0.0455160029232502, "step": 7617 }, { "epoch": 0.9244023783521418, "grad_norm": 2.9305880069732666, "learning_rate": 1.53298120624002e-06, "loss": 0.19100435078144073, "step": 7618 }, { "epoch": 0.9245237228491688, "grad_norm": 2.9143829345703125, "learning_rate": 1.5305245055889942e-06, "loss": 0.3383450210094452, "step": 7619 }, { "epoch": 0.9246450673461959, "grad_norm": 2.120588541030884, "learning_rate": 1.5280678049379686e-06, "loss": 0.09537140280008316, "step": 7620 }, { "epoch": 0.924766411843223, "grad_norm": 2.0068564414978027, "learning_rate": 1.5256111042869429e-06, "loss": 0.06670057028532028, "step": 7621 }, { "epoch": 0.92488775634025, "grad_norm": 4.721917152404785, "learning_rate": 1.5231544036359171e-06, "loss": 0.1728273183107376, "step": 7622 }, { "epoch": 0.925009100837277, "grad_norm": 2.6429178714752197, "learning_rate": 1.5206977029848912e-06, "loss": 0.029139723628759384, "step": 7623 }, { "epoch": 0.9251304453343041, "grad_norm": 3.4830024242401123, "learning_rate": 1.5182410023338657e-06, "loss": 0.2335418313741684, "step": 7624 }, { "epoch": 0.9252517898313312, "grad_norm": 2.939126491546631, "learning_rate": 1.51578430168284e-06, "loss": 0.17729414999485016, "step": 7625 }, { "epoch": 0.9253731343283582, "grad_norm": 2.6331913471221924, "learning_rate": 1.5133276010318144e-06, "loss": 0.25026172399520874, "step": 7626 }, { "epoch": 0.9254944788253853, "grad_norm": 2.622654438018799, "learning_rate": 1.5108709003807886e-06, "loss": 0.07376431673765182, "step": 7627 }, { "epoch": 0.9256158233224123, "grad_norm": 3.4457552433013916, "learning_rate": 1.508414199729763e-06, "loss": 0.5623354911804199, "step": 7628 }, { "epoch": 0.9257371678194394, "grad_norm": 2.7280516624450684, "learning_rate": 1.5059574990787374e-06, "loss": 0.5605316758155823, "step": 7629 }, { "epoch": 0.9258585123164664, "grad_norm": 2.794553518295288, "learning_rate": 1.5035007984277116e-06, "loss": 0.4472280442714691, "step": 7630 }, { "epoch": 0.9259798568134935, "grad_norm": 2.469909906387329, "learning_rate": 1.501044097776686e-06, "loss": 0.14444369077682495, "step": 7631 }, { "epoch": 0.9261012013105205, "grad_norm": 4.197169780731201, "learning_rate": 1.4985873971256604e-06, "loss": 0.19597876071929932, "step": 7632 }, { "epoch": 0.9262225458075476, "grad_norm": 1.2870328426361084, "learning_rate": 1.4961306964746346e-06, "loss": 0.04011332616209984, "step": 7633 }, { "epoch": 0.9263438903045746, "grad_norm": 4.1891984939575195, "learning_rate": 1.493673995823609e-06, "loss": 0.560319185256958, "step": 7634 }, { "epoch": 0.9264652348016017, "grad_norm": 3.38000226020813, "learning_rate": 1.4912172951725833e-06, "loss": 0.5146498680114746, "step": 7635 }, { "epoch": 0.9265865792986288, "grad_norm": 2.807605743408203, "learning_rate": 1.4887605945215578e-06, "loss": 0.34891635179519653, "step": 7636 }, { "epoch": 0.9267079237956559, "grad_norm": 1.0424308776855469, "learning_rate": 1.486303893870532e-06, "loss": 0.020707691088318825, "step": 7637 }, { "epoch": 0.926829268292683, "grad_norm": 3.2826123237609863, "learning_rate": 1.4838471932195063e-06, "loss": 0.08949390798807144, "step": 7638 }, { "epoch": 0.92695061278971, "grad_norm": 2.506828784942627, "learning_rate": 1.4813904925684808e-06, "loss": 0.19058573246002197, "step": 7639 }, { "epoch": 0.9270719572867371, "grad_norm": 5.349743366241455, "learning_rate": 1.4789337919174549e-06, "loss": 0.10892535746097565, "step": 7640 }, { "epoch": 0.9271933017837641, "grad_norm": 3.9996731281280518, "learning_rate": 1.4764770912664291e-06, "loss": 0.1436176300048828, "step": 7641 }, { "epoch": 0.9273146462807912, "grad_norm": 2.016751527786255, "learning_rate": 1.4740203906154036e-06, "loss": 0.04902449622750282, "step": 7642 }, { "epoch": 0.9274359907778182, "grad_norm": 3.0935513973236084, "learning_rate": 1.4715636899643778e-06, "loss": 0.29166674613952637, "step": 7643 }, { "epoch": 0.9275573352748453, "grad_norm": 3.0676534175872803, "learning_rate": 1.4691069893133523e-06, "loss": 0.38801994919776917, "step": 7644 }, { "epoch": 0.9276786797718723, "grad_norm": 3.8045084476470947, "learning_rate": 1.4666502886623266e-06, "loss": 0.38111570477485657, "step": 7645 }, { "epoch": 0.9278000242688994, "grad_norm": 1.8905631303787231, "learning_rate": 1.4641935880113008e-06, "loss": 0.10301118344068527, "step": 7646 }, { "epoch": 0.9279213687659265, "grad_norm": 3.6041972637176514, "learning_rate": 1.4617368873602753e-06, "loss": 0.5503351092338562, "step": 7647 }, { "epoch": 0.9280427132629535, "grad_norm": 2.096755266189575, "learning_rate": 1.4592801867092496e-06, "loss": 0.12855984270572662, "step": 7648 }, { "epoch": 0.9281640577599806, "grad_norm": 3.5611672401428223, "learning_rate": 1.456823486058224e-06, "loss": 0.45901089906692505, "step": 7649 }, { "epoch": 0.9282854022570076, "grad_norm": 2.4018454551696777, "learning_rate": 1.4543667854071983e-06, "loss": 0.25219959020614624, "step": 7650 }, { "epoch": 0.9284067467540347, "grad_norm": 5.4819655418396, "learning_rate": 1.4519100847561725e-06, "loss": 0.20533382892608643, "step": 7651 }, { "epoch": 0.9285280912510617, "grad_norm": 2.736849069595337, "learning_rate": 1.449453384105147e-06, "loss": 0.42228320240974426, "step": 7652 }, { "epoch": 0.9286494357480888, "grad_norm": 2.2973861694335938, "learning_rate": 1.4469966834541213e-06, "loss": 0.17400118708610535, "step": 7653 }, { "epoch": 0.9287707802451158, "grad_norm": 3.0624725818634033, "learning_rate": 1.4445399828030957e-06, "loss": 0.2191160023212433, "step": 7654 }, { "epoch": 0.9288921247421429, "grad_norm": 4.068859100341797, "learning_rate": 1.44208328215207e-06, "loss": 0.37234461307525635, "step": 7655 }, { "epoch": 0.92901346923917, "grad_norm": 4.694348335266113, "learning_rate": 1.4396265815010443e-06, "loss": 0.344944030046463, "step": 7656 }, { "epoch": 0.9291348137361971, "grad_norm": 2.452763557434082, "learning_rate": 1.4371698808500183e-06, "loss": 0.2248394787311554, "step": 7657 }, { "epoch": 0.9292561582332242, "grad_norm": 2.888495922088623, "learning_rate": 1.4347131801989928e-06, "loss": 0.3595215380191803, "step": 7658 }, { "epoch": 0.9293775027302512, "grad_norm": 3.6977341175079346, "learning_rate": 1.432256479547967e-06, "loss": 0.12399230897426605, "step": 7659 }, { "epoch": 0.9294988472272783, "grad_norm": 0.972385585308075, "learning_rate": 1.4297997788969415e-06, "loss": 0.022361135110259056, "step": 7660 }, { "epoch": 0.9296201917243053, "grad_norm": 1.781026005744934, "learning_rate": 1.4273430782459158e-06, "loss": 0.08221207559108734, "step": 7661 }, { "epoch": 0.9297415362213324, "grad_norm": 1.9083151817321777, "learning_rate": 1.4248863775948902e-06, "loss": 0.06955470889806747, "step": 7662 }, { "epoch": 0.9298628807183594, "grad_norm": 2.349482774734497, "learning_rate": 1.4224296769438645e-06, "loss": 0.09595946967601776, "step": 7663 }, { "epoch": 0.9299842252153865, "grad_norm": 2.368788719177246, "learning_rate": 1.4199729762928387e-06, "loss": 0.13406451046466827, "step": 7664 }, { "epoch": 0.9301055697124135, "grad_norm": 3.645068883895874, "learning_rate": 1.4175162756418132e-06, "loss": 0.520393431186676, "step": 7665 }, { "epoch": 0.9302269142094406, "grad_norm": 2.6033923625946045, "learning_rate": 1.4150595749907875e-06, "loss": 0.018178243190050125, "step": 7666 }, { "epoch": 0.9303482587064676, "grad_norm": 0.368269681930542, "learning_rate": 1.412602874339762e-06, "loss": 0.002248176606371999, "step": 7667 }, { "epoch": 0.9304696032034947, "grad_norm": 3.8257598876953125, "learning_rate": 1.4101461736887362e-06, "loss": 0.11010874807834625, "step": 7668 }, { "epoch": 0.9305909477005218, "grad_norm": 2.167874574661255, "learning_rate": 1.4076894730377105e-06, "loss": 0.07785952836275101, "step": 7669 }, { "epoch": 0.9307122921975488, "grad_norm": 2.9954841136932373, "learning_rate": 1.405232772386685e-06, "loss": 0.4038749039173126, "step": 7670 }, { "epoch": 0.9308336366945759, "grad_norm": 1.7670494318008423, "learning_rate": 1.4027760717356592e-06, "loss": 0.06286994367837906, "step": 7671 }, { "epoch": 0.9309549811916029, "grad_norm": 3.0101826190948486, "learning_rate": 1.4003193710846337e-06, "loss": 0.15305793285369873, "step": 7672 }, { "epoch": 0.93107632568863, "grad_norm": 1.7151803970336914, "learning_rate": 1.397862670433608e-06, "loss": 0.02074068784713745, "step": 7673 }, { "epoch": 0.9311976701856571, "grad_norm": 3.820417881011963, "learning_rate": 1.395405969782582e-06, "loss": 0.21407903730869293, "step": 7674 }, { "epoch": 0.9313190146826842, "grad_norm": 4.551059722900391, "learning_rate": 1.3929492691315562e-06, "loss": 0.4330679476261139, "step": 7675 }, { "epoch": 0.9314403591797112, "grad_norm": 2.0285117626190186, "learning_rate": 1.3904925684805307e-06, "loss": 0.4227559268474579, "step": 7676 }, { "epoch": 0.9315617036767383, "grad_norm": 3.459960699081421, "learning_rate": 1.388035867829505e-06, "loss": 0.4030398726463318, "step": 7677 }, { "epoch": 0.9316830481737653, "grad_norm": 2.6113929748535156, "learning_rate": 1.3855791671784794e-06, "loss": 0.17132079601287842, "step": 7678 }, { "epoch": 0.9318043926707924, "grad_norm": 1.42075777053833, "learning_rate": 1.3831224665274537e-06, "loss": 0.3693407475948334, "step": 7679 }, { "epoch": 0.9319257371678195, "grad_norm": 2.0272347927093506, "learning_rate": 1.380665765876428e-06, "loss": 0.13482022285461426, "step": 7680 }, { "epoch": 0.9320470816648465, "grad_norm": 2.642117977142334, "learning_rate": 1.3782090652254024e-06, "loss": 0.21288365125656128, "step": 7681 }, { "epoch": 0.9321684261618736, "grad_norm": 5.104677200317383, "learning_rate": 1.3757523645743767e-06, "loss": 0.34931325912475586, "step": 7682 }, { "epoch": 0.9322897706589006, "grad_norm": 1.921242356300354, "learning_rate": 1.3732956639233511e-06, "loss": 0.24462847411632538, "step": 7683 }, { "epoch": 0.9324111151559277, "grad_norm": 3.152461290359497, "learning_rate": 1.3708389632723254e-06, "loss": 0.15765883028507233, "step": 7684 }, { "epoch": 0.9325324596529547, "grad_norm": 2.6830146312713623, "learning_rate": 1.3683822626212996e-06, "loss": 0.11059755831956863, "step": 7685 }, { "epoch": 0.9326538041499818, "grad_norm": 4.161698341369629, "learning_rate": 1.3659255619702741e-06, "loss": 0.2672032415866852, "step": 7686 }, { "epoch": 0.9327751486470088, "grad_norm": 1.4508382081985474, "learning_rate": 1.3634688613192484e-06, "loss": 0.03998139873147011, "step": 7687 }, { "epoch": 0.9328964931440359, "grad_norm": 0.521662712097168, "learning_rate": 1.3610121606682228e-06, "loss": 0.006077821832150221, "step": 7688 }, { "epoch": 0.9330178376410629, "grad_norm": 5.481769561767578, "learning_rate": 1.358555460017197e-06, "loss": 0.26734668016433716, "step": 7689 }, { "epoch": 0.93313918213809, "grad_norm": 3.713395357131958, "learning_rate": 1.3560987593661716e-06, "loss": 0.37445175647735596, "step": 7690 }, { "epoch": 0.933260526635117, "grad_norm": 0.3066747486591339, "learning_rate": 1.3536420587151456e-06, "loss": 0.003615202382206917, "step": 7691 }, { "epoch": 0.9333818711321441, "grad_norm": 2.3961291313171387, "learning_rate": 1.3511853580641199e-06, "loss": 0.1711491495370865, "step": 7692 }, { "epoch": 0.9335032156291713, "grad_norm": 4.164459705352783, "learning_rate": 1.3487286574130941e-06, "loss": 0.4079133868217468, "step": 7693 }, { "epoch": 0.9336245601261983, "grad_norm": 3.1475412845611572, "learning_rate": 1.3462719567620686e-06, "loss": 0.4258159399032593, "step": 7694 }, { "epoch": 0.9337459046232254, "grad_norm": 3.065511703491211, "learning_rate": 1.3438152561110429e-06, "loss": 0.20897693932056427, "step": 7695 }, { "epoch": 0.9338672491202524, "grad_norm": 1.3652220964431763, "learning_rate": 1.3413585554600173e-06, "loss": 0.031913962215185165, "step": 7696 }, { "epoch": 0.9339885936172795, "grad_norm": 3.253568649291992, "learning_rate": 1.3389018548089916e-06, "loss": 0.42451316118240356, "step": 7697 }, { "epoch": 0.9341099381143065, "grad_norm": 1.9813202619552612, "learning_rate": 1.3364451541579659e-06, "loss": 0.0604761466383934, "step": 7698 }, { "epoch": 0.9342312826113336, "grad_norm": 1.2787697315216064, "learning_rate": 1.3339884535069403e-06, "loss": 0.0512201227247715, "step": 7699 }, { "epoch": 0.9343526271083606, "grad_norm": 0.029261037707328796, "learning_rate": 1.3315317528559146e-06, "loss": 0.0002072262141155079, "step": 7700 }, { "epoch": 0.9344739716053877, "grad_norm": 3.70450758934021, "learning_rate": 1.329075052204889e-06, "loss": 0.4397331178188324, "step": 7701 }, { "epoch": 0.9345953161024148, "grad_norm": 3.2678442001342773, "learning_rate": 1.3266183515538633e-06, "loss": 0.18177111446857452, "step": 7702 }, { "epoch": 0.9347166605994418, "grad_norm": 2.476982831954956, "learning_rate": 1.3241616509028376e-06, "loss": 0.0725698322057724, "step": 7703 }, { "epoch": 0.9348380050964689, "grad_norm": 2.715791940689087, "learning_rate": 1.321704950251812e-06, "loss": 0.4649902582168579, "step": 7704 }, { "epoch": 0.9349593495934959, "grad_norm": 2.3348348140716553, "learning_rate": 1.3192482496007863e-06, "loss": 0.11110152304172516, "step": 7705 }, { "epoch": 0.935080694090523, "grad_norm": 3.84224534034729, "learning_rate": 1.3167915489497608e-06, "loss": 0.2247028350830078, "step": 7706 }, { "epoch": 0.93520203858755, "grad_norm": 2.862316131591797, "learning_rate": 1.314334848298735e-06, "loss": 0.19974932074546814, "step": 7707 }, { "epoch": 0.9353233830845771, "grad_norm": 3.424909830093384, "learning_rate": 1.311878147647709e-06, "loss": 0.22040243446826935, "step": 7708 }, { "epoch": 0.9354447275816041, "grad_norm": 2.357590675354004, "learning_rate": 1.3094214469966835e-06, "loss": 0.18117155134677887, "step": 7709 }, { "epoch": 0.9355660720786312, "grad_norm": 2.96187162399292, "learning_rate": 1.3069647463456578e-06, "loss": 0.21953000128269196, "step": 7710 }, { "epoch": 0.9356874165756583, "grad_norm": 3.1687397956848145, "learning_rate": 1.304508045694632e-06, "loss": 0.8575660586357117, "step": 7711 }, { "epoch": 0.9358087610726854, "grad_norm": 2.7650389671325684, "learning_rate": 1.3020513450436065e-06, "loss": 0.6545658111572266, "step": 7712 }, { "epoch": 0.9359301055697125, "grad_norm": 3.05652117729187, "learning_rate": 1.2995946443925808e-06, "loss": 0.15357980132102966, "step": 7713 }, { "epoch": 0.9360514500667395, "grad_norm": 2.196955919265747, "learning_rate": 1.2971379437415553e-06, "loss": 0.2827216386795044, "step": 7714 }, { "epoch": 0.9361727945637666, "grad_norm": 2.8086791038513184, "learning_rate": 1.2946812430905295e-06, "loss": 0.25356945395469666, "step": 7715 }, { "epoch": 0.9362941390607936, "grad_norm": 2.706453323364258, "learning_rate": 1.2922245424395038e-06, "loss": 0.5356978178024292, "step": 7716 }, { "epoch": 0.9364154835578207, "grad_norm": 4.313547134399414, "learning_rate": 1.2897678417884782e-06, "loss": 0.3409487009048462, "step": 7717 }, { "epoch": 0.9365368280548477, "grad_norm": 3.009143352508545, "learning_rate": 1.2873111411374525e-06, "loss": 0.24399058520793915, "step": 7718 }, { "epoch": 0.9366581725518748, "grad_norm": 0.7059973478317261, "learning_rate": 1.284854440486427e-06, "loss": 0.01626204326748848, "step": 7719 }, { "epoch": 0.9367795170489018, "grad_norm": 2.7011427879333496, "learning_rate": 1.2823977398354012e-06, "loss": 0.22002291679382324, "step": 7720 }, { "epoch": 0.9369008615459289, "grad_norm": 5.059040546417236, "learning_rate": 1.2799410391843755e-06, "loss": 0.2882941663265228, "step": 7721 }, { "epoch": 0.9370222060429559, "grad_norm": 4.248938083648682, "learning_rate": 1.27748433853335e-06, "loss": 0.263386607170105, "step": 7722 }, { "epoch": 0.937143550539983, "grad_norm": 3.1992363929748535, "learning_rate": 1.2750276378823242e-06, "loss": 0.238864004611969, "step": 7723 }, { "epoch": 0.93726489503701, "grad_norm": 3.4653055667877197, "learning_rate": 1.2725709372312987e-06, "loss": 0.23384249210357666, "step": 7724 }, { "epoch": 0.9373862395340371, "grad_norm": 2.051030397415161, "learning_rate": 1.2701142365802727e-06, "loss": 0.0713038519024849, "step": 7725 }, { "epoch": 0.9375075840310642, "grad_norm": 4.537394046783447, "learning_rate": 1.267657535929247e-06, "loss": 0.35191968083381653, "step": 7726 }, { "epoch": 0.9376289285280912, "grad_norm": 2.8108417987823486, "learning_rate": 1.2652008352782215e-06, "loss": 0.4897487759590149, "step": 7727 }, { "epoch": 0.9377502730251183, "grad_norm": 2.4910411834716797, "learning_rate": 1.2627441346271957e-06, "loss": 0.30593183636665344, "step": 7728 }, { "epoch": 0.9378716175221453, "grad_norm": 2.8507323265075684, "learning_rate": 1.26028743397617e-06, "loss": 0.10967987775802612, "step": 7729 }, { "epoch": 0.9379929620191725, "grad_norm": 3.4700119495391846, "learning_rate": 1.2578307333251444e-06, "loss": 0.12149327248334885, "step": 7730 }, { "epoch": 0.9381143065161995, "grad_norm": 3.623185157775879, "learning_rate": 1.2553740326741187e-06, "loss": 0.35553795099258423, "step": 7731 }, { "epoch": 0.9382356510132266, "grad_norm": 4.728524208068848, "learning_rate": 1.2529173320230932e-06, "loss": 0.38562309741973877, "step": 7732 }, { "epoch": 0.9383569955102536, "grad_norm": 2.902444839477539, "learning_rate": 1.2504606313720674e-06, "loss": 0.12159226089715958, "step": 7733 }, { "epoch": 0.9384783400072807, "grad_norm": 3.0655856132507324, "learning_rate": 1.2480039307210417e-06, "loss": 0.3510741591453552, "step": 7734 }, { "epoch": 0.9385996845043078, "grad_norm": 2.011171817779541, "learning_rate": 1.2455472300700162e-06, "loss": 0.08836576342582703, "step": 7735 }, { "epoch": 0.9387210290013348, "grad_norm": 3.489607095718384, "learning_rate": 1.2430905294189904e-06, "loss": 0.167584627866745, "step": 7736 }, { "epoch": 0.9388423734983619, "grad_norm": 3.76141357421875, "learning_rate": 1.2406338287679649e-06, "loss": 0.19348521530628204, "step": 7737 }, { "epoch": 0.9389637179953889, "grad_norm": 2.305687189102173, "learning_rate": 1.238177128116939e-06, "loss": 0.15586811304092407, "step": 7738 }, { "epoch": 0.939085062492416, "grad_norm": 3.143094778060913, "learning_rate": 1.2357204274659134e-06, "loss": 0.14542751014232635, "step": 7739 }, { "epoch": 0.939206406989443, "grad_norm": 5.055068492889404, "learning_rate": 1.2332637268148877e-06, "loss": 0.44920188188552856, "step": 7740 }, { "epoch": 0.9393277514864701, "grad_norm": 2.7742958068847656, "learning_rate": 1.230807026163862e-06, "loss": 0.1275118589401245, "step": 7741 }, { "epoch": 0.9394490959834971, "grad_norm": 3.652402400970459, "learning_rate": 1.2283503255128364e-06, "loss": 0.23869803547859192, "step": 7742 }, { "epoch": 0.9395704404805242, "grad_norm": 4.132966995239258, "learning_rate": 1.2258936248618106e-06, "loss": 0.2960548400878906, "step": 7743 }, { "epoch": 0.9396917849775512, "grad_norm": 4.897154808044434, "learning_rate": 1.2234369242107851e-06, "loss": 0.536248505115509, "step": 7744 }, { "epoch": 0.9398131294745783, "grad_norm": 3.24900221824646, "learning_rate": 1.2209802235597594e-06, "loss": 0.3594883680343628, "step": 7745 }, { "epoch": 0.9399344739716053, "grad_norm": 3.264488697052002, "learning_rate": 1.2185235229087336e-06, "loss": 0.15447860956192017, "step": 7746 }, { "epoch": 0.9400558184686324, "grad_norm": 1.7284207344055176, "learning_rate": 1.2160668222577079e-06, "loss": 0.022626476362347603, "step": 7747 }, { "epoch": 0.9401771629656595, "grad_norm": 5.274637222290039, "learning_rate": 1.2136101216066824e-06, "loss": 0.30976518988609314, "step": 7748 }, { "epoch": 0.9402985074626866, "grad_norm": 3.4455041885375977, "learning_rate": 1.2111534209556566e-06, "loss": 0.31555888056755066, "step": 7749 }, { "epoch": 0.9404198519597137, "grad_norm": 0.004785396624356508, "learning_rate": 1.2086967203046309e-06, "loss": 4.1874638554872945e-05, "step": 7750 }, { "epoch": 0.9405411964567407, "grad_norm": 2.9381093978881836, "learning_rate": 1.2062400196536053e-06, "loss": 0.24553848803043365, "step": 7751 }, { "epoch": 0.9406625409537678, "grad_norm": 3.9928903579711914, "learning_rate": 1.2037833190025796e-06, "loss": 0.2613721787929535, "step": 7752 }, { "epoch": 0.9407838854507948, "grad_norm": 1.2125452756881714, "learning_rate": 1.201326618351554e-06, "loss": 0.055786747485399246, "step": 7753 }, { "epoch": 0.9409052299478219, "grad_norm": 3.056671619415283, "learning_rate": 1.1988699177005283e-06, "loss": 0.3547486960887909, "step": 7754 }, { "epoch": 0.9410265744448489, "grad_norm": 3.317507743835449, "learning_rate": 1.1964132170495026e-06, "loss": 0.26206251978874207, "step": 7755 }, { "epoch": 0.941147918941876, "grad_norm": 1.1664596796035767, "learning_rate": 1.1939565163984768e-06, "loss": 0.03508451208472252, "step": 7756 }, { "epoch": 0.941269263438903, "grad_norm": 3.0716500282287598, "learning_rate": 1.1914998157474513e-06, "loss": 0.2291264832019806, "step": 7757 }, { "epoch": 0.9413906079359301, "grad_norm": 2.703834295272827, "learning_rate": 1.1890431150964256e-06, "loss": 0.16631676256656647, "step": 7758 }, { "epoch": 0.9415119524329572, "grad_norm": 2.5542733669281006, "learning_rate": 1.1865864144453998e-06, "loss": 0.3290342688560486, "step": 7759 }, { "epoch": 0.9416332969299842, "grad_norm": 3.0216946601867676, "learning_rate": 1.1841297137943743e-06, "loss": 0.169298455119133, "step": 7760 }, { "epoch": 0.9417546414270113, "grad_norm": 3.805272340774536, "learning_rate": 1.1816730131433486e-06, "loss": 0.26143383979797363, "step": 7761 }, { "epoch": 0.9418759859240383, "grad_norm": 1.9782791137695312, "learning_rate": 1.179216312492323e-06, "loss": 0.25400373339653015, "step": 7762 }, { "epoch": 0.9419973304210654, "grad_norm": 3.260904550552368, "learning_rate": 1.176759611841297e-06, "loss": 0.13549262285232544, "step": 7763 }, { "epoch": 0.9421186749180924, "grad_norm": 1.2485079765319824, "learning_rate": 1.1743029111902716e-06, "loss": 0.02356737107038498, "step": 7764 }, { "epoch": 0.9422400194151195, "grad_norm": 3.1456594467163086, "learning_rate": 1.1718462105392458e-06, "loss": 0.17982962727546692, "step": 7765 }, { "epoch": 0.9423613639121465, "grad_norm": 2.4721994400024414, "learning_rate": 1.1693895098882203e-06, "loss": 0.03200969099998474, "step": 7766 }, { "epoch": 0.9424827084091737, "grad_norm": 2.8403825759887695, "learning_rate": 1.1669328092371945e-06, "loss": 0.5184905529022217, "step": 7767 }, { "epoch": 0.9426040529062008, "grad_norm": 5.367800712585449, "learning_rate": 1.1644761085861688e-06, "loss": 0.30634695291519165, "step": 7768 }, { "epoch": 0.9427253974032278, "grad_norm": 3.0104660987854004, "learning_rate": 1.1620194079351433e-06, "loss": 0.3312057554721832, "step": 7769 }, { "epoch": 0.9428467419002549, "grad_norm": 3.15285325050354, "learning_rate": 1.1595627072841175e-06, "loss": 0.33677205443382263, "step": 7770 }, { "epoch": 0.9429680863972819, "grad_norm": 4.322443008422852, "learning_rate": 1.157106006633092e-06, "loss": 0.3254617750644684, "step": 7771 }, { "epoch": 0.943089430894309, "grad_norm": 2.673431873321533, "learning_rate": 1.154649305982066e-06, "loss": 0.14881163835525513, "step": 7772 }, { "epoch": 0.943210775391336, "grad_norm": 2.535515308380127, "learning_rate": 1.1521926053310405e-06, "loss": 0.1082981675863266, "step": 7773 }, { "epoch": 0.9433321198883631, "grad_norm": 1.620809555053711, "learning_rate": 1.1497359046800148e-06, "loss": 0.030517224222421646, "step": 7774 }, { "epoch": 0.9434534643853901, "grad_norm": 3.4687530994415283, "learning_rate": 1.1472792040289892e-06, "loss": 0.1672358363866806, "step": 7775 }, { "epoch": 0.9435748088824172, "grad_norm": 3.0473620891571045, "learning_rate": 1.1448225033779635e-06, "loss": 0.4854034185409546, "step": 7776 }, { "epoch": 0.9436961533794442, "grad_norm": 2.201353073120117, "learning_rate": 1.1423658027269378e-06, "loss": 0.21234038472175598, "step": 7777 }, { "epoch": 0.9438174978764713, "grad_norm": 3.088956356048584, "learning_rate": 1.1399091020759122e-06, "loss": 0.3780791759490967, "step": 7778 }, { "epoch": 0.9439388423734983, "grad_norm": 2.887418746948242, "learning_rate": 1.1374524014248865e-06, "loss": 0.10637243092060089, "step": 7779 }, { "epoch": 0.9440601868705254, "grad_norm": 2.344541072845459, "learning_rate": 1.1349957007738607e-06, "loss": 0.2510610818862915, "step": 7780 }, { "epoch": 0.9441815313675525, "grad_norm": 3.1485605239868164, "learning_rate": 1.132539000122835e-06, "loss": 0.2005797177553177, "step": 7781 }, { "epoch": 0.9443028758645795, "grad_norm": 1.8344240188598633, "learning_rate": 1.1300822994718095e-06, "loss": 0.161865234375, "step": 7782 }, { "epoch": 0.9444242203616066, "grad_norm": 2.7477471828460693, "learning_rate": 1.1276255988207837e-06, "loss": 0.1903497278690338, "step": 7783 }, { "epoch": 0.9445455648586336, "grad_norm": 2.133188009262085, "learning_rate": 1.1251688981697582e-06, "loss": 0.20684421062469482, "step": 7784 }, { "epoch": 0.9446669093556607, "grad_norm": 2.7124621868133545, "learning_rate": 1.1227121975187325e-06, "loss": 0.12692318856716156, "step": 7785 }, { "epoch": 0.9447882538526878, "grad_norm": 3.450451612472534, "learning_rate": 1.1202554968677067e-06, "loss": 0.3387119770050049, "step": 7786 }, { "epoch": 0.9449095983497149, "grad_norm": 1.7586647272109985, "learning_rate": 1.1177987962166812e-06, "loss": 0.22231486439704895, "step": 7787 }, { "epoch": 0.9450309428467419, "grad_norm": 2.835451126098633, "learning_rate": 1.1153420955656554e-06, "loss": 0.09578491002321243, "step": 7788 }, { "epoch": 0.945152287343769, "grad_norm": 4.861599445343018, "learning_rate": 1.1128853949146297e-06, "loss": 0.33029377460479736, "step": 7789 }, { "epoch": 0.945273631840796, "grad_norm": 3.6189284324645996, "learning_rate": 1.110428694263604e-06, "loss": 0.17705810070037842, "step": 7790 }, { "epoch": 0.9453949763378231, "grad_norm": 3.0145437717437744, "learning_rate": 1.1079719936125784e-06, "loss": 0.261701762676239, "step": 7791 }, { "epoch": 0.9455163208348502, "grad_norm": 2.9041213989257812, "learning_rate": 1.1055152929615527e-06, "loss": 0.5005735754966736, "step": 7792 }, { "epoch": 0.9456376653318772, "grad_norm": 1.9305404424667358, "learning_rate": 1.1030585923105272e-06, "loss": 0.10803613066673279, "step": 7793 }, { "epoch": 0.9457590098289043, "grad_norm": 3.6003432273864746, "learning_rate": 1.1006018916595014e-06, "loss": 0.2311781495809555, "step": 7794 }, { "epoch": 0.9458803543259313, "grad_norm": 3.5689187049865723, "learning_rate": 1.0981451910084757e-06, "loss": 0.44839027523994446, "step": 7795 }, { "epoch": 0.9460016988229584, "grad_norm": 1.411562204360962, "learning_rate": 1.0956884903574501e-06, "loss": 0.021551068872213364, "step": 7796 }, { "epoch": 0.9461230433199854, "grad_norm": 1.704988956451416, "learning_rate": 1.0932317897064244e-06, "loss": 0.08966219425201416, "step": 7797 }, { "epoch": 0.9462443878170125, "grad_norm": 2.121044397354126, "learning_rate": 1.0907750890553987e-06, "loss": 0.2672261595726013, "step": 7798 }, { "epoch": 0.9463657323140395, "grad_norm": 1.4736442565917969, "learning_rate": 1.088318388404373e-06, "loss": 0.017919592559337616, "step": 7799 }, { "epoch": 0.9464870768110666, "grad_norm": 3.490145206451416, "learning_rate": 1.0858616877533474e-06, "loss": 0.2591620087623596, "step": 7800 }, { "epoch": 0.9466084213080936, "grad_norm": 6.508039951324463, "learning_rate": 1.0834049871023216e-06, "loss": 0.19484935700893402, "step": 7801 }, { "epoch": 0.9467297658051207, "grad_norm": 1.7342473268508911, "learning_rate": 1.0809482864512961e-06, "loss": 0.16916580498218536, "step": 7802 }, { "epoch": 0.9468511103021477, "grad_norm": 5.028873920440674, "learning_rate": 1.0784915858002704e-06, "loss": 0.28472405672073364, "step": 7803 }, { "epoch": 0.9469724547991749, "grad_norm": 3.037914991378784, "learning_rate": 1.0760348851492446e-06, "loss": 0.22378169000148773, "step": 7804 }, { "epoch": 0.947093799296202, "grad_norm": 3.23246431350708, "learning_rate": 1.073578184498219e-06, "loss": 0.3720892071723938, "step": 7805 }, { "epoch": 0.947215143793229, "grad_norm": 2.4554266929626465, "learning_rate": 1.0711214838471931e-06, "loss": 0.1774751842021942, "step": 7806 }, { "epoch": 0.9473364882902561, "grad_norm": 1.6978445053100586, "learning_rate": 1.0686647831961676e-06, "loss": 0.12968480587005615, "step": 7807 }, { "epoch": 0.9474578327872831, "grad_norm": 3.584123373031616, "learning_rate": 1.0662080825451419e-06, "loss": 0.27817025780677795, "step": 7808 }, { "epoch": 0.9475791772843102, "grad_norm": 4.332122802734375, "learning_rate": 1.0637513818941163e-06, "loss": 0.3896644711494446, "step": 7809 }, { "epoch": 0.9477005217813372, "grad_norm": 2.6040537357330322, "learning_rate": 1.0612946812430906e-06, "loss": 0.28695106506347656, "step": 7810 }, { "epoch": 0.9478218662783643, "grad_norm": 3.85866379737854, "learning_rate": 1.058837980592065e-06, "loss": 0.4249591827392578, "step": 7811 }, { "epoch": 0.9479432107753913, "grad_norm": 3.0386016368865967, "learning_rate": 1.0563812799410393e-06, "loss": 0.31773895025253296, "step": 7812 }, { "epoch": 0.9480645552724184, "grad_norm": 3.6719658374786377, "learning_rate": 1.0539245792900136e-06, "loss": 0.18650908768177032, "step": 7813 }, { "epoch": 0.9481858997694455, "grad_norm": 3.387835741043091, "learning_rate": 1.0514678786389878e-06, "loss": 0.2576434016227722, "step": 7814 }, { "epoch": 0.9483072442664725, "grad_norm": 2.6208126544952393, "learning_rate": 1.0490111779879621e-06, "loss": 0.16077911853790283, "step": 7815 }, { "epoch": 0.9484285887634996, "grad_norm": 3.311572790145874, "learning_rate": 1.0465544773369366e-06, "loss": 0.31560176610946655, "step": 7816 }, { "epoch": 0.9485499332605266, "grad_norm": 1.8237146139144897, "learning_rate": 1.0440977766859108e-06, "loss": 0.11086104810237885, "step": 7817 }, { "epoch": 0.9486712777575537, "grad_norm": 3.3984038829803467, "learning_rate": 1.0416410760348853e-06, "loss": 0.5098788142204285, "step": 7818 }, { "epoch": 0.9487926222545807, "grad_norm": 3.0104126930236816, "learning_rate": 1.0391843753838596e-06, "loss": 0.2441672682762146, "step": 7819 }, { "epoch": 0.9489139667516078, "grad_norm": 3.652759313583374, "learning_rate": 1.0367276747328338e-06, "loss": 0.16642239689826965, "step": 7820 }, { "epoch": 0.9490353112486348, "grad_norm": 3.7115299701690674, "learning_rate": 1.0342709740818083e-06, "loss": 0.14152273535728455, "step": 7821 }, { "epoch": 0.9491566557456619, "grad_norm": 2.323883533477783, "learning_rate": 1.0318142734307825e-06, "loss": 0.1961551457643509, "step": 7822 }, { "epoch": 0.949278000242689, "grad_norm": 2.853586196899414, "learning_rate": 1.0293575727797568e-06, "loss": 0.521477222442627, "step": 7823 }, { "epoch": 0.9493993447397161, "grad_norm": 1.1162757873535156, "learning_rate": 1.026900872128731e-06, "loss": 0.02232605218887329, "step": 7824 }, { "epoch": 0.9495206892367432, "grad_norm": 4.015036582946777, "learning_rate": 1.0244441714777055e-06, "loss": 0.30441439151763916, "step": 7825 }, { "epoch": 0.9496420337337702, "grad_norm": 1.7470495700836182, "learning_rate": 1.0219874708266798e-06, "loss": 0.06380728632211685, "step": 7826 }, { "epoch": 0.9497633782307973, "grad_norm": 1.8475502729415894, "learning_rate": 1.0195307701756543e-06, "loss": 0.13326765596866608, "step": 7827 }, { "epoch": 0.9498847227278243, "grad_norm": 3.670280933380127, "learning_rate": 1.0170740695246285e-06, "loss": 0.12661758065223694, "step": 7828 }, { "epoch": 0.9500060672248514, "grad_norm": 5.783466815948486, "learning_rate": 1.0146173688736028e-06, "loss": 0.16956567764282227, "step": 7829 }, { "epoch": 0.9501274117218784, "grad_norm": 5.360352039337158, "learning_rate": 1.0121606682225772e-06, "loss": 0.6883335113525391, "step": 7830 }, { "epoch": 0.9502487562189055, "grad_norm": 2.8863415718078613, "learning_rate": 1.0097039675715515e-06, "loss": 0.17198000848293304, "step": 7831 }, { "epoch": 0.9503701007159325, "grad_norm": 1.9041783809661865, "learning_rate": 1.0072472669205258e-06, "loss": 0.19008484482765198, "step": 7832 }, { "epoch": 0.9504914452129596, "grad_norm": 4.760650157928467, "learning_rate": 1.0047905662695e-06, "loss": 0.26236963272094727, "step": 7833 }, { "epoch": 0.9506127897099866, "grad_norm": 2.4655110836029053, "learning_rate": 1.0023338656184745e-06, "loss": 0.45303910970687866, "step": 7834 }, { "epoch": 0.9507341342070137, "grad_norm": 3.124277353286743, "learning_rate": 9.998771649674488e-07, "loss": 0.2639216184616089, "step": 7835 }, { "epoch": 0.9508554787040407, "grad_norm": 2.077195882797241, "learning_rate": 9.974204643164232e-07, "loss": 0.37368372082710266, "step": 7836 }, { "epoch": 0.9509768232010678, "grad_norm": 3.0093061923980713, "learning_rate": 9.949637636653975e-07, "loss": 0.5001065135002136, "step": 7837 }, { "epoch": 0.9510981676980949, "grad_norm": 2.4959075450897217, "learning_rate": 9.925070630143717e-07, "loss": 0.4869372248649597, "step": 7838 }, { "epoch": 0.9512195121951219, "grad_norm": 3.088595390319824, "learning_rate": 9.900503623633462e-07, "loss": 0.3885296881198883, "step": 7839 }, { "epoch": 0.951340856692149, "grad_norm": 2.049753189086914, "learning_rate": 9.875936617123205e-07, "loss": 0.22175203263759613, "step": 7840 }, { "epoch": 0.951462201189176, "grad_norm": 2.704782485961914, "learning_rate": 9.851369610612947e-07, "loss": 0.19625632464885712, "step": 7841 }, { "epoch": 0.9515835456862032, "grad_norm": 2.6180198192596436, "learning_rate": 9.82680260410269e-07, "loss": 0.45969143509864807, "step": 7842 }, { "epoch": 0.9517048901832302, "grad_norm": 2.7049448490142822, "learning_rate": 9.802235597592435e-07, "loss": 0.5751237869262695, "step": 7843 }, { "epoch": 0.9518262346802573, "grad_norm": 5.345637798309326, "learning_rate": 9.777668591082177e-07, "loss": 0.17650985717773438, "step": 7844 }, { "epoch": 0.9519475791772843, "grad_norm": 6.753096103668213, "learning_rate": 9.753101584571922e-07, "loss": 0.33544835448265076, "step": 7845 }, { "epoch": 0.9520689236743114, "grad_norm": 3.1077709197998047, "learning_rate": 9.728534578061664e-07, "loss": 0.24875032901763916, "step": 7846 }, { "epoch": 0.9521902681713384, "grad_norm": 2.448488712310791, "learning_rate": 9.703967571551407e-07, "loss": 0.05973612889647484, "step": 7847 }, { "epoch": 0.9523116126683655, "grad_norm": 2.594780683517456, "learning_rate": 9.67940056504115e-07, "loss": 0.1589660495519638, "step": 7848 }, { "epoch": 0.9524329571653926, "grad_norm": 3.5705206394195557, "learning_rate": 9.654833558530894e-07, "loss": 0.25810468196868896, "step": 7849 }, { "epoch": 0.9525543016624196, "grad_norm": 2.0141921043395996, "learning_rate": 9.630266552020637e-07, "loss": 0.36318957805633545, "step": 7850 }, { "epoch": 0.9526756461594467, "grad_norm": 1.4428545236587524, "learning_rate": 9.60569954551038e-07, "loss": 0.04250238463282585, "step": 7851 }, { "epoch": 0.9527969906564737, "grad_norm": 3.8955764770507812, "learning_rate": 9.581132539000124e-07, "loss": 0.26648643612861633, "step": 7852 }, { "epoch": 0.9529183351535008, "grad_norm": 2.4975740909576416, "learning_rate": 9.556565532489867e-07, "loss": 0.13401541113853455, "step": 7853 }, { "epoch": 0.9530396796505278, "grad_norm": 0.8228896260261536, "learning_rate": 9.53199852597961e-07, "loss": 0.014080810360610485, "step": 7854 }, { "epoch": 0.9531610241475549, "grad_norm": 2.9554057121276855, "learning_rate": 9.507431519469354e-07, "loss": 0.48180025815963745, "step": 7855 }, { "epoch": 0.9532823686445819, "grad_norm": 2.626617670059204, "learning_rate": 9.482864512959098e-07, "loss": 0.26008087396621704, "step": 7856 }, { "epoch": 0.953403713141609, "grad_norm": 2.5600061416625977, "learning_rate": 9.458297506448839e-07, "loss": 0.1543760448694229, "step": 7857 }, { "epoch": 0.953525057638636, "grad_norm": 4.203423023223877, "learning_rate": 9.433730499938583e-07, "loss": 0.5097906589508057, "step": 7858 }, { "epoch": 0.9536464021356631, "grad_norm": 1.8074662685394287, "learning_rate": 9.409163493428326e-07, "loss": 0.0946711003780365, "step": 7859 }, { "epoch": 0.9537677466326903, "grad_norm": 1.6363354921340942, "learning_rate": 9.38459648691807e-07, "loss": 0.14806164801120758, "step": 7860 }, { "epoch": 0.9538890911297173, "grad_norm": 2.3720626831054688, "learning_rate": 9.360029480407813e-07, "loss": 0.2444186955690384, "step": 7861 }, { "epoch": 0.9540104356267444, "grad_norm": 3.175133228302002, "learning_rate": 9.335462473897556e-07, "loss": 0.11319217085838318, "step": 7862 }, { "epoch": 0.9541317801237714, "grad_norm": 3.757995367050171, "learning_rate": 9.3108954673873e-07, "loss": 0.3447774648666382, "step": 7863 }, { "epoch": 0.9542531246207985, "grad_norm": 3.8200862407684326, "learning_rate": 9.286328460877044e-07, "loss": 0.35407742857933044, "step": 7864 }, { "epoch": 0.9543744691178255, "grad_norm": 2.783705234527588, "learning_rate": 9.261761454366785e-07, "loss": 0.6596783995628357, "step": 7865 }, { "epoch": 0.9544958136148526, "grad_norm": 3.068283796310425, "learning_rate": 9.237194447856529e-07, "loss": 0.2303699254989624, "step": 7866 }, { "epoch": 0.9546171581118796, "grad_norm": 3.7777249813079834, "learning_rate": 9.212627441346272e-07, "loss": 0.15921466052532196, "step": 7867 }, { "epoch": 0.9547385026089067, "grad_norm": 1.3980931043624878, "learning_rate": 9.188060434836016e-07, "loss": 0.03198691084980965, "step": 7868 }, { "epoch": 0.9548598471059337, "grad_norm": 4.3535356521606445, "learning_rate": 9.16349342832576e-07, "loss": 0.13515742123126984, "step": 7869 }, { "epoch": 0.9549811916029608, "grad_norm": 3.2996747493743896, "learning_rate": 9.138926421815502e-07, "loss": 0.2880602478981018, "step": 7870 }, { "epoch": 0.9551025360999879, "grad_norm": 3.6439859867095947, "learning_rate": 9.114359415305246e-07, "loss": 0.5296615362167358, "step": 7871 }, { "epoch": 0.9552238805970149, "grad_norm": 3.095430374145508, "learning_rate": 9.08979240879499e-07, "loss": 0.18423676490783691, "step": 7872 }, { "epoch": 0.955345225094042, "grad_norm": 3.168898820877075, "learning_rate": 9.065225402284733e-07, "loss": 0.6742936968803406, "step": 7873 }, { "epoch": 0.955466569591069, "grad_norm": 3.451775312423706, "learning_rate": 9.040658395774475e-07, "loss": 0.08914196491241455, "step": 7874 }, { "epoch": 0.9555879140880961, "grad_norm": 2.2088730335235596, "learning_rate": 9.016091389264218e-07, "loss": 0.04968889430165291, "step": 7875 }, { "epoch": 0.9557092585851231, "grad_norm": 2.1526970863342285, "learning_rate": 8.991524382753962e-07, "loss": 0.09794817864894867, "step": 7876 }, { "epoch": 0.9558306030821502, "grad_norm": 2.9504287242889404, "learning_rate": 8.966957376243706e-07, "loss": 0.306149423122406, "step": 7877 }, { "epoch": 0.9559519475791772, "grad_norm": 2.565030574798584, "learning_rate": 8.942390369733449e-07, "loss": 0.2874588072299957, "step": 7878 }, { "epoch": 0.9560732920762044, "grad_norm": 2.3507254123687744, "learning_rate": 8.917823363223192e-07, "loss": 0.17302533984184265, "step": 7879 }, { "epoch": 0.9561946365732314, "grad_norm": 2.4506943225860596, "learning_rate": 8.893256356712935e-07, "loss": 0.13234300911426544, "step": 7880 }, { "epoch": 0.9563159810702585, "grad_norm": 2.684302806854248, "learning_rate": 8.868689350202679e-07, "loss": 0.4583718180656433, "step": 7881 }, { "epoch": 0.9564373255672856, "grad_norm": 2.4883241653442383, "learning_rate": 8.844122343692422e-07, "loss": 0.08473912626504898, "step": 7882 }, { "epoch": 0.9565586700643126, "grad_norm": 4.1125969886779785, "learning_rate": 8.819555337182164e-07, "loss": 0.1911337971687317, "step": 7883 }, { "epoch": 0.9566800145613397, "grad_norm": 1.5903441905975342, "learning_rate": 8.794988330671908e-07, "loss": 0.03131105378270149, "step": 7884 }, { "epoch": 0.9568013590583667, "grad_norm": 1.1099869012832642, "learning_rate": 8.770421324161652e-07, "loss": 0.0715690329670906, "step": 7885 }, { "epoch": 0.9569227035553938, "grad_norm": 2.8287768363952637, "learning_rate": 8.745854317651395e-07, "loss": 0.7676157355308533, "step": 7886 }, { "epoch": 0.9570440480524208, "grad_norm": 1.5631098747253418, "learning_rate": 8.721287311141139e-07, "loss": 0.06726650148630142, "step": 7887 }, { "epoch": 0.9571653925494479, "grad_norm": 3.1624066829681396, "learning_rate": 8.696720304630881e-07, "loss": 0.44372379779815674, "step": 7888 }, { "epoch": 0.9572867370464749, "grad_norm": 2.2251269817352295, "learning_rate": 8.672153298120625e-07, "loss": 0.08869171142578125, "step": 7889 }, { "epoch": 0.957408081543502, "grad_norm": 2.507517099380493, "learning_rate": 8.647586291610369e-07, "loss": 0.1120719164609909, "step": 7890 }, { "epoch": 0.957529426040529, "grad_norm": 3.830821990966797, "learning_rate": 8.62301928510011e-07, "loss": 0.32974526286125183, "step": 7891 }, { "epoch": 0.9576507705375561, "grad_norm": 3.0191774368286133, "learning_rate": 8.598452278589854e-07, "loss": 0.45083338022232056, "step": 7892 }, { "epoch": 0.9577721150345831, "grad_norm": 2.3206052780151367, "learning_rate": 8.573885272079598e-07, "loss": 0.21293078362941742, "step": 7893 }, { "epoch": 0.9578934595316102, "grad_norm": 3.81679368019104, "learning_rate": 8.549318265569341e-07, "loss": 0.29027408361434937, "step": 7894 }, { "epoch": 0.9580148040286373, "grad_norm": 2.8030316829681396, "learning_rate": 8.524751259059085e-07, "loss": 0.503628134727478, "step": 7895 }, { "epoch": 0.9581361485256643, "grad_norm": 1.6951390504837036, "learning_rate": 8.500184252548827e-07, "loss": 0.06144610792398453, "step": 7896 }, { "epoch": 0.9582574930226915, "grad_norm": 3.3977105617523193, "learning_rate": 8.475617246038571e-07, "loss": 0.2305852621793747, "step": 7897 }, { "epoch": 0.9583788375197185, "grad_norm": 2.1307830810546875, "learning_rate": 8.451050239528315e-07, "loss": 0.09264960885047913, "step": 7898 }, { "epoch": 0.9585001820167456, "grad_norm": 3.14979887008667, "learning_rate": 8.426483233018057e-07, "loss": 0.2149835228919983, "step": 7899 }, { "epoch": 0.9586215265137726, "grad_norm": 2.1775331497192383, "learning_rate": 8.4019162265078e-07, "loss": 0.1885920763015747, "step": 7900 }, { "epoch": 0.9587428710107997, "grad_norm": 0.3252418041229248, "learning_rate": 8.377349219997543e-07, "loss": 0.00818142481148243, "step": 7901 }, { "epoch": 0.9588642155078267, "grad_norm": 2.461092472076416, "learning_rate": 8.352782213487287e-07, "loss": 0.1598881632089615, "step": 7902 }, { "epoch": 0.9589855600048538, "grad_norm": 2.2760009765625, "learning_rate": 8.328215206977031e-07, "loss": 0.18402983248233795, "step": 7903 }, { "epoch": 0.9591069045018809, "grad_norm": 1.855074405670166, "learning_rate": 8.303648200466774e-07, "loss": 0.2035462111234665, "step": 7904 }, { "epoch": 0.9592282489989079, "grad_norm": 4.009053707122803, "learning_rate": 8.279081193956517e-07, "loss": 0.261350154876709, "step": 7905 }, { "epoch": 0.959349593495935, "grad_norm": 2.6432864665985107, "learning_rate": 8.254514187446261e-07, "loss": 0.12321817874908447, "step": 7906 }, { "epoch": 0.959470937992962, "grad_norm": 4.217153072357178, "learning_rate": 8.229947180936004e-07, "loss": 0.11636294424533844, "step": 7907 }, { "epoch": 0.9595922824899891, "grad_norm": 3.455448865890503, "learning_rate": 8.205380174425747e-07, "loss": 0.374325692653656, "step": 7908 }, { "epoch": 0.9597136269870161, "grad_norm": 4.506859302520752, "learning_rate": 8.180813167915489e-07, "loss": 0.15391072630882263, "step": 7909 }, { "epoch": 0.9598349714840432, "grad_norm": 3.161404848098755, "learning_rate": 8.156246161405233e-07, "loss": 0.15494774281978607, "step": 7910 }, { "epoch": 0.9599563159810702, "grad_norm": 1.999456524848938, "learning_rate": 8.131679154894977e-07, "loss": 0.062022123485803604, "step": 7911 }, { "epoch": 0.9600776604780973, "grad_norm": 3.268637180328369, "learning_rate": 8.10711214838472e-07, "loss": 0.40158611536026, "step": 7912 }, { "epoch": 0.9601990049751243, "grad_norm": 4.26248836517334, "learning_rate": 8.082545141874464e-07, "loss": 0.3491218090057373, "step": 7913 }, { "epoch": 0.9603203494721514, "grad_norm": 3.431417226791382, "learning_rate": 8.057978135364207e-07, "loss": 0.8332284688949585, "step": 7914 }, { "epoch": 0.9604416939691784, "grad_norm": 3.1961565017700195, "learning_rate": 8.03341112885395e-07, "loss": 0.49827903509140015, "step": 7915 }, { "epoch": 0.9605630384662056, "grad_norm": 3.3630588054656982, "learning_rate": 8.008844122343693e-07, "loss": 0.33448243141174316, "step": 7916 }, { "epoch": 0.9606843829632327, "grad_norm": 7.66854190826416, "learning_rate": 7.984277115833436e-07, "loss": 0.20237378776073456, "step": 7917 }, { "epoch": 0.9608057274602597, "grad_norm": 2.4182910919189453, "learning_rate": 7.959710109323179e-07, "loss": 0.22386577725410461, "step": 7918 }, { "epoch": 0.9609270719572868, "grad_norm": 2.571340322494507, "learning_rate": 7.935143102812923e-07, "loss": 0.360904723405838, "step": 7919 }, { "epoch": 0.9610484164543138, "grad_norm": 2.1754395961761475, "learning_rate": 7.910576096302666e-07, "loss": 0.08950668573379517, "step": 7920 }, { "epoch": 0.9611697609513409, "grad_norm": 3.66198468208313, "learning_rate": 7.88600908979241e-07, "loss": 0.31132248044013977, "step": 7921 }, { "epoch": 0.9612911054483679, "grad_norm": 3.0433566570281982, "learning_rate": 7.861442083282154e-07, "loss": 0.19213582575321198, "step": 7922 }, { "epoch": 0.961412449945395, "grad_norm": 3.143679618835449, "learning_rate": 7.836875076771896e-07, "loss": 0.3043483793735504, "step": 7923 }, { "epoch": 0.961533794442422, "grad_norm": 4.061753749847412, "learning_rate": 7.812308070261639e-07, "loss": 0.40942084789276123, "step": 7924 }, { "epoch": 0.9616551389394491, "grad_norm": 2.057485818862915, "learning_rate": 7.787741063751382e-07, "loss": 0.45508241653442383, "step": 7925 }, { "epoch": 0.9617764834364761, "grad_norm": 3.4724273681640625, "learning_rate": 7.763174057241125e-07, "loss": 0.3207148015499115, "step": 7926 }, { "epoch": 0.9618978279335032, "grad_norm": 2.308500289916992, "learning_rate": 7.738607050730869e-07, "loss": 0.46270430088043213, "step": 7927 }, { "epoch": 0.9620191724305303, "grad_norm": 5.799560546875, "learning_rate": 7.714040044220612e-07, "loss": 0.4069652855396271, "step": 7928 }, { "epoch": 0.9621405169275573, "grad_norm": 2.761258125305176, "learning_rate": 7.689473037710356e-07, "loss": 0.2092076539993286, "step": 7929 }, { "epoch": 0.9622618614245844, "grad_norm": 2.553238868713379, "learning_rate": 7.6649060312001e-07, "loss": 0.17968635261058807, "step": 7930 }, { "epoch": 0.9623832059216114, "grad_norm": 2.6732804775238037, "learning_rate": 7.640339024689843e-07, "loss": 0.6900864839553833, "step": 7931 }, { "epoch": 0.9625045504186385, "grad_norm": 5.017135143280029, "learning_rate": 7.615772018179586e-07, "loss": 0.5799640417098999, "step": 7932 }, { "epoch": 0.9626258949156655, "grad_norm": 2.8801865577697754, "learning_rate": 7.591205011669328e-07, "loss": 0.3171132206916809, "step": 7933 }, { "epoch": 0.9627472394126926, "grad_norm": 3.987372398376465, "learning_rate": 7.566638005159072e-07, "loss": 0.4332514703273773, "step": 7934 }, { "epoch": 0.9628685839097197, "grad_norm": 2.6968657970428467, "learning_rate": 7.542070998648815e-07, "loss": 0.12070680409669876, "step": 7935 }, { "epoch": 0.9629899284067468, "grad_norm": 3.9448254108428955, "learning_rate": 7.517503992138558e-07, "loss": 0.17305317521095276, "step": 7936 }, { "epoch": 0.9631112729037739, "grad_norm": 3.5824899673461914, "learning_rate": 7.492936985628302e-07, "loss": 0.16777774691581726, "step": 7937 }, { "epoch": 0.9632326174008009, "grad_norm": 2.3919737339019775, "learning_rate": 7.468369979118045e-07, "loss": 0.0630592331290245, "step": 7938 }, { "epoch": 0.963353961897828, "grad_norm": 4.683971881866455, "learning_rate": 7.443802972607789e-07, "loss": 0.3601974844932556, "step": 7939 }, { "epoch": 0.963475306394855, "grad_norm": 2.2994611263275146, "learning_rate": 7.419235966097532e-07, "loss": 0.37967389822006226, "step": 7940 }, { "epoch": 0.9635966508918821, "grad_norm": 2.3091518878936768, "learning_rate": 7.394668959587274e-07, "loss": 0.09787468612194061, "step": 7941 }, { "epoch": 0.9637179953889091, "grad_norm": 2.3024415969848633, "learning_rate": 7.370101953077018e-07, "loss": 0.12726274132728577, "step": 7942 }, { "epoch": 0.9638393398859362, "grad_norm": 4.135954856872559, "learning_rate": 7.345534946566762e-07, "loss": 0.36145249009132385, "step": 7943 }, { "epoch": 0.9639606843829632, "grad_norm": 6.047504425048828, "learning_rate": 7.320967940056504e-07, "loss": 0.4619120955467224, "step": 7944 }, { "epoch": 0.9640820288799903, "grad_norm": 3.6637415885925293, "learning_rate": 7.296400933546248e-07, "loss": 0.11983911693096161, "step": 7945 }, { "epoch": 0.9642033733770173, "grad_norm": 0.011507936753332615, "learning_rate": 7.271833927035991e-07, "loss": 0.00011489490862004459, "step": 7946 }, { "epoch": 0.9643247178740444, "grad_norm": 2.1599011421203613, "learning_rate": 7.247266920525735e-07, "loss": 0.13275368511676788, "step": 7947 }, { "epoch": 0.9644460623710714, "grad_norm": 5.150091171264648, "learning_rate": 7.222699914015479e-07, "loss": 0.08994501829147339, "step": 7948 }, { "epoch": 0.9645674068680985, "grad_norm": 3.5626416206359863, "learning_rate": 7.198132907505221e-07, "loss": 0.29300791025161743, "step": 7949 }, { "epoch": 0.9646887513651256, "grad_norm": 1.921974778175354, "learning_rate": 7.173565900994964e-07, "loss": 0.5204575657844543, "step": 7950 }, { "epoch": 0.9648100958621526, "grad_norm": 5.062069416046143, "learning_rate": 7.148998894484707e-07, "loss": 0.3412650227546692, "step": 7951 }, { "epoch": 0.9649314403591797, "grad_norm": 3.3679237365722656, "learning_rate": 7.124431887974451e-07, "loss": 0.48037001490592957, "step": 7952 }, { "epoch": 0.9650527848562068, "grad_norm": 3.8031041622161865, "learning_rate": 7.099864881464194e-07, "loss": 0.511989414691925, "step": 7953 }, { "epoch": 0.9651741293532339, "grad_norm": 3.5611443519592285, "learning_rate": 7.075297874953937e-07, "loss": 0.2502305805683136, "step": 7954 }, { "epoch": 0.9652954738502609, "grad_norm": 4.304204940795898, "learning_rate": 7.050730868443681e-07, "loss": 0.270717054605484, "step": 7955 }, { "epoch": 0.965416818347288, "grad_norm": 3.9980058670043945, "learning_rate": 7.026163861933425e-07, "loss": 0.7489833235740662, "step": 7956 }, { "epoch": 0.965538162844315, "grad_norm": 3.366884469985962, "learning_rate": 7.001596855423168e-07, "loss": 0.035697609186172485, "step": 7957 }, { "epoch": 0.9656595073413421, "grad_norm": 3.33687424659729, "learning_rate": 6.97702984891291e-07, "loss": 0.6773338913917542, "step": 7958 }, { "epoch": 0.9657808518383691, "grad_norm": 4.1891560554504395, "learning_rate": 6.952462842402653e-07, "loss": 0.4635056257247925, "step": 7959 }, { "epoch": 0.9659021963353962, "grad_norm": 3.1431467533111572, "learning_rate": 6.927895835892397e-07, "loss": 0.22556090354919434, "step": 7960 }, { "epoch": 0.9660235408324233, "grad_norm": 2.864940881729126, "learning_rate": 6.90332882938214e-07, "loss": 0.17001225054264069, "step": 7961 }, { "epoch": 0.9661448853294503, "grad_norm": 2.998119592666626, "learning_rate": 6.878761822871883e-07, "loss": 0.10588537901639938, "step": 7962 }, { "epoch": 0.9662662298264774, "grad_norm": 5.751261234283447, "learning_rate": 6.854194816361627e-07, "loss": 0.21144995093345642, "step": 7963 }, { "epoch": 0.9663875743235044, "grad_norm": 1.186421513557434, "learning_rate": 6.829627809851371e-07, "loss": 0.11125794053077698, "step": 7964 }, { "epoch": 0.9665089188205315, "grad_norm": 0.10963766276836395, "learning_rate": 6.805060803341114e-07, "loss": 0.0012716248165816069, "step": 7965 }, { "epoch": 0.9666302633175585, "grad_norm": 3.3122103214263916, "learning_rate": 6.780493796830858e-07, "loss": 0.36914873123168945, "step": 7966 }, { "epoch": 0.9667516078145856, "grad_norm": 5.991551399230957, "learning_rate": 6.755926790320599e-07, "loss": 0.48905646800994873, "step": 7967 }, { "epoch": 0.9668729523116126, "grad_norm": 0.5856578946113586, "learning_rate": 6.731359783810343e-07, "loss": 0.008250446990132332, "step": 7968 }, { "epoch": 0.9669942968086397, "grad_norm": 3.88533878326416, "learning_rate": 6.706792777300087e-07, "loss": 0.4692520499229431, "step": 7969 }, { "epoch": 0.9671156413056667, "grad_norm": 4.396678924560547, "learning_rate": 6.682225770789829e-07, "loss": 0.3979184329509735, "step": 7970 }, { "epoch": 0.9672369858026938, "grad_norm": 3.2962570190429688, "learning_rate": 6.657658764279573e-07, "loss": 0.28521528840065, "step": 7971 }, { "epoch": 0.967358330299721, "grad_norm": 3.511728286743164, "learning_rate": 6.633091757769317e-07, "loss": 0.7172317504882812, "step": 7972 }, { "epoch": 0.967479674796748, "grad_norm": 3.1661734580993652, "learning_rate": 6.60852475125906e-07, "loss": 0.337493896484375, "step": 7973 }, { "epoch": 0.9676010192937751, "grad_norm": 1.9824734926223755, "learning_rate": 6.583957744748804e-07, "loss": 0.11007031053304672, "step": 7974 }, { "epoch": 0.9677223637908021, "grad_norm": 4.08870792388916, "learning_rate": 6.559390738238545e-07, "loss": 0.4382845461368561, "step": 7975 }, { "epoch": 0.9678437082878292, "grad_norm": 0.7976673245429993, "learning_rate": 6.534823731728289e-07, "loss": 0.018051166087388992, "step": 7976 }, { "epoch": 0.9679650527848562, "grad_norm": 2.4754157066345215, "learning_rate": 6.510256725218033e-07, "loss": 0.17027176916599274, "step": 7977 }, { "epoch": 0.9680863972818833, "grad_norm": 3.971200466156006, "learning_rate": 6.485689718707776e-07, "loss": 0.2869623899459839, "step": 7978 }, { "epoch": 0.9682077417789103, "grad_norm": 3.9596965312957764, "learning_rate": 6.461122712197519e-07, "loss": 0.17685894668102264, "step": 7979 }, { "epoch": 0.9683290862759374, "grad_norm": 4.569477081298828, "learning_rate": 6.436555705687262e-07, "loss": 0.08662522584199905, "step": 7980 }, { "epoch": 0.9684504307729644, "grad_norm": 2.6751413345336914, "learning_rate": 6.411988699177006e-07, "loss": 0.1112760677933693, "step": 7981 }, { "epoch": 0.9685717752699915, "grad_norm": 4.352506160736084, "learning_rate": 6.38742169266675e-07, "loss": 0.13355319201946259, "step": 7982 }, { "epoch": 0.9686931197670186, "grad_norm": 2.4656338691711426, "learning_rate": 6.362854686156493e-07, "loss": 0.32578545808792114, "step": 7983 }, { "epoch": 0.9688144642640456, "grad_norm": 2.8347713947296143, "learning_rate": 6.338287679646235e-07, "loss": 0.23368747532367706, "step": 7984 }, { "epoch": 0.9689358087610727, "grad_norm": 3.175974130630493, "learning_rate": 6.313720673135979e-07, "loss": 0.23475497961044312, "step": 7985 }, { "epoch": 0.9690571532580997, "grad_norm": 2.9720115661621094, "learning_rate": 6.289153666625722e-07, "loss": 0.3744368851184845, "step": 7986 }, { "epoch": 0.9691784977551268, "grad_norm": 3.2594106197357178, "learning_rate": 6.264586660115466e-07, "loss": 0.26197558641433716, "step": 7987 }, { "epoch": 0.9692998422521538, "grad_norm": 3.5036184787750244, "learning_rate": 6.240019653605208e-07, "loss": 0.2104101926088333, "step": 7988 }, { "epoch": 0.9694211867491809, "grad_norm": 3.3197720050811768, "learning_rate": 6.215452647094952e-07, "loss": 0.1905415654182434, "step": 7989 }, { "epoch": 0.969542531246208, "grad_norm": 2.2914814949035645, "learning_rate": 6.190885640584695e-07, "loss": 0.09242109954357147, "step": 7990 }, { "epoch": 0.9696638757432351, "grad_norm": 3.04522967338562, "learning_rate": 6.166318634074438e-07, "loss": 0.3486882150173187, "step": 7991 }, { "epoch": 0.9697852202402621, "grad_norm": 2.9246022701263428, "learning_rate": 6.141751627564182e-07, "loss": 0.09958159923553467, "step": 7992 }, { "epoch": 0.9699065647372892, "grad_norm": 1.7992048263549805, "learning_rate": 6.117184621053926e-07, "loss": 0.12658990919589996, "step": 7993 }, { "epoch": 0.9700279092343163, "grad_norm": 2.7368500232696533, "learning_rate": 6.092617614543668e-07, "loss": 0.14625921845436096, "step": 7994 }, { "epoch": 0.9701492537313433, "grad_norm": 3.1359643936157227, "learning_rate": 6.068050608033412e-07, "loss": 0.19749733805656433, "step": 7995 }, { "epoch": 0.9702705982283704, "grad_norm": 1.9864304065704346, "learning_rate": 6.043483601523154e-07, "loss": 0.3252110481262207, "step": 7996 }, { "epoch": 0.9703919427253974, "grad_norm": 4.464076995849609, "learning_rate": 6.018916595012898e-07, "loss": 0.21974925696849823, "step": 7997 }, { "epoch": 0.9705132872224245, "grad_norm": 2.414304494857788, "learning_rate": 5.994349588502642e-07, "loss": 0.07756417989730835, "step": 7998 }, { "epoch": 0.9706346317194515, "grad_norm": 3.7340261936187744, "learning_rate": 5.969782581992384e-07, "loss": 0.22269098460674286, "step": 7999 }, { "epoch": 0.9707559762164786, "grad_norm": 3.2792184352874756, "learning_rate": 5.945215575482128e-07, "loss": 0.16499823331832886, "step": 8000 }, { "epoch": 0.9708773207135056, "grad_norm": 2.2741506099700928, "learning_rate": 5.920648568971872e-07, "loss": 0.11837846040725708, "step": 8001 }, { "epoch": 0.9709986652105327, "grad_norm": 1.936371088027954, "learning_rate": 5.896081562461615e-07, "loss": 0.20412687957286835, "step": 8002 }, { "epoch": 0.9711200097075597, "grad_norm": 2.6052792072296143, "learning_rate": 5.871514555951358e-07, "loss": 0.3583765923976898, "step": 8003 }, { "epoch": 0.9712413542045868, "grad_norm": 2.475748300552368, "learning_rate": 5.846947549441101e-07, "loss": 0.2710685133934021, "step": 8004 }, { "epoch": 0.9713626987016138, "grad_norm": 2.4831652641296387, "learning_rate": 5.822380542930844e-07, "loss": 0.16427268087863922, "step": 8005 }, { "epoch": 0.9714840431986409, "grad_norm": 5.110856056213379, "learning_rate": 5.797813536420588e-07, "loss": 0.4366946518421173, "step": 8006 }, { "epoch": 0.971605387695668, "grad_norm": 2.5188827514648438, "learning_rate": 5.77324652991033e-07, "loss": 0.1802208423614502, "step": 8007 }, { "epoch": 0.971726732192695, "grad_norm": 3.011776924133301, "learning_rate": 5.748679523400074e-07, "loss": 0.6094852089881897, "step": 8008 }, { "epoch": 0.9718480766897222, "grad_norm": 2.370227336883545, "learning_rate": 5.724112516889817e-07, "loss": 0.15361112356185913, "step": 8009 }, { "epoch": 0.9719694211867492, "grad_norm": 1.9584566354751587, "learning_rate": 5.699545510379561e-07, "loss": 0.20595978200435638, "step": 8010 }, { "epoch": 0.9720907656837763, "grad_norm": 3.20078182220459, "learning_rate": 5.674978503869304e-07, "loss": 0.4513453245162964, "step": 8011 }, { "epoch": 0.9722121101808033, "grad_norm": 2.6884684562683105, "learning_rate": 5.650411497359047e-07, "loss": 0.32029762864112854, "step": 8012 }, { "epoch": 0.9723334546778304, "grad_norm": 3.8702406883239746, "learning_rate": 5.625844490848791e-07, "loss": 0.3958131968975067, "step": 8013 }, { "epoch": 0.9724547991748574, "grad_norm": 3.2293291091918945, "learning_rate": 5.601277484338534e-07, "loss": 0.22284071147441864, "step": 8014 }, { "epoch": 0.9725761436718845, "grad_norm": 2.01242733001709, "learning_rate": 5.576710477828277e-07, "loss": 0.408892959356308, "step": 8015 }, { "epoch": 0.9726974881689116, "grad_norm": 3.1618263721466064, "learning_rate": 5.55214347131802e-07, "loss": 0.23851387202739716, "step": 8016 }, { "epoch": 0.9728188326659386, "grad_norm": 3.6827187538146973, "learning_rate": 5.527576464807763e-07, "loss": 0.23756277561187744, "step": 8017 }, { "epoch": 0.9729401771629657, "grad_norm": 3.352854013442993, "learning_rate": 5.503009458297507e-07, "loss": 0.6379824876785278, "step": 8018 }, { "epoch": 0.9730615216599927, "grad_norm": 2.586066722869873, "learning_rate": 5.478442451787251e-07, "loss": 0.2642971873283386, "step": 8019 }, { "epoch": 0.9731828661570198, "grad_norm": 1.598549246788025, "learning_rate": 5.453875445276993e-07, "loss": 0.08604611456394196, "step": 8020 }, { "epoch": 0.9733042106540468, "grad_norm": 1.926895260810852, "learning_rate": 5.429308438766737e-07, "loss": 0.03974687308073044, "step": 8021 }, { "epoch": 0.9734255551510739, "grad_norm": 2.9098961353302, "learning_rate": 5.404741432256481e-07, "loss": 0.18133467435836792, "step": 8022 }, { "epoch": 0.9735468996481009, "grad_norm": 3.4711601734161377, "learning_rate": 5.380174425746223e-07, "loss": 0.20705561339855194, "step": 8023 }, { "epoch": 0.973668244145128, "grad_norm": 0.8246050477027893, "learning_rate": 5.355607419235966e-07, "loss": 0.011186044663190842, "step": 8024 }, { "epoch": 0.973789588642155, "grad_norm": 4.583596229553223, "learning_rate": 5.331040412725709e-07, "loss": 0.2717769145965576, "step": 8025 }, { "epoch": 0.9739109331391821, "grad_norm": 3.9553446769714355, "learning_rate": 5.306473406215453e-07, "loss": 0.2031843364238739, "step": 8026 }, { "epoch": 0.9740322776362091, "grad_norm": 2.772916078567505, "learning_rate": 5.281906399705197e-07, "loss": 0.24932214617729187, "step": 8027 }, { "epoch": 0.9741536221332363, "grad_norm": 2.933384656906128, "learning_rate": 5.257339393194939e-07, "loss": 0.2576424181461334, "step": 8028 }, { "epoch": 0.9742749666302634, "grad_norm": 4.053450584411621, "learning_rate": 5.232772386684683e-07, "loss": 0.4961802661418915, "step": 8029 }, { "epoch": 0.9743963111272904, "grad_norm": 3.6830716133117676, "learning_rate": 5.208205380174427e-07, "loss": 0.3001202642917633, "step": 8030 }, { "epoch": 0.9745176556243175, "grad_norm": 4.133368015289307, "learning_rate": 5.183638373664169e-07, "loss": 0.22824418544769287, "step": 8031 }, { "epoch": 0.9746390001213445, "grad_norm": 2.1026504039764404, "learning_rate": 5.159071367153913e-07, "loss": 0.18196791410446167, "step": 8032 }, { "epoch": 0.9747603446183716, "grad_norm": 2.5015740394592285, "learning_rate": 5.134504360643655e-07, "loss": 0.17556096613407135, "step": 8033 }, { "epoch": 0.9748816891153986, "grad_norm": 2.8434247970581055, "learning_rate": 5.109937354133399e-07, "loss": 0.1827562004327774, "step": 8034 }, { "epoch": 0.9750030336124257, "grad_norm": 1.8038930892944336, "learning_rate": 5.085370347623143e-07, "loss": 0.08384514600038528, "step": 8035 }, { "epoch": 0.9751243781094527, "grad_norm": 3.091498613357544, "learning_rate": 5.060803341112886e-07, "loss": 0.12789833545684814, "step": 8036 }, { "epoch": 0.9752457226064798, "grad_norm": 0.09367174655199051, "learning_rate": 5.036236334602629e-07, "loss": 0.001040774630382657, "step": 8037 }, { "epoch": 0.9753670671035068, "grad_norm": 1.8995803594589233, "learning_rate": 5.011669328092372e-07, "loss": 0.3796798884868622, "step": 8038 }, { "epoch": 0.9754884116005339, "grad_norm": 3.0257623195648193, "learning_rate": 4.987102321582116e-07, "loss": 0.2778775095939636, "step": 8039 }, { "epoch": 0.975609756097561, "grad_norm": 3.2603647708892822, "learning_rate": 4.962535315071859e-07, "loss": 0.03183357045054436, "step": 8040 }, { "epoch": 0.975731100594588, "grad_norm": 3.2065699100494385, "learning_rate": 4.937968308561602e-07, "loss": 0.6497611999511719, "step": 8041 }, { "epoch": 0.9758524450916151, "grad_norm": 2.0367579460144043, "learning_rate": 4.913401302051345e-07, "loss": 0.21571645140647888, "step": 8042 }, { "epoch": 0.9759737895886421, "grad_norm": 3.73067307472229, "learning_rate": 4.888834295541089e-07, "loss": 0.295085072517395, "step": 8043 }, { "epoch": 0.9760951340856692, "grad_norm": 3.9510042667388916, "learning_rate": 4.864267289030832e-07, "loss": 0.3842081129550934, "step": 8044 }, { "epoch": 0.9762164785826962, "grad_norm": 3.7136118412017822, "learning_rate": 4.839700282520575e-07, "loss": 0.17222243547439575, "step": 8045 }, { "epoch": 0.9763378230797234, "grad_norm": 3.296497106552124, "learning_rate": 4.815133276010318e-07, "loss": 0.13662958145141602, "step": 8046 }, { "epoch": 0.9764591675767504, "grad_norm": 1.992224097251892, "learning_rate": 4.790566269500062e-07, "loss": 0.24687139689922333, "step": 8047 }, { "epoch": 0.9765805120737775, "grad_norm": 0.5473794937133789, "learning_rate": 4.765999262989805e-07, "loss": 0.00890967808663845, "step": 8048 }, { "epoch": 0.9767018565708045, "grad_norm": 2.5760724544525146, "learning_rate": 4.741432256479549e-07, "loss": 0.1137750893831253, "step": 8049 }, { "epoch": 0.9768232010678316, "grad_norm": 3.269500255584717, "learning_rate": 4.7168652499692914e-07, "loss": 0.556832492351532, "step": 8050 }, { "epoch": 0.9769445455648587, "grad_norm": 4.887061595916748, "learning_rate": 4.692298243459035e-07, "loss": 0.5353174209594727, "step": 8051 }, { "epoch": 0.9770658900618857, "grad_norm": 2.0631234645843506, "learning_rate": 4.667731236948778e-07, "loss": 0.37828293442726135, "step": 8052 }, { "epoch": 0.9771872345589128, "grad_norm": 1.8645737171173096, "learning_rate": 4.643164230438522e-07, "loss": 0.09912809729576111, "step": 8053 }, { "epoch": 0.9773085790559398, "grad_norm": 2.9436941146850586, "learning_rate": 4.6185972239282644e-07, "loss": 0.1767445206642151, "step": 8054 }, { "epoch": 0.9774299235529669, "grad_norm": 2.902066230773926, "learning_rate": 4.594030217418008e-07, "loss": 0.37270790338516235, "step": 8055 }, { "epoch": 0.9775512680499939, "grad_norm": 3.983527421951294, "learning_rate": 4.569463210907751e-07, "loss": 0.1015276163816452, "step": 8056 }, { "epoch": 0.977672612547021, "grad_norm": 3.168600559234619, "learning_rate": 4.544896204397495e-07, "loss": 0.32618552446365356, "step": 8057 }, { "epoch": 0.977793957044048, "grad_norm": 2.6500771045684814, "learning_rate": 4.5203291978872373e-07, "loss": 0.2651030719280243, "step": 8058 }, { "epoch": 0.9779153015410751, "grad_norm": 3.4707014560699463, "learning_rate": 4.495762191376981e-07, "loss": 0.43146318197250366, "step": 8059 }, { "epoch": 0.9780366460381021, "grad_norm": 2.94201922416687, "learning_rate": 4.4711951848667246e-07, "loss": 0.39479559659957886, "step": 8060 }, { "epoch": 0.9781579905351292, "grad_norm": 0.184917151927948, "learning_rate": 4.446628178356468e-07, "loss": 0.0023350557312369347, "step": 8061 }, { "epoch": 0.9782793350321562, "grad_norm": 3.382920265197754, "learning_rate": 4.422061171846211e-07, "loss": 0.20173312723636627, "step": 8062 }, { "epoch": 0.9784006795291833, "grad_norm": 3.7760398387908936, "learning_rate": 4.397494165335954e-07, "loss": 0.4683387875556946, "step": 8063 }, { "epoch": 0.9785220240262104, "grad_norm": 2.8736021518707275, "learning_rate": 4.3729271588256976e-07, "loss": 0.3856116533279419, "step": 8064 }, { "epoch": 0.9786433685232375, "grad_norm": 3.6910316944122314, "learning_rate": 4.3483601523154407e-07, "loss": 0.23004640638828278, "step": 8065 }, { "epoch": 0.9787647130202646, "grad_norm": 3.9581284523010254, "learning_rate": 4.3237931458051844e-07, "loss": 0.31450703740119934, "step": 8066 }, { "epoch": 0.9788860575172916, "grad_norm": 2.824986696243286, "learning_rate": 4.299226139294927e-07, "loss": 0.06652854382991791, "step": 8067 }, { "epoch": 0.9790074020143187, "grad_norm": 3.534966468811035, "learning_rate": 4.2746591327846706e-07, "loss": 0.1945396363735199, "step": 8068 }, { "epoch": 0.9791287465113457, "grad_norm": 1.0673497915267944, "learning_rate": 4.2500921262744137e-07, "loss": 0.017983462661504745, "step": 8069 }, { "epoch": 0.9792500910083728, "grad_norm": 3.994001865386963, "learning_rate": 4.2255251197641573e-07, "loss": 0.44196560978889465, "step": 8070 }, { "epoch": 0.9793714355053998, "grad_norm": 0.8996119499206543, "learning_rate": 4.2009581132539e-07, "loss": 0.01310754381120205, "step": 8071 }, { "epoch": 0.9794927800024269, "grad_norm": 3.924530506134033, "learning_rate": 4.1763911067436435e-07, "loss": 0.3667697608470917, "step": 8072 }, { "epoch": 0.979614124499454, "grad_norm": 2.178114652633667, "learning_rate": 4.151824100233387e-07, "loss": 0.09098240733146667, "step": 8073 }, { "epoch": 0.979735468996481, "grad_norm": 2.2387211322784424, "learning_rate": 4.1272570937231303e-07, "loss": 0.09443448483943939, "step": 8074 }, { "epoch": 0.9798568134935081, "grad_norm": 2.0665245056152344, "learning_rate": 4.1026900872128734e-07, "loss": 0.23772157728672028, "step": 8075 }, { "epoch": 0.9799781579905351, "grad_norm": 3.7155840396881104, "learning_rate": 4.0781230807026165e-07, "loss": 0.7512916326522827, "step": 8076 }, { "epoch": 0.9800995024875622, "grad_norm": 4.054144859313965, "learning_rate": 4.05355607419236e-07, "loss": 0.3023231625556946, "step": 8077 }, { "epoch": 0.9802208469845892, "grad_norm": 3.678654670715332, "learning_rate": 4.0289890676821033e-07, "loss": 0.28166577219963074, "step": 8078 }, { "epoch": 0.9803421914816163, "grad_norm": 3.5107340812683105, "learning_rate": 4.0044220611718464e-07, "loss": 0.16149196028709412, "step": 8079 }, { "epoch": 0.9804635359786433, "grad_norm": 1.5764837265014648, "learning_rate": 3.9798550546615895e-07, "loss": 0.2644275724887848, "step": 8080 }, { "epoch": 0.9805848804756704, "grad_norm": 1.3301101922988892, "learning_rate": 3.955288048151333e-07, "loss": 0.05436298996210098, "step": 8081 }, { "epoch": 0.9807062249726974, "grad_norm": 3.1911044120788574, "learning_rate": 3.930721041641077e-07, "loss": 0.20677927136421204, "step": 8082 }, { "epoch": 0.9808275694697246, "grad_norm": 1.9102050065994263, "learning_rate": 3.9061540351308194e-07, "loss": 0.2845415771007538, "step": 8083 }, { "epoch": 0.9809489139667517, "grad_norm": 2.007875442504883, "learning_rate": 3.8815870286205625e-07, "loss": 0.162248894572258, "step": 8084 }, { "epoch": 0.9810702584637787, "grad_norm": 2.520707368850708, "learning_rate": 3.857020022110306e-07, "loss": 0.13463200628757477, "step": 8085 }, { "epoch": 0.9811916029608058, "grad_norm": 3.9976744651794434, "learning_rate": 3.83245301560005e-07, "loss": 0.7113618850708008, "step": 8086 }, { "epoch": 0.9813129474578328, "grad_norm": 4.662350177764893, "learning_rate": 3.807886009089793e-07, "loss": 0.2524140179157257, "step": 8087 }, { "epoch": 0.9814342919548599, "grad_norm": 3.061037302017212, "learning_rate": 3.783319002579536e-07, "loss": 0.28856053948402405, "step": 8088 }, { "epoch": 0.9815556364518869, "grad_norm": 4.604337215423584, "learning_rate": 3.758751996069279e-07, "loss": 0.19506657123565674, "step": 8089 }, { "epoch": 0.981676980948914, "grad_norm": 2.887908458709717, "learning_rate": 3.7341849895590227e-07, "loss": 0.19156521558761597, "step": 8090 }, { "epoch": 0.981798325445941, "grad_norm": 3.9081521034240723, "learning_rate": 3.709617983048766e-07, "loss": 0.38929563760757446, "step": 8091 }, { "epoch": 0.9819196699429681, "grad_norm": 1.771638035774231, "learning_rate": 3.685050976538509e-07, "loss": 0.1222287192940712, "step": 8092 }, { "epoch": 0.9820410144399951, "grad_norm": 2.114978551864624, "learning_rate": 3.660483970028252e-07, "loss": 0.14033587276935577, "step": 8093 }, { "epoch": 0.9821623589370222, "grad_norm": 2.6302406787872314, "learning_rate": 3.6359169635179957e-07, "loss": 0.46363893151283264, "step": 8094 }, { "epoch": 0.9822837034340492, "grad_norm": 3.419023036956787, "learning_rate": 3.6113499570077393e-07, "loss": 0.21908141672611237, "step": 8095 }, { "epoch": 0.9824050479310763, "grad_norm": 2.775230884552002, "learning_rate": 3.586782950497482e-07, "loss": 0.30805662274360657, "step": 8096 }, { "epoch": 0.9825263924281034, "grad_norm": 0.6404641270637512, "learning_rate": 3.5622159439872256e-07, "loss": 0.013068201020359993, "step": 8097 }, { "epoch": 0.9826477369251304, "grad_norm": 3.9912517070770264, "learning_rate": 3.5376489374769687e-07, "loss": 0.17237721383571625, "step": 8098 }, { "epoch": 0.9827690814221575, "grad_norm": 1.7178115844726562, "learning_rate": 3.5130819309667123e-07, "loss": 0.06579326093196869, "step": 8099 }, { "epoch": 0.9828904259191845, "grad_norm": 3.006591796875, "learning_rate": 3.488514924456455e-07, "loss": 0.1720815896987915, "step": 8100 }, { "epoch": 0.9830117704162116, "grad_norm": 2.2581772804260254, "learning_rate": 3.4639479179461985e-07, "loss": 0.4876437783241272, "step": 8101 }, { "epoch": 0.9831331149132387, "grad_norm": 3.04974365234375, "learning_rate": 3.4393809114359417e-07, "loss": 0.20000283420085907, "step": 8102 }, { "epoch": 0.9832544594102658, "grad_norm": 1.0660133361816406, "learning_rate": 3.4148139049256853e-07, "loss": 0.01860252395272255, "step": 8103 }, { "epoch": 0.9833758039072928, "grad_norm": 2.2222540378570557, "learning_rate": 3.390246898415429e-07, "loss": 0.09651230275630951, "step": 8104 }, { "epoch": 0.9834971484043199, "grad_norm": 2.9970884323120117, "learning_rate": 3.3656798919051715e-07, "loss": 0.7961016893386841, "step": 8105 }, { "epoch": 0.983618492901347, "grad_norm": 2.143347978591919, "learning_rate": 3.3411128853949146e-07, "loss": 0.2510548532009125, "step": 8106 }, { "epoch": 0.983739837398374, "grad_norm": 3.9725635051727295, "learning_rate": 3.3165458788846583e-07, "loss": 0.12912032008171082, "step": 8107 }, { "epoch": 0.9838611818954011, "grad_norm": 3.6573069095611572, "learning_rate": 3.291978872374402e-07, "loss": 0.42165759205818176, "step": 8108 }, { "epoch": 0.9839825263924281, "grad_norm": 2.277338743209839, "learning_rate": 3.2674118658641445e-07, "loss": 0.31514203548431396, "step": 8109 }, { "epoch": 0.9841038708894552, "grad_norm": 4.461212635040283, "learning_rate": 3.242844859353888e-07, "loss": 0.18696101009845734, "step": 8110 }, { "epoch": 0.9842252153864822, "grad_norm": 2.940528392791748, "learning_rate": 3.218277852843631e-07, "loss": 0.1494307518005371, "step": 8111 }, { "epoch": 0.9843465598835093, "grad_norm": 3.6453797817230225, "learning_rate": 3.193710846333375e-07, "loss": 0.42490461468696594, "step": 8112 }, { "epoch": 0.9844679043805363, "grad_norm": 0.6592462658882141, "learning_rate": 3.1691438398231175e-07, "loss": 0.04030952975153923, "step": 8113 }, { "epoch": 0.9845892488775634, "grad_norm": 2.0120794773101807, "learning_rate": 3.144576833312861e-07, "loss": 0.10096409916877747, "step": 8114 }, { "epoch": 0.9847105933745904, "grad_norm": 2.481167793273926, "learning_rate": 3.120009826802604e-07, "loss": 0.17462635040283203, "step": 8115 }, { "epoch": 0.9848319378716175, "grad_norm": 1.6302645206451416, "learning_rate": 3.0954428202923473e-07, "loss": 0.13646431267261505, "step": 8116 }, { "epoch": 0.9849532823686445, "grad_norm": 2.466959238052368, "learning_rate": 3.070875813782091e-07, "loss": 0.1938471794128418, "step": 8117 }, { "epoch": 0.9850746268656716, "grad_norm": 3.714644432067871, "learning_rate": 3.046308807271834e-07, "loss": 0.5378848314285278, "step": 8118 }, { "epoch": 0.9851959713626987, "grad_norm": 1.8727396726608276, "learning_rate": 3.021741800761577e-07, "loss": 0.04817209020256996, "step": 8119 }, { "epoch": 0.9853173158597257, "grad_norm": 1.5516796112060547, "learning_rate": 2.997174794251321e-07, "loss": 0.3015582859516144, "step": 8120 }, { "epoch": 0.9854386603567529, "grad_norm": 2.540365219116211, "learning_rate": 2.972607787741064e-07, "loss": 0.22312861680984497, "step": 8121 }, { "epoch": 0.9855600048537799, "grad_norm": 3.198859214782715, "learning_rate": 2.9480407812308076e-07, "loss": 0.37547188997268677, "step": 8122 }, { "epoch": 0.985681349350807, "grad_norm": 0.24127602577209473, "learning_rate": 2.9234737747205507e-07, "loss": 0.0021177439484745264, "step": 8123 }, { "epoch": 0.985802693847834, "grad_norm": 2.247511625289917, "learning_rate": 2.898906768210294e-07, "loss": 0.2518605589866638, "step": 8124 }, { "epoch": 0.9859240383448611, "grad_norm": 1.8647449016571045, "learning_rate": 2.874339761700037e-07, "loss": 0.3303447961807251, "step": 8125 }, { "epoch": 0.9860453828418881, "grad_norm": 2.7201974391937256, "learning_rate": 2.8497727551897806e-07, "loss": 0.17956596612930298, "step": 8126 }, { "epoch": 0.9861667273389152, "grad_norm": 6.016379356384277, "learning_rate": 2.8252057486795237e-07, "loss": 1.030609369277954, "step": 8127 }, { "epoch": 0.9862880718359422, "grad_norm": 3.5695765018463135, "learning_rate": 2.800638742169267e-07, "loss": 0.255485862493515, "step": 8128 }, { "epoch": 0.9864094163329693, "grad_norm": 2.9865846633911133, "learning_rate": 2.77607173565901e-07, "loss": 0.15963327884674072, "step": 8129 }, { "epoch": 0.9865307608299964, "grad_norm": 1.6358180046081543, "learning_rate": 2.7515047291487535e-07, "loss": 0.030295897275209427, "step": 8130 }, { "epoch": 0.9866521053270234, "grad_norm": 1.4260319471359253, "learning_rate": 2.7269377226384966e-07, "loss": 0.07341703027486801, "step": 8131 }, { "epoch": 0.9867734498240505, "grad_norm": 1.558876633644104, "learning_rate": 2.7023707161282403e-07, "loss": 0.08085636049509048, "step": 8132 }, { "epoch": 0.9868947943210775, "grad_norm": 3.185612201690674, "learning_rate": 2.677803709617983e-07, "loss": 0.5367431044578552, "step": 8133 }, { "epoch": 0.9870161388181046, "grad_norm": 5.342649936676025, "learning_rate": 2.6532367031077265e-07, "loss": 0.32681703567504883, "step": 8134 }, { "epoch": 0.9871374833151316, "grad_norm": 3.1077311038970947, "learning_rate": 2.6286696965974696e-07, "loss": 0.21376961469650269, "step": 8135 }, { "epoch": 0.9872588278121587, "grad_norm": 2.102519989013672, "learning_rate": 2.604102690087213e-07, "loss": 0.5633777976036072, "step": 8136 }, { "epoch": 0.9873801723091857, "grad_norm": 2.8061654567718506, "learning_rate": 2.5795356835769564e-07, "loss": 0.3500403165817261, "step": 8137 }, { "epoch": 0.9875015168062128, "grad_norm": 3.073956251144409, "learning_rate": 2.5549686770666995e-07, "loss": 0.507943868637085, "step": 8138 }, { "epoch": 0.98762286130324, "grad_norm": 3.751018524169922, "learning_rate": 2.530401670556443e-07, "loss": 0.32101625204086304, "step": 8139 }, { "epoch": 0.987744205800267, "grad_norm": 2.336937427520752, "learning_rate": 2.505834664046186e-07, "loss": 0.10001513361930847, "step": 8140 }, { "epoch": 0.9878655502972941, "grad_norm": 1.484362006187439, "learning_rate": 2.4812676575359293e-07, "loss": 0.04645594209432602, "step": 8141 }, { "epoch": 0.9879868947943211, "grad_norm": 3.109088897705078, "learning_rate": 2.4567006510256725e-07, "loss": 0.12971867620944977, "step": 8142 }, { "epoch": 0.9881082392913482, "grad_norm": 3.2385387420654297, "learning_rate": 2.432133644515416e-07, "loss": 0.2928641438484192, "step": 8143 }, { "epoch": 0.9882295837883752, "grad_norm": 5.660645961761475, "learning_rate": 2.407566638005159e-07, "loss": 0.2920892834663391, "step": 8144 }, { "epoch": 0.9883509282854023, "grad_norm": 0.6422171592712402, "learning_rate": 2.3829996314949026e-07, "loss": 0.021814577281475067, "step": 8145 }, { "epoch": 0.9884722727824293, "grad_norm": 2.600780487060547, "learning_rate": 2.3584326249846457e-07, "loss": 0.3104495108127594, "step": 8146 }, { "epoch": 0.9885936172794564, "grad_norm": 0.2896110713481903, "learning_rate": 2.333865618474389e-07, "loss": 0.0044311657547950745, "step": 8147 }, { "epoch": 0.9887149617764834, "grad_norm": 1.7917742729187012, "learning_rate": 2.3092986119641322e-07, "loss": 0.07974135875701904, "step": 8148 }, { "epoch": 0.9888363062735105, "grad_norm": 2.4561219215393066, "learning_rate": 2.2847316054538756e-07, "loss": 0.20624814927577972, "step": 8149 }, { "epoch": 0.9889576507705375, "grad_norm": 1.6273305416107178, "learning_rate": 2.2601645989436187e-07, "loss": 0.04462714493274689, "step": 8150 }, { "epoch": 0.9890789952675646, "grad_norm": 3.7837259769439697, "learning_rate": 2.2355975924333623e-07, "loss": 0.07709643244743347, "step": 8151 }, { "epoch": 0.9892003397645917, "grad_norm": 2.918287754058838, "learning_rate": 2.2110305859231054e-07, "loss": 0.310965359210968, "step": 8152 }, { "epoch": 0.9893216842616187, "grad_norm": 2.7454335689544678, "learning_rate": 2.1864635794128488e-07, "loss": 0.12769421935081482, "step": 8153 }, { "epoch": 0.9894430287586458, "grad_norm": 2.7663915157318115, "learning_rate": 2.1618965729025922e-07, "loss": 0.1351376473903656, "step": 8154 }, { "epoch": 0.9895643732556728, "grad_norm": 2.1215829849243164, "learning_rate": 2.1373295663923353e-07, "loss": 0.1294950693845749, "step": 8155 }, { "epoch": 0.9896857177526999, "grad_norm": 4.0904693603515625, "learning_rate": 2.1127625598820787e-07, "loss": 0.09840081632137299, "step": 8156 }, { "epoch": 0.9898070622497269, "grad_norm": 3.3264482021331787, "learning_rate": 2.0881955533718218e-07, "loss": 0.2552015781402588, "step": 8157 }, { "epoch": 0.9899284067467541, "grad_norm": 2.010719060897827, "learning_rate": 2.0636285468615652e-07, "loss": 0.2282073199748993, "step": 8158 }, { "epoch": 0.9900497512437811, "grad_norm": 2.6469497680664062, "learning_rate": 2.0390615403513083e-07, "loss": 0.10432097315788269, "step": 8159 }, { "epoch": 0.9901710957408082, "grad_norm": 3.5745718479156494, "learning_rate": 2.0144945338410516e-07, "loss": 0.3126499354839325, "step": 8160 }, { "epoch": 0.9902924402378352, "grad_norm": 1.9101555347442627, "learning_rate": 1.9899275273307947e-07, "loss": 0.038711465895175934, "step": 8161 }, { "epoch": 0.9904137847348623, "grad_norm": 3.4719655513763428, "learning_rate": 1.9653605208205384e-07, "loss": 0.4475652575492859, "step": 8162 }, { "epoch": 0.9905351292318894, "grad_norm": 2.5514416694641113, "learning_rate": 1.9407935143102812e-07, "loss": 0.2257295846939087, "step": 8163 }, { "epoch": 0.9906564737289164, "grad_norm": 1.8199118375778198, "learning_rate": 1.916226507800025e-07, "loss": 0.0902881771326065, "step": 8164 }, { "epoch": 0.9907778182259435, "grad_norm": 3.203481674194336, "learning_rate": 1.891659501289768e-07, "loss": 0.44435223937034607, "step": 8165 }, { "epoch": 0.9908991627229705, "grad_norm": 2.8003132343292236, "learning_rate": 1.8670924947795114e-07, "loss": 0.37334299087524414, "step": 8166 }, { "epoch": 0.9910205072199976, "grad_norm": 3.1576292514801025, "learning_rate": 1.8425254882692545e-07, "loss": 0.750487208366394, "step": 8167 }, { "epoch": 0.9911418517170246, "grad_norm": 4.1352996826171875, "learning_rate": 1.8179584817589979e-07, "loss": 0.4385581612586975, "step": 8168 }, { "epoch": 0.9912631962140517, "grad_norm": 2.2557601928710938, "learning_rate": 1.793391475248741e-07, "loss": 0.14517828822135925, "step": 8169 }, { "epoch": 0.9913845407110787, "grad_norm": 5.140727996826172, "learning_rate": 1.7688244687384843e-07, "loss": 0.29728055000305176, "step": 8170 }, { "epoch": 0.9915058852081058, "grad_norm": 3.9052574634552, "learning_rate": 1.7442574622282275e-07, "loss": 0.2552011013031006, "step": 8171 }, { "epoch": 0.9916272297051328, "grad_norm": 4.304445743560791, "learning_rate": 1.7196904557179708e-07, "loss": 0.6415797472000122, "step": 8172 }, { "epoch": 0.9917485742021599, "grad_norm": 2.3857357501983643, "learning_rate": 1.6951234492077145e-07, "loss": 0.14873909950256348, "step": 8173 }, { "epoch": 0.991869918699187, "grad_norm": 2.325669527053833, "learning_rate": 1.6705564426974573e-07, "loss": 0.13116228580474854, "step": 8174 }, { "epoch": 0.991991263196214, "grad_norm": 3.275754451751709, "learning_rate": 1.645989436187201e-07, "loss": 0.6214953660964966, "step": 8175 }, { "epoch": 0.9921126076932412, "grad_norm": 2.735649585723877, "learning_rate": 1.621422429676944e-07, "loss": 0.5071886777877808, "step": 8176 }, { "epoch": 0.9922339521902682, "grad_norm": 1.2962099313735962, "learning_rate": 1.5968554231666874e-07, "loss": 0.13184018433094025, "step": 8177 }, { "epoch": 0.9923552966872953, "grad_norm": 2.656682252883911, "learning_rate": 1.5722884166564306e-07, "loss": 0.27576905488967896, "step": 8178 }, { "epoch": 0.9924766411843223, "grad_norm": 2.6721622943878174, "learning_rate": 1.5477214101461737e-07, "loss": 0.30925267934799194, "step": 8179 }, { "epoch": 0.9925979856813494, "grad_norm": 2.8136658668518066, "learning_rate": 1.523154403635917e-07, "loss": 0.2670142352581024, "step": 8180 }, { "epoch": 0.9927193301783764, "grad_norm": 2.676511764526367, "learning_rate": 1.4985873971256604e-07, "loss": 0.37768274545669556, "step": 8181 }, { "epoch": 0.9928406746754035, "grad_norm": 3.46738338470459, "learning_rate": 1.4740203906154038e-07, "loss": 0.23713327944278717, "step": 8182 }, { "epoch": 0.9929620191724305, "grad_norm": 0.7744072675704956, "learning_rate": 1.449453384105147e-07, "loss": 0.007564460393041372, "step": 8183 }, { "epoch": 0.9930833636694576, "grad_norm": 3.042039155960083, "learning_rate": 1.4248863775948903e-07, "loss": 0.562218189239502, "step": 8184 }, { "epoch": 0.9932047081664847, "grad_norm": 1.4645085334777832, "learning_rate": 1.4003193710846334e-07, "loss": 0.03484513610601425, "step": 8185 }, { "epoch": 0.9933260526635117, "grad_norm": 1.5418792963027954, "learning_rate": 1.3757523645743768e-07, "loss": 0.04805825278162956, "step": 8186 }, { "epoch": 0.9934473971605388, "grad_norm": 3.963831901550293, "learning_rate": 1.3511853580641201e-07, "loss": 0.1557963788509369, "step": 8187 }, { "epoch": 0.9935687416575658, "grad_norm": 2.0418667793273926, "learning_rate": 1.3266183515538633e-07, "loss": 0.06207454577088356, "step": 8188 }, { "epoch": 0.9936900861545929, "grad_norm": 4.718974590301514, "learning_rate": 1.3020513450436066e-07, "loss": 0.0847591683268547, "step": 8189 }, { "epoch": 0.9938114306516199, "grad_norm": 3.536233901977539, "learning_rate": 1.2774843385333497e-07, "loss": 0.42564043402671814, "step": 8190 }, { "epoch": 0.993932775148647, "grad_norm": 2.7669475078582764, "learning_rate": 1.252917332023093e-07, "loss": 0.11937233805656433, "step": 8191 }, { "epoch": 0.994054119645674, "grad_norm": 3.0169365406036377, "learning_rate": 1.2283503255128362e-07, "loss": 0.6079370379447937, "step": 8192 }, { "epoch": 0.9941754641427011, "grad_norm": 3.967857599258423, "learning_rate": 1.2037833190025796e-07, "loss": 0.7374891638755798, "step": 8193 }, { "epoch": 0.9942968086397281, "grad_norm": 4.546518325805664, "learning_rate": 1.1792163124923228e-07, "loss": 0.4368818700313568, "step": 8194 }, { "epoch": 0.9944181531367553, "grad_norm": 4.486617565155029, "learning_rate": 1.1546493059820661e-07, "loss": 0.1656920462846756, "step": 8195 }, { "epoch": 0.9945394976337824, "grad_norm": 3.5295090675354004, "learning_rate": 1.1300822994718093e-07, "loss": 0.4423447847366333, "step": 8196 }, { "epoch": 0.9946608421308094, "grad_norm": 3.7926464080810547, "learning_rate": 1.1055152929615527e-07, "loss": 0.17174115777015686, "step": 8197 }, { "epoch": 0.9947821866278365, "grad_norm": 2.2528209686279297, "learning_rate": 1.0809482864512961e-07, "loss": 0.1615927517414093, "step": 8198 }, { "epoch": 0.9949035311248635, "grad_norm": 4.124000072479248, "learning_rate": 1.0563812799410393e-07, "loss": 0.14602503180503845, "step": 8199 }, { "epoch": 0.9950248756218906, "grad_norm": 3.5572290420532227, "learning_rate": 1.0318142734307826e-07, "loss": 0.13812923431396484, "step": 8200 }, { "epoch": 0.9951462201189176, "grad_norm": 2.669635772705078, "learning_rate": 1.0072472669205258e-07, "loss": 0.09314485639333725, "step": 8201 }, { "epoch": 0.9952675646159447, "grad_norm": 2.8807713985443115, "learning_rate": 9.826802604102692e-08, "loss": 0.08343902975320816, "step": 8202 }, { "epoch": 0.9953889091129717, "grad_norm": 3.005725860595703, "learning_rate": 9.581132539000124e-08, "loss": 0.5203794836997986, "step": 8203 }, { "epoch": 0.9955102536099988, "grad_norm": 2.7536556720733643, "learning_rate": 9.335462473897557e-08, "loss": 0.22589141130447388, "step": 8204 }, { "epoch": 0.9956315981070258, "grad_norm": 0.6251642107963562, "learning_rate": 9.089792408794989e-08, "loss": 0.007485697045922279, "step": 8205 }, { "epoch": 0.9957529426040529, "grad_norm": 2.9699032306671143, "learning_rate": 8.844122343692422e-08, "loss": 0.26268380880355835, "step": 8206 }, { "epoch": 0.99587428710108, "grad_norm": 3.975344657897949, "learning_rate": 8.598452278589854e-08, "loss": 0.12206026911735535, "step": 8207 }, { "epoch": 0.995995631598107, "grad_norm": 2.6646511554718018, "learning_rate": 8.352782213487287e-08, "loss": 0.38092827796936035, "step": 8208 }, { "epoch": 0.996116976095134, "grad_norm": 3.843729019165039, "learning_rate": 8.10711214838472e-08, "loss": 0.20459771156311035, "step": 8209 }, { "epoch": 0.9962383205921611, "grad_norm": 3.0742721557617188, "learning_rate": 7.861442083282153e-08, "loss": 0.5630477070808411, "step": 8210 }, { "epoch": 0.9963596650891882, "grad_norm": 1.3999626636505127, "learning_rate": 7.615772018179585e-08, "loss": 0.04356493055820465, "step": 8211 }, { "epoch": 0.9964810095862152, "grad_norm": 1.9086469411849976, "learning_rate": 7.370101953077019e-08, "loss": 0.22079302370548248, "step": 8212 }, { "epoch": 0.9966023540832423, "grad_norm": 0.015656478703022003, "learning_rate": 7.124431887974451e-08, "loss": 0.00023217940179165453, "step": 8213 }, { "epoch": 0.9967236985802694, "grad_norm": 1.4900484085083008, "learning_rate": 6.878761822871884e-08, "loss": 0.03433198854327202, "step": 8214 }, { "epoch": 0.9968450430772965, "grad_norm": 0.32612621784210205, "learning_rate": 6.633091757769316e-08, "loss": 0.003940375056117773, "step": 8215 }, { "epoch": 0.9969663875743235, "grad_norm": 3.393261671066284, "learning_rate": 6.387421692666749e-08, "loss": 0.12713110446929932, "step": 8216 }, { "epoch": 0.9970877320713506, "grad_norm": 2.67403244972229, "learning_rate": 6.141751627564181e-08, "loss": 0.1976781189441681, "step": 8217 }, { "epoch": 0.9972090765683777, "grad_norm": 0.8744733333587646, "learning_rate": 5.896081562461614e-08, "loss": 0.13486960530281067, "step": 8218 }, { "epoch": 0.9973304210654047, "grad_norm": 3.9463884830474854, "learning_rate": 5.650411497359047e-08, "loss": 0.28171488642692566, "step": 8219 }, { "epoch": 0.9974517655624318, "grad_norm": 3.6775624752044678, "learning_rate": 5.4047414322564804e-08, "loss": 0.23265552520751953, "step": 8220 }, { "epoch": 0.9975731100594588, "grad_norm": 6.057101726531982, "learning_rate": 5.159071367153913e-08, "loss": 0.24588251113891602, "step": 8221 }, { "epoch": 0.9976944545564859, "grad_norm": 3.1650969982147217, "learning_rate": 4.913401302051346e-08, "loss": 0.19915533065795898, "step": 8222 }, { "epoch": 0.9978157990535129, "grad_norm": 2.436814308166504, "learning_rate": 4.6677312369487784e-08, "loss": 0.3665306866168976, "step": 8223 }, { "epoch": 0.99793714355054, "grad_norm": 2.0944769382476807, "learning_rate": 4.422061171846211e-08, "loss": 0.01665383391082287, "step": 8224 }, { "epoch": 0.998058488047567, "grad_norm": 3.073438882827759, "learning_rate": 4.176391106743643e-08, "loss": 0.21553675830364227, "step": 8225 }, { "epoch": 0.9981798325445941, "grad_norm": 3.467939853668213, "learning_rate": 3.9307210416410764e-08, "loss": 0.19214603304862976, "step": 8226 }, { "epoch": 0.9983011770416211, "grad_norm": 2.562479257583618, "learning_rate": 3.6850509765385095e-08, "loss": 0.19107551872730255, "step": 8227 }, { "epoch": 0.9984225215386482, "grad_norm": 3.2016451358795166, "learning_rate": 3.439380911435942e-08, "loss": 0.4586966633796692, "step": 8228 }, { "epoch": 0.9985438660356752, "grad_norm": 2.893289566040039, "learning_rate": 3.1937108463333744e-08, "loss": 0.7293487191200256, "step": 8229 }, { "epoch": 0.9986652105327023, "grad_norm": 2.645019769668579, "learning_rate": 2.948040781230807e-08, "loss": 0.30666327476501465, "step": 8230 }, { "epoch": 0.9987865550297294, "grad_norm": 2.5953478813171387, "learning_rate": 2.7023707161282402e-08, "loss": 0.3168795704841614, "step": 8231 }, { "epoch": 0.9989078995267565, "grad_norm": 2.093499183654785, "learning_rate": 2.456700651025673e-08, "loss": 0.21758435666561127, "step": 8232 }, { "epoch": 0.9990292440237836, "grad_norm": 4.87149715423584, "learning_rate": 2.2110305859231054e-08, "loss": 0.3215257525444031, "step": 8233 }, { "epoch": 0.9991505885208106, "grad_norm": 2.537774085998535, "learning_rate": 1.9653605208205382e-08, "loss": 0.06136220693588257, "step": 8234 }, { "epoch": 0.9992719330178377, "grad_norm": 1.7183773517608643, "learning_rate": 1.719690455717971e-08, "loss": 0.10423365980386734, "step": 8235 }, { "epoch": 0.9993932775148647, "grad_norm": 5.249739170074463, "learning_rate": 1.4740203906154036e-08, "loss": 0.27854207158088684, "step": 8236 }, { "epoch": 0.9995146220118918, "grad_norm": 2.115219831466675, "learning_rate": 1.2283503255128365e-08, "loss": 0.10702306032180786, "step": 8237 }, { "epoch": 0.9996359665089188, "grad_norm": 3.1493079662323, "learning_rate": 9.826802604102691e-09, "loss": 0.29594406485557556, "step": 8238 }, { "epoch": 0.9997573110059459, "grad_norm": 2.2682580947875977, "learning_rate": 7.370101953077018e-09, "loss": 0.37829381227493286, "step": 8239 }, { "epoch": 0.999878655502973, "grad_norm": 3.544779062271118, "learning_rate": 4.9134013020513455e-09, "loss": 0.4875047206878662, "step": 8240 }, { "epoch": 1.0, "grad_norm": 2.911996603012085, "learning_rate": 2.4567006510256727e-09, "loss": 0.30747270584106445, "step": 8241 }, { "epoch": 1.0, "step": 8241, "total_flos": 9.07891627152601e+18, "train_loss": 0.2817817049306414, "train_runtime": 69591.8553, "train_samples_per_second": 0.947, "train_steps_per_second": 0.118 } ], "logging_steps": 1, "max_steps": 8241, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.07891627152601e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }