Instructions to use rombodawg/rombos_Adapter_For_Replete-Coder-Qwen2-1.5b with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use rombodawg/rombos_Adapter_For_Replete-Coder-Qwen2-1.5b with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("rombodawg/rombos_Adapter_For_Replete-Coder-Qwen2-1.5b", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- Unsloth Studio
How to use rombodawg/rombos_Adapter_For_Replete-Coder-Qwen2-1.5b with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for rombodawg/rombos_Adapter_For_Replete-Coder-Qwen2-1.5b to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for rombodawg/rombos_Adapter_For_Replete-Coder-Qwen2-1.5b to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for rombodawg/rombos_Adapter_For_Replete-Coder-Qwen2-1.5b to start chatting
Load model with FastModel
pip install unsloth from unsloth import FastModel model, tokenizer = FastModel.from_pretrained( model_name="rombodawg/rombos_Adapter_For_Replete-Coder-Qwen2-1.5b", max_seq_length=2048, )
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.956126096847579, | |
| "eval_steps": 500, | |
| "global_step": 30500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0025071034598027745, | |
| "grad_norm": 0.09887482225894928, | |
| "learning_rate": 3e-05, | |
| "loss": 1.1694, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.005014206919605549, | |
| "grad_norm": 0.04257430136203766, | |
| "learning_rate": 6e-05, | |
| "loss": 1.0648, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.007521310379408324, | |
| "grad_norm": 0.025177787989377975, | |
| "learning_rate": 9e-05, | |
| "loss": 0.9901, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.010028413839211098, | |
| "grad_norm": 0.014405222609639168, | |
| "learning_rate": 0.00012, | |
| "loss": 0.9677, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.012535517299013872, | |
| "grad_norm": 0.012216474860906601, | |
| "learning_rate": 0.00015000000000000001, | |
| "loss": 0.9271, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.015042620758816648, | |
| "grad_norm": 0.01270276214927435, | |
| "learning_rate": 0.00018, | |
| "loss": 0.9087, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.01754972421861942, | |
| "grad_norm": 0.014931446872651577, | |
| "learning_rate": 0.00019996645983565321, | |
| "loss": 0.8911, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.020056827678422196, | |
| "grad_norm": 0.013434696942567825, | |
| "learning_rate": 0.00019986583934261277, | |
| "loss": 0.9009, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.022563931138224972, | |
| "grad_norm": 0.014926938340067863, | |
| "learning_rate": 0.00019976521884957238, | |
| "loss": 0.8827, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.025071034598027744, | |
| "grad_norm": 0.018700918182730675, | |
| "learning_rate": 0.00019966459835653194, | |
| "loss": 0.8815, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.02757813805783052, | |
| "grad_norm": 0.01647135801613331, | |
| "learning_rate": 0.00019956397786349156, | |
| "loss": 0.8762, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.030085241517633296, | |
| "grad_norm": 0.016942940652370453, | |
| "learning_rate": 0.00019946335737045111, | |
| "loss": 0.8648, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.03259234497743607, | |
| "grad_norm": 0.019231606274843216, | |
| "learning_rate": 0.00019936273687741073, | |
| "loss": 0.8567, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.03509944843723884, | |
| "grad_norm": 0.019707536324858665, | |
| "learning_rate": 0.00019926211638437028, | |
| "loss": 0.8568, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.03760655189704162, | |
| "grad_norm": 0.023225486278533936, | |
| "learning_rate": 0.0001991614958913299, | |
| "loss": 0.8507, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.04011365535684439, | |
| "grad_norm": 0.019466817378997803, | |
| "learning_rate": 0.00019906087539828946, | |
| "loss": 0.8559, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.04262075881664717, | |
| "grad_norm": 0.020322684198617935, | |
| "learning_rate": 0.00019896025490524907, | |
| "loss": 0.8411, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.045127862276449944, | |
| "grad_norm": 0.018744077533483505, | |
| "learning_rate": 0.00019885963441220863, | |
| "loss": 0.8587, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.04763496573625271, | |
| "grad_norm": 0.018990306183695793, | |
| "learning_rate": 0.0001987590139191682, | |
| "loss": 0.8434, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.05014206919605549, | |
| "grad_norm": 0.018513506278395653, | |
| "learning_rate": 0.0001986583934261278, | |
| "loss": 0.8573, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.052649172655858265, | |
| "grad_norm": 0.019561799243092537, | |
| "learning_rate": 0.00019855777293308738, | |
| "loss": 0.8459, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.05515627611566104, | |
| "grad_norm": 0.019172094762325287, | |
| "learning_rate": 0.00019845715244004697, | |
| "loss": 0.8653, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.057663379575463816, | |
| "grad_norm": 0.018002351745963097, | |
| "learning_rate": 0.00019835653194700655, | |
| "loss": 0.837, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.06017048303526659, | |
| "grad_norm": 0.01977609097957611, | |
| "learning_rate": 0.00019825591145396614, | |
| "loss": 0.8352, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.06267758649506937, | |
| "grad_norm": 0.019932597875595093, | |
| "learning_rate": 0.0001981552909609257, | |
| "loss": 0.8309, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.06518468995487214, | |
| "grad_norm": 0.01805214211344719, | |
| "learning_rate": 0.0001980546704678853, | |
| "loss": 0.8427, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.0676917934146749, | |
| "grad_norm": 0.018298575654625893, | |
| "learning_rate": 0.00019795404997484487, | |
| "loss": 0.8286, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.07019889687447768, | |
| "grad_norm": 0.01844840496778488, | |
| "learning_rate": 0.00019785342948180448, | |
| "loss": 0.8455, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.07270600033428046, | |
| "grad_norm": 0.018125606700778008, | |
| "learning_rate": 0.00019775280898876404, | |
| "loss": 0.8376, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.07521310379408323, | |
| "grad_norm": 0.01847078464925289, | |
| "learning_rate": 0.00019765218849572365, | |
| "loss": 0.8387, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.07772020725388601, | |
| "grad_norm": 0.0198803897947073, | |
| "learning_rate": 0.0001975515680026832, | |
| "loss": 0.8123, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.08022731071368878, | |
| "grad_norm": 0.018840806558728218, | |
| "learning_rate": 0.00019745094750964282, | |
| "loss": 0.8182, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.08273441417349156, | |
| "grad_norm": 0.02007896639406681, | |
| "learning_rate": 0.00019735032701660238, | |
| "loss": 0.8417, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.08524151763329434, | |
| "grad_norm": 0.02028510719537735, | |
| "learning_rate": 0.000197249706523562, | |
| "loss": 0.8359, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.08774862109309711, | |
| "grad_norm": 0.019926371052861214, | |
| "learning_rate": 0.00019714908603052155, | |
| "loss": 0.8253, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.09025572455289989, | |
| "grad_norm": 0.02000526711344719, | |
| "learning_rate": 0.00019704846553748117, | |
| "loss": 0.8136, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.09276282801270266, | |
| "grad_norm": 0.019949857145547867, | |
| "learning_rate": 0.00019694784504444072, | |
| "loss": 0.814, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.09526993147250543, | |
| "grad_norm": 0.018684815615415573, | |
| "learning_rate": 0.0001968472245514003, | |
| "loss": 0.8251, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.0977770349323082, | |
| "grad_norm": 0.01959558017551899, | |
| "learning_rate": 0.0001967466040583599, | |
| "loss": 0.8176, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.10028413839211098, | |
| "grad_norm": 0.02020624279975891, | |
| "learning_rate": 0.00019664598356531948, | |
| "loss": 0.8181, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.10279124185191375, | |
| "grad_norm": 0.019187506288290024, | |
| "learning_rate": 0.00019654536307227907, | |
| "loss": 0.8269, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.10529834531171653, | |
| "grad_norm": 0.019997362047433853, | |
| "learning_rate": 0.00019644474257923865, | |
| "loss": 0.8208, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.1078054487715193, | |
| "grad_norm": 0.02035447023808956, | |
| "learning_rate": 0.00019634412208619824, | |
| "loss": 0.822, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.11031255223132208, | |
| "grad_norm": 0.019842060282826424, | |
| "learning_rate": 0.0001962435015931578, | |
| "loss": 0.8123, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.11281965569112486, | |
| "grad_norm": 0.0202711783349514, | |
| "learning_rate": 0.0001961428811001174, | |
| "loss": 0.8137, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.11532675915092763, | |
| "grad_norm": 0.020544525235891342, | |
| "learning_rate": 0.00019604226060707697, | |
| "loss": 0.807, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.11783386261073041, | |
| "grad_norm": 0.02084393985569477, | |
| "learning_rate": 0.00019594164011403658, | |
| "loss": 0.8136, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.12034096607053318, | |
| "grad_norm": 0.020337115973234177, | |
| "learning_rate": 0.00019584101962099614, | |
| "loss": 0.8162, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.12284806953033595, | |
| "grad_norm": 0.019300837069749832, | |
| "learning_rate": 0.00019574039912795575, | |
| "loss": 0.8116, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.12535517299013874, | |
| "grad_norm": 0.020422646775841713, | |
| "learning_rate": 0.0001956397786349153, | |
| "loss": 0.8208, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.1278622764499415, | |
| "grad_norm": 0.019620511680841446, | |
| "learning_rate": 0.00019553915814187492, | |
| "loss": 0.7984, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.1303693799097443, | |
| "grad_norm": 0.018732598051428795, | |
| "learning_rate": 0.00019543853764883448, | |
| "loss": 0.8121, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.13287648336954705, | |
| "grad_norm": 0.020731749013066292, | |
| "learning_rate": 0.0001953379171557941, | |
| "loss": 0.8106, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.1353835868293498, | |
| "grad_norm": 0.019481362774968147, | |
| "learning_rate": 0.00019523729666275365, | |
| "loss": 0.8098, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.1378906902891526, | |
| "grad_norm": 0.021802278235554695, | |
| "learning_rate": 0.00019513667616971326, | |
| "loss": 0.7904, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.14039779374895536, | |
| "grad_norm": 0.02061532624065876, | |
| "learning_rate": 0.00019503605567667282, | |
| "loss": 0.8084, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.14290489720875815, | |
| "grad_norm": 0.01921633817255497, | |
| "learning_rate": 0.00019493543518363243, | |
| "loss": 0.8099, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.14541200066856091, | |
| "grad_norm": 0.020100874826312065, | |
| "learning_rate": 0.000194834814690592, | |
| "loss": 0.7984, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.1479191041283637, | |
| "grad_norm": 0.019538206979632378, | |
| "learning_rate": 0.00019473419419755158, | |
| "loss": 0.8003, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.15042620758816647, | |
| "grad_norm": 0.021109605208039284, | |
| "learning_rate": 0.00019463357370451116, | |
| "loss": 0.8027, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.15293331104796926, | |
| "grad_norm": 0.023268043994903564, | |
| "learning_rate": 0.00019453295321147075, | |
| "loss": 0.8137, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.15544041450777202, | |
| "grad_norm": 0.020138578489422798, | |
| "learning_rate": 0.00019443233271843033, | |
| "loss": 0.8015, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.1579475179675748, | |
| "grad_norm": 0.02234073542058468, | |
| "learning_rate": 0.00019433171222538992, | |
| "loss": 0.812, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.16045462142737757, | |
| "grad_norm": 0.02045338600873947, | |
| "learning_rate": 0.0001942310917323495, | |
| "loss": 0.7988, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.16296172488718033, | |
| "grad_norm": 0.020514754578471184, | |
| "learning_rate": 0.00019413047123930906, | |
| "loss": 0.7954, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.16546882834698312, | |
| "grad_norm": 0.020174162462353706, | |
| "learning_rate": 0.00019402985074626867, | |
| "loss": 0.7998, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.16797593180678588, | |
| "grad_norm": 0.020632125437259674, | |
| "learning_rate": 0.00019392923025322823, | |
| "loss": 0.8049, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.17048303526658867, | |
| "grad_norm": 0.02562854066491127, | |
| "learning_rate": 0.00019382860976018785, | |
| "loss": 0.8058, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.17299013872639143, | |
| "grad_norm": 0.019526248797774315, | |
| "learning_rate": 0.0001937279892671474, | |
| "loss": 0.7936, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.17549724218619422, | |
| "grad_norm": 0.020038483664393425, | |
| "learning_rate": 0.00019362736877410702, | |
| "loss": 0.8057, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.178004345645997, | |
| "grad_norm": 0.022498290985822678, | |
| "learning_rate": 0.00019352674828106658, | |
| "loss": 0.801, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.18051144910579978, | |
| "grad_norm": 0.020256614312529564, | |
| "learning_rate": 0.0001934261277880262, | |
| "loss": 0.7984, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.18301855256560254, | |
| "grad_norm": 0.021500416100025177, | |
| "learning_rate": 0.00019332550729498575, | |
| "loss": 0.8026, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.18552565602540533, | |
| "grad_norm": 0.02118818834424019, | |
| "learning_rate": 0.00019322488680194536, | |
| "loss": 0.805, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.1880327594852081, | |
| "grad_norm": 0.020503008738160133, | |
| "learning_rate": 0.00019312426630890492, | |
| "loss": 0.7991, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.19053986294501085, | |
| "grad_norm": 0.02011336386203766, | |
| "learning_rate": 0.00019302364581586453, | |
| "loss": 0.7966, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.19304696640481364, | |
| "grad_norm": 0.020229632034897804, | |
| "learning_rate": 0.0001929230253228241, | |
| "loss": 0.7925, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.1955540698646164, | |
| "grad_norm": 0.021130822598934174, | |
| "learning_rate": 0.00019282240482978367, | |
| "loss": 0.7942, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.1980611733244192, | |
| "grad_norm": 0.02094241976737976, | |
| "learning_rate": 0.00019272178433674326, | |
| "loss": 0.7857, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.20056827678422195, | |
| "grad_norm": 0.01990380696952343, | |
| "learning_rate": 0.00019262116384370284, | |
| "loss": 0.7821, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.20307538024402474, | |
| "grad_norm": 0.020579956471920013, | |
| "learning_rate": 0.00019252054335066243, | |
| "loss": 0.8054, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.2055824837038275, | |
| "grad_norm": 0.02037345990538597, | |
| "learning_rate": 0.00019241992285762202, | |
| "loss": 0.789, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.2080895871636303, | |
| "grad_norm": 0.02104773558676243, | |
| "learning_rate": 0.0001923193023645816, | |
| "loss": 0.7812, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.21059669062343306, | |
| "grad_norm": 0.020640334114432335, | |
| "learning_rate": 0.00019221868187154119, | |
| "loss": 0.7976, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.21310379408323585, | |
| "grad_norm": 0.02236183173954487, | |
| "learning_rate": 0.00019211806137850077, | |
| "loss": 0.7855, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.2156108975430386, | |
| "grad_norm": 0.021454576402902603, | |
| "learning_rate": 0.00019201744088546033, | |
| "loss": 0.7921, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.21811800100284137, | |
| "grad_norm": 0.0205401424318552, | |
| "learning_rate": 0.00019191682039241994, | |
| "loss": 0.7919, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.22062510446264416, | |
| "grad_norm": 0.020427586510777473, | |
| "learning_rate": 0.0001918161998993795, | |
| "loss": 0.7842, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.22313220792244692, | |
| "grad_norm": 0.020643971860408783, | |
| "learning_rate": 0.0001917155794063391, | |
| "loss": 0.8012, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.2256393113822497, | |
| "grad_norm": 0.02139684371650219, | |
| "learning_rate": 0.00019161495891329867, | |
| "loss": 0.7853, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.22814641484205248, | |
| "grad_norm": 0.020423240959644318, | |
| "learning_rate": 0.00019151433842025828, | |
| "loss": 0.7918, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.23065351830185526, | |
| "grad_norm": 0.022509122267365456, | |
| "learning_rate": 0.00019141371792721784, | |
| "loss": 0.7867, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.23316062176165803, | |
| "grad_norm": 0.021511022001504898, | |
| "learning_rate": 0.00019131309743417746, | |
| "loss": 0.7785, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.23566772522146082, | |
| "grad_norm": 0.021113473922014236, | |
| "learning_rate": 0.00019121247694113701, | |
| "loss": 0.7835, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.23817482868126358, | |
| "grad_norm": 0.02204412780702114, | |
| "learning_rate": 0.00019111185644809663, | |
| "loss": 0.7796, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.24068193214106637, | |
| "grad_norm": 0.021546153351664543, | |
| "learning_rate": 0.00019101123595505618, | |
| "loss": 0.795, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.24318903560086913, | |
| "grad_norm": 0.02130185067653656, | |
| "learning_rate": 0.00019091061546201577, | |
| "loss": 0.7743, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.2456961390606719, | |
| "grad_norm": 0.020676780492067337, | |
| "learning_rate": 0.00019080999496897536, | |
| "loss": 0.7827, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.24820324252047468, | |
| "grad_norm": 0.02096562273800373, | |
| "learning_rate": 0.00019070937447593494, | |
| "loss": 0.8017, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.25071034598027747, | |
| "grad_norm": 0.021092170849442482, | |
| "learning_rate": 0.00019060875398289453, | |
| "loss": 0.7752, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.25321744944008023, | |
| "grad_norm": 0.02107168734073639, | |
| "learning_rate": 0.0001905081334898541, | |
| "loss": 0.7819, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.255724552899883, | |
| "grad_norm": 0.021778512746095657, | |
| "learning_rate": 0.0001904075129968137, | |
| "loss": 0.7911, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.25823165635968576, | |
| "grad_norm": 0.020381765440106392, | |
| "learning_rate": 0.00019030689250377328, | |
| "loss": 0.787, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.2607387598194886, | |
| "grad_norm": 0.02274371311068535, | |
| "learning_rate": 0.00019020627201073287, | |
| "loss": 0.7828, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.26324586327929134, | |
| "grad_norm": 0.02126036398112774, | |
| "learning_rate": 0.00019010565151769243, | |
| "loss": 0.7798, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.2657529667390941, | |
| "grad_norm": 0.02086903154850006, | |
| "learning_rate": 0.00019000503102465204, | |
| "loss": 0.7723, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.26826007019889686, | |
| "grad_norm": 0.05506217107176781, | |
| "learning_rate": 0.0001899044105316116, | |
| "loss": 0.7798, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.2707671736586996, | |
| "grad_norm": 0.02119087241590023, | |
| "learning_rate": 0.0001898037900385712, | |
| "loss": 0.7809, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.27327427711850244, | |
| "grad_norm": 0.02315429411828518, | |
| "learning_rate": 0.00018970316954553077, | |
| "loss": 0.7881, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.2757813805783052, | |
| "grad_norm": 0.021781641989946365, | |
| "learning_rate": 0.00018960254905249038, | |
| "loss": 0.7902, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.27828848403810796, | |
| "grad_norm": 0.022906338796019554, | |
| "learning_rate": 0.00018950192855944994, | |
| "loss": 0.7766, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.2807955874979107, | |
| "grad_norm": 0.021640203893184662, | |
| "learning_rate": 0.00018940130806640955, | |
| "loss": 0.777, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.28330269095771354, | |
| "grad_norm": 0.02225816249847412, | |
| "learning_rate": 0.0001893006875733691, | |
| "loss": 0.7777, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.2858097944175163, | |
| "grad_norm": 0.021424556151032448, | |
| "learning_rate": 0.00018920006708032872, | |
| "loss": 0.7609, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.28831689787731907, | |
| "grad_norm": 0.02180912159383297, | |
| "learning_rate": 0.00018909944658728828, | |
| "loss": 0.7691, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.29082400133712183, | |
| "grad_norm": 0.021193066611886024, | |
| "learning_rate": 0.00018899882609424787, | |
| "loss": 0.7636, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.29333110479692465, | |
| "grad_norm": 0.021105512976646423, | |
| "learning_rate": 0.00018889820560120745, | |
| "loss": 0.7756, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.2958382082567274, | |
| "grad_norm": 0.021696053445339203, | |
| "learning_rate": 0.00018879758510816704, | |
| "loss": 0.7814, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.29834531171653017, | |
| "grad_norm": 0.021872224286198616, | |
| "learning_rate": 0.00018869696461512662, | |
| "loss": 0.7912, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.30085241517633293, | |
| "grad_norm": 0.02113959938287735, | |
| "learning_rate": 0.0001885963441220862, | |
| "loss": 0.7775, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.3033595186361357, | |
| "grad_norm": 0.020779291167855263, | |
| "learning_rate": 0.0001884957236290458, | |
| "loss": 0.7752, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.3058666220959385, | |
| "grad_norm": 0.021366087719798088, | |
| "learning_rate": 0.00018839510313600538, | |
| "loss": 0.7748, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.3083737255557413, | |
| "grad_norm": 0.02154374308884144, | |
| "learning_rate": 0.00018829448264296497, | |
| "loss": 0.7774, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.31088082901554404, | |
| "grad_norm": 0.020630501210689545, | |
| "learning_rate": 0.00018819386214992455, | |
| "loss": 0.7832, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.3133879324753468, | |
| "grad_norm": 0.022217195481061935, | |
| "learning_rate": 0.00018809324165688414, | |
| "loss": 0.7742, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.3158950359351496, | |
| "grad_norm": 0.021622564643621445, | |
| "learning_rate": 0.0001879926211638437, | |
| "loss": 0.7782, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.3184021393949524, | |
| "grad_norm": 0.02158367820084095, | |
| "learning_rate": 0.0001878920006708033, | |
| "loss": 0.7753, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 0.32090924285475514, | |
| "grad_norm": 0.021993108093738556, | |
| "learning_rate": 0.00018779138017776287, | |
| "loss": 0.7801, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.3234163463145579, | |
| "grad_norm": 0.02169063873589039, | |
| "learning_rate": 0.00018769075968472248, | |
| "loss": 0.7937, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 0.32592344977436066, | |
| "grad_norm": 0.023950908333063126, | |
| "learning_rate": 0.00018759013919168204, | |
| "loss": 0.7668, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.3284305532341635, | |
| "grad_norm": 0.02253536880016327, | |
| "learning_rate": 0.00018748951869864165, | |
| "loss": 0.7796, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 0.33093765669396624, | |
| "grad_norm": 0.021693330258131027, | |
| "learning_rate": 0.0001873888982056012, | |
| "loss": 0.7634, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.333444760153769, | |
| "grad_norm": 0.022510211914777756, | |
| "learning_rate": 0.00018728827771256082, | |
| "loss": 0.7664, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 0.33595186361357177, | |
| "grad_norm": 0.021836843341588974, | |
| "learning_rate": 0.00018718765721952038, | |
| "loss": 0.7849, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.3384589670733746, | |
| "grad_norm": 0.021421095356345177, | |
| "learning_rate": 0.00018708703672647996, | |
| "loss": 0.78, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.34096607053317735, | |
| "grad_norm": 0.02277962490916252, | |
| "learning_rate": 0.00018698641623343955, | |
| "loss": 0.7935, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.3434731739929801, | |
| "grad_norm": 0.022962411865592003, | |
| "learning_rate": 0.00018688579574039913, | |
| "loss": 0.7573, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 0.34598027745278287, | |
| "grad_norm": 0.021092860028147697, | |
| "learning_rate": 0.00018678517524735872, | |
| "loss": 0.7725, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.3484873809125857, | |
| "grad_norm": 0.0216389037668705, | |
| "learning_rate": 0.0001866845547543183, | |
| "loss": 0.7717, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 0.35099448437238845, | |
| "grad_norm": 0.022193802520632744, | |
| "learning_rate": 0.0001865839342612779, | |
| "loss": 0.7671, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.3535015878321912, | |
| "grad_norm": 0.021959876641631126, | |
| "learning_rate": 0.00018648331376823748, | |
| "loss": 0.7893, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 0.356008691291994, | |
| "grad_norm": 0.022308630868792534, | |
| "learning_rate": 0.00018638269327519706, | |
| "loss": 0.7719, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.35851579475179673, | |
| "grad_norm": 0.022814404219388962, | |
| "learning_rate": 0.00018628207278215665, | |
| "loss": 0.7687, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 0.36102289821159955, | |
| "grad_norm": 0.021741073578596115, | |
| "learning_rate": 0.00018618145228911623, | |
| "loss": 0.7757, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.3635300016714023, | |
| "grad_norm": 0.022700047120451927, | |
| "learning_rate": 0.00018608083179607582, | |
| "loss": 0.7724, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.3660371051312051, | |
| "grad_norm": 0.023608332499861717, | |
| "learning_rate": 0.0001859802113030354, | |
| "loss": 0.7697, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.36854420859100784, | |
| "grad_norm": 0.02221842296421528, | |
| "learning_rate": 0.00018587959080999496, | |
| "loss": 0.7746, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 0.37105131205081066, | |
| "grad_norm": 0.022841554135084152, | |
| "learning_rate": 0.00018577897031695457, | |
| "loss": 0.7798, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.3735584155106134, | |
| "grad_norm": 0.021496908739209175, | |
| "learning_rate": 0.00018567834982391413, | |
| "loss": 0.7608, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 0.3760655189704162, | |
| "grad_norm": 0.022609667852520943, | |
| "learning_rate": 0.00018557772933087375, | |
| "loss": 0.7582, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.37857262243021894, | |
| "grad_norm": 0.022388063371181488, | |
| "learning_rate": 0.0001854771088378333, | |
| "loss": 0.7736, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 0.3810797258900217, | |
| "grad_norm": 0.021875958889722824, | |
| "learning_rate": 0.00018537648834479292, | |
| "loss": 0.764, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.3835868293498245, | |
| "grad_norm": 0.023109521716833115, | |
| "learning_rate": 0.00018527586785175247, | |
| "loss": 0.7646, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 0.3860939328096273, | |
| "grad_norm": 0.02191918157041073, | |
| "learning_rate": 0.00018517524735871206, | |
| "loss": 0.7688, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.38860103626943004, | |
| "grad_norm": 0.022137146443128586, | |
| "learning_rate": 0.00018507462686567165, | |
| "loss": 0.7708, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.3911081397292328, | |
| "grad_norm": 0.023074300959706306, | |
| "learning_rate": 0.00018497400637263123, | |
| "loss": 0.77, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.3936152431890356, | |
| "grad_norm": 0.023129386827349663, | |
| "learning_rate": 0.00018487338587959082, | |
| "loss": 0.7645, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 0.3961223466488384, | |
| "grad_norm": 0.022260216996073723, | |
| "learning_rate": 0.0001847727653865504, | |
| "loss": 0.7739, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.39862945010864115, | |
| "grad_norm": 0.022797416895627975, | |
| "learning_rate": 0.00018467214489351, | |
| "loss": 0.7631, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 0.4011365535684439, | |
| "grad_norm": 0.02237161435186863, | |
| "learning_rate": 0.00018457152440046957, | |
| "loss": 0.7708, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.40364365702824667, | |
| "grad_norm": 0.023264579474925995, | |
| "learning_rate": 0.00018447090390742916, | |
| "loss": 0.7702, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 0.4061507604880495, | |
| "grad_norm": 0.022827420383691788, | |
| "learning_rate": 0.00018437028341438874, | |
| "loss": 0.7696, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.40865786394785225, | |
| "grad_norm": 0.022284789010882378, | |
| "learning_rate": 0.00018426966292134833, | |
| "loss": 0.7759, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 0.411164967407655, | |
| "grad_norm": 0.0228969044983387, | |
| "learning_rate": 0.00018416904242830791, | |
| "loss": 0.7711, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.4136720708674578, | |
| "grad_norm": 0.02203362248837948, | |
| "learning_rate": 0.0001840684219352675, | |
| "loss": 0.7557, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.4161791743272606, | |
| "grad_norm": 0.022419359534978867, | |
| "learning_rate": 0.00018396780144222706, | |
| "loss": 0.7806, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.41868627778706335, | |
| "grad_norm": 0.02259223349392414, | |
| "learning_rate": 0.00018386718094918667, | |
| "loss": 0.7616, | |
| "step": 2505 | |
| }, | |
| { | |
| "epoch": 0.4211933812468661, | |
| "grad_norm": 0.023276396095752716, | |
| "learning_rate": 0.00018376656045614623, | |
| "loss": 0.7597, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.4237004847066689, | |
| "grad_norm": 0.022209784016013145, | |
| "learning_rate": 0.00018366593996310584, | |
| "loss": 0.7554, | |
| "step": 2535 | |
| }, | |
| { | |
| "epoch": 0.4262075881664717, | |
| "grad_norm": 0.022717982530593872, | |
| "learning_rate": 0.0001835653194700654, | |
| "loss": 0.7723, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.42871469162627446, | |
| "grad_norm": 0.022251484915614128, | |
| "learning_rate": 0.000183464698977025, | |
| "loss": 0.7716, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 0.4312217950860772, | |
| "grad_norm": 0.022392725571990013, | |
| "learning_rate": 0.00018336407848398457, | |
| "loss": 0.7654, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.43372889854588, | |
| "grad_norm": 0.023053428158164024, | |
| "learning_rate": 0.00018326345799094416, | |
| "loss": 0.7579, | |
| "step": 2595 | |
| }, | |
| { | |
| "epoch": 0.43623600200568274, | |
| "grad_norm": 0.02315950021147728, | |
| "learning_rate": 0.00018316283749790374, | |
| "loss": 0.7655, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.43874310546548556, | |
| "grad_norm": 0.02267162874341011, | |
| "learning_rate": 0.00018306221700486333, | |
| "loss": 0.7503, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.4412502089252883, | |
| "grad_norm": 0.022932684049010277, | |
| "learning_rate": 0.0001829615965118229, | |
| "loss": 0.7592, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.4437573123850911, | |
| "grad_norm": 0.023299789056181908, | |
| "learning_rate": 0.0001828609760187825, | |
| "loss": 0.7805, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 0.44626441584489385, | |
| "grad_norm": 0.022324666380882263, | |
| "learning_rate": 0.00018276035552574208, | |
| "loss": 0.7639, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.44877151930469666, | |
| "grad_norm": 0.023942479863762856, | |
| "learning_rate": 0.00018265973503270167, | |
| "loss": 0.7506, | |
| "step": 2685 | |
| }, | |
| { | |
| "epoch": 0.4512786227644994, | |
| "grad_norm": 0.022840656340122223, | |
| "learning_rate": 0.00018255911453966126, | |
| "loss": 0.7568, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.4537857262243022, | |
| "grad_norm": 0.022889986634254456, | |
| "learning_rate": 0.00018245849404662084, | |
| "loss": 0.757, | |
| "step": 2715 | |
| }, | |
| { | |
| "epoch": 0.45629282968410495, | |
| "grad_norm": 0.02276541106402874, | |
| "learning_rate": 0.00018235787355358043, | |
| "loss": 0.7702, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.4587999331439077, | |
| "grad_norm": 0.022805610671639442, | |
| "learning_rate": 0.00018225725306054, | |
| "loss": 0.7522, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 0.46130703660371053, | |
| "grad_norm": 0.02356228232383728, | |
| "learning_rate": 0.0001821566325674996, | |
| "loss": 0.7584, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.4638141400635133, | |
| "grad_norm": 0.02339334785938263, | |
| "learning_rate": 0.00018205601207445918, | |
| "loss": 0.7551, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 0.46632124352331605, | |
| "grad_norm": 0.022267676889896393, | |
| "learning_rate": 0.00018195539158141877, | |
| "loss": 0.7551, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.4688283469831188, | |
| "grad_norm": 0.02209157682955265, | |
| "learning_rate": 0.00018185477108837833, | |
| "loss": 0.7555, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 0.47133545044292163, | |
| "grad_norm": 0.023798322305083275, | |
| "learning_rate": 0.00018175415059533794, | |
| "loss": 0.7672, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.4738425539027244, | |
| "grad_norm": 0.02182634547352791, | |
| "learning_rate": 0.0001816535301022975, | |
| "loss": 0.7523, | |
| "step": 2835 | |
| }, | |
| { | |
| "epoch": 0.47634965736252716, | |
| "grad_norm": 0.02280135080218315, | |
| "learning_rate": 0.0001815529096092571, | |
| "loss": 0.7523, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.4788567608223299, | |
| "grad_norm": 0.022913530468940735, | |
| "learning_rate": 0.00018145228911621667, | |
| "loss": 0.7664, | |
| "step": 2865 | |
| }, | |
| { | |
| "epoch": 0.48136386428213274, | |
| "grad_norm": 0.022897444665431976, | |
| "learning_rate": 0.00018135166862317625, | |
| "loss": 0.7626, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.4838709677419355, | |
| "grad_norm": 0.022669149562716484, | |
| "learning_rate": 0.00018125104813013584, | |
| "loss": 0.7666, | |
| "step": 2895 | |
| }, | |
| { | |
| "epoch": 0.48637807120173826, | |
| "grad_norm": 0.022428149357438087, | |
| "learning_rate": 0.00018115042763709542, | |
| "loss": 0.7574, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.488885174661541, | |
| "grad_norm": 0.02266399934887886, | |
| "learning_rate": 0.000181049807144055, | |
| "loss": 0.7713, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 0.4913922781213438, | |
| "grad_norm": 0.022166673094034195, | |
| "learning_rate": 0.0001809491866510146, | |
| "loss": 0.7526, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.4938993815811466, | |
| "grad_norm": 0.022409655153751373, | |
| "learning_rate": 0.00018084856615797418, | |
| "loss": 0.7378, | |
| "step": 2955 | |
| }, | |
| { | |
| "epoch": 0.49640648504094936, | |
| "grad_norm": 0.02232409082353115, | |
| "learning_rate": 0.00018074794566493377, | |
| "loss": 0.7632, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.4989135885007521, | |
| "grad_norm": 0.022124771028757095, | |
| "learning_rate": 0.00018064732517189335, | |
| "loss": 0.7493, | |
| "step": 2985 | |
| }, | |
| { | |
| "epoch": 0.5014206919605549, | |
| "grad_norm": 0.023450786247849464, | |
| "learning_rate": 0.00018054670467885294, | |
| "loss": 0.751, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.5039277954203577, | |
| "grad_norm": 0.023552143946290016, | |
| "learning_rate": 0.00018044608418581252, | |
| "loss": 0.7489, | |
| "step": 3015 | |
| }, | |
| { | |
| "epoch": 0.5064348988801605, | |
| "grad_norm": 0.022822733968496323, | |
| "learning_rate": 0.0001803454636927721, | |
| "loss": 0.7464, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.5089420023399632, | |
| "grad_norm": 0.02279839850962162, | |
| "learning_rate": 0.0001802448431997317, | |
| "loss": 0.7613, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 0.511449105799766, | |
| "grad_norm": 0.023819871246814728, | |
| "learning_rate": 0.00018014422270669128, | |
| "loss": 0.7368, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.5139562092595688, | |
| "grad_norm": 0.02348748780786991, | |
| "learning_rate": 0.00018004360221365086, | |
| "loss": 0.7485, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 0.5164633127193715, | |
| "grad_norm": 0.02394930087029934, | |
| "learning_rate": 0.00017994298172061045, | |
| "loss": 0.7504, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.5189704161791743, | |
| "grad_norm": 0.023166505619883537, | |
| "learning_rate": 0.00017984236122757004, | |
| "loss": 0.7526, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 0.5214775196389771, | |
| "grad_norm": 0.023279821500182152, | |
| "learning_rate": 0.0001797417407345296, | |
| "loss": 0.7474, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.5239846230987799, | |
| "grad_norm": 0.022907249629497528, | |
| "learning_rate": 0.0001796411202414892, | |
| "loss": 0.7555, | |
| "step": 3135 | |
| }, | |
| { | |
| "epoch": 0.5264917265585827, | |
| "grad_norm": 0.023161666467785835, | |
| "learning_rate": 0.00017954049974844877, | |
| "loss": 0.7587, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.5289988300183854, | |
| "grad_norm": 0.02453703060746193, | |
| "learning_rate": 0.00017943987925540835, | |
| "loss": 0.7623, | |
| "step": 3165 | |
| }, | |
| { | |
| "epoch": 0.5315059334781882, | |
| "grad_norm": 0.02323891967535019, | |
| "learning_rate": 0.00017933925876236794, | |
| "loss": 0.7546, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.534013036937991, | |
| "grad_norm": 0.022658636793494225, | |
| "learning_rate": 0.00017923863826932752, | |
| "loss": 0.7544, | |
| "step": 3195 | |
| }, | |
| { | |
| "epoch": 0.5365201403977937, | |
| "grad_norm": 0.023256490007042885, | |
| "learning_rate": 0.0001791380177762871, | |
| "loss": 0.7631, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.5390272438575965, | |
| "grad_norm": 0.02328312210738659, | |
| "learning_rate": 0.0001790373972832467, | |
| "loss": 0.7576, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 0.5415343473173992, | |
| "grad_norm": 0.023502754047513008, | |
| "learning_rate": 0.00017893677679020628, | |
| "loss": 0.7558, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.5440414507772021, | |
| "grad_norm": 0.02397795580327511, | |
| "learning_rate": 0.00017883615629716586, | |
| "loss": 0.7465, | |
| "step": 3255 | |
| }, | |
| { | |
| "epoch": 0.5465485542370049, | |
| "grad_norm": 0.023231035098433495, | |
| "learning_rate": 0.00017873553580412545, | |
| "loss": 0.7596, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.5490556576968076, | |
| "grad_norm": 0.023429760709404945, | |
| "learning_rate": 0.00017863491531108503, | |
| "loss": 0.7631, | |
| "step": 3285 | |
| }, | |
| { | |
| "epoch": 0.5515627611566104, | |
| "grad_norm": 0.02327948808670044, | |
| "learning_rate": 0.00017853429481804462, | |
| "loss": 0.7493, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.5540698646164132, | |
| "grad_norm": 0.023450564593076706, | |
| "learning_rate": 0.0001784336743250042, | |
| "loss": 0.7489, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 0.5565769680762159, | |
| "grad_norm": 0.02356708236038685, | |
| "learning_rate": 0.0001783330538319638, | |
| "loss": 0.7541, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.5590840715360187, | |
| "grad_norm": 0.024269040673971176, | |
| "learning_rate": 0.00017823243333892338, | |
| "loss": 0.7717, | |
| "step": 3345 | |
| }, | |
| { | |
| "epoch": 0.5615911749958215, | |
| "grad_norm": 0.02358848787844181, | |
| "learning_rate": 0.00017813181284588296, | |
| "loss": 0.7553, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.5640982784556242, | |
| "grad_norm": 0.02385580912232399, | |
| "learning_rate": 0.00017803119235284255, | |
| "loss": 0.7484, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 0.5666053819154271, | |
| "grad_norm": 0.023820120841264725, | |
| "learning_rate": 0.00017793057185980213, | |
| "loss": 0.7529, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.5691124853752298, | |
| "grad_norm": 0.023704256862401962, | |
| "learning_rate": 0.00017782995136676172, | |
| "loss": 0.763, | |
| "step": 3405 | |
| }, | |
| { | |
| "epoch": 0.5716195888350326, | |
| "grad_norm": 0.02363293431699276, | |
| "learning_rate": 0.0001777293308737213, | |
| "loss": 0.7552, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.5741266922948354, | |
| "grad_norm": 0.023471953347325325, | |
| "learning_rate": 0.00017762871038068086, | |
| "loss": 0.7516, | |
| "step": 3435 | |
| }, | |
| { | |
| "epoch": 0.5766337957546381, | |
| "grad_norm": 0.023572325706481934, | |
| "learning_rate": 0.00017752808988764045, | |
| "loss": 0.7635, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.5791408992144409, | |
| "grad_norm": 0.023114044219255447, | |
| "learning_rate": 0.00017742746939460003, | |
| "loss": 0.7376, | |
| "step": 3465 | |
| }, | |
| { | |
| "epoch": 0.5816480026742437, | |
| "grad_norm": 0.022982290014624596, | |
| "learning_rate": 0.00017732684890155962, | |
| "loss": 0.7548, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.5841551061340464, | |
| "grad_norm": 0.024818824604153633, | |
| "learning_rate": 0.0001772262284085192, | |
| "loss": 0.7555, | |
| "step": 3495 | |
| }, | |
| { | |
| "epoch": 0.5866622095938493, | |
| "grad_norm": 0.024532759562134743, | |
| "learning_rate": 0.0001771256079154788, | |
| "loss": 0.7543, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.589169313053652, | |
| "grad_norm": 0.023687878623604774, | |
| "learning_rate": 0.00017702498742243837, | |
| "loss": 0.7574, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 0.5916764165134548, | |
| "grad_norm": 0.023244835436344147, | |
| "learning_rate": 0.00017692436692939796, | |
| "loss": 0.738, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.5941835199732576, | |
| "grad_norm": 0.023271916434168816, | |
| "learning_rate": 0.00017682374643635755, | |
| "loss": 0.7472, | |
| "step": 3555 | |
| }, | |
| { | |
| "epoch": 0.5966906234330603, | |
| "grad_norm": 0.023334383964538574, | |
| "learning_rate": 0.00017672312594331713, | |
| "loss": 0.7547, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.5991977268928631, | |
| "grad_norm": 0.024023573845624924, | |
| "learning_rate": 0.00017662250545027672, | |
| "loss": 0.7516, | |
| "step": 3585 | |
| }, | |
| { | |
| "epoch": 0.6017048303526659, | |
| "grad_norm": 0.023526392877101898, | |
| "learning_rate": 0.0001765218849572363, | |
| "loss": 0.7484, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.6042119338124686, | |
| "grad_norm": 0.023420479148626328, | |
| "learning_rate": 0.0001764212644641959, | |
| "loss": 0.7368, | |
| "step": 3615 | |
| }, | |
| { | |
| "epoch": 0.6067190372722714, | |
| "grad_norm": 0.024068370461463928, | |
| "learning_rate": 0.00017632064397115547, | |
| "loss": 0.7448, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.6092261407320743, | |
| "grad_norm": 0.024318361654877663, | |
| "learning_rate": 0.00017622002347811506, | |
| "loss": 0.7544, | |
| "step": 3645 | |
| }, | |
| { | |
| "epoch": 0.611733244191877, | |
| "grad_norm": 0.023683857172727585, | |
| "learning_rate": 0.00017611940298507464, | |
| "loss": 0.7583, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.6142403476516798, | |
| "grad_norm": 0.023911328986287117, | |
| "learning_rate": 0.00017601878249203423, | |
| "loss": 0.7482, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 0.6167474511114825, | |
| "grad_norm": 0.023844299837946892, | |
| "learning_rate": 0.00017591816199899381, | |
| "loss": 0.7406, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.6192545545712853, | |
| "grad_norm": 0.023253358900547028, | |
| "learning_rate": 0.0001758175415059534, | |
| "loss": 0.7476, | |
| "step": 3705 | |
| }, | |
| { | |
| "epoch": 0.6217616580310881, | |
| "grad_norm": 0.022935032844543457, | |
| "learning_rate": 0.00017571692101291296, | |
| "loss": 0.7563, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.6242687614908908, | |
| "grad_norm": 0.02410741336643696, | |
| "learning_rate": 0.00017561630051987254, | |
| "loss": 0.7553, | |
| "step": 3735 | |
| }, | |
| { | |
| "epoch": 0.6267758649506936, | |
| "grad_norm": 0.023733945563435555, | |
| "learning_rate": 0.00017551568002683213, | |
| "loss": 0.7395, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.6292829684104964, | |
| "grad_norm": 0.024090424180030823, | |
| "learning_rate": 0.00017541505953379171, | |
| "loss": 0.7615, | |
| "step": 3765 | |
| }, | |
| { | |
| "epoch": 0.6317900718702992, | |
| "grad_norm": 0.023794986307621002, | |
| "learning_rate": 0.0001753144390407513, | |
| "loss": 0.7527, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.634297175330102, | |
| "grad_norm": 0.02363026887178421, | |
| "learning_rate": 0.00017521381854771089, | |
| "loss": 0.748, | |
| "step": 3795 | |
| }, | |
| { | |
| "epoch": 0.6368042787899048, | |
| "grad_norm": 0.024967040866613388, | |
| "learning_rate": 0.00017511319805467047, | |
| "loss": 0.7501, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.6393113822497075, | |
| "grad_norm": 0.02417265996336937, | |
| "learning_rate": 0.00017501257756163006, | |
| "loss": 0.7453, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 0.6418184857095103, | |
| "grad_norm": 0.024464495480060577, | |
| "learning_rate": 0.00017491195706858964, | |
| "loss": 0.758, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.644325589169313, | |
| "grad_norm": 0.023871179670095444, | |
| "learning_rate": 0.00017481133657554923, | |
| "loss": 0.7616, | |
| "step": 3855 | |
| }, | |
| { | |
| "epoch": 0.6468326926291158, | |
| "grad_norm": 0.023780934512615204, | |
| "learning_rate": 0.0001747107160825088, | |
| "loss": 0.7453, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.6493397960889186, | |
| "grad_norm": 0.02408822439610958, | |
| "learning_rate": 0.0001746100955894684, | |
| "loss": 0.7471, | |
| "step": 3885 | |
| }, | |
| { | |
| "epoch": 0.6518468995487213, | |
| "grad_norm": 0.024668745696544647, | |
| "learning_rate": 0.00017450947509642798, | |
| "loss": 0.7333, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.6543540030085242, | |
| "grad_norm": 0.023561371490359306, | |
| "learning_rate": 0.00017440885460338757, | |
| "loss": 0.7454, | |
| "step": 3915 | |
| }, | |
| { | |
| "epoch": 0.656861106468327, | |
| "grad_norm": 0.02355646714568138, | |
| "learning_rate": 0.00017430823411034716, | |
| "loss": 0.7505, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.6593682099281297, | |
| "grad_norm": 0.02338649332523346, | |
| "learning_rate": 0.00017420761361730674, | |
| "loss": 0.7615, | |
| "step": 3945 | |
| }, | |
| { | |
| "epoch": 0.6618753133879325, | |
| "grad_norm": 0.024536214768886566, | |
| "learning_rate": 0.00017410699312426633, | |
| "loss": 0.7497, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.6643824168477352, | |
| "grad_norm": 0.023618606850504875, | |
| "learning_rate": 0.0001740063726312259, | |
| "loss": 0.741, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 0.666889520307538, | |
| "grad_norm": 0.023363051936030388, | |
| "learning_rate": 0.0001739057521381855, | |
| "loss": 0.7498, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.6693966237673408, | |
| "grad_norm": 0.023151425644755363, | |
| "learning_rate": 0.00017380513164514508, | |
| "loss": 0.7436, | |
| "step": 4005 | |
| }, | |
| { | |
| "epoch": 0.6719037272271435, | |
| "grad_norm": 0.024613911285996437, | |
| "learning_rate": 0.00017370451115210464, | |
| "loss": 0.7484, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.6744108306869463, | |
| "grad_norm": 0.023703262209892273, | |
| "learning_rate": 0.00017360389065906423, | |
| "loss": 0.7401, | |
| "step": 4035 | |
| }, | |
| { | |
| "epoch": 0.6769179341467492, | |
| "grad_norm": 0.02323344349861145, | |
| "learning_rate": 0.0001735032701660238, | |
| "loss": 0.7372, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.6794250376065519, | |
| "grad_norm": 0.023779282346367836, | |
| "learning_rate": 0.0001734026496729834, | |
| "loss": 0.7474, | |
| "step": 4065 | |
| }, | |
| { | |
| "epoch": 0.6819321410663547, | |
| "grad_norm": 0.024744119495153427, | |
| "learning_rate": 0.00017330202917994298, | |
| "loss": 0.7337, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.6844392445261575, | |
| "grad_norm": 0.02366352453827858, | |
| "learning_rate": 0.00017320140868690257, | |
| "loss": 0.7755, | |
| "step": 4095 | |
| }, | |
| { | |
| "epoch": 0.6869463479859602, | |
| "grad_norm": 0.02404959499835968, | |
| "learning_rate": 0.00017310078819386215, | |
| "loss": 0.7412, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.689453451445763, | |
| "grad_norm": 0.024871889501810074, | |
| "learning_rate": 0.00017300016770082174, | |
| "loss": 0.7521, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 0.6919605549055657, | |
| "grad_norm": 0.02386365458369255, | |
| "learning_rate": 0.00017289954720778132, | |
| "loss": 0.7431, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.6944676583653685, | |
| "grad_norm": 0.025385569781064987, | |
| "learning_rate": 0.0001727989267147409, | |
| "loss": 0.73, | |
| "step": 4155 | |
| }, | |
| { | |
| "epoch": 0.6969747618251714, | |
| "grad_norm": 0.024604368954896927, | |
| "learning_rate": 0.0001726983062217005, | |
| "loss": 0.7474, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.6994818652849741, | |
| "grad_norm": 0.025954630225896835, | |
| "learning_rate": 0.00017259768572866008, | |
| "loss": 0.7473, | |
| "step": 4185 | |
| }, | |
| { | |
| "epoch": 0.7019889687447769, | |
| "grad_norm": 0.02412698231637478, | |
| "learning_rate": 0.00017249706523561967, | |
| "loss": 0.7498, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.7044960722045797, | |
| "grad_norm": 0.02433890663087368, | |
| "learning_rate": 0.00017239644474257925, | |
| "loss": 0.7474, | |
| "step": 4215 | |
| }, | |
| { | |
| "epoch": 0.7070031756643824, | |
| "grad_norm": 0.02414149045944214, | |
| "learning_rate": 0.00017229582424953884, | |
| "loss": 0.7416, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.7095102791241852, | |
| "grad_norm": 0.03919633850455284, | |
| "learning_rate": 0.00017219520375649842, | |
| "loss": 0.7502, | |
| "step": 4245 | |
| }, | |
| { | |
| "epoch": 0.712017382583988, | |
| "grad_norm": 0.02408537268638611, | |
| "learning_rate": 0.000172094583263458, | |
| "loss": 0.7456, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.7145244860437907, | |
| "grad_norm": 0.02505289390683174, | |
| "learning_rate": 0.0001719939627704176, | |
| "loss": 0.7412, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 0.7170315895035935, | |
| "grad_norm": 0.02388434298336506, | |
| "learning_rate": 0.00017189334227737718, | |
| "loss": 0.739, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.7195386929633963, | |
| "grad_norm": 0.02636132948100567, | |
| "learning_rate": 0.00017179272178433674, | |
| "loss": 0.7405, | |
| "step": 4305 | |
| }, | |
| { | |
| "epoch": 0.7220457964231991, | |
| "grad_norm": 0.02557826228439808, | |
| "learning_rate": 0.00017169210129129635, | |
| "loss": 0.7424, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.7245528998830019, | |
| "grad_norm": 0.02385845221579075, | |
| "learning_rate": 0.0001715914807982559, | |
| "loss": 0.7388, | |
| "step": 4335 | |
| }, | |
| { | |
| "epoch": 0.7270600033428046, | |
| "grad_norm": 0.02581110969185829, | |
| "learning_rate": 0.0001714908603052155, | |
| "loss": 0.7414, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.7295671068026074, | |
| "grad_norm": 0.025572916492819786, | |
| "learning_rate": 0.00017139023981217508, | |
| "loss": 0.7526, | |
| "step": 4365 | |
| }, | |
| { | |
| "epoch": 0.7320742102624102, | |
| "grad_norm": 0.024279674515128136, | |
| "learning_rate": 0.00017128961931913466, | |
| "loss": 0.738, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.7345813137222129, | |
| "grad_norm": 0.02414841763675213, | |
| "learning_rate": 0.00017118899882609425, | |
| "loss": 0.7387, | |
| "step": 4395 | |
| }, | |
| { | |
| "epoch": 0.7370884171820157, | |
| "grad_norm": 0.024131467565894127, | |
| "learning_rate": 0.00017108837833305384, | |
| "loss": 0.743, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.7395955206418184, | |
| "grad_norm": 0.024498678743839264, | |
| "learning_rate": 0.00017098775784001342, | |
| "loss": 0.7531, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 0.7421026241016213, | |
| "grad_norm": 0.024572541937232018, | |
| "learning_rate": 0.000170887137346973, | |
| "loss": 0.7489, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.7446097275614241, | |
| "grad_norm": 0.02463640458881855, | |
| "learning_rate": 0.0001707865168539326, | |
| "loss": 0.7379, | |
| "step": 4455 | |
| }, | |
| { | |
| "epoch": 0.7471168310212268, | |
| "grad_norm": 0.024474984034895897, | |
| "learning_rate": 0.00017068589636089218, | |
| "loss": 0.7532, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.7496239344810296, | |
| "grad_norm": 0.023911593481898308, | |
| "learning_rate": 0.00017058527586785176, | |
| "loss": 0.7346, | |
| "step": 4485 | |
| }, | |
| { | |
| "epoch": 0.7521310379408324, | |
| "grad_norm": 0.024990247562527657, | |
| "learning_rate": 0.00017048465537481135, | |
| "loss": 0.727, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.7546381414006351, | |
| "grad_norm": 0.024192336946725845, | |
| "learning_rate": 0.00017038403488177093, | |
| "loss": 0.7462, | |
| "step": 4515 | |
| }, | |
| { | |
| "epoch": 0.7571452448604379, | |
| "grad_norm": 0.02413538470864296, | |
| "learning_rate": 0.00017028341438873052, | |
| "loss": 0.7364, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.7596523483202406, | |
| "grad_norm": 0.02461206167936325, | |
| "learning_rate": 0.0001701827938956901, | |
| "loss": 0.7321, | |
| "step": 4545 | |
| }, | |
| { | |
| "epoch": 0.7621594517800434, | |
| "grad_norm": 0.024669578298926353, | |
| "learning_rate": 0.0001700821734026497, | |
| "loss": 0.7503, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.7646665552398463, | |
| "grad_norm": 0.02436312846839428, | |
| "learning_rate": 0.00016998155290960928, | |
| "loss": 0.7346, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 0.767173658699649, | |
| "grad_norm": 0.025169432163238525, | |
| "learning_rate": 0.00016988093241656883, | |
| "loss": 0.7219, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.7696807621594518, | |
| "grad_norm": 0.025311505421996117, | |
| "learning_rate": 0.00016978031192352845, | |
| "loss": 0.742, | |
| "step": 4605 | |
| }, | |
| { | |
| "epoch": 0.7721878656192546, | |
| "grad_norm": 0.024896448478102684, | |
| "learning_rate": 0.000169679691430488, | |
| "loss": 0.746, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.7746949690790573, | |
| "grad_norm": 0.025063227862119675, | |
| "learning_rate": 0.0001695790709374476, | |
| "loss": 0.7399, | |
| "step": 4635 | |
| }, | |
| { | |
| "epoch": 0.7772020725388601, | |
| "grad_norm": 0.024744588881731033, | |
| "learning_rate": 0.00016947845044440718, | |
| "loss": 0.742, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.7797091759986629, | |
| "grad_norm": 0.025170577690005302, | |
| "learning_rate": 0.00016937782995136676, | |
| "loss": 0.7288, | |
| "step": 4665 | |
| }, | |
| { | |
| "epoch": 0.7822162794584656, | |
| "grad_norm": 0.024757632985711098, | |
| "learning_rate": 0.00016927720945832635, | |
| "loss": 0.7407, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.7847233829182684, | |
| "grad_norm": 0.025282783433794975, | |
| "learning_rate": 0.00016917658896528593, | |
| "loss": 0.7294, | |
| "step": 4695 | |
| }, | |
| { | |
| "epoch": 0.7872304863780712, | |
| "grad_norm": 0.025306588038802147, | |
| "learning_rate": 0.00016907596847224552, | |
| "loss": 0.7414, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.789737589837874, | |
| "grad_norm": 0.024476177990436554, | |
| "learning_rate": 0.0001689753479792051, | |
| "loss": 0.7377, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 0.7922446932976768, | |
| "grad_norm": 0.025107109919190407, | |
| "learning_rate": 0.0001688747274861647, | |
| "loss": 0.7378, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.7947517967574795, | |
| "grad_norm": 0.024397587403655052, | |
| "learning_rate": 0.00016877410699312427, | |
| "loss": 0.7308, | |
| "step": 4755 | |
| }, | |
| { | |
| "epoch": 0.7972589002172823, | |
| "grad_norm": 0.02418595738708973, | |
| "learning_rate": 0.00016867348650008386, | |
| "loss": 0.7437, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.7997660036770851, | |
| "grad_norm": 0.025148652493953705, | |
| "learning_rate": 0.00016857286600704345, | |
| "loss": 0.7365, | |
| "step": 4785 | |
| }, | |
| { | |
| "epoch": 0.8022731071368878, | |
| "grad_norm": 0.025669820606708527, | |
| "learning_rate": 0.00016847224551400303, | |
| "loss": 0.7392, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.8047802105966906, | |
| "grad_norm": 0.02602335438132286, | |
| "learning_rate": 0.00016837162502096262, | |
| "loss": 0.7377, | |
| "step": 4815 | |
| }, | |
| { | |
| "epoch": 0.8072873140564933, | |
| "grad_norm": 0.02492678537964821, | |
| "learning_rate": 0.0001682710045279222, | |
| "loss": 0.7435, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.8097944175162962, | |
| "grad_norm": 0.02486814185976982, | |
| "learning_rate": 0.0001681703840348818, | |
| "loss": 0.7392, | |
| "step": 4845 | |
| }, | |
| { | |
| "epoch": 0.812301520976099, | |
| "grad_norm": 0.026057204231619835, | |
| "learning_rate": 0.00016806976354184137, | |
| "loss": 0.7371, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.8148086244359017, | |
| "grad_norm": 0.025231441482901573, | |
| "learning_rate": 0.00016796914304880093, | |
| "loss": 0.7429, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 0.8173157278957045, | |
| "grad_norm": 0.025132806971669197, | |
| "learning_rate": 0.00016786852255576054, | |
| "loss": 0.7398, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.8198228313555073, | |
| "grad_norm": 0.02506762556731701, | |
| "learning_rate": 0.0001677679020627201, | |
| "loss": 0.7463, | |
| "step": 4905 | |
| }, | |
| { | |
| "epoch": 0.82232993481531, | |
| "grad_norm": 0.02398357354104519, | |
| "learning_rate": 0.00016766728156967971, | |
| "loss": 0.7309, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.8248370382751128, | |
| "grad_norm": 0.025060344487428665, | |
| "learning_rate": 0.00016756666107663927, | |
| "loss": 0.7287, | |
| "step": 4935 | |
| }, | |
| { | |
| "epoch": 0.8273441417349156, | |
| "grad_norm": 0.024265987798571587, | |
| "learning_rate": 0.00016746604058359886, | |
| "loss": 0.7439, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.8298512451947184, | |
| "grad_norm": 0.025207631289958954, | |
| "learning_rate": 0.00016736542009055844, | |
| "loss": 0.7332, | |
| "step": 4965 | |
| }, | |
| { | |
| "epoch": 0.8323583486545212, | |
| "grad_norm": 0.025070613250136375, | |
| "learning_rate": 0.00016726479959751803, | |
| "loss": 0.7356, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.834865452114324, | |
| "grad_norm": 0.025521699339151382, | |
| "learning_rate": 0.00016716417910447761, | |
| "loss": 0.7345, | |
| "step": 4995 | |
| }, | |
| { | |
| "epoch": 0.8373725555741267, | |
| "grad_norm": 0.025154948234558105, | |
| "learning_rate": 0.0001670635586114372, | |
| "loss": 0.7362, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.8398796590339295, | |
| "grad_norm": 0.025558389723300934, | |
| "learning_rate": 0.00016696293811839679, | |
| "loss": 0.7294, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 0.8423867624937322, | |
| "grad_norm": 0.026137180626392365, | |
| "learning_rate": 0.00016686231762535637, | |
| "loss": 0.7331, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.844893865953535, | |
| "grad_norm": 0.024644847959280014, | |
| "learning_rate": 0.00016676169713231596, | |
| "loss": 0.7382, | |
| "step": 5055 | |
| }, | |
| { | |
| "epoch": 0.8474009694133378, | |
| "grad_norm": 0.024775272235274315, | |
| "learning_rate": 0.00016666107663927554, | |
| "loss": 0.7242, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.8499080728731405, | |
| "grad_norm": 0.025577571243047714, | |
| "learning_rate": 0.00016656045614623513, | |
| "loss": 0.7192, | |
| "step": 5085 | |
| }, | |
| { | |
| "epoch": 0.8524151763329434, | |
| "grad_norm": 0.024751491844654083, | |
| "learning_rate": 0.0001664598356531947, | |
| "loss": 0.7219, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.8549222797927462, | |
| "grad_norm": 0.025324271991848946, | |
| "learning_rate": 0.0001663592151601543, | |
| "loss": 0.7412, | |
| "step": 5115 | |
| }, | |
| { | |
| "epoch": 0.8574293832525489, | |
| "grad_norm": 0.02564609982073307, | |
| "learning_rate": 0.00016625859466711388, | |
| "loss": 0.7366, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.8599364867123517, | |
| "grad_norm": 0.02468453161418438, | |
| "learning_rate": 0.00016615797417407347, | |
| "loss": 0.7387, | |
| "step": 5145 | |
| }, | |
| { | |
| "epoch": 0.8624435901721544, | |
| "grad_norm": 0.025196226313710213, | |
| "learning_rate": 0.00016605735368103303, | |
| "loss": 0.7299, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.8649506936319572, | |
| "grad_norm": 0.02621576189994812, | |
| "learning_rate": 0.00016595673318799264, | |
| "loss": 0.7495, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 0.86745779709176, | |
| "grad_norm": 0.025252273306250572, | |
| "learning_rate": 0.0001658561126949522, | |
| "loss": 0.7322, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.8699649005515627, | |
| "grad_norm": 0.025535358116030693, | |
| "learning_rate": 0.0001657554922019118, | |
| "loss": 0.7281, | |
| "step": 5205 | |
| }, | |
| { | |
| "epoch": 0.8724720040113655, | |
| "grad_norm": 0.024804269894957542, | |
| "learning_rate": 0.00016565487170887137, | |
| "loss": 0.7505, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.8749791074711684, | |
| "grad_norm": 0.02469950169324875, | |
| "learning_rate": 0.00016555425121583098, | |
| "loss": 0.7265, | |
| "step": 5235 | |
| }, | |
| { | |
| "epoch": 0.8774862109309711, | |
| "grad_norm": 0.02518155239522457, | |
| "learning_rate": 0.00016545363072279054, | |
| "loss": 0.7288, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.8799933143907739, | |
| "grad_norm": 0.024804813787341118, | |
| "learning_rate": 0.00016535301022975013, | |
| "loss": 0.7382, | |
| "step": 5265 | |
| }, | |
| { | |
| "epoch": 0.8825004178505766, | |
| "grad_norm": 0.024241533130407333, | |
| "learning_rate": 0.0001652523897367097, | |
| "loss": 0.7408, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.8850075213103794, | |
| "grad_norm": 0.025099163874983788, | |
| "learning_rate": 0.0001651517692436693, | |
| "loss": 0.7324, | |
| "step": 5295 | |
| }, | |
| { | |
| "epoch": 0.8875146247701822, | |
| "grad_norm": 0.025935839861631393, | |
| "learning_rate": 0.00016505114875062888, | |
| "loss": 0.7353, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.8900217282299849, | |
| "grad_norm": 0.024958360940217972, | |
| "learning_rate": 0.00016495052825758847, | |
| "loss": 0.724, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 0.8925288316897877, | |
| "grad_norm": 0.024382906034588814, | |
| "learning_rate": 0.00016484990776454805, | |
| "loss": 0.7372, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.8950359351495905, | |
| "grad_norm": 0.02473212592303753, | |
| "learning_rate": 0.00016474928727150764, | |
| "loss": 0.7531, | |
| "step": 5355 | |
| }, | |
| { | |
| "epoch": 0.8975430386093933, | |
| "grad_norm": 0.024407681077718735, | |
| "learning_rate": 0.00016464866677846722, | |
| "loss": 0.7489, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.9000501420691961, | |
| "grad_norm": 0.02625984139740467, | |
| "learning_rate": 0.0001645480462854268, | |
| "loss": 0.7399, | |
| "step": 5385 | |
| }, | |
| { | |
| "epoch": 0.9025572455289989, | |
| "grad_norm": 0.026630889624357224, | |
| "learning_rate": 0.0001644474257923864, | |
| "loss": 0.7322, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.9050643489888016, | |
| "grad_norm": 0.025531059131026268, | |
| "learning_rate": 0.00016434680529934598, | |
| "loss": 0.7499, | |
| "step": 5415 | |
| }, | |
| { | |
| "epoch": 0.9075714524486044, | |
| "grad_norm": 0.025482535362243652, | |
| "learning_rate": 0.00016424618480630557, | |
| "loss": 0.7287, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.9100785559084071, | |
| "grad_norm": 0.026173191145062447, | |
| "learning_rate": 0.00016414556431326512, | |
| "loss": 0.7363, | |
| "step": 5445 | |
| }, | |
| { | |
| "epoch": 0.9125856593682099, | |
| "grad_norm": 0.024878835305571556, | |
| "learning_rate": 0.00016404494382022474, | |
| "loss": 0.7592, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.9150927628280127, | |
| "grad_norm": 0.025847023352980614, | |
| "learning_rate": 0.0001639443233271843, | |
| "loss": 0.7138, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 0.9175998662878154, | |
| "grad_norm": 0.026683717966079712, | |
| "learning_rate": 0.0001638437028341439, | |
| "loss": 0.7266, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.9201069697476183, | |
| "grad_norm": 0.02581162378191948, | |
| "learning_rate": 0.00016374308234110347, | |
| "loss": 0.7175, | |
| "step": 5505 | |
| }, | |
| { | |
| "epoch": 0.9226140732074211, | |
| "grad_norm": 0.02513813227415085, | |
| "learning_rate": 0.00016364246184806308, | |
| "loss": 0.74, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.9251211766672238, | |
| "grad_norm": 0.024819128215312958, | |
| "learning_rate": 0.00016354184135502264, | |
| "loss": 0.742, | |
| "step": 5535 | |
| }, | |
| { | |
| "epoch": 0.9276282801270266, | |
| "grad_norm": 0.024832414463162422, | |
| "learning_rate": 0.00016344122086198222, | |
| "loss": 0.7465, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.9301353835868293, | |
| "grad_norm": 0.02581876330077648, | |
| "learning_rate": 0.0001633406003689418, | |
| "loss": 0.7383, | |
| "step": 5565 | |
| }, | |
| { | |
| "epoch": 0.9326424870466321, | |
| "grad_norm": 0.024939673021435738, | |
| "learning_rate": 0.0001632399798759014, | |
| "loss": 0.7479, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.9351495905064349, | |
| "grad_norm": 0.025533072650432587, | |
| "learning_rate": 0.00016313935938286098, | |
| "loss": 0.7259, | |
| "step": 5595 | |
| }, | |
| { | |
| "epoch": 0.9376566939662376, | |
| "grad_norm": 0.02547396905720234, | |
| "learning_rate": 0.00016303873888982056, | |
| "loss": 0.7258, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 0.9401637974260404, | |
| "grad_norm": 0.025361550971865654, | |
| "learning_rate": 0.00016293811839678015, | |
| "loss": 0.7302, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 0.9426709008858433, | |
| "grad_norm": 0.02566991187632084, | |
| "learning_rate": 0.00016283749790373974, | |
| "loss": 0.7319, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.945178004345646, | |
| "grad_norm": 0.026383578777313232, | |
| "learning_rate": 0.00016273687741069932, | |
| "loss": 0.7523, | |
| "step": 5655 | |
| }, | |
| { | |
| "epoch": 0.9476851078054488, | |
| "grad_norm": 0.025949161499738693, | |
| "learning_rate": 0.0001626362569176589, | |
| "loss": 0.7116, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 0.9501922112652516, | |
| "grad_norm": 0.02509259060025215, | |
| "learning_rate": 0.0001625356364246185, | |
| "loss": 0.7259, | |
| "step": 5685 | |
| }, | |
| { | |
| "epoch": 0.9526993147250543, | |
| "grad_norm": 0.025692781433463097, | |
| "learning_rate": 0.00016243501593157808, | |
| "loss": 0.7263, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.9552064181848571, | |
| "grad_norm": 0.025238677859306335, | |
| "learning_rate": 0.00016233439543853766, | |
| "loss": 0.7315, | |
| "step": 5715 | |
| }, | |
| { | |
| "epoch": 0.9577135216446598, | |
| "grad_norm": 0.025801653042435646, | |
| "learning_rate": 0.00016223377494549722, | |
| "loss": 0.7329, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 0.9602206251044626, | |
| "grad_norm": 0.025331363081932068, | |
| "learning_rate": 0.00016213315445245683, | |
| "loss": 0.7245, | |
| "step": 5745 | |
| }, | |
| { | |
| "epoch": 0.9627277285642655, | |
| "grad_norm": 0.025975272059440613, | |
| "learning_rate": 0.0001620325339594164, | |
| "loss": 0.7119, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.9652348320240682, | |
| "grad_norm": 0.025318987667560577, | |
| "learning_rate": 0.000161931913466376, | |
| "loss": 0.7481, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 0.967741935483871, | |
| "grad_norm": 0.02570466138422489, | |
| "learning_rate": 0.00016183129297333556, | |
| "loss": 0.7506, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 0.9702490389436738, | |
| "grad_norm": 0.024957410991191864, | |
| "learning_rate": 0.00016173067248029518, | |
| "loss": 0.7277, | |
| "step": 5805 | |
| }, | |
| { | |
| "epoch": 0.9727561424034765, | |
| "grad_norm": 0.026068007573485374, | |
| "learning_rate": 0.00016163005198725473, | |
| "loss": 0.7305, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.9752632458632793, | |
| "grad_norm": 0.026244519278407097, | |
| "learning_rate": 0.00016152943149421435, | |
| "loss": 0.7184, | |
| "step": 5835 | |
| }, | |
| { | |
| "epoch": 0.977770349323082, | |
| "grad_norm": 0.025324849411845207, | |
| "learning_rate": 0.0001614288110011739, | |
| "loss": 0.7264, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.9802774527828848, | |
| "grad_norm": 0.025065554305911064, | |
| "learning_rate": 0.0001613281905081335, | |
| "loss": 0.7294, | |
| "step": 5865 | |
| }, | |
| { | |
| "epoch": 0.9827845562426876, | |
| "grad_norm": 0.025444064289331436, | |
| "learning_rate": 0.00016122757001509308, | |
| "loss": 0.728, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.9852916597024904, | |
| "grad_norm": 0.026068173348903656, | |
| "learning_rate": 0.00016112694952205266, | |
| "loss": 0.741, | |
| "step": 5895 | |
| }, | |
| { | |
| "epoch": 0.9877987631622932, | |
| "grad_norm": 0.024954237043857574, | |
| "learning_rate": 0.00016102632902901225, | |
| "loss": 0.7375, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 0.990305866622096, | |
| "grad_norm": 0.0247243270277977, | |
| "learning_rate": 0.00016092570853597183, | |
| "loss": 0.7375, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 0.9928129700818987, | |
| "grad_norm": 0.025755500420928, | |
| "learning_rate": 0.00016082508804293142, | |
| "loss": 0.7368, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.9953200735417015, | |
| "grad_norm": 0.026517482474446297, | |
| "learning_rate": 0.000160724467549891, | |
| "loss": 0.7203, | |
| "step": 5955 | |
| }, | |
| { | |
| "epoch": 0.9978271770015043, | |
| "grad_norm": 0.025983400642871857, | |
| "learning_rate": 0.0001606238470568506, | |
| "loss": 0.7142, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 1.0003342804613071, | |
| "grad_norm": 0.024920133873820305, | |
| "learning_rate": 0.00016052322656381017, | |
| "loss": 0.7166, | |
| "step": 5985 | |
| }, | |
| { | |
| "epoch": 1.0028413839211099, | |
| "grad_norm": 0.02642948552966118, | |
| "learning_rate": 0.00016042260607076976, | |
| "loss": 0.7074, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.0053484873809126, | |
| "grad_norm": 0.026105554774403572, | |
| "learning_rate": 0.00016032198557772932, | |
| "loss": 0.7139, | |
| "step": 6015 | |
| }, | |
| { | |
| "epoch": 1.0078555908407154, | |
| "grad_norm": 0.0251301322132349, | |
| "learning_rate": 0.00016022136508468893, | |
| "loss": 0.7071, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 1.0103626943005182, | |
| "grad_norm": 0.025702379643917084, | |
| "learning_rate": 0.0001601207445916485, | |
| "loss": 0.6975, | |
| "step": 6045 | |
| }, | |
| { | |
| "epoch": 1.012869797760321, | |
| "grad_norm": 0.02600419521331787, | |
| "learning_rate": 0.0001600201240986081, | |
| "loss": 0.718, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 1.0153769012201237, | |
| "grad_norm": 0.026151692494750023, | |
| "learning_rate": 0.00015991950360556766, | |
| "loss": 0.7069, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 1.0178840046799265, | |
| "grad_norm": 0.025088109076023102, | |
| "learning_rate": 0.00015981888311252727, | |
| "loss": 0.708, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 1.0203911081397292, | |
| "grad_norm": 0.026014501228928566, | |
| "learning_rate": 0.00015971826261948683, | |
| "loss": 0.7133, | |
| "step": 6105 | |
| }, | |
| { | |
| "epoch": 1.022898211599532, | |
| "grad_norm": 0.02501731365919113, | |
| "learning_rate": 0.00015961764212644644, | |
| "loss": 0.7191, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 1.0254053150593347, | |
| "grad_norm": 0.025291357189416885, | |
| "learning_rate": 0.000159517021633406, | |
| "loss": 0.7164, | |
| "step": 6135 | |
| }, | |
| { | |
| "epoch": 1.0279124185191375, | |
| "grad_norm": 0.026282720267772675, | |
| "learning_rate": 0.00015941640114036561, | |
| "loss": 0.72, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.0304195219789403, | |
| "grad_norm": 0.026496944949030876, | |
| "learning_rate": 0.00015931578064732517, | |
| "loss": 0.7212, | |
| "step": 6165 | |
| }, | |
| { | |
| "epoch": 1.032926625438743, | |
| "grad_norm": 0.026129065081477165, | |
| "learning_rate": 0.00015921516015428476, | |
| "loss": 0.7121, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 1.0354337288985458, | |
| "grad_norm": 0.027691906318068504, | |
| "learning_rate": 0.00015911453966124434, | |
| "loss": 0.7238, | |
| "step": 6195 | |
| }, | |
| { | |
| "epoch": 1.0379408323583486, | |
| "grad_norm": 0.025952916592359543, | |
| "learning_rate": 0.00015901391916820393, | |
| "loss": 0.7141, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 1.0404479358181513, | |
| "grad_norm": 0.0261197779327631, | |
| "learning_rate": 0.00015891329867516351, | |
| "loss": 0.7116, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 1.0429550392779543, | |
| "grad_norm": 0.02602444589138031, | |
| "learning_rate": 0.0001588126781821231, | |
| "loss": 0.7212, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 1.045462142737757, | |
| "grad_norm": 0.027341393753886223, | |
| "learning_rate": 0.00015871205768908269, | |
| "loss": 0.7065, | |
| "step": 6255 | |
| }, | |
| { | |
| "epoch": 1.0479692461975598, | |
| "grad_norm": 0.026516225188970566, | |
| "learning_rate": 0.00015861143719604227, | |
| "loss": 0.7137, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 1.0504763496573626, | |
| "grad_norm": 0.025233183056116104, | |
| "learning_rate": 0.00015851081670300186, | |
| "loss": 0.7286, | |
| "step": 6285 | |
| }, | |
| { | |
| "epoch": 1.0529834531171653, | |
| "grad_norm": 0.025705158710479736, | |
| "learning_rate": 0.00015841019620996141, | |
| "loss": 0.7252, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.055490556576968, | |
| "grad_norm": 0.025452638044953346, | |
| "learning_rate": 0.00015830957571692103, | |
| "loss": 0.7207, | |
| "step": 6315 | |
| }, | |
| { | |
| "epoch": 1.0579976600367709, | |
| "grad_norm": 0.027089523151516914, | |
| "learning_rate": 0.00015820895522388059, | |
| "loss": 0.7123, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 1.0605047634965736, | |
| "grad_norm": 0.02557321824133396, | |
| "learning_rate": 0.0001581083347308402, | |
| "loss": 0.7153, | |
| "step": 6345 | |
| }, | |
| { | |
| "epoch": 1.0630118669563764, | |
| "grad_norm": 0.026297248899936676, | |
| "learning_rate": 0.00015800771423779976, | |
| "loss": 0.7183, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 1.0655189704161792, | |
| "grad_norm": 0.026958812028169632, | |
| "learning_rate": 0.00015790709374475937, | |
| "loss": 0.7117, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 1.068026073875982, | |
| "grad_norm": 0.026555512100458145, | |
| "learning_rate": 0.00015780647325171893, | |
| "loss": 0.7025, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 1.0705331773357847, | |
| "grad_norm": 0.026713771745562553, | |
| "learning_rate": 0.00015770585275867854, | |
| "loss": 0.7158, | |
| "step": 6405 | |
| }, | |
| { | |
| "epoch": 1.0730402807955874, | |
| "grad_norm": 0.02662680670619011, | |
| "learning_rate": 0.0001576052322656381, | |
| "loss": 0.7104, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 1.0755473842553902, | |
| "grad_norm": 0.02612622268497944, | |
| "learning_rate": 0.0001575046117725977, | |
| "loss": 0.7186, | |
| "step": 6435 | |
| }, | |
| { | |
| "epoch": 1.078054487715193, | |
| "grad_norm": 0.02652982994914055, | |
| "learning_rate": 0.00015740399127955727, | |
| "loss": 0.7097, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.0805615911749957, | |
| "grad_norm": 0.026232892647385597, | |
| "learning_rate": 0.00015730337078651685, | |
| "loss": 0.7203, | |
| "step": 6465 | |
| }, | |
| { | |
| "epoch": 1.0830686946347985, | |
| "grad_norm": 0.02632397972047329, | |
| "learning_rate": 0.00015720275029347644, | |
| "loss": 0.7209, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 1.0855757980946015, | |
| "grad_norm": 0.02648136578500271, | |
| "learning_rate": 0.00015710212980043603, | |
| "loss": 0.7182, | |
| "step": 6495 | |
| }, | |
| { | |
| "epoch": 1.0880829015544042, | |
| "grad_norm": 0.025636956095695496, | |
| "learning_rate": 0.0001570015093073956, | |
| "loss": 0.7077, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 1.090590005014207, | |
| "grad_norm": 0.026664093136787415, | |
| "learning_rate": 0.0001569008888143552, | |
| "loss": 0.7216, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 1.0930971084740098, | |
| "grad_norm": 0.02704274095594883, | |
| "learning_rate": 0.00015680026832131478, | |
| "loss": 0.714, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 1.0956042119338125, | |
| "grad_norm": 0.026222985237836838, | |
| "learning_rate": 0.00015669964782827437, | |
| "loss": 0.7134, | |
| "step": 6555 | |
| }, | |
| { | |
| "epoch": 1.0981113153936153, | |
| "grad_norm": 0.02940414845943451, | |
| "learning_rate": 0.00015659902733523395, | |
| "loss": 0.6986, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 1.100618418853418, | |
| "grad_norm": 0.025812886655330658, | |
| "learning_rate": 0.0001564984068421935, | |
| "loss": 0.7163, | |
| "step": 6585 | |
| }, | |
| { | |
| "epoch": 1.1031255223132208, | |
| "grad_norm": 0.026331394910812378, | |
| "learning_rate": 0.00015639778634915312, | |
| "loss": 0.7097, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.1056326257730236, | |
| "grad_norm": 0.027025267481803894, | |
| "learning_rate": 0.00015629716585611268, | |
| "loss": 0.6983, | |
| "step": 6615 | |
| }, | |
| { | |
| "epoch": 1.1081397292328263, | |
| "grad_norm": 0.02628287486732006, | |
| "learning_rate": 0.0001561965453630723, | |
| "loss": 0.7113, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 1.110646832692629, | |
| "grad_norm": 0.0271297600120306, | |
| "learning_rate": 0.00015609592487003185, | |
| "loss": 0.7163, | |
| "step": 6645 | |
| }, | |
| { | |
| "epoch": 1.1131539361524319, | |
| "grad_norm": 0.027640245854854584, | |
| "learning_rate": 0.00015599530437699147, | |
| "loss": 0.6974, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 1.1156610396122346, | |
| "grad_norm": 0.026571575552225113, | |
| "learning_rate": 0.00015589468388395102, | |
| "loss": 0.7032, | |
| "step": 6675 | |
| }, | |
| { | |
| "epoch": 1.1181681430720374, | |
| "grad_norm": 0.02639468014240265, | |
| "learning_rate": 0.00015579406339091064, | |
| "loss": 0.7139, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 1.1206752465318401, | |
| "grad_norm": 0.026831267401576042, | |
| "learning_rate": 0.0001556934428978702, | |
| "loss": 0.7209, | |
| "step": 6705 | |
| }, | |
| { | |
| "epoch": 1.123182349991643, | |
| "grad_norm": 0.028162870556116104, | |
| "learning_rate": 0.0001555928224048298, | |
| "loss": 0.7066, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 1.1256894534514457, | |
| "grad_norm": 0.02714131958782673, | |
| "learning_rate": 0.00015549220191178937, | |
| "loss": 0.721, | |
| "step": 6735 | |
| }, | |
| { | |
| "epoch": 1.1281965569112486, | |
| "grad_norm": 0.0281366016715765, | |
| "learning_rate": 0.00015539158141874898, | |
| "loss": 0.7024, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.1307036603710512, | |
| "grad_norm": 0.027032790705561638, | |
| "learning_rate": 0.00015529096092570854, | |
| "loss": 0.7283, | |
| "step": 6765 | |
| }, | |
| { | |
| "epoch": 1.1332107638308542, | |
| "grad_norm": 0.026658054441213608, | |
| "learning_rate": 0.00015519034043266812, | |
| "loss": 0.7111, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 1.135717867290657, | |
| "grad_norm": 0.026945605874061584, | |
| "learning_rate": 0.0001550897199396277, | |
| "loss": 0.7298, | |
| "step": 6795 | |
| }, | |
| { | |
| "epoch": 1.1382249707504597, | |
| "grad_norm": 0.02765739895403385, | |
| "learning_rate": 0.0001549890994465873, | |
| "loss": 0.7081, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 1.1407320742102625, | |
| "grad_norm": 0.02612920291721821, | |
| "learning_rate": 0.00015488847895354688, | |
| "loss": 0.709, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 1.1432391776700652, | |
| "grad_norm": 0.026704227551817894, | |
| "learning_rate": 0.00015478785846050646, | |
| "loss": 0.7088, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 1.145746281129868, | |
| "grad_norm": 0.027153639122843742, | |
| "learning_rate": 0.00015468723796746605, | |
| "loss": 0.7166, | |
| "step": 6855 | |
| }, | |
| { | |
| "epoch": 1.1482533845896707, | |
| "grad_norm": 0.02730732038617134, | |
| "learning_rate": 0.0001545866174744256, | |
| "loss": 0.7101, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 1.1507604880494735, | |
| "grad_norm": 0.027596892789006233, | |
| "learning_rate": 0.00015448599698138522, | |
| "loss": 0.7122, | |
| "step": 6885 | |
| }, | |
| { | |
| "epoch": 1.1532675915092763, | |
| "grad_norm": 0.02678474597632885, | |
| "learning_rate": 0.00015438537648834478, | |
| "loss": 0.7226, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.155774694969079, | |
| "grad_norm": 0.02596975676715374, | |
| "learning_rate": 0.0001542847559953044, | |
| "loss": 0.7119, | |
| "step": 6915 | |
| }, | |
| { | |
| "epoch": 1.1582817984288818, | |
| "grad_norm": 0.026990054175257683, | |
| "learning_rate": 0.00015418413550226395, | |
| "loss": 0.7186, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 1.1607889018886846, | |
| "grad_norm": 0.026957310736179352, | |
| "learning_rate": 0.00015408351500922356, | |
| "loss": 0.7201, | |
| "step": 6945 | |
| }, | |
| { | |
| "epoch": 1.1632960053484873, | |
| "grad_norm": 0.02676299959421158, | |
| "learning_rate": 0.00015398289451618312, | |
| "loss": 0.7116, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 1.16580310880829, | |
| "grad_norm": 0.026614701375365257, | |
| "learning_rate": 0.00015388227402314273, | |
| "loss": 0.7181, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 1.1683102122680928, | |
| "grad_norm": 0.02804492600262165, | |
| "learning_rate": 0.0001537816535301023, | |
| "loss": 0.7062, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 1.1708173157278958, | |
| "grad_norm": 0.027462385594844818, | |
| "learning_rate": 0.0001536810330370619, | |
| "loss": 0.7274, | |
| "step": 7005 | |
| }, | |
| { | |
| "epoch": 1.1733244191876984, | |
| "grad_norm": 0.026805778965353966, | |
| "learning_rate": 0.00015358041254402146, | |
| "loss": 0.7176, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 1.1758315226475013, | |
| "grad_norm": 0.027235226705670357, | |
| "learning_rate": 0.00015347979205098108, | |
| "loss": 0.6944, | |
| "step": 7035 | |
| }, | |
| { | |
| "epoch": 1.178338626107304, | |
| "grad_norm": 0.02651335299015045, | |
| "learning_rate": 0.00015337917155794063, | |
| "loss": 0.7084, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 1.1808457295671069, | |
| "grad_norm": 0.027431068941950798, | |
| "learning_rate": 0.00015327855106490025, | |
| "loss": 0.7261, | |
| "step": 7065 | |
| }, | |
| { | |
| "epoch": 1.1833528330269096, | |
| "grad_norm": 0.027069034054875374, | |
| "learning_rate": 0.0001531779305718598, | |
| "loss": 0.7269, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 1.1858599364867124, | |
| "grad_norm": 0.026999959722161293, | |
| "learning_rate": 0.0001530773100788194, | |
| "loss": 0.713, | |
| "step": 7095 | |
| }, | |
| { | |
| "epoch": 1.1883670399465152, | |
| "grad_norm": 0.027173152193427086, | |
| "learning_rate": 0.00015297668958577898, | |
| "loss": 0.7099, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 1.190874143406318, | |
| "grad_norm": 0.026728777214884758, | |
| "learning_rate": 0.00015287606909273856, | |
| "loss": 0.7006, | |
| "step": 7125 | |
| }, | |
| { | |
| "epoch": 1.1933812468661207, | |
| "grad_norm": 0.02722666971385479, | |
| "learning_rate": 0.00015277544859969815, | |
| "loss": 0.711, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 1.1958883503259234, | |
| "grad_norm": 0.027167314663529396, | |
| "learning_rate": 0.00015267482810665773, | |
| "loss": 0.708, | |
| "step": 7155 | |
| }, | |
| { | |
| "epoch": 1.1983954537857262, | |
| "grad_norm": 0.027100099250674248, | |
| "learning_rate": 0.00015257420761361732, | |
| "loss": 0.6944, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 1.200902557245529, | |
| "grad_norm": 0.026492077857255936, | |
| "learning_rate": 0.00015247358712057688, | |
| "loss": 0.7122, | |
| "step": 7185 | |
| }, | |
| { | |
| "epoch": 1.2034096607053317, | |
| "grad_norm": 0.027062034234404564, | |
| "learning_rate": 0.0001523729666275365, | |
| "loss": 0.7333, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.2059167641651345, | |
| "grad_norm": 0.026957035064697266, | |
| "learning_rate": 0.00015227234613449605, | |
| "loss": 0.706, | |
| "step": 7215 | |
| }, | |
| { | |
| "epoch": 1.2084238676249373, | |
| "grad_norm": 0.027580831199884415, | |
| "learning_rate": 0.00015217172564145566, | |
| "loss": 0.7113, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 1.21093097108474, | |
| "grad_norm": 0.02672952227294445, | |
| "learning_rate": 0.00015207110514841522, | |
| "loss": 0.6934, | |
| "step": 7245 | |
| }, | |
| { | |
| "epoch": 1.2134380745445428, | |
| "grad_norm": 0.026900822296738625, | |
| "learning_rate": 0.00015197048465537483, | |
| "loss": 0.7015, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 1.2159451780043455, | |
| "grad_norm": 0.028098303824663162, | |
| "learning_rate": 0.0001518698641623344, | |
| "loss": 0.7143, | |
| "step": 7275 | |
| }, | |
| { | |
| "epoch": 1.2184522814641485, | |
| "grad_norm": 0.026865461841225624, | |
| "learning_rate": 0.000151769243669294, | |
| "loss": 0.7253, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 1.2209593849239513, | |
| "grad_norm": 0.02781241200864315, | |
| "learning_rate": 0.00015166862317625356, | |
| "loss": 0.7099, | |
| "step": 7305 | |
| }, | |
| { | |
| "epoch": 1.223466488383754, | |
| "grad_norm": 0.027126578614115715, | |
| "learning_rate": 0.00015156800268321317, | |
| "loss": 0.6956, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 1.2259735918435568, | |
| "grad_norm": 0.02705315686762333, | |
| "learning_rate": 0.00015146738219017273, | |
| "loss": 0.7037, | |
| "step": 7335 | |
| }, | |
| { | |
| "epoch": 1.2284806953033596, | |
| "grad_norm": 0.027233878150582314, | |
| "learning_rate": 0.00015136676169713234, | |
| "loss": 0.7137, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 1.2309877987631623, | |
| "grad_norm": 0.028538642451167107, | |
| "learning_rate": 0.0001512661412040919, | |
| "loss": 0.7123, | |
| "step": 7365 | |
| }, | |
| { | |
| "epoch": 1.233494902222965, | |
| "grad_norm": 0.027490422129631042, | |
| "learning_rate": 0.0001511655207110515, | |
| "loss": 0.7208, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 1.2360020056827679, | |
| "grad_norm": 0.02747008576989174, | |
| "learning_rate": 0.00015106490021801107, | |
| "loss": 0.7196, | |
| "step": 7395 | |
| }, | |
| { | |
| "epoch": 1.2385091091425706, | |
| "grad_norm": 0.026851654052734375, | |
| "learning_rate": 0.00015096427972497066, | |
| "loss": 0.7149, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 1.2410162126023734, | |
| "grad_norm": 0.02743196301162243, | |
| "learning_rate": 0.00015086365923193024, | |
| "loss": 0.7175, | |
| "step": 7425 | |
| }, | |
| { | |
| "epoch": 1.2435233160621761, | |
| "grad_norm": 0.028329750522971153, | |
| "learning_rate": 0.00015076303873888983, | |
| "loss": 0.698, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 1.246030419521979, | |
| "grad_norm": 0.026834193617105484, | |
| "learning_rate": 0.00015066241824584941, | |
| "loss": 0.7063, | |
| "step": 7455 | |
| }, | |
| { | |
| "epoch": 1.2485375229817817, | |
| "grad_norm": 0.028689688071608543, | |
| "learning_rate": 0.000150561797752809, | |
| "loss": 0.704, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 1.2510446264415844, | |
| "grad_norm": 0.02716403640806675, | |
| "learning_rate": 0.00015046117725976859, | |
| "loss": 0.7095, | |
| "step": 7485 | |
| }, | |
| { | |
| "epoch": 1.2535517299013872, | |
| "grad_norm": 0.027952060103416443, | |
| "learning_rate": 0.00015036055676672814, | |
| "loss": 0.7341, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.2560588333611902, | |
| "grad_norm": 0.028136277571320534, | |
| "learning_rate": 0.00015025993627368776, | |
| "loss": 0.6967, | |
| "step": 7515 | |
| }, | |
| { | |
| "epoch": 1.2585659368209927, | |
| "grad_norm": 0.027513163164258003, | |
| "learning_rate": 0.00015015931578064731, | |
| "loss": 0.7053, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 1.2610730402807957, | |
| "grad_norm": 0.027584819123148918, | |
| "learning_rate": 0.00015005869528760693, | |
| "loss": 0.7162, | |
| "step": 7545 | |
| }, | |
| { | |
| "epoch": 1.2635801437405982, | |
| "grad_norm": 0.02737903967499733, | |
| "learning_rate": 0.00014995807479456649, | |
| "loss": 0.7211, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 1.2660872472004012, | |
| "grad_norm": 0.028384409844875336, | |
| "learning_rate": 0.0001498574543015261, | |
| "loss": 0.7059, | |
| "step": 7575 | |
| }, | |
| { | |
| "epoch": 1.268594350660204, | |
| "grad_norm": 0.027213079854846, | |
| "learning_rate": 0.00014975683380848566, | |
| "loss": 0.7064, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 1.2711014541200067, | |
| "grad_norm": 0.02736794948577881, | |
| "learning_rate": 0.00014965621331544527, | |
| "loss": 0.712, | |
| "step": 7605 | |
| }, | |
| { | |
| "epoch": 1.2736085575798095, | |
| "grad_norm": 0.026495933532714844, | |
| "learning_rate": 0.00014955559282240483, | |
| "loss": 0.7115, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 1.2761156610396123, | |
| "grad_norm": 0.02718982845544815, | |
| "learning_rate": 0.00014945497232936444, | |
| "loss": 0.7039, | |
| "step": 7635 | |
| }, | |
| { | |
| "epoch": 1.278622764499415, | |
| "grad_norm": 0.027888623997569084, | |
| "learning_rate": 0.000149354351836324, | |
| "loss": 0.6947, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 1.2811298679592178, | |
| "grad_norm": 0.027887005358934402, | |
| "learning_rate": 0.0001492537313432836, | |
| "loss": 0.7092, | |
| "step": 7665 | |
| }, | |
| { | |
| "epoch": 1.2836369714190206, | |
| "grad_norm": 0.02832951210439205, | |
| "learning_rate": 0.00014915311085024317, | |
| "loss": 0.7253, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 1.2861440748788233, | |
| "grad_norm": 0.027755776420235634, | |
| "learning_rate": 0.00014905249035720275, | |
| "loss": 0.7051, | |
| "step": 7695 | |
| }, | |
| { | |
| "epoch": 1.288651178338626, | |
| "grad_norm": 0.027755258604884148, | |
| "learning_rate": 0.00014895186986416234, | |
| "loss": 0.7131, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 1.2911582817984288, | |
| "grad_norm": 0.027515331283211708, | |
| "learning_rate": 0.00014885124937112193, | |
| "loss": 0.6972, | |
| "step": 7725 | |
| }, | |
| { | |
| "epoch": 1.2936653852582316, | |
| "grad_norm": 0.02867818996310234, | |
| "learning_rate": 0.0001487506288780815, | |
| "loss": 0.6909, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 1.2961724887180344, | |
| "grad_norm": 0.027417359873652458, | |
| "learning_rate": 0.0001486500083850411, | |
| "loss": 0.7112, | |
| "step": 7755 | |
| }, | |
| { | |
| "epoch": 1.2986795921778371, | |
| "grad_norm": 0.02725161798298359, | |
| "learning_rate": 0.00014854938789200068, | |
| "loss": 0.7172, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 1.30118669563764, | |
| "grad_norm": 0.027100631967186928, | |
| "learning_rate": 0.00014844876739896024, | |
| "loss": 0.7079, | |
| "step": 7785 | |
| }, | |
| { | |
| "epoch": 1.3036937990974429, | |
| "grad_norm": 0.026735814288258553, | |
| "learning_rate": 0.00014834814690591985, | |
| "loss": 0.7134, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.3062009025572454, | |
| "grad_norm": 0.02827010303735733, | |
| "learning_rate": 0.0001482475264128794, | |
| "loss": 0.7077, | |
| "step": 7815 | |
| }, | |
| { | |
| "epoch": 1.3087080060170484, | |
| "grad_norm": 0.02705741673707962, | |
| "learning_rate": 0.00014814690591983902, | |
| "loss": 0.7172, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 1.3112151094768512, | |
| "grad_norm": 0.02796081081032753, | |
| "learning_rate": 0.00014804628542679858, | |
| "loss": 0.7232, | |
| "step": 7845 | |
| }, | |
| { | |
| "epoch": 1.313722212936654, | |
| "grad_norm": 0.027841266244649887, | |
| "learning_rate": 0.0001479456649337582, | |
| "loss": 0.7113, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 1.3162293163964567, | |
| "grad_norm": 0.030358731746673584, | |
| "learning_rate": 0.00014784504444071775, | |
| "loss": 0.7014, | |
| "step": 7875 | |
| }, | |
| { | |
| "epoch": 1.3187364198562594, | |
| "grad_norm": 0.02849227376282215, | |
| "learning_rate": 0.00014774442394767737, | |
| "loss": 0.7233, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 1.3212435233160622, | |
| "grad_norm": 0.02644391544163227, | |
| "learning_rate": 0.00014764380345463692, | |
| "loss": 0.729, | |
| "step": 7905 | |
| }, | |
| { | |
| "epoch": 1.323750626775865, | |
| "grad_norm": 0.027298742905259132, | |
| "learning_rate": 0.00014754318296159654, | |
| "loss": 0.722, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 1.3262577302356677, | |
| "grad_norm": 0.027199968695640564, | |
| "learning_rate": 0.0001474425624685561, | |
| "loss": 0.7041, | |
| "step": 7935 | |
| }, | |
| { | |
| "epoch": 1.3287648336954705, | |
| "grad_norm": 0.027822501957416534, | |
| "learning_rate": 0.0001473419419755157, | |
| "loss": 0.7044, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 1.3312719371552733, | |
| "grad_norm": 0.027914773672819138, | |
| "learning_rate": 0.00014724132148247527, | |
| "loss": 0.7074, | |
| "step": 7965 | |
| }, | |
| { | |
| "epoch": 1.333779040615076, | |
| "grad_norm": 0.028190581128001213, | |
| "learning_rate": 0.00014714070098943488, | |
| "loss": 0.7014, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 1.3362861440748788, | |
| "grad_norm": 0.027638264000415802, | |
| "learning_rate": 0.00014704008049639444, | |
| "loss": 0.6973, | |
| "step": 7995 | |
| }, | |
| { | |
| "epoch": 1.3387932475346815, | |
| "grad_norm": 0.028353575617074966, | |
| "learning_rate": 0.00014693946000335402, | |
| "loss": 0.7191, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 1.3413003509944843, | |
| "grad_norm": 0.027547866106033325, | |
| "learning_rate": 0.0001468388395103136, | |
| "loss": 0.7089, | |
| "step": 8025 | |
| }, | |
| { | |
| "epoch": 1.343807454454287, | |
| "grad_norm": 0.02667342871427536, | |
| "learning_rate": 0.0001467382190172732, | |
| "loss": 0.7075, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 1.34631455791409, | |
| "grad_norm": 0.028818530961871147, | |
| "learning_rate": 0.00014663759852423278, | |
| "loss": 0.7008, | |
| "step": 8055 | |
| }, | |
| { | |
| "epoch": 1.3488216613738926, | |
| "grad_norm": 0.02606160379946232, | |
| "learning_rate": 0.00014653697803119236, | |
| "loss": 0.7207, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 1.3513287648336956, | |
| "grad_norm": 0.028475910425186157, | |
| "learning_rate": 0.00014643635753815195, | |
| "loss": 0.6993, | |
| "step": 8085 | |
| }, | |
| { | |
| "epoch": 1.3538358682934981, | |
| "grad_norm": 0.02790878899395466, | |
| "learning_rate": 0.0001463357370451115, | |
| "loss": 0.7058, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.356342971753301, | |
| "grad_norm": 0.028986552730202675, | |
| "learning_rate": 0.00014623511655207112, | |
| "loss": 0.7056, | |
| "step": 8115 | |
| }, | |
| { | |
| "epoch": 1.3588500752131039, | |
| "grad_norm": 0.02837732620537281, | |
| "learning_rate": 0.00014613449605903068, | |
| "loss": 0.708, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 1.3613571786729066, | |
| "grad_norm": 0.027905132621526718, | |
| "learning_rate": 0.0001460338755659903, | |
| "loss": 0.7024, | |
| "step": 8145 | |
| }, | |
| { | |
| "epoch": 1.3638642821327094, | |
| "grad_norm": 0.027892014011740685, | |
| "learning_rate": 0.00014593325507294985, | |
| "loss": 0.7011, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 1.3663713855925121, | |
| "grad_norm": 0.02732338011264801, | |
| "learning_rate": 0.00014583263457990946, | |
| "loss": 0.6972, | |
| "step": 8175 | |
| }, | |
| { | |
| "epoch": 1.368878489052315, | |
| "grad_norm": 0.028097622096538544, | |
| "learning_rate": 0.00014573201408686902, | |
| "loss": 0.6951, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 1.3713855925121177, | |
| "grad_norm": 0.028016911819577217, | |
| "learning_rate": 0.00014563139359382863, | |
| "loss": 0.7156, | |
| "step": 8205 | |
| }, | |
| { | |
| "epoch": 1.3738926959719204, | |
| "grad_norm": 0.02762255072593689, | |
| "learning_rate": 0.0001455307731007882, | |
| "loss": 0.7021, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 1.3763997994317232, | |
| "grad_norm": 0.027654899284243584, | |
| "learning_rate": 0.0001454301526077478, | |
| "loss": 0.7074, | |
| "step": 8235 | |
| }, | |
| { | |
| "epoch": 1.378906902891526, | |
| "grad_norm": 0.027378590777516365, | |
| "learning_rate": 0.00014532953211470736, | |
| "loss": 0.706, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 1.3814140063513287, | |
| "grad_norm": 0.026956256479024887, | |
| "learning_rate": 0.00014522891162166698, | |
| "loss": 0.7183, | |
| "step": 8265 | |
| }, | |
| { | |
| "epoch": 1.3839211098111315, | |
| "grad_norm": 0.027121366932988167, | |
| "learning_rate": 0.00014512829112862653, | |
| "loss": 0.7027, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 1.3864282132709342, | |
| "grad_norm": 0.02765464223921299, | |
| "learning_rate": 0.00014502767063558612, | |
| "loss": 0.7132, | |
| "step": 8295 | |
| }, | |
| { | |
| "epoch": 1.3889353167307372, | |
| "grad_norm": 0.02817637287080288, | |
| "learning_rate": 0.0001449270501425457, | |
| "loss": 0.6864, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 1.3914424201905398, | |
| "grad_norm": 0.02854936383664608, | |
| "learning_rate": 0.0001448264296495053, | |
| "loss": 0.7248, | |
| "step": 8325 | |
| }, | |
| { | |
| "epoch": 1.3939495236503427, | |
| "grad_norm": 0.028685523197054863, | |
| "learning_rate": 0.00014472580915646488, | |
| "loss": 0.7036, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 1.3964566271101453, | |
| "grad_norm": 0.028023192659020424, | |
| "learning_rate": 0.00014462518866342446, | |
| "loss": 0.706, | |
| "step": 8355 | |
| }, | |
| { | |
| "epoch": 1.3989637305699483, | |
| "grad_norm": 0.027805542573332787, | |
| "learning_rate": 0.00014452456817038405, | |
| "loss": 0.7126, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 1.401470834029751, | |
| "grad_norm": 0.027909213677048683, | |
| "learning_rate": 0.00014442394767734363, | |
| "loss": 0.7133, | |
| "step": 8385 | |
| }, | |
| { | |
| "epoch": 1.4039779374895538, | |
| "grad_norm": 0.02798452228307724, | |
| "learning_rate": 0.00014432332718430322, | |
| "loss": 0.7052, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.4064850409493566, | |
| "grad_norm": 0.02735227160155773, | |
| "learning_rate": 0.00014422270669126278, | |
| "loss": 0.7096, | |
| "step": 8415 | |
| }, | |
| { | |
| "epoch": 1.4089921444091593, | |
| "grad_norm": 0.027850987389683723, | |
| "learning_rate": 0.0001441220861982224, | |
| "loss": 0.718, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 1.411499247868962, | |
| "grad_norm": 0.028347337618470192, | |
| "learning_rate": 0.00014402146570518195, | |
| "loss": 0.6989, | |
| "step": 8445 | |
| }, | |
| { | |
| "epoch": 1.4140063513287648, | |
| "grad_norm": 0.028133846819400787, | |
| "learning_rate": 0.00014392084521214156, | |
| "loss": 0.7073, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 1.4165134547885676, | |
| "grad_norm": 0.02889505960047245, | |
| "learning_rate": 0.00014382022471910112, | |
| "loss": 0.7157, | |
| "step": 8475 | |
| }, | |
| { | |
| "epoch": 1.4190205582483704, | |
| "grad_norm": 0.02751564234495163, | |
| "learning_rate": 0.00014371960422606073, | |
| "loss": 0.7115, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 1.4215276617081731, | |
| "grad_norm": 0.027201758697628975, | |
| "learning_rate": 0.0001436189837330203, | |
| "loss": 0.7121, | |
| "step": 8505 | |
| }, | |
| { | |
| "epoch": 1.424034765167976, | |
| "grad_norm": 0.047122806310653687, | |
| "learning_rate": 0.0001435183632399799, | |
| "loss": 0.7103, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 1.4265418686277787, | |
| "grad_norm": 0.028580831363797188, | |
| "learning_rate": 0.00014341774274693946, | |
| "loss": 0.6933, | |
| "step": 8535 | |
| }, | |
| { | |
| "epoch": 1.4290489720875814, | |
| "grad_norm": 0.028754740953445435, | |
| "learning_rate": 0.00014331712225389907, | |
| "loss": 0.7155, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 1.4315560755473842, | |
| "grad_norm": 0.028142735362052917, | |
| "learning_rate": 0.00014321650176085863, | |
| "loss": 0.7076, | |
| "step": 8565 | |
| }, | |
| { | |
| "epoch": 1.434063179007187, | |
| "grad_norm": 0.02792290225625038, | |
| "learning_rate": 0.00014311588126781822, | |
| "loss": 0.7202, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 1.43657028246699, | |
| "grad_norm": 0.027254393324255943, | |
| "learning_rate": 0.0001430152607747778, | |
| "loss": 0.7116, | |
| "step": 8595 | |
| }, | |
| { | |
| "epoch": 1.4390773859267925, | |
| "grad_norm": 0.027158159762620926, | |
| "learning_rate": 0.0001429146402817374, | |
| "loss": 0.7034, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 1.4415844893865954, | |
| "grad_norm": 0.028217531740665436, | |
| "learning_rate": 0.00014281401978869697, | |
| "loss": 0.7136, | |
| "step": 8625 | |
| }, | |
| { | |
| "epoch": 1.4440915928463982, | |
| "grad_norm": 0.028678081929683685, | |
| "learning_rate": 0.00014271339929565656, | |
| "loss": 0.7053, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 1.446598696306201, | |
| "grad_norm": 0.028371306136250496, | |
| "learning_rate": 0.00014261277880261614, | |
| "loss": 0.7115, | |
| "step": 8655 | |
| }, | |
| { | |
| "epoch": 1.4491057997660037, | |
| "grad_norm": 0.027796892449259758, | |
| "learning_rate": 0.00014251215830957573, | |
| "loss": 0.7138, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 1.4516129032258065, | |
| "grad_norm": 0.027524475008249283, | |
| "learning_rate": 0.00014241153781653531, | |
| "loss": 0.7048, | |
| "step": 8685 | |
| }, | |
| { | |
| "epoch": 1.4541200066856093, | |
| "grad_norm": 0.02704106830060482, | |
| "learning_rate": 0.00014231091732349487, | |
| "loss": 0.6967, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.456627110145412, | |
| "grad_norm": 0.028332151472568512, | |
| "learning_rate": 0.00014221029683045448, | |
| "loss": 0.7015, | |
| "step": 8715 | |
| }, | |
| { | |
| "epoch": 1.4591342136052148, | |
| "grad_norm": 0.028455249965190887, | |
| "learning_rate": 0.00014210967633741404, | |
| "loss": 0.6973, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 1.4616413170650175, | |
| "grad_norm": 0.028323177248239517, | |
| "learning_rate": 0.00014200905584437366, | |
| "loss": 0.7119, | |
| "step": 8745 | |
| }, | |
| { | |
| "epoch": 1.4641484205248203, | |
| "grad_norm": 0.028827426955103874, | |
| "learning_rate": 0.00014190843535133321, | |
| "loss": 0.6973, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 1.466655523984623, | |
| "grad_norm": 0.029024334624409676, | |
| "learning_rate": 0.00014180781485829283, | |
| "loss": 0.7109, | |
| "step": 8775 | |
| }, | |
| { | |
| "epoch": 1.4691626274444258, | |
| "grad_norm": 0.02851213701069355, | |
| "learning_rate": 0.00014170719436525239, | |
| "loss": 0.7038, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 1.4716697309042286, | |
| "grad_norm": 0.027595283463597298, | |
| "learning_rate": 0.000141606573872212, | |
| "loss": 0.7119, | |
| "step": 8805 | |
| }, | |
| { | |
| "epoch": 1.4741768343640314, | |
| "grad_norm": 0.02817492000758648, | |
| "learning_rate": 0.00014150595337917156, | |
| "loss": 0.7007, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 1.4766839378238341, | |
| "grad_norm": 0.028595896437764168, | |
| "learning_rate": 0.00014140533288613117, | |
| "loss": 0.7031, | |
| "step": 8835 | |
| }, | |
| { | |
| "epoch": 1.479191041283637, | |
| "grad_norm": 0.028396232053637505, | |
| "learning_rate": 0.00014130471239309073, | |
| "loss": 0.6944, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 1.4816981447434396, | |
| "grad_norm": 0.02777491882443428, | |
| "learning_rate": 0.0001412040919000503, | |
| "loss": 0.7069, | |
| "step": 8865 | |
| }, | |
| { | |
| "epoch": 1.4842052482032426, | |
| "grad_norm": 0.02780229039490223, | |
| "learning_rate": 0.0001411034714070099, | |
| "loss": 0.6955, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 1.4867123516630452, | |
| "grad_norm": 0.02779022604227066, | |
| "learning_rate": 0.00014100285091396948, | |
| "loss": 0.7101, | |
| "step": 8895 | |
| }, | |
| { | |
| "epoch": 1.4892194551228481, | |
| "grad_norm": 0.029339686036109924, | |
| "learning_rate": 0.00014090223042092907, | |
| "loss": 0.7074, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 1.491726558582651, | |
| "grad_norm": 0.0277661494910717, | |
| "learning_rate": 0.00014080160992788865, | |
| "loss": 0.7007, | |
| "step": 8925 | |
| }, | |
| { | |
| "epoch": 1.4942336620424537, | |
| "grad_norm": 0.028384177014231682, | |
| "learning_rate": 0.00014070098943484824, | |
| "loss": 0.7006, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 1.4967407655022564, | |
| "grad_norm": 0.027173573151230812, | |
| "learning_rate": 0.00014060036894180783, | |
| "loss": 0.7126, | |
| "step": 8955 | |
| }, | |
| { | |
| "epoch": 1.4992478689620592, | |
| "grad_norm": 0.029250754043459892, | |
| "learning_rate": 0.0001404997484487674, | |
| "loss": 0.7053, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 1.501754972421862, | |
| "grad_norm": 0.02840678207576275, | |
| "learning_rate": 0.000140399127955727, | |
| "loss": 0.691, | |
| "step": 8985 | |
| }, | |
| { | |
| "epoch": 1.5042620758816647, | |
| "grad_norm": 0.029002662748098373, | |
| "learning_rate": 0.00014029850746268658, | |
| "loss": 0.6969, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.5067691793414675, | |
| "grad_norm": 0.028643961995840073, | |
| "learning_rate": 0.00014019788696964614, | |
| "loss": 0.7145, | |
| "step": 9015 | |
| }, | |
| { | |
| "epoch": 1.5092762828012702, | |
| "grad_norm": 0.027849212288856506, | |
| "learning_rate": 0.00014009726647660575, | |
| "loss": 0.7056, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 1.511783386261073, | |
| "grad_norm": 0.02838641032576561, | |
| "learning_rate": 0.0001399966459835653, | |
| "loss": 0.7171, | |
| "step": 9045 | |
| }, | |
| { | |
| "epoch": 1.5142904897208758, | |
| "grad_norm": 0.028329892084002495, | |
| "learning_rate": 0.00013989602549052492, | |
| "loss": 0.7021, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 1.5167975931806787, | |
| "grad_norm": 0.0278428103774786, | |
| "learning_rate": 0.00013979540499748448, | |
| "loss": 0.7008, | |
| "step": 9075 | |
| }, | |
| { | |
| "epoch": 1.5193046966404813, | |
| "grad_norm": 0.029085583984851837, | |
| "learning_rate": 0.0001396947845044441, | |
| "loss": 0.7014, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 1.5218118001002843, | |
| "grad_norm": 0.028230642899870872, | |
| "learning_rate": 0.00013959416401140365, | |
| "loss": 0.7028, | |
| "step": 9105 | |
| }, | |
| { | |
| "epoch": 1.5243189035600868, | |
| "grad_norm": 0.02829892747104168, | |
| "learning_rate": 0.00013949354351836327, | |
| "loss": 0.7052, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 1.5268260070198898, | |
| "grad_norm": 0.02769339270889759, | |
| "learning_rate": 0.00013939292302532282, | |
| "loss": 0.705, | |
| "step": 9135 | |
| }, | |
| { | |
| "epoch": 1.5293331104796923, | |
| "grad_norm": 0.02728847600519657, | |
| "learning_rate": 0.0001392923025322824, | |
| "loss": 0.7129, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 1.5318402139394953, | |
| "grad_norm": 0.029400475323200226, | |
| "learning_rate": 0.000139191682039242, | |
| "loss": 0.7076, | |
| "step": 9165 | |
| }, | |
| { | |
| "epoch": 1.5343473173992979, | |
| "grad_norm": 0.02829390950500965, | |
| "learning_rate": 0.00013909106154620158, | |
| "loss": 0.7032, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 1.5368544208591008, | |
| "grad_norm": 0.028629042208194733, | |
| "learning_rate": 0.00013899044105316117, | |
| "loss": 0.6992, | |
| "step": 9195 | |
| }, | |
| { | |
| "epoch": 1.5393615243189036, | |
| "grad_norm": 0.028124259784817696, | |
| "learning_rate": 0.00013888982056012075, | |
| "loss": 0.6928, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 1.5418686277787064, | |
| "grad_norm": 0.027618682011961937, | |
| "learning_rate": 0.00013878920006708034, | |
| "loss": 0.6988, | |
| "step": 9225 | |
| }, | |
| { | |
| "epoch": 1.5443757312385091, | |
| "grad_norm": 0.028371086344122887, | |
| "learning_rate": 0.00013868857957403992, | |
| "loss": 0.7068, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 1.546882834698312, | |
| "grad_norm": 0.02925163321197033, | |
| "learning_rate": 0.0001385879590809995, | |
| "loss": 0.7044, | |
| "step": 9255 | |
| }, | |
| { | |
| "epoch": 1.5493899381581147, | |
| "grad_norm": 0.027992991730570793, | |
| "learning_rate": 0.0001384873385879591, | |
| "loss": 0.7147, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 1.5518970416179174, | |
| "grad_norm": 0.02831142581999302, | |
| "learning_rate": 0.00013838671809491868, | |
| "loss": 0.711, | |
| "step": 9285 | |
| }, | |
| { | |
| "epoch": 1.5544041450777202, | |
| "grad_norm": 0.027344243600964546, | |
| "learning_rate": 0.00013828609760187826, | |
| "loss": 0.7043, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.556911248537523, | |
| "grad_norm": 0.027959240600466728, | |
| "learning_rate": 0.00013818547710883785, | |
| "loss": 0.715, | |
| "step": 9315 | |
| }, | |
| { | |
| "epoch": 1.5594183519973257, | |
| "grad_norm": 0.0285944901406765, | |
| "learning_rate": 0.0001380848566157974, | |
| "loss": 0.7104, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 1.5619254554571285, | |
| "grad_norm": 0.02860502153635025, | |
| "learning_rate": 0.00013798423612275702, | |
| "loss": 0.7053, | |
| "step": 9345 | |
| }, | |
| { | |
| "epoch": 1.5644325589169314, | |
| "grad_norm": 0.028087912127375603, | |
| "learning_rate": 0.00013788361562971658, | |
| "loss": 0.715, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 1.566939662376734, | |
| "grad_norm": 0.028339073061943054, | |
| "learning_rate": 0.0001377829951366762, | |
| "loss": 0.7035, | |
| "step": 9375 | |
| }, | |
| { | |
| "epoch": 1.569446765836537, | |
| "grad_norm": 0.027878131717443466, | |
| "learning_rate": 0.00013768237464363575, | |
| "loss": 0.7072, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 1.5719538692963395, | |
| "grad_norm": 0.028305955231189728, | |
| "learning_rate": 0.00013758175415059536, | |
| "loss": 0.6994, | |
| "step": 9405 | |
| }, | |
| { | |
| "epoch": 1.5744609727561425, | |
| "grad_norm": 0.028195269405841827, | |
| "learning_rate": 0.00013748113365755492, | |
| "loss": 0.7172, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 1.576968076215945, | |
| "grad_norm": 0.028301289305090904, | |
| "learning_rate": 0.00013738051316451453, | |
| "loss": 0.6958, | |
| "step": 9435 | |
| }, | |
| { | |
| "epoch": 1.579475179675748, | |
| "grad_norm": 0.029125042259693146, | |
| "learning_rate": 0.0001372798926714741, | |
| "loss": 0.7004, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 1.5819822831355508, | |
| "grad_norm": 0.02798408642411232, | |
| "learning_rate": 0.00013717927217843368, | |
| "loss": 0.6995, | |
| "step": 9465 | |
| }, | |
| { | |
| "epoch": 1.5844893865953535, | |
| "grad_norm": 0.029614899307489395, | |
| "learning_rate": 0.00013707865168539326, | |
| "loss": 0.7057, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 1.5869964900551563, | |
| "grad_norm": 0.0279951523989439, | |
| "learning_rate": 0.00013697803119235285, | |
| "loss": 0.6949, | |
| "step": 9495 | |
| }, | |
| { | |
| "epoch": 1.589503593514959, | |
| "grad_norm": 0.028490344062447548, | |
| "learning_rate": 0.00013687741069931243, | |
| "loss": 0.7003, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 1.5920106969747618, | |
| "grad_norm": 0.028360631316900253, | |
| "learning_rate": 0.00013677679020627202, | |
| "loss": 0.7008, | |
| "step": 9525 | |
| }, | |
| { | |
| "epoch": 1.5945178004345646, | |
| "grad_norm": 0.029337970539927483, | |
| "learning_rate": 0.0001366761697132316, | |
| "loss": 0.7143, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 1.5970249038943674, | |
| "grad_norm": 0.02845313400030136, | |
| "learning_rate": 0.0001365755492201912, | |
| "loss": 0.7006, | |
| "step": 9555 | |
| }, | |
| { | |
| "epoch": 1.5995320073541701, | |
| "grad_norm": 0.027560876682400703, | |
| "learning_rate": 0.00013647492872715078, | |
| "loss": 0.7102, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 1.6020391108139729, | |
| "grad_norm": 0.028155362233519554, | |
| "learning_rate": 0.00013637430823411036, | |
| "loss": 0.6979, | |
| "step": 9585 | |
| }, | |
| { | |
| "epoch": 1.6045462142737756, | |
| "grad_norm": 0.029344851151108742, | |
| "learning_rate": 0.00013627368774106995, | |
| "loss": 0.704, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.6070533177335786, | |
| "grad_norm": 0.02839244157075882, | |
| "learning_rate": 0.0001361730672480295, | |
| "loss": 0.6977, | |
| "step": 9615 | |
| }, | |
| { | |
| "epoch": 1.6095604211933812, | |
| "grad_norm": 0.027915630489587784, | |
| "learning_rate": 0.00013607244675498912, | |
| "loss": 0.7086, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 1.6120675246531841, | |
| "grad_norm": 0.02826772816479206, | |
| "learning_rate": 0.00013597182626194868, | |
| "loss": 0.6951, | |
| "step": 9645 | |
| }, | |
| { | |
| "epoch": 1.6145746281129867, | |
| "grad_norm": 0.02916094847023487, | |
| "learning_rate": 0.0001358712057689083, | |
| "loss": 0.7103, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 1.6170817315727897, | |
| "grad_norm": 0.02921309880912304, | |
| "learning_rate": 0.00013577058527586785, | |
| "loss": 0.6987, | |
| "step": 9675 | |
| }, | |
| { | |
| "epoch": 1.6195888350325922, | |
| "grad_norm": 0.028561830520629883, | |
| "learning_rate": 0.00013566996478282746, | |
| "loss": 0.7119, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 1.6220959384923952, | |
| "grad_norm": 0.028445105999708176, | |
| "learning_rate": 0.00013556934428978702, | |
| "loss": 0.7022, | |
| "step": 9705 | |
| }, | |
| { | |
| "epoch": 1.6246030419521977, | |
| "grad_norm": 0.029156696051359177, | |
| "learning_rate": 0.00013546872379674663, | |
| "loss": 0.6946, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 1.6271101454120007, | |
| "grad_norm": 0.029195377603173256, | |
| "learning_rate": 0.0001353681033037062, | |
| "loss": 0.6919, | |
| "step": 9735 | |
| }, | |
| { | |
| "epoch": 1.6296172488718035, | |
| "grad_norm": 0.028340883553028107, | |
| "learning_rate": 0.00013526748281066577, | |
| "loss": 0.6949, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 1.6321243523316062, | |
| "grad_norm": 0.028798367828130722, | |
| "learning_rate": 0.00013516686231762536, | |
| "loss": 0.6939, | |
| "step": 9765 | |
| }, | |
| { | |
| "epoch": 1.634631455791409, | |
| "grad_norm": 0.028108691796660423, | |
| "learning_rate": 0.00013506624182458494, | |
| "loss": 0.6877, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 1.6371385592512118, | |
| "grad_norm": 0.029803916811943054, | |
| "learning_rate": 0.00013496562133154453, | |
| "loss": 0.7063, | |
| "step": 9795 | |
| }, | |
| { | |
| "epoch": 1.6396456627110145, | |
| "grad_norm": 0.02933133766055107, | |
| "learning_rate": 0.00013486500083850412, | |
| "loss": 0.7105, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 1.6421527661708173, | |
| "grad_norm": 0.02795150876045227, | |
| "learning_rate": 0.0001347643803454637, | |
| "loss": 0.7127, | |
| "step": 9825 | |
| }, | |
| { | |
| "epoch": 1.64465986963062, | |
| "grad_norm": 0.028160467743873596, | |
| "learning_rate": 0.00013466375985242329, | |
| "loss": 0.6962, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 1.6471669730904228, | |
| "grad_norm": 0.028696995228528976, | |
| "learning_rate": 0.00013456313935938287, | |
| "loss": 0.7104, | |
| "step": 9855 | |
| }, | |
| { | |
| "epoch": 1.6496740765502258, | |
| "grad_norm": 0.028448186814785004, | |
| "learning_rate": 0.00013446251886634246, | |
| "loss": 0.7164, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 1.6521811800100283, | |
| "grad_norm": 0.028285130858421326, | |
| "learning_rate": 0.00013436189837330204, | |
| "loss": 0.6969, | |
| "step": 9885 | |
| }, | |
| { | |
| "epoch": 1.6546882834698313, | |
| "grad_norm": 0.02930794097483158, | |
| "learning_rate": 0.00013426127788026163, | |
| "loss": 0.6933, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 1.6571953869296339, | |
| "grad_norm": 0.028923654928803444, | |
| "learning_rate": 0.00013416065738722121, | |
| "loss": 0.7023, | |
| "step": 9915 | |
| }, | |
| { | |
| "epoch": 1.6597024903894368, | |
| "grad_norm": 0.029697788879275322, | |
| "learning_rate": 0.00013406003689418077, | |
| "loss": 0.7149, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 1.6622095938492394, | |
| "grad_norm": 0.02780589461326599, | |
| "learning_rate": 0.00013395941640114038, | |
| "loss": 0.7018, | |
| "step": 9945 | |
| }, | |
| { | |
| "epoch": 1.6647166973090424, | |
| "grad_norm": 0.028592998161911964, | |
| "learning_rate": 0.00013385879590809994, | |
| "loss": 0.6999, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 1.667223800768845, | |
| "grad_norm": 0.028748946264386177, | |
| "learning_rate": 0.00013375817541505956, | |
| "loss": 0.7108, | |
| "step": 9975 | |
| }, | |
| { | |
| "epoch": 1.669730904228648, | |
| "grad_norm": 0.02883664146065712, | |
| "learning_rate": 0.00013365755492201911, | |
| "loss": 0.7014, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 1.6722380076884507, | |
| "grad_norm": 0.027728645130991936, | |
| "learning_rate": 0.00013355693442897873, | |
| "loss": 0.7098, | |
| "step": 10005 | |
| }, | |
| { | |
| "epoch": 1.6747451111482534, | |
| "grad_norm": 0.028445927426218987, | |
| "learning_rate": 0.00013345631393593828, | |
| "loss": 0.7016, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 1.6772522146080562, | |
| "grad_norm": 0.029764369130134583, | |
| "learning_rate": 0.00013335569344289787, | |
| "loss": 0.6983, | |
| "step": 10035 | |
| }, | |
| { | |
| "epoch": 1.679759318067859, | |
| "grad_norm": 0.029188336804509163, | |
| "learning_rate": 0.00013325507294985746, | |
| "loss": 0.7158, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 1.6822664215276617, | |
| "grad_norm": 0.028241556137800217, | |
| "learning_rate": 0.00013315445245681704, | |
| "loss": 0.6923, | |
| "step": 10065 | |
| }, | |
| { | |
| "epoch": 1.6847735249874645, | |
| "grad_norm": 0.02920147404074669, | |
| "learning_rate": 0.00013305383196377663, | |
| "loss": 0.7157, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 1.6872806284472672, | |
| "grad_norm": 0.027919236570596695, | |
| "learning_rate": 0.0001329532114707362, | |
| "loss": 0.6984, | |
| "step": 10095 | |
| }, | |
| { | |
| "epoch": 1.68978773190707, | |
| "grad_norm": 0.0279484074562788, | |
| "learning_rate": 0.0001328525909776958, | |
| "loss": 0.688, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 1.6922948353668728, | |
| "grad_norm": 0.02801922895014286, | |
| "learning_rate": 0.00013275197048465538, | |
| "loss": 0.7077, | |
| "step": 10125 | |
| }, | |
| { | |
| "epoch": 1.6948019388266755, | |
| "grad_norm": 0.02875382825732231, | |
| "learning_rate": 0.00013265134999161497, | |
| "loss": 0.7097, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 1.6973090422864785, | |
| "grad_norm": 0.028978591784834862, | |
| "learning_rate": 0.00013255072949857455, | |
| "loss": 0.6921, | |
| "step": 10155 | |
| }, | |
| { | |
| "epoch": 1.699816145746281, | |
| "grad_norm": 0.028557538986206055, | |
| "learning_rate": 0.00013245010900553414, | |
| "loss": 0.7124, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 1.702323249206084, | |
| "grad_norm": 0.02763993851840496, | |
| "learning_rate": 0.00013234948851249372, | |
| "loss": 0.6998, | |
| "step": 10185 | |
| }, | |
| { | |
| "epoch": 1.7048303526658866, | |
| "grad_norm": 0.029599042609333992, | |
| "learning_rate": 0.0001322488680194533, | |
| "loss": 0.7116, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 1.7073374561256895, | |
| "grad_norm": 0.028568753972649574, | |
| "learning_rate": 0.0001321482475264129, | |
| "loss": 0.6927, | |
| "step": 10215 | |
| }, | |
| { | |
| "epoch": 1.709844559585492, | |
| "grad_norm": 0.028803616762161255, | |
| "learning_rate": 0.00013204762703337248, | |
| "loss": 0.7, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 1.712351663045295, | |
| "grad_norm": 0.028020154684782028, | |
| "learning_rate": 0.00013194700654033204, | |
| "loss": 0.7024, | |
| "step": 10245 | |
| }, | |
| { | |
| "epoch": 1.7148587665050978, | |
| "grad_norm": 0.029931314289569855, | |
| "learning_rate": 0.00013184638604729165, | |
| "loss": 0.6996, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 1.7173658699649006, | |
| "grad_norm": 0.028297219425439835, | |
| "learning_rate": 0.0001317457655542512, | |
| "loss": 0.7055, | |
| "step": 10275 | |
| }, | |
| { | |
| "epoch": 1.7198729734247034, | |
| "grad_norm": 0.02956199459731579, | |
| "learning_rate": 0.00013164514506121082, | |
| "loss": 0.6975, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 1.7223800768845061, | |
| "grad_norm": 0.027763094753026962, | |
| "learning_rate": 0.00013154452456817038, | |
| "loss": 0.7072, | |
| "step": 10305 | |
| }, | |
| { | |
| "epoch": 1.7248871803443089, | |
| "grad_norm": 0.027571503072977066, | |
| "learning_rate": 0.00013144390407512997, | |
| "loss": 0.7001, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 1.7273942838041116, | |
| "grad_norm": 0.028334425762295723, | |
| "learning_rate": 0.00013134328358208955, | |
| "loss": 0.7059, | |
| "step": 10335 | |
| }, | |
| { | |
| "epoch": 1.7299013872639144, | |
| "grad_norm": 0.027847876772284508, | |
| "learning_rate": 0.00013124266308904914, | |
| "loss": 0.6956, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 1.7324084907237172, | |
| "grad_norm": 0.027983665466308594, | |
| "learning_rate": 0.00013114204259600872, | |
| "loss": 0.716, | |
| "step": 10365 | |
| }, | |
| { | |
| "epoch": 1.73491559418352, | |
| "grad_norm": 0.028772972524166107, | |
| "learning_rate": 0.0001310414221029683, | |
| "loss": 0.7052, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 1.7374226976433227, | |
| "grad_norm": 0.028679322451353073, | |
| "learning_rate": 0.0001309408016099279, | |
| "loss": 0.6948, | |
| "step": 10395 | |
| }, | |
| { | |
| "epoch": 1.7399298011031257, | |
| "grad_norm": 0.02946317568421364, | |
| "learning_rate": 0.00013084018111688748, | |
| "loss": 0.7048, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 1.7424369045629282, | |
| "grad_norm": 0.0287346001714468, | |
| "learning_rate": 0.00013073956062384707, | |
| "loss": 0.7047, | |
| "step": 10425 | |
| }, | |
| { | |
| "epoch": 1.7449440080227312, | |
| "grad_norm": 0.02862308919429779, | |
| "learning_rate": 0.00013063894013080665, | |
| "loss": 0.6886, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 1.7474511114825337, | |
| "grad_norm": 0.0288804080337286, | |
| "learning_rate": 0.00013053831963776624, | |
| "loss": 0.706, | |
| "step": 10455 | |
| }, | |
| { | |
| "epoch": 1.7499582149423367, | |
| "grad_norm": 0.0278554018586874, | |
| "learning_rate": 0.00013043769914472582, | |
| "loss": 0.7169, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 1.7524653184021393, | |
| "grad_norm": 0.02842450514435768, | |
| "learning_rate": 0.0001303370786516854, | |
| "loss": 0.7028, | |
| "step": 10485 | |
| }, | |
| { | |
| "epoch": 1.7549724218619422, | |
| "grad_norm": 0.02780633233487606, | |
| "learning_rate": 0.000130236458158645, | |
| "loss": 0.6974, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.7574795253217448, | |
| "grad_norm": 0.028826531022787094, | |
| "learning_rate": 0.00013013583766560458, | |
| "loss": 0.7024, | |
| "step": 10515 | |
| }, | |
| { | |
| "epoch": 1.7599866287815478, | |
| "grad_norm": 0.028399532660841942, | |
| "learning_rate": 0.00013003521717256414, | |
| "loss": 0.7029, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 1.7624937322413505, | |
| "grad_norm": 0.029726563021540642, | |
| "learning_rate": 0.00012993459667952375, | |
| "loss": 0.7033, | |
| "step": 10545 | |
| }, | |
| { | |
| "epoch": 1.7650008357011533, | |
| "grad_norm": 0.028318284079432487, | |
| "learning_rate": 0.0001298339761864833, | |
| "loss": 0.7102, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 1.767507939160956, | |
| "grad_norm": 0.02865464985370636, | |
| "learning_rate": 0.00012973335569344292, | |
| "loss": 0.7079, | |
| "step": 10575 | |
| }, | |
| { | |
| "epoch": 1.7700150426207588, | |
| "grad_norm": 0.029711904004216194, | |
| "learning_rate": 0.00012963273520040248, | |
| "loss": 0.7003, | |
| "step": 10590 | |
| }, | |
| { | |
| "epoch": 1.7725221460805616, | |
| "grad_norm": 0.02868981659412384, | |
| "learning_rate": 0.00012953211470736206, | |
| "loss": 0.7071, | |
| "step": 10605 | |
| }, | |
| { | |
| "epoch": 1.7750292495403643, | |
| "grad_norm": 0.03023667074739933, | |
| "learning_rate": 0.00012943149421432165, | |
| "loss": 0.6988, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 1.777536353000167, | |
| "grad_norm": 0.02855963073670864, | |
| "learning_rate": 0.00012933087372128123, | |
| "loss": 0.7001, | |
| "step": 10635 | |
| }, | |
| { | |
| "epoch": 1.7800434564599699, | |
| "grad_norm": 0.02811777964234352, | |
| "learning_rate": 0.00012923025322824082, | |
| "loss": 0.6982, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 1.7825505599197728, | |
| "grad_norm": 0.029220616444945335, | |
| "learning_rate": 0.0001291296327352004, | |
| "loss": 0.7123, | |
| "step": 10665 | |
| }, | |
| { | |
| "epoch": 1.7850576633795754, | |
| "grad_norm": 0.02945820614695549, | |
| "learning_rate": 0.00012902901224216, | |
| "loss": 0.702, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 1.7875647668393784, | |
| "grad_norm": 0.02915896289050579, | |
| "learning_rate": 0.00012892839174911958, | |
| "loss": 0.6996, | |
| "step": 10695 | |
| }, | |
| { | |
| "epoch": 1.790071870299181, | |
| "grad_norm": 0.028102731332182884, | |
| "learning_rate": 0.00012882777125607916, | |
| "loss": 0.6931, | |
| "step": 10710 | |
| }, | |
| { | |
| "epoch": 1.792578973758984, | |
| "grad_norm": 0.028598302975296974, | |
| "learning_rate": 0.00012872715076303875, | |
| "loss": 0.7049, | |
| "step": 10725 | |
| }, | |
| { | |
| "epoch": 1.7950860772187864, | |
| "grad_norm": 0.02882864698767662, | |
| "learning_rate": 0.00012862653026999833, | |
| "loss": 0.6894, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 1.7975931806785894, | |
| "grad_norm": 0.02864612452685833, | |
| "learning_rate": 0.00012852590977695792, | |
| "loss": 0.6959, | |
| "step": 10755 | |
| }, | |
| { | |
| "epoch": 1.800100284138392, | |
| "grad_norm": 0.02791963331401348, | |
| "learning_rate": 0.0001284252892839175, | |
| "loss": 0.699, | |
| "step": 10770 | |
| }, | |
| { | |
| "epoch": 1.802607387598195, | |
| "grad_norm": 0.029228495433926582, | |
| "learning_rate": 0.0001283246687908771, | |
| "loss": 0.7014, | |
| "step": 10785 | |
| }, | |
| { | |
| "epoch": 1.8051144910579977, | |
| "grad_norm": 0.028694583103060722, | |
| "learning_rate": 0.00012822404829783667, | |
| "loss": 0.6981, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 1.8076215945178005, | |
| "grad_norm": 0.028723234310746193, | |
| "learning_rate": 0.00012812342780479626, | |
| "loss": 0.6935, | |
| "step": 10815 | |
| }, | |
| { | |
| "epoch": 1.8101286979776032, | |
| "grad_norm": 0.02791297808289528, | |
| "learning_rate": 0.00012802280731175585, | |
| "loss": 0.7023, | |
| "step": 10830 | |
| }, | |
| { | |
| "epoch": 1.812635801437406, | |
| "grad_norm": 0.028474239632487297, | |
| "learning_rate": 0.0001279221868187154, | |
| "loss": 0.6966, | |
| "step": 10845 | |
| }, | |
| { | |
| "epoch": 1.8151429048972088, | |
| "grad_norm": 0.028216082602739334, | |
| "learning_rate": 0.00012782156632567502, | |
| "loss": 0.7146, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 1.8176500083570115, | |
| "grad_norm": 0.02868053875863552, | |
| "learning_rate": 0.00012772094583263458, | |
| "loss": 0.7018, | |
| "step": 10875 | |
| }, | |
| { | |
| "epoch": 1.8201571118168143, | |
| "grad_norm": 0.029623722657561302, | |
| "learning_rate": 0.00012762032533959416, | |
| "loss": 0.7024, | |
| "step": 10890 | |
| }, | |
| { | |
| "epoch": 1.822664215276617, | |
| "grad_norm": 0.029195398092269897, | |
| "learning_rate": 0.00012751970484655375, | |
| "loss": 0.7056, | |
| "step": 10905 | |
| }, | |
| { | |
| "epoch": 1.8251713187364198, | |
| "grad_norm": 0.02803465723991394, | |
| "learning_rate": 0.00012741908435351333, | |
| "loss": 0.7013, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 1.8276784221962226, | |
| "grad_norm": 0.02818216383457184, | |
| "learning_rate": 0.00012731846386047292, | |
| "loss": 0.7052, | |
| "step": 10935 | |
| }, | |
| { | |
| "epoch": 1.8301855256560255, | |
| "grad_norm": 0.029034661129117012, | |
| "learning_rate": 0.0001272178433674325, | |
| "loss": 0.6918, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 1.832692629115828, | |
| "grad_norm": 0.028653794899582863, | |
| "learning_rate": 0.0001271172228743921, | |
| "loss": 0.708, | |
| "step": 10965 | |
| }, | |
| { | |
| "epoch": 1.835199732575631, | |
| "grad_norm": 0.02844145894050598, | |
| "learning_rate": 0.00012701660238135167, | |
| "loss": 0.7065, | |
| "step": 10980 | |
| }, | |
| { | |
| "epoch": 1.8377068360354336, | |
| "grad_norm": 0.02880460023880005, | |
| "learning_rate": 0.00012691598188831126, | |
| "loss": 0.6931, | |
| "step": 10995 | |
| }, | |
| { | |
| "epoch": 1.8402139394952366, | |
| "grad_norm": 0.02845979668200016, | |
| "learning_rate": 0.00012681536139527084, | |
| "loss": 0.6924, | |
| "step": 11010 | |
| }, | |
| { | |
| "epoch": 1.8427210429550391, | |
| "grad_norm": 0.02805483527481556, | |
| "learning_rate": 0.00012671474090223043, | |
| "loss": 0.7064, | |
| "step": 11025 | |
| }, | |
| { | |
| "epoch": 1.8452281464148421, | |
| "grad_norm": 0.029036138206720352, | |
| "learning_rate": 0.00012661412040919002, | |
| "loss": 0.7061, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 1.8477352498746449, | |
| "grad_norm": 0.028865808621048927, | |
| "learning_rate": 0.0001265134999161496, | |
| "loss": 0.7088, | |
| "step": 11055 | |
| }, | |
| { | |
| "epoch": 1.8502423533344476, | |
| "grad_norm": 0.028568295761942863, | |
| "learning_rate": 0.00012641287942310919, | |
| "loss": 0.7032, | |
| "step": 11070 | |
| }, | |
| { | |
| "epoch": 1.8527494567942504, | |
| "grad_norm": 0.02971578575670719, | |
| "learning_rate": 0.00012631225893006877, | |
| "loss": 0.703, | |
| "step": 11085 | |
| }, | |
| { | |
| "epoch": 1.8552565602540532, | |
| "grad_norm": 0.029128948226571083, | |
| "learning_rate": 0.00012621163843702836, | |
| "loss": 0.6964, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 1.857763663713856, | |
| "grad_norm": 0.028951995074748993, | |
| "learning_rate": 0.00012611101794398794, | |
| "loss": 0.6998, | |
| "step": 11115 | |
| }, | |
| { | |
| "epoch": 1.8602707671736587, | |
| "grad_norm": 0.029678482562303543, | |
| "learning_rate": 0.00012601039745094753, | |
| "loss": 0.6778, | |
| "step": 11130 | |
| }, | |
| { | |
| "epoch": 1.8627778706334615, | |
| "grad_norm": 0.029598036780953407, | |
| "learning_rate": 0.0001259097769579071, | |
| "loss": 0.694, | |
| "step": 11145 | |
| }, | |
| { | |
| "epoch": 1.8652849740932642, | |
| "grad_norm": 0.02879234589636326, | |
| "learning_rate": 0.00012580915646486667, | |
| "loss": 0.7085, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 1.867792077553067, | |
| "grad_norm": 0.029246920719742775, | |
| "learning_rate": 0.00012570853597182626, | |
| "loss": 0.6932, | |
| "step": 11175 | |
| }, | |
| { | |
| "epoch": 1.8702991810128697, | |
| "grad_norm": 0.030359363183379173, | |
| "learning_rate": 0.00012560791547878584, | |
| "loss": 0.7027, | |
| "step": 11190 | |
| }, | |
| { | |
| "epoch": 1.8728062844726727, | |
| "grad_norm": 0.02991410344839096, | |
| "learning_rate": 0.00012550729498574543, | |
| "loss": 0.6842, | |
| "step": 11205 | |
| }, | |
| { | |
| "epoch": 1.8753133879324753, | |
| "grad_norm": 0.028199173510074615, | |
| "learning_rate": 0.00012540667449270501, | |
| "loss": 0.6998, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 1.8778204913922782, | |
| "grad_norm": 0.028087392449378967, | |
| "learning_rate": 0.0001253060539996646, | |
| "loss": 0.697, | |
| "step": 11235 | |
| }, | |
| { | |
| "epoch": 1.8803275948520808, | |
| "grad_norm": 0.02853637933731079, | |
| "learning_rate": 0.00012520543350662418, | |
| "loss": 0.6874, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 1.8828346983118838, | |
| "grad_norm": 0.028400765731930733, | |
| "learning_rate": 0.00012510481301358377, | |
| "loss": 0.6881, | |
| "step": 11265 | |
| }, | |
| { | |
| "epoch": 1.8853418017716863, | |
| "grad_norm": 0.02928781695663929, | |
| "learning_rate": 0.00012500419252054336, | |
| "loss": 0.6951, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 1.8878489052314893, | |
| "grad_norm": 0.028838330879807472, | |
| "learning_rate": 0.00012490357202750294, | |
| "loss": 0.6857, | |
| "step": 11295 | |
| }, | |
| { | |
| "epoch": 1.8903560086912918, | |
| "grad_norm": 0.0293565783649683, | |
| "learning_rate": 0.00012480295153446253, | |
| "loss": 0.693, | |
| "step": 11310 | |
| }, | |
| { | |
| "epoch": 1.8928631121510948, | |
| "grad_norm": 0.02845110557973385, | |
| "learning_rate": 0.0001247023310414221, | |
| "loss": 0.6999, | |
| "step": 11325 | |
| }, | |
| { | |
| "epoch": 1.8953702156108976, | |
| "grad_norm": 0.029096076264977455, | |
| "learning_rate": 0.0001246017105483817, | |
| "loss": 0.6841, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 1.8978773190707003, | |
| "grad_norm": 0.029120532795786858, | |
| "learning_rate": 0.00012450109005534128, | |
| "loss": 0.7009, | |
| "step": 11355 | |
| }, | |
| { | |
| "epoch": 1.900384422530503, | |
| "grad_norm": 0.027919389307498932, | |
| "learning_rate": 0.00012440046956230087, | |
| "loss": 0.708, | |
| "step": 11370 | |
| }, | |
| { | |
| "epoch": 1.9028915259903059, | |
| "grad_norm": 0.02887488156557083, | |
| "learning_rate": 0.00012429984906926045, | |
| "loss": 0.7048, | |
| "step": 11385 | |
| }, | |
| { | |
| "epoch": 1.9053986294501086, | |
| "grad_norm": 0.028664030134677887, | |
| "learning_rate": 0.00012419922857622004, | |
| "loss": 0.7003, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 1.9079057329099114, | |
| "grad_norm": 0.028661739081144333, | |
| "learning_rate": 0.00012409860808317962, | |
| "loss": 0.7009, | |
| "step": 11415 | |
| }, | |
| { | |
| "epoch": 1.9104128363697142, | |
| "grad_norm": 0.02937045879662037, | |
| "learning_rate": 0.0001239979875901392, | |
| "loss": 0.6935, | |
| "step": 11430 | |
| }, | |
| { | |
| "epoch": 1.912919939829517, | |
| "grad_norm": 0.030395416542887688, | |
| "learning_rate": 0.0001238973670970988, | |
| "loss": 0.6808, | |
| "step": 11445 | |
| }, | |
| { | |
| "epoch": 1.91542704328932, | |
| "grad_norm": 0.030018294230103493, | |
| "learning_rate": 0.00012379674660405835, | |
| "loss": 0.6931, | |
| "step": 11460 | |
| }, | |
| { | |
| "epoch": 1.9179341467491224, | |
| "grad_norm": 0.029583923518657684, | |
| "learning_rate": 0.00012369612611101794, | |
| "loss": 0.6844, | |
| "step": 11475 | |
| }, | |
| { | |
| "epoch": 1.9204412502089254, | |
| "grad_norm": 0.028469126671552658, | |
| "learning_rate": 0.00012359550561797752, | |
| "loss": 0.7, | |
| "step": 11490 | |
| }, | |
| { | |
| "epoch": 1.922948353668728, | |
| "grad_norm": 0.029069840908050537, | |
| "learning_rate": 0.0001234948851249371, | |
| "loss": 0.6769, | |
| "step": 11505 | |
| }, | |
| { | |
| "epoch": 1.925455457128531, | |
| "grad_norm": 0.03039330244064331, | |
| "learning_rate": 0.0001233942646318967, | |
| "loss": 0.701, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 1.9279625605883335, | |
| "grad_norm": 0.029704933986067772, | |
| "learning_rate": 0.00012329364413885628, | |
| "loss": 0.6955, | |
| "step": 11535 | |
| }, | |
| { | |
| "epoch": 1.9304696640481365, | |
| "grad_norm": 0.02861003205180168, | |
| "learning_rate": 0.00012319302364581587, | |
| "loss": 0.7029, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 1.932976767507939, | |
| "grad_norm": 0.028516478836536407, | |
| "learning_rate": 0.00012309240315277545, | |
| "loss": 0.685, | |
| "step": 11565 | |
| }, | |
| { | |
| "epoch": 1.935483870967742, | |
| "grad_norm": 0.02939150668680668, | |
| "learning_rate": 0.00012299178265973504, | |
| "loss": 0.6862, | |
| "step": 11580 | |
| }, | |
| { | |
| "epoch": 1.9379909744275448, | |
| "grad_norm": 0.029078399762511253, | |
| "learning_rate": 0.00012289116216669462, | |
| "loss": 0.6915, | |
| "step": 11595 | |
| }, | |
| { | |
| "epoch": 1.9404980778873475, | |
| "grad_norm": 0.02967904321849346, | |
| "learning_rate": 0.0001227905416736542, | |
| "loss": 0.7078, | |
| "step": 11610 | |
| }, | |
| { | |
| "epoch": 1.9430051813471503, | |
| "grad_norm": 0.02986898459494114, | |
| "learning_rate": 0.0001226899211806138, | |
| "loss": 0.6962, | |
| "step": 11625 | |
| }, | |
| { | |
| "epoch": 1.945512284806953, | |
| "grad_norm": 0.029141373932361603, | |
| "learning_rate": 0.00012258930068757338, | |
| "loss": 0.6898, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 1.9480193882667558, | |
| "grad_norm": 0.02856113389134407, | |
| "learning_rate": 0.00012248868019453296, | |
| "loss": 0.6932, | |
| "step": 11655 | |
| }, | |
| { | |
| "epoch": 1.9505264917265586, | |
| "grad_norm": 0.02906043641269207, | |
| "learning_rate": 0.00012238805970149255, | |
| "loss": 0.6947, | |
| "step": 11670 | |
| }, | |
| { | |
| "epoch": 1.9530335951863613, | |
| "grad_norm": 0.028559362515807152, | |
| "learning_rate": 0.00012228743920845214, | |
| "loss": 0.6959, | |
| "step": 11685 | |
| }, | |
| { | |
| "epoch": 1.955540698646164, | |
| "grad_norm": 0.029632238671183586, | |
| "learning_rate": 0.00012218681871541172, | |
| "loss": 0.7044, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 1.958047802105967, | |
| "grad_norm": 0.028845706954598427, | |
| "learning_rate": 0.0001220861982223713, | |
| "loss": 0.6845, | |
| "step": 11715 | |
| }, | |
| { | |
| "epoch": 1.9605549055657696, | |
| "grad_norm": 0.029171636328101158, | |
| "learning_rate": 0.00012198557772933088, | |
| "loss": 0.7044, | |
| "step": 11730 | |
| }, | |
| { | |
| "epoch": 1.9630620090255726, | |
| "grad_norm": 0.030526766553521156, | |
| "learning_rate": 0.00012188495723629045, | |
| "loss": 0.6881, | |
| "step": 11745 | |
| }, | |
| { | |
| "epoch": 1.9655691124853751, | |
| "grad_norm": 0.029202323406934738, | |
| "learning_rate": 0.00012178433674325005, | |
| "loss": 0.6853, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 1.9680762159451781, | |
| "grad_norm": 0.028741231188178062, | |
| "learning_rate": 0.00012168371625020962, | |
| "loss": 0.7085, | |
| "step": 11775 | |
| }, | |
| { | |
| "epoch": 1.9705833194049807, | |
| "grad_norm": 0.029565809294581413, | |
| "learning_rate": 0.00012158309575716922, | |
| "loss": 0.6951, | |
| "step": 11790 | |
| }, | |
| { | |
| "epoch": 1.9730904228647836, | |
| "grad_norm": 0.029546387493610382, | |
| "learning_rate": 0.00012148247526412879, | |
| "loss": 0.6961, | |
| "step": 11805 | |
| }, | |
| { | |
| "epoch": 1.9755975263245862, | |
| "grad_norm": 0.029062774032354355, | |
| "learning_rate": 0.00012138185477108839, | |
| "loss": 0.6933, | |
| "step": 11820 | |
| }, | |
| { | |
| "epoch": 1.9781046297843892, | |
| "grad_norm": 0.028955336660146713, | |
| "learning_rate": 0.00012128123427804796, | |
| "loss": 0.6898, | |
| "step": 11835 | |
| }, | |
| { | |
| "epoch": 1.980611733244192, | |
| "grad_norm": 0.031218407675623894, | |
| "learning_rate": 0.00012118061378500756, | |
| "loss": 0.689, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 1.9831188367039947, | |
| "grad_norm": 0.030403736978769302, | |
| "learning_rate": 0.00012107999329196713, | |
| "loss": 0.6981, | |
| "step": 11865 | |
| }, | |
| { | |
| "epoch": 1.9856259401637975, | |
| "grad_norm": 0.030305424705147743, | |
| "learning_rate": 0.00012097937279892673, | |
| "loss": 0.6987, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 1.9881330436236002, | |
| "grad_norm": 0.029590345919132233, | |
| "learning_rate": 0.0001208787523058863, | |
| "loss": 0.6984, | |
| "step": 11895 | |
| }, | |
| { | |
| "epoch": 1.990640147083403, | |
| "grad_norm": 0.04296644404530525, | |
| "learning_rate": 0.00012077813181284589, | |
| "loss": 0.7018, | |
| "step": 11910 | |
| }, | |
| { | |
| "epoch": 1.9931472505432057, | |
| "grad_norm": 0.029970306903123856, | |
| "learning_rate": 0.00012067751131980548, | |
| "loss": 0.6887, | |
| "step": 11925 | |
| }, | |
| { | |
| "epoch": 1.9956543540030085, | |
| "grad_norm": 0.02884749509394169, | |
| "learning_rate": 0.00012057689082676506, | |
| "loss": 0.7004, | |
| "step": 11940 | |
| }, | |
| { | |
| "epoch": 1.9981614574628113, | |
| "grad_norm": 0.030533695593476295, | |
| "learning_rate": 0.00012047627033372463, | |
| "loss": 0.6883, | |
| "step": 11955 | |
| }, | |
| { | |
| "epoch": 2.0006685609226142, | |
| "grad_norm": 0.029126284644007683, | |
| "learning_rate": 0.00012037564984068423, | |
| "loss": 0.6984, | |
| "step": 11970 | |
| }, | |
| { | |
| "epoch": 2.003175664382417, | |
| "grad_norm": 0.029292147606611252, | |
| "learning_rate": 0.0001202750293476438, | |
| "loss": 0.6894, | |
| "step": 11985 | |
| }, | |
| { | |
| "epoch": 2.0056827678422198, | |
| "grad_norm": 0.029509389773011208, | |
| "learning_rate": 0.0001201744088546034, | |
| "loss": 0.6823, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.0081898713020223, | |
| "grad_norm": 0.02902618609368801, | |
| "learning_rate": 0.00012007378836156298, | |
| "loss": 0.6763, | |
| "step": 12015 | |
| }, | |
| { | |
| "epoch": 2.0106969747618253, | |
| "grad_norm": 0.028685985133051872, | |
| "learning_rate": 0.00011997316786852255, | |
| "loss": 0.6903, | |
| "step": 12030 | |
| }, | |
| { | |
| "epoch": 2.013204078221628, | |
| "grad_norm": 0.029849760234355927, | |
| "learning_rate": 0.00011987254737548215, | |
| "loss": 0.6886, | |
| "step": 12045 | |
| }, | |
| { | |
| "epoch": 2.015711181681431, | |
| "grad_norm": 0.030097436159849167, | |
| "learning_rate": 0.00011977192688244172, | |
| "loss": 0.6868, | |
| "step": 12060 | |
| }, | |
| { | |
| "epoch": 2.0182182851412334, | |
| "grad_norm": 0.02963315322995186, | |
| "learning_rate": 0.00011967130638940132, | |
| "loss": 0.6856, | |
| "step": 12075 | |
| }, | |
| { | |
| "epoch": 2.0207253886010363, | |
| "grad_norm": 0.030087383463978767, | |
| "learning_rate": 0.00011957068589636089, | |
| "loss": 0.6886, | |
| "step": 12090 | |
| }, | |
| { | |
| "epoch": 2.023232492060839, | |
| "grad_norm": 0.029318705201148987, | |
| "learning_rate": 0.00011947006540332049, | |
| "loss": 0.6734, | |
| "step": 12105 | |
| }, | |
| { | |
| "epoch": 2.025739595520642, | |
| "grad_norm": 0.029196394607424736, | |
| "learning_rate": 0.00011936944491028006, | |
| "loss": 0.674, | |
| "step": 12120 | |
| }, | |
| { | |
| "epoch": 2.0282466989804444, | |
| "grad_norm": 0.029127739369869232, | |
| "learning_rate": 0.00011926882441723966, | |
| "loss": 0.6875, | |
| "step": 12135 | |
| }, | |
| { | |
| "epoch": 2.0307538024402474, | |
| "grad_norm": 0.029445838183164597, | |
| "learning_rate": 0.00011916820392419923, | |
| "loss": 0.6869, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 2.03326090590005, | |
| "grad_norm": 0.029497170820832253, | |
| "learning_rate": 0.00011906758343115883, | |
| "loss": 0.6717, | |
| "step": 12165 | |
| }, | |
| { | |
| "epoch": 2.035768009359853, | |
| "grad_norm": 0.028793711215257645, | |
| "learning_rate": 0.0001189669629381184, | |
| "loss": 0.682, | |
| "step": 12180 | |
| }, | |
| { | |
| "epoch": 2.038275112819656, | |
| "grad_norm": 0.029894977807998657, | |
| "learning_rate": 0.00011886634244507799, | |
| "loss": 0.6821, | |
| "step": 12195 | |
| }, | |
| { | |
| "epoch": 2.0407822162794584, | |
| "grad_norm": 0.028813883662223816, | |
| "learning_rate": 0.00011876572195203757, | |
| "loss": 0.6678, | |
| "step": 12210 | |
| }, | |
| { | |
| "epoch": 2.0432893197392614, | |
| "grad_norm": 0.029816757887601852, | |
| "learning_rate": 0.00011866510145899716, | |
| "loss": 0.693, | |
| "step": 12225 | |
| }, | |
| { | |
| "epoch": 2.045796423199064, | |
| "grad_norm": 0.03083239123225212, | |
| "learning_rate": 0.00011856448096595673, | |
| "loss": 0.681, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 2.048303526658867, | |
| "grad_norm": 0.029679182916879654, | |
| "learning_rate": 0.00011846386047291633, | |
| "loss": 0.6742, | |
| "step": 12255 | |
| }, | |
| { | |
| "epoch": 2.0508106301186695, | |
| "grad_norm": 0.03096550703048706, | |
| "learning_rate": 0.0001183632399798759, | |
| "loss": 0.6836, | |
| "step": 12270 | |
| }, | |
| { | |
| "epoch": 2.0533177335784725, | |
| "grad_norm": 0.030012456700205803, | |
| "learning_rate": 0.0001182626194868355, | |
| "loss": 0.6819, | |
| "step": 12285 | |
| }, | |
| { | |
| "epoch": 2.055824837038275, | |
| "grad_norm": 0.029759397730231285, | |
| "learning_rate": 0.00011816199899379507, | |
| "loss": 0.6781, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 2.058331940498078, | |
| "grad_norm": 0.030046438798308372, | |
| "learning_rate": 0.00011806137850075464, | |
| "loss": 0.6787, | |
| "step": 12315 | |
| }, | |
| { | |
| "epoch": 2.0608390439578805, | |
| "grad_norm": 0.02959163673222065, | |
| "learning_rate": 0.00011796075800771424, | |
| "loss": 0.6828, | |
| "step": 12330 | |
| }, | |
| { | |
| "epoch": 2.0633461474176835, | |
| "grad_norm": 0.02911483868956566, | |
| "learning_rate": 0.00011786013751467382, | |
| "loss": 0.682, | |
| "step": 12345 | |
| }, | |
| { | |
| "epoch": 2.065853250877486, | |
| "grad_norm": 0.04046880826354027, | |
| "learning_rate": 0.00011775951702163341, | |
| "loss": 0.6852, | |
| "step": 12360 | |
| }, | |
| { | |
| "epoch": 2.068360354337289, | |
| "grad_norm": 0.030412757769227028, | |
| "learning_rate": 0.00011765889652859299, | |
| "loss": 0.6783, | |
| "step": 12375 | |
| }, | |
| { | |
| "epoch": 2.0708674577970916, | |
| "grad_norm": 0.029883218929171562, | |
| "learning_rate": 0.00011755827603555259, | |
| "loss": 0.6774, | |
| "step": 12390 | |
| }, | |
| { | |
| "epoch": 2.0733745612568946, | |
| "grad_norm": 0.029417937621474266, | |
| "learning_rate": 0.00011745765554251216, | |
| "loss": 0.691, | |
| "step": 12405 | |
| }, | |
| { | |
| "epoch": 2.075881664716697, | |
| "grad_norm": 0.03051302768290043, | |
| "learning_rate": 0.00011735703504947176, | |
| "loss": 0.6871, | |
| "step": 12420 | |
| }, | |
| { | |
| "epoch": 2.0783887681765, | |
| "grad_norm": 0.030459176748991013, | |
| "learning_rate": 0.00011725641455643133, | |
| "loss": 0.6892, | |
| "step": 12435 | |
| }, | |
| { | |
| "epoch": 2.0808958716363026, | |
| "grad_norm": 0.030476195737719536, | |
| "learning_rate": 0.00011715579406339093, | |
| "loss": 0.6875, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 2.0834029750961056, | |
| "grad_norm": 0.02982410229742527, | |
| "learning_rate": 0.0001170551735703505, | |
| "loss": 0.6623, | |
| "step": 12465 | |
| }, | |
| { | |
| "epoch": 2.0859100785559086, | |
| "grad_norm": 0.030465099960565567, | |
| "learning_rate": 0.0001169545530773101, | |
| "loss": 0.6841, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 2.088417182015711, | |
| "grad_norm": 0.029227489605545998, | |
| "learning_rate": 0.00011685393258426967, | |
| "loss": 0.6777, | |
| "step": 12495 | |
| }, | |
| { | |
| "epoch": 2.090924285475514, | |
| "grad_norm": 0.029344556853175163, | |
| "learning_rate": 0.00011675331209122926, | |
| "loss": 0.6823, | |
| "step": 12510 | |
| }, | |
| { | |
| "epoch": 2.0934313889353167, | |
| "grad_norm": 0.030551349744200706, | |
| "learning_rate": 0.00011665269159818884, | |
| "loss": 0.6872, | |
| "step": 12525 | |
| }, | |
| { | |
| "epoch": 2.0959384923951196, | |
| "grad_norm": 0.03063136897981167, | |
| "learning_rate": 0.00011655207110514843, | |
| "loss": 0.6767, | |
| "step": 12540 | |
| }, | |
| { | |
| "epoch": 2.098445595854922, | |
| "grad_norm": 0.02986333705484867, | |
| "learning_rate": 0.000116451450612108, | |
| "loss": 0.6941, | |
| "step": 12555 | |
| }, | |
| { | |
| "epoch": 2.100952699314725, | |
| "grad_norm": 0.030152348801493645, | |
| "learning_rate": 0.0001163508301190676, | |
| "loss": 0.6832, | |
| "step": 12570 | |
| }, | |
| { | |
| "epoch": 2.1034598027745277, | |
| "grad_norm": 0.029383687302470207, | |
| "learning_rate": 0.00011625020962602717, | |
| "loss": 0.6676, | |
| "step": 12585 | |
| }, | |
| { | |
| "epoch": 2.1059669062343307, | |
| "grad_norm": 0.03019135817885399, | |
| "learning_rate": 0.00011614958913298674, | |
| "loss": 0.6735, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 2.1084740096941332, | |
| "grad_norm": 0.030429605394601822, | |
| "learning_rate": 0.00011604896863994634, | |
| "loss": 0.6837, | |
| "step": 12615 | |
| }, | |
| { | |
| "epoch": 2.110981113153936, | |
| "grad_norm": 0.031370870769023895, | |
| "learning_rate": 0.00011594834814690591, | |
| "loss": 0.6753, | |
| "step": 12630 | |
| }, | |
| { | |
| "epoch": 2.1134882166137388, | |
| "grad_norm": 0.030195990577340126, | |
| "learning_rate": 0.00011584772765386551, | |
| "loss": 0.669, | |
| "step": 12645 | |
| }, | |
| { | |
| "epoch": 2.1159953200735417, | |
| "grad_norm": 0.03015013597905636, | |
| "learning_rate": 0.00011574710716082508, | |
| "loss": 0.6868, | |
| "step": 12660 | |
| }, | |
| { | |
| "epoch": 2.1185024235333443, | |
| "grad_norm": 0.030749835073947906, | |
| "learning_rate": 0.00011564648666778468, | |
| "loss": 0.6664, | |
| "step": 12675 | |
| }, | |
| { | |
| "epoch": 2.1210095269931473, | |
| "grad_norm": 0.03003542125225067, | |
| "learning_rate": 0.00011554586617474425, | |
| "loss": 0.6884, | |
| "step": 12690 | |
| }, | |
| { | |
| "epoch": 2.12351663045295, | |
| "grad_norm": 0.02948312647640705, | |
| "learning_rate": 0.00011544524568170385, | |
| "loss": 0.686, | |
| "step": 12705 | |
| }, | |
| { | |
| "epoch": 2.126023733912753, | |
| "grad_norm": 0.03116905875504017, | |
| "learning_rate": 0.00011534462518866342, | |
| "loss": 0.6917, | |
| "step": 12720 | |
| }, | |
| { | |
| "epoch": 2.1285308373725558, | |
| "grad_norm": 0.03057217039167881, | |
| "learning_rate": 0.00011524400469562302, | |
| "loss": 0.6893, | |
| "step": 12735 | |
| }, | |
| { | |
| "epoch": 2.1310379408323583, | |
| "grad_norm": 0.03055824153125286, | |
| "learning_rate": 0.0001151433842025826, | |
| "loss": 0.6749, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 2.1335450442921613, | |
| "grad_norm": 0.030194489285349846, | |
| "learning_rate": 0.0001150427637095422, | |
| "loss": 0.6841, | |
| "step": 12765 | |
| }, | |
| { | |
| "epoch": 2.136052147751964, | |
| "grad_norm": 0.030030904337763786, | |
| "learning_rate": 0.00011494214321650177, | |
| "loss": 0.6755, | |
| "step": 12780 | |
| }, | |
| { | |
| "epoch": 2.138559251211767, | |
| "grad_norm": 0.030531438067555428, | |
| "learning_rate": 0.00011484152272346137, | |
| "loss": 0.6885, | |
| "step": 12795 | |
| }, | |
| { | |
| "epoch": 2.1410663546715694, | |
| "grad_norm": 0.031014693900942802, | |
| "learning_rate": 0.00011474090223042094, | |
| "loss": 0.6872, | |
| "step": 12810 | |
| }, | |
| { | |
| "epoch": 2.1435734581313723, | |
| "grad_norm": 0.03255138173699379, | |
| "learning_rate": 0.00011464028173738052, | |
| "loss": 0.6811, | |
| "step": 12825 | |
| }, | |
| { | |
| "epoch": 2.146080561591175, | |
| "grad_norm": 0.02984030731022358, | |
| "learning_rate": 0.00011453966124434011, | |
| "loss": 0.684, | |
| "step": 12840 | |
| }, | |
| { | |
| "epoch": 2.148587665050978, | |
| "grad_norm": 0.03038971871137619, | |
| "learning_rate": 0.0001144390407512997, | |
| "loss": 0.6901, | |
| "step": 12855 | |
| }, | |
| { | |
| "epoch": 2.1510947685107804, | |
| "grad_norm": 0.03030613623559475, | |
| "learning_rate": 0.00011433842025825927, | |
| "loss": 0.6815, | |
| "step": 12870 | |
| }, | |
| { | |
| "epoch": 2.1536018719705834, | |
| "grad_norm": 0.03107587993144989, | |
| "learning_rate": 0.00011423779976521885, | |
| "loss": 0.6773, | |
| "step": 12885 | |
| }, | |
| { | |
| "epoch": 2.156108975430386, | |
| "grad_norm": 0.030311092734336853, | |
| "learning_rate": 0.00011413717927217844, | |
| "loss": 0.6859, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 2.158616078890189, | |
| "grad_norm": 0.03004043735563755, | |
| "learning_rate": 0.00011403655877913801, | |
| "loss": 0.677, | |
| "step": 12915 | |
| }, | |
| { | |
| "epoch": 2.1611231823499915, | |
| "grad_norm": 0.02978183701634407, | |
| "learning_rate": 0.00011393593828609761, | |
| "loss": 0.686, | |
| "step": 12930 | |
| }, | |
| { | |
| "epoch": 2.1636302858097944, | |
| "grad_norm": 0.030549898743629456, | |
| "learning_rate": 0.00011383531779305718, | |
| "loss": 0.6875, | |
| "step": 12945 | |
| }, | |
| { | |
| "epoch": 2.166137389269597, | |
| "grad_norm": 0.030601589009165764, | |
| "learning_rate": 0.00011373469730001678, | |
| "loss": 0.6817, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 2.1686444927294, | |
| "grad_norm": 0.030580811202526093, | |
| "learning_rate": 0.00011363407680697635, | |
| "loss": 0.6887, | |
| "step": 12975 | |
| }, | |
| { | |
| "epoch": 2.171151596189203, | |
| "grad_norm": 0.030157998204231262, | |
| "learning_rate": 0.00011353345631393595, | |
| "loss": 0.693, | |
| "step": 12990 | |
| }, | |
| { | |
| "epoch": 2.1736586996490055, | |
| "grad_norm": 0.03086373209953308, | |
| "learning_rate": 0.00011343283582089552, | |
| "loss": 0.6962, | |
| "step": 13005 | |
| }, | |
| { | |
| "epoch": 2.1761658031088085, | |
| "grad_norm": 0.02979792095720768, | |
| "learning_rate": 0.00011333221532785512, | |
| "loss": 0.6756, | |
| "step": 13020 | |
| }, | |
| { | |
| "epoch": 2.178672906568611, | |
| "grad_norm": 0.03019995242357254, | |
| "learning_rate": 0.00011323159483481469, | |
| "loss": 0.6951, | |
| "step": 13035 | |
| }, | |
| { | |
| "epoch": 2.181180010028414, | |
| "grad_norm": 0.030209194868803024, | |
| "learning_rate": 0.00011313097434177429, | |
| "loss": 0.6904, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 2.1836871134882165, | |
| "grad_norm": 0.030985839664936066, | |
| "learning_rate": 0.00011303035384873386, | |
| "loss": 0.6808, | |
| "step": 13065 | |
| }, | |
| { | |
| "epoch": 2.1861942169480195, | |
| "grad_norm": 0.03027096390724182, | |
| "learning_rate": 0.00011292973335569346, | |
| "loss": 0.6892, | |
| "step": 13080 | |
| }, | |
| { | |
| "epoch": 2.188701320407822, | |
| "grad_norm": 0.03128921985626221, | |
| "learning_rate": 0.00011282911286265303, | |
| "loss": 0.6841, | |
| "step": 13095 | |
| }, | |
| { | |
| "epoch": 2.191208423867625, | |
| "grad_norm": 0.030639823526144028, | |
| "learning_rate": 0.00011272849236961262, | |
| "loss": 0.6708, | |
| "step": 13110 | |
| }, | |
| { | |
| "epoch": 2.1937155273274276, | |
| "grad_norm": 0.030816158279776573, | |
| "learning_rate": 0.0001126278718765722, | |
| "loss": 0.6792, | |
| "step": 13125 | |
| }, | |
| { | |
| "epoch": 2.1962226307872306, | |
| "grad_norm": 0.03019116260111332, | |
| "learning_rate": 0.00011252725138353179, | |
| "loss": 0.6777, | |
| "step": 13140 | |
| }, | |
| { | |
| "epoch": 2.198729734247033, | |
| "grad_norm": 0.030292050912976265, | |
| "learning_rate": 0.00011242663089049136, | |
| "loss": 0.6999, | |
| "step": 13155 | |
| }, | |
| { | |
| "epoch": 2.201236837706836, | |
| "grad_norm": 0.029916753992438316, | |
| "learning_rate": 0.00011232601039745095, | |
| "loss": 0.6784, | |
| "step": 13170 | |
| }, | |
| { | |
| "epoch": 2.2037439411666386, | |
| "grad_norm": 0.029692910611629486, | |
| "learning_rate": 0.00011222538990441053, | |
| "loss": 0.6921, | |
| "step": 13185 | |
| }, | |
| { | |
| "epoch": 2.2062510446264416, | |
| "grad_norm": 0.030788224190473557, | |
| "learning_rate": 0.0001121247694113701, | |
| "loss": 0.6714, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 2.208758148086244, | |
| "grad_norm": 0.031961727887392044, | |
| "learning_rate": 0.0001120241489183297, | |
| "loss": 0.6818, | |
| "step": 13215 | |
| }, | |
| { | |
| "epoch": 2.211265251546047, | |
| "grad_norm": 0.030589012429118156, | |
| "learning_rate": 0.00011192352842528928, | |
| "loss": 0.6863, | |
| "step": 13230 | |
| }, | |
| { | |
| "epoch": 2.21377235500585, | |
| "grad_norm": 0.03072304092347622, | |
| "learning_rate": 0.00011182290793224888, | |
| "loss": 0.6854, | |
| "step": 13245 | |
| }, | |
| { | |
| "epoch": 2.2162794584656527, | |
| "grad_norm": 0.030577028170228004, | |
| "learning_rate": 0.00011172228743920845, | |
| "loss": 0.6781, | |
| "step": 13260 | |
| }, | |
| { | |
| "epoch": 2.2187865619254556, | |
| "grad_norm": 0.030161473900079727, | |
| "learning_rate": 0.00011162166694616805, | |
| "loss": 0.6824, | |
| "step": 13275 | |
| }, | |
| { | |
| "epoch": 2.221293665385258, | |
| "grad_norm": 0.030237851664423943, | |
| "learning_rate": 0.00011152104645312762, | |
| "loss": 0.6808, | |
| "step": 13290 | |
| }, | |
| { | |
| "epoch": 2.223800768845061, | |
| "grad_norm": 0.030910607427358627, | |
| "learning_rate": 0.00011142042596008722, | |
| "loss": 0.6819, | |
| "step": 13305 | |
| }, | |
| { | |
| "epoch": 2.2263078723048637, | |
| "grad_norm": 0.03041113168001175, | |
| "learning_rate": 0.00011131980546704679, | |
| "loss": 0.6784, | |
| "step": 13320 | |
| }, | |
| { | |
| "epoch": 2.2288149757646667, | |
| "grad_norm": 0.0322742834687233, | |
| "learning_rate": 0.00011121918497400639, | |
| "loss": 0.6695, | |
| "step": 13335 | |
| }, | |
| { | |
| "epoch": 2.2313220792244692, | |
| "grad_norm": 0.03125980496406555, | |
| "learning_rate": 0.00011111856448096596, | |
| "loss": 0.681, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 2.233829182684272, | |
| "grad_norm": 0.030773991718888283, | |
| "learning_rate": 0.00011101794398792556, | |
| "loss": 0.6867, | |
| "step": 13365 | |
| }, | |
| { | |
| "epoch": 2.2363362861440748, | |
| "grad_norm": 0.03200787305831909, | |
| "learning_rate": 0.00011091732349488513, | |
| "loss": 0.691, | |
| "step": 13380 | |
| }, | |
| { | |
| "epoch": 2.2388433896038777, | |
| "grad_norm": 0.03116571344435215, | |
| "learning_rate": 0.00011081670300184473, | |
| "loss": 0.671, | |
| "step": 13395 | |
| }, | |
| { | |
| "epoch": 2.2413504930636803, | |
| "grad_norm": 0.031088994815945625, | |
| "learning_rate": 0.0001107160825088043, | |
| "loss": 0.6726, | |
| "step": 13410 | |
| }, | |
| { | |
| "epoch": 2.2438575965234833, | |
| "grad_norm": 0.03130762279033661, | |
| "learning_rate": 0.00011061546201576389, | |
| "loss": 0.6948, | |
| "step": 13425 | |
| }, | |
| { | |
| "epoch": 2.246364699983286, | |
| "grad_norm": 0.03147103264927864, | |
| "learning_rate": 0.00011051484152272347, | |
| "loss": 0.6778, | |
| "step": 13440 | |
| }, | |
| { | |
| "epoch": 2.248871803443089, | |
| "grad_norm": 0.02998683787882328, | |
| "learning_rate": 0.00011041422102968304, | |
| "loss": 0.6996, | |
| "step": 13455 | |
| }, | |
| { | |
| "epoch": 2.2513789069028913, | |
| "grad_norm": 0.03249230980873108, | |
| "learning_rate": 0.00011031360053664263, | |
| "loss": 0.6949, | |
| "step": 13470 | |
| }, | |
| { | |
| "epoch": 2.2538860103626943, | |
| "grad_norm": 0.030694512650370598, | |
| "learning_rate": 0.00011021298004360222, | |
| "loss": 0.6806, | |
| "step": 13485 | |
| }, | |
| { | |
| "epoch": 2.2563931138224973, | |
| "grad_norm": 0.0317358560860157, | |
| "learning_rate": 0.0001101123595505618, | |
| "loss": 0.6844, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.2589002172823, | |
| "grad_norm": 0.029508093371987343, | |
| "learning_rate": 0.00011001173905752137, | |
| "loss": 0.6723, | |
| "step": 13515 | |
| }, | |
| { | |
| "epoch": 2.2614073207421024, | |
| "grad_norm": 0.03101976215839386, | |
| "learning_rate": 0.00010991111856448097, | |
| "loss": 0.6689, | |
| "step": 13530 | |
| }, | |
| { | |
| "epoch": 2.2639144242019054, | |
| "grad_norm": 0.030808012932538986, | |
| "learning_rate": 0.00010981049807144054, | |
| "loss": 0.6701, | |
| "step": 13545 | |
| }, | |
| { | |
| "epoch": 2.2664215276617083, | |
| "grad_norm": 0.03057938627898693, | |
| "learning_rate": 0.00010970987757840014, | |
| "loss": 0.684, | |
| "step": 13560 | |
| }, | |
| { | |
| "epoch": 2.268928631121511, | |
| "grad_norm": 0.03127751499414444, | |
| "learning_rate": 0.00010960925708535971, | |
| "loss": 0.6776, | |
| "step": 13575 | |
| }, | |
| { | |
| "epoch": 2.271435734581314, | |
| "grad_norm": 0.02989344857633114, | |
| "learning_rate": 0.00010950863659231931, | |
| "loss": 0.6889, | |
| "step": 13590 | |
| }, | |
| { | |
| "epoch": 2.2739428380411164, | |
| "grad_norm": 0.03043249435722828, | |
| "learning_rate": 0.00010940801609927889, | |
| "loss": 0.6794, | |
| "step": 13605 | |
| }, | |
| { | |
| "epoch": 2.2764499415009194, | |
| "grad_norm": 0.030408738180994987, | |
| "learning_rate": 0.00010930739560623848, | |
| "loss": 0.6815, | |
| "step": 13620 | |
| }, | |
| { | |
| "epoch": 2.278957044960722, | |
| "grad_norm": 0.030735976994037628, | |
| "learning_rate": 0.00010920677511319806, | |
| "loss": 0.6809, | |
| "step": 13635 | |
| }, | |
| { | |
| "epoch": 2.281464148420525, | |
| "grad_norm": 0.0312831737101078, | |
| "learning_rate": 0.00010910615462015766, | |
| "loss": 0.6788, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 2.2839712518803275, | |
| "grad_norm": 0.030336899682879448, | |
| "learning_rate": 0.00010900553412711723, | |
| "loss": 0.6737, | |
| "step": 13665 | |
| }, | |
| { | |
| "epoch": 2.2864783553401304, | |
| "grad_norm": 0.030938081443309784, | |
| "learning_rate": 0.00010890491363407683, | |
| "loss": 0.6803, | |
| "step": 13680 | |
| }, | |
| { | |
| "epoch": 2.288985458799933, | |
| "grad_norm": 0.02994300052523613, | |
| "learning_rate": 0.0001088042931410364, | |
| "loss": 0.6714, | |
| "step": 13695 | |
| }, | |
| { | |
| "epoch": 2.291492562259736, | |
| "grad_norm": 0.03124346025288105, | |
| "learning_rate": 0.000108703672647996, | |
| "loss": 0.678, | |
| "step": 13710 | |
| }, | |
| { | |
| "epoch": 2.2939996657195385, | |
| "grad_norm": 0.030526146292686462, | |
| "learning_rate": 0.00010860305215495557, | |
| "loss": 0.6815, | |
| "step": 13725 | |
| }, | |
| { | |
| "epoch": 2.2965067691793415, | |
| "grad_norm": 0.03184838965535164, | |
| "learning_rate": 0.00010850243166191514, | |
| "loss": 0.6768, | |
| "step": 13740 | |
| }, | |
| { | |
| "epoch": 2.2990138726391445, | |
| "grad_norm": 0.03009560890495777, | |
| "learning_rate": 0.00010840181116887474, | |
| "loss": 0.6768, | |
| "step": 13755 | |
| }, | |
| { | |
| "epoch": 2.301520976098947, | |
| "grad_norm": 0.029740184545516968, | |
| "learning_rate": 0.00010830119067583431, | |
| "loss": 0.681, | |
| "step": 13770 | |
| }, | |
| { | |
| "epoch": 2.3040280795587496, | |
| "grad_norm": 0.030534571036696434, | |
| "learning_rate": 0.0001082005701827939, | |
| "loss": 0.6739, | |
| "step": 13785 | |
| }, | |
| { | |
| "epoch": 2.3065351830185525, | |
| "grad_norm": 0.030200140550732613, | |
| "learning_rate": 0.00010809994968975348, | |
| "loss": 0.6695, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 2.3090422864783555, | |
| "grad_norm": 0.031782638281583786, | |
| "learning_rate": 0.00010799932919671307, | |
| "loss": 0.6866, | |
| "step": 13815 | |
| }, | |
| { | |
| "epoch": 2.311549389938158, | |
| "grad_norm": 0.03087507374584675, | |
| "learning_rate": 0.00010789870870367264, | |
| "loss": 0.6717, | |
| "step": 13830 | |
| }, | |
| { | |
| "epoch": 2.314056493397961, | |
| "grad_norm": 0.030710799619555473, | |
| "learning_rate": 0.00010779808821063224, | |
| "loss": 0.6882, | |
| "step": 13845 | |
| }, | |
| { | |
| "epoch": 2.3165635968577636, | |
| "grad_norm": 0.030561743304133415, | |
| "learning_rate": 0.00010769746771759181, | |
| "loss": 0.6814, | |
| "step": 13860 | |
| }, | |
| { | |
| "epoch": 2.3190707003175666, | |
| "grad_norm": 0.030251817777752876, | |
| "learning_rate": 0.00010759684722455141, | |
| "loss": 0.6747, | |
| "step": 13875 | |
| }, | |
| { | |
| "epoch": 2.321577803777369, | |
| "grad_norm": 0.030898461118340492, | |
| "learning_rate": 0.00010749622673151098, | |
| "loss": 0.6678, | |
| "step": 13890 | |
| }, | |
| { | |
| "epoch": 2.324084907237172, | |
| "grad_norm": 0.031910572201013565, | |
| "learning_rate": 0.00010739560623847058, | |
| "loss": 0.6873, | |
| "step": 13905 | |
| }, | |
| { | |
| "epoch": 2.3265920106969746, | |
| "grad_norm": 0.031096691265702248, | |
| "learning_rate": 0.00010729498574543015, | |
| "loss": 0.6761, | |
| "step": 13920 | |
| }, | |
| { | |
| "epoch": 2.3290991141567776, | |
| "grad_norm": 0.030930999666452408, | |
| "learning_rate": 0.00010719436525238975, | |
| "loss": 0.6842, | |
| "step": 13935 | |
| }, | |
| { | |
| "epoch": 2.33160621761658, | |
| "grad_norm": 0.030477695167064667, | |
| "learning_rate": 0.00010709374475934932, | |
| "loss": 0.6784, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 2.334113321076383, | |
| "grad_norm": 0.03102184645831585, | |
| "learning_rate": 0.00010699312426630892, | |
| "loss": 0.679, | |
| "step": 13965 | |
| }, | |
| { | |
| "epoch": 2.3366204245361857, | |
| "grad_norm": 0.02999734878540039, | |
| "learning_rate": 0.0001068925037732685, | |
| "loss": 0.6786, | |
| "step": 13980 | |
| }, | |
| { | |
| "epoch": 2.3391275279959887, | |
| "grad_norm": 0.030323563143610954, | |
| "learning_rate": 0.0001067918832802281, | |
| "loss": 0.6825, | |
| "step": 13995 | |
| }, | |
| { | |
| "epoch": 2.3416346314557916, | |
| "grad_norm": 0.030984263867139816, | |
| "learning_rate": 0.00010669126278718767, | |
| "loss": 0.6798, | |
| "step": 14010 | |
| }, | |
| { | |
| "epoch": 2.344141734915594, | |
| "grad_norm": 0.03151758387684822, | |
| "learning_rate": 0.00010659064229414724, | |
| "loss": 0.6821, | |
| "step": 14025 | |
| }, | |
| { | |
| "epoch": 2.3466488383753967, | |
| "grad_norm": 0.03008199669420719, | |
| "learning_rate": 0.00010649002180110684, | |
| "loss": 0.6778, | |
| "step": 14040 | |
| }, | |
| { | |
| "epoch": 2.3491559418351997, | |
| "grad_norm": 0.030592739582061768, | |
| "learning_rate": 0.00010638940130806641, | |
| "loss": 0.6629, | |
| "step": 14055 | |
| }, | |
| { | |
| "epoch": 2.3516630452950027, | |
| "grad_norm": 0.030223028734326363, | |
| "learning_rate": 0.000106288780815026, | |
| "loss": 0.6807, | |
| "step": 14070 | |
| }, | |
| { | |
| "epoch": 2.3541701487548052, | |
| "grad_norm": 0.03019655868411064, | |
| "learning_rate": 0.00010618816032198558, | |
| "loss": 0.6875, | |
| "step": 14085 | |
| }, | |
| { | |
| "epoch": 2.356677252214608, | |
| "grad_norm": 0.03179163858294487, | |
| "learning_rate": 0.00010608753982894517, | |
| "loss": 0.6744, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 2.3591843556744108, | |
| "grad_norm": 0.030132126063108444, | |
| "learning_rate": 0.00010598691933590474, | |
| "loss": 0.6768, | |
| "step": 14115 | |
| }, | |
| { | |
| "epoch": 2.3616914591342137, | |
| "grad_norm": 0.03125820681452751, | |
| "learning_rate": 0.00010588629884286434, | |
| "loss": 0.6734, | |
| "step": 14130 | |
| }, | |
| { | |
| "epoch": 2.3641985625940163, | |
| "grad_norm": 0.03128393739461899, | |
| "learning_rate": 0.00010578567834982391, | |
| "loss": 0.6643, | |
| "step": 14145 | |
| }, | |
| { | |
| "epoch": 2.3667056660538193, | |
| "grad_norm": 0.031101234257221222, | |
| "learning_rate": 0.00010568505785678351, | |
| "loss": 0.6937, | |
| "step": 14160 | |
| }, | |
| { | |
| "epoch": 2.369212769513622, | |
| "grad_norm": 0.03127965331077576, | |
| "learning_rate": 0.00010558443736374308, | |
| "loss": 0.6837, | |
| "step": 14175 | |
| }, | |
| { | |
| "epoch": 2.371719872973425, | |
| "grad_norm": 0.03142804279923439, | |
| "learning_rate": 0.00010548381687070268, | |
| "loss": 0.6643, | |
| "step": 14190 | |
| }, | |
| { | |
| "epoch": 2.3742269764332273, | |
| "grad_norm": 0.03196566551923752, | |
| "learning_rate": 0.00010538319637766225, | |
| "loss": 0.6737, | |
| "step": 14205 | |
| }, | |
| { | |
| "epoch": 2.3767340798930303, | |
| "grad_norm": 0.03105044923722744, | |
| "learning_rate": 0.00010528257588462185, | |
| "loss": 0.6733, | |
| "step": 14220 | |
| }, | |
| { | |
| "epoch": 2.379241183352833, | |
| "grad_norm": 0.030758565291762352, | |
| "learning_rate": 0.00010518195539158142, | |
| "loss": 0.6959, | |
| "step": 14235 | |
| }, | |
| { | |
| "epoch": 2.381748286812636, | |
| "grad_norm": 0.03046661615371704, | |
| "learning_rate": 0.00010508133489854102, | |
| "loss": 0.6958, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 2.384255390272439, | |
| "grad_norm": 0.03125166893005371, | |
| "learning_rate": 0.00010498071440550059, | |
| "loss": 0.6763, | |
| "step": 14265 | |
| }, | |
| { | |
| "epoch": 2.3867624937322414, | |
| "grad_norm": 0.031636305153369904, | |
| "learning_rate": 0.00010488009391246019, | |
| "loss": 0.6794, | |
| "step": 14280 | |
| }, | |
| { | |
| "epoch": 2.389269597192044, | |
| "grad_norm": 0.030563022941350937, | |
| "learning_rate": 0.00010477947341941976, | |
| "loss": 0.6874, | |
| "step": 14295 | |
| }, | |
| { | |
| "epoch": 2.391776700651847, | |
| "grad_norm": 0.03061690181493759, | |
| "learning_rate": 0.00010467885292637933, | |
| "loss": 0.6782, | |
| "step": 14310 | |
| }, | |
| { | |
| "epoch": 2.39428380411165, | |
| "grad_norm": 0.0308393444865942, | |
| "learning_rate": 0.00010457823243333893, | |
| "loss": 0.6777, | |
| "step": 14325 | |
| }, | |
| { | |
| "epoch": 2.3967909075714524, | |
| "grad_norm": 0.030834507197141647, | |
| "learning_rate": 0.0001044776119402985, | |
| "loss": 0.6854, | |
| "step": 14340 | |
| }, | |
| { | |
| "epoch": 2.3992980110312554, | |
| "grad_norm": 0.031078575178980827, | |
| "learning_rate": 0.0001043769914472581, | |
| "loss": 0.6844, | |
| "step": 14355 | |
| }, | |
| { | |
| "epoch": 2.401805114491058, | |
| "grad_norm": 0.030426884070038795, | |
| "learning_rate": 0.00010427637095421768, | |
| "loss": 0.6701, | |
| "step": 14370 | |
| }, | |
| { | |
| "epoch": 2.404312217950861, | |
| "grad_norm": 0.03103550709784031, | |
| "learning_rate": 0.00010417575046117726, | |
| "loss": 0.685, | |
| "step": 14385 | |
| }, | |
| { | |
| "epoch": 2.4068193214106635, | |
| "grad_norm": 0.030895834788680077, | |
| "learning_rate": 0.00010407512996813685, | |
| "loss": 0.6798, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 2.4093264248704664, | |
| "grad_norm": 0.029942205175757408, | |
| "learning_rate": 0.00010397450947509643, | |
| "loss": 0.6848, | |
| "step": 14415 | |
| }, | |
| { | |
| "epoch": 2.411833528330269, | |
| "grad_norm": 0.03145187348127365, | |
| "learning_rate": 0.000103873888982056, | |
| "loss": 0.6745, | |
| "step": 14430 | |
| }, | |
| { | |
| "epoch": 2.414340631790072, | |
| "grad_norm": 0.03102920390665531, | |
| "learning_rate": 0.0001037732684890156, | |
| "loss": 0.6963, | |
| "step": 14445 | |
| }, | |
| { | |
| "epoch": 2.4168477352498745, | |
| "grad_norm": 0.030479585751891136, | |
| "learning_rate": 0.00010367264799597518, | |
| "loss": 0.6779, | |
| "step": 14460 | |
| }, | |
| { | |
| "epoch": 2.4193548387096775, | |
| "grad_norm": 0.0313333161175251, | |
| "learning_rate": 0.00010357202750293477, | |
| "loss": 0.6675, | |
| "step": 14475 | |
| }, | |
| { | |
| "epoch": 2.42186194216948, | |
| "grad_norm": 0.031193213537335396, | |
| "learning_rate": 0.00010347140700989435, | |
| "loss": 0.6709, | |
| "step": 14490 | |
| }, | |
| { | |
| "epoch": 2.424369045629283, | |
| "grad_norm": 0.031854551285505295, | |
| "learning_rate": 0.00010337078651685395, | |
| "loss": 0.6832, | |
| "step": 14505 | |
| }, | |
| { | |
| "epoch": 2.4268761490890856, | |
| "grad_norm": 0.03131631389260292, | |
| "learning_rate": 0.00010327016602381352, | |
| "loss": 0.6831, | |
| "step": 14520 | |
| }, | |
| { | |
| "epoch": 2.4293832525488885, | |
| "grad_norm": 0.030897963792085648, | |
| "learning_rate": 0.00010316954553077312, | |
| "loss": 0.6779, | |
| "step": 14535 | |
| }, | |
| { | |
| "epoch": 2.431890356008691, | |
| "grad_norm": 0.030229298397898674, | |
| "learning_rate": 0.00010306892503773269, | |
| "loss": 0.6928, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 2.434397459468494, | |
| "grad_norm": 0.03158511593937874, | |
| "learning_rate": 0.00010296830454469229, | |
| "loss": 0.6812, | |
| "step": 14565 | |
| }, | |
| { | |
| "epoch": 2.436904562928297, | |
| "grad_norm": 0.03185586631298065, | |
| "learning_rate": 0.00010286768405165186, | |
| "loss": 0.6707, | |
| "step": 14580 | |
| }, | |
| { | |
| "epoch": 2.4394116663880996, | |
| "grad_norm": 0.03139151632785797, | |
| "learning_rate": 0.00010276706355861143, | |
| "loss": 0.6814, | |
| "step": 14595 | |
| }, | |
| { | |
| "epoch": 2.4419187698479026, | |
| "grad_norm": 0.03182042017579079, | |
| "learning_rate": 0.00010266644306557103, | |
| "loss": 0.6663, | |
| "step": 14610 | |
| }, | |
| { | |
| "epoch": 2.444425873307705, | |
| "grad_norm": 0.030850499868392944, | |
| "learning_rate": 0.0001025658225725306, | |
| "loss": 0.6937, | |
| "step": 14625 | |
| }, | |
| { | |
| "epoch": 2.446932976767508, | |
| "grad_norm": 0.032495591789484024, | |
| "learning_rate": 0.0001024652020794902, | |
| "loss": 0.6588, | |
| "step": 14640 | |
| }, | |
| { | |
| "epoch": 2.4494400802273106, | |
| "grad_norm": 0.03162992000579834, | |
| "learning_rate": 0.00010236458158644977, | |
| "loss": 0.6848, | |
| "step": 14655 | |
| }, | |
| { | |
| "epoch": 2.4519471836871136, | |
| "grad_norm": 0.031871598213911057, | |
| "learning_rate": 0.00010226396109340937, | |
| "loss": 0.6743, | |
| "step": 14670 | |
| }, | |
| { | |
| "epoch": 2.454454287146916, | |
| "grad_norm": 0.031383831053972244, | |
| "learning_rate": 0.00010216334060036894, | |
| "loss": 0.6861, | |
| "step": 14685 | |
| }, | |
| { | |
| "epoch": 2.456961390606719, | |
| "grad_norm": 0.03176445513963699, | |
| "learning_rate": 0.00010206272010732853, | |
| "loss": 0.6702, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 2.4594684940665217, | |
| "grad_norm": 0.03109871782362461, | |
| "learning_rate": 0.00010196209961428812, | |
| "loss": 0.6776, | |
| "step": 14715 | |
| }, | |
| { | |
| "epoch": 2.4619755975263247, | |
| "grad_norm": 0.031003376469016075, | |
| "learning_rate": 0.0001018614791212477, | |
| "loss": 0.688, | |
| "step": 14730 | |
| }, | |
| { | |
| "epoch": 2.464482700986127, | |
| "grad_norm": 0.031020162627100945, | |
| "learning_rate": 0.00010176085862820727, | |
| "loss": 0.6713, | |
| "step": 14745 | |
| }, | |
| { | |
| "epoch": 2.46698980444593, | |
| "grad_norm": 0.031086094677448273, | |
| "learning_rate": 0.00010166023813516687, | |
| "loss": 0.6769, | |
| "step": 14760 | |
| }, | |
| { | |
| "epoch": 2.4694969079057327, | |
| "grad_norm": 0.03022875264286995, | |
| "learning_rate": 0.00010155961764212644, | |
| "loss": 0.6807, | |
| "step": 14775 | |
| }, | |
| { | |
| "epoch": 2.4720040113655357, | |
| "grad_norm": 0.030896877869963646, | |
| "learning_rate": 0.00010145899714908604, | |
| "loss": 0.6927, | |
| "step": 14790 | |
| }, | |
| { | |
| "epoch": 2.4745111148253383, | |
| "grad_norm": 0.031297486275434494, | |
| "learning_rate": 0.00010135837665604561, | |
| "loss": 0.6827, | |
| "step": 14805 | |
| }, | |
| { | |
| "epoch": 2.4770182182851412, | |
| "grad_norm": 0.03127811476588249, | |
| "learning_rate": 0.00010125775616300521, | |
| "loss": 0.6962, | |
| "step": 14820 | |
| }, | |
| { | |
| "epoch": 2.479525321744944, | |
| "grad_norm": 0.030049098655581474, | |
| "learning_rate": 0.00010115713566996479, | |
| "loss": 0.6666, | |
| "step": 14835 | |
| }, | |
| { | |
| "epoch": 2.4820324252047468, | |
| "grad_norm": 0.031142529100179672, | |
| "learning_rate": 0.00010105651517692438, | |
| "loss": 0.6787, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 2.4845395286645497, | |
| "grad_norm": 0.031707145273685455, | |
| "learning_rate": 0.00010095589468388396, | |
| "loss": 0.6741, | |
| "step": 14865 | |
| }, | |
| { | |
| "epoch": 2.4870466321243523, | |
| "grad_norm": 0.03133350983262062, | |
| "learning_rate": 0.00010085527419084353, | |
| "loss": 0.6695, | |
| "step": 14880 | |
| }, | |
| { | |
| "epoch": 2.4895537355841553, | |
| "grad_norm": 0.031642328947782516, | |
| "learning_rate": 0.00010075465369780313, | |
| "loss": 0.6853, | |
| "step": 14895 | |
| }, | |
| { | |
| "epoch": 2.492060839043958, | |
| "grad_norm": 0.03161296248435974, | |
| "learning_rate": 0.0001006540332047627, | |
| "loss": 0.673, | |
| "step": 14910 | |
| }, | |
| { | |
| "epoch": 2.494567942503761, | |
| "grad_norm": 0.03102605603635311, | |
| "learning_rate": 0.0001005534127117223, | |
| "loss": 0.6819, | |
| "step": 14925 | |
| }, | |
| { | |
| "epoch": 2.4970750459635633, | |
| "grad_norm": 0.031027935445308685, | |
| "learning_rate": 0.00010045279221868187, | |
| "loss": 0.6835, | |
| "step": 14940 | |
| }, | |
| { | |
| "epoch": 2.4995821494233663, | |
| "grad_norm": 0.031037239357829094, | |
| "learning_rate": 0.00010035217172564147, | |
| "loss": 0.6695, | |
| "step": 14955 | |
| }, | |
| { | |
| "epoch": 2.502089252883169, | |
| "grad_norm": 0.030962081626057625, | |
| "learning_rate": 0.00010025155123260104, | |
| "loss": 0.6808, | |
| "step": 14970 | |
| }, | |
| { | |
| "epoch": 2.504596356342972, | |
| "grad_norm": 0.030871711671352386, | |
| "learning_rate": 0.00010015093073956063, | |
| "loss": 0.6799, | |
| "step": 14985 | |
| }, | |
| { | |
| "epoch": 2.5071034598027744, | |
| "grad_norm": 0.03209908306598663, | |
| "learning_rate": 0.00010005031024652021, | |
| "loss": 0.6785, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.5096105632625774, | |
| "grad_norm": 0.031665463000535965, | |
| "learning_rate": 9.99496897534798e-05, | |
| "loss": 0.6871, | |
| "step": 15015 | |
| }, | |
| { | |
| "epoch": 2.5121176667223803, | |
| "grad_norm": 0.031626634299755096, | |
| "learning_rate": 9.984906926043938e-05, | |
| "loss": 0.6706, | |
| "step": 15030 | |
| }, | |
| { | |
| "epoch": 2.514624770182183, | |
| "grad_norm": 0.03143932297825813, | |
| "learning_rate": 9.974844876739895e-05, | |
| "loss": 0.6776, | |
| "step": 15045 | |
| }, | |
| { | |
| "epoch": 2.5171318736419854, | |
| "grad_norm": 0.03138510882854462, | |
| "learning_rate": 9.964782827435854e-05, | |
| "loss": 0.6832, | |
| "step": 15060 | |
| }, | |
| { | |
| "epoch": 2.5196389771017884, | |
| "grad_norm": 0.030731745064258575, | |
| "learning_rate": 9.954720778131813e-05, | |
| "loss": 0.6728, | |
| "step": 15075 | |
| }, | |
| { | |
| "epoch": 2.5221460805615914, | |
| "grad_norm": 0.03058742918074131, | |
| "learning_rate": 9.944658728827771e-05, | |
| "loss": 0.6916, | |
| "step": 15090 | |
| }, | |
| { | |
| "epoch": 2.524653184021394, | |
| "grad_norm": 0.030874596908688545, | |
| "learning_rate": 9.93459667952373e-05, | |
| "loss": 0.6692, | |
| "step": 15105 | |
| }, | |
| { | |
| "epoch": 2.5271602874811965, | |
| "grad_norm": 0.03069966472685337, | |
| "learning_rate": 9.924534630219688e-05, | |
| "loss": 0.6836, | |
| "step": 15120 | |
| }, | |
| { | |
| "epoch": 2.5296673909409995, | |
| "grad_norm": 0.031031129881739616, | |
| "learning_rate": 9.914472580915647e-05, | |
| "loss": 0.6806, | |
| "step": 15135 | |
| }, | |
| { | |
| "epoch": 2.5321744944008024, | |
| "grad_norm": 0.03190414234995842, | |
| "learning_rate": 9.904410531611605e-05, | |
| "loss": 0.677, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 2.534681597860605, | |
| "grad_norm": 0.03230069950222969, | |
| "learning_rate": 9.894348482307564e-05, | |
| "loss": 0.6761, | |
| "step": 15165 | |
| }, | |
| { | |
| "epoch": 2.537188701320408, | |
| "grad_norm": 0.03053051233291626, | |
| "learning_rate": 9.884286433003522e-05, | |
| "loss": 0.6805, | |
| "step": 15180 | |
| }, | |
| { | |
| "epoch": 2.5396958047802105, | |
| "grad_norm": 0.03064662776887417, | |
| "learning_rate": 9.874224383699481e-05, | |
| "loss": 0.6854, | |
| "step": 15195 | |
| }, | |
| { | |
| "epoch": 2.5422029082400135, | |
| "grad_norm": 0.03142537549138069, | |
| "learning_rate": 9.86416233439544e-05, | |
| "loss": 0.6693, | |
| "step": 15210 | |
| }, | |
| { | |
| "epoch": 2.544710011699816, | |
| "grad_norm": 0.031185530126094818, | |
| "learning_rate": 9.854100285091398e-05, | |
| "loss": 0.6626, | |
| "step": 15225 | |
| }, | |
| { | |
| "epoch": 2.547217115159619, | |
| "grad_norm": 0.03198733925819397, | |
| "learning_rate": 9.844038235787357e-05, | |
| "loss": 0.6847, | |
| "step": 15240 | |
| }, | |
| { | |
| "epoch": 2.5497242186194216, | |
| "grad_norm": 0.03293673321604729, | |
| "learning_rate": 9.833976186483315e-05, | |
| "loss": 0.6792, | |
| "step": 15255 | |
| }, | |
| { | |
| "epoch": 2.5522313220792245, | |
| "grad_norm": 0.03125865384936333, | |
| "learning_rate": 9.823914137179274e-05, | |
| "loss": 0.6728, | |
| "step": 15270 | |
| }, | |
| { | |
| "epoch": 2.554738425539027, | |
| "grad_norm": 0.0312894769012928, | |
| "learning_rate": 9.813852087875232e-05, | |
| "loss": 0.6748, | |
| "step": 15285 | |
| }, | |
| { | |
| "epoch": 2.55724552899883, | |
| "grad_norm": 0.03170843422412872, | |
| "learning_rate": 9.80379003857119e-05, | |
| "loss": 0.6674, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 2.5597526324586326, | |
| "grad_norm": 0.031321533024311066, | |
| "learning_rate": 9.793727989267148e-05, | |
| "loss": 0.6797, | |
| "step": 15315 | |
| }, | |
| { | |
| "epoch": 2.5622597359184356, | |
| "grad_norm": 0.031243357807397842, | |
| "learning_rate": 9.783665939963107e-05, | |
| "loss": 0.6752, | |
| "step": 15330 | |
| }, | |
| { | |
| "epoch": 2.5647668393782386, | |
| "grad_norm": 0.03241657465696335, | |
| "learning_rate": 9.773603890659064e-05, | |
| "loss": 0.6851, | |
| "step": 15345 | |
| }, | |
| { | |
| "epoch": 2.567273942838041, | |
| "grad_norm": 0.032917000353336334, | |
| "learning_rate": 9.763541841355022e-05, | |
| "loss": 0.6858, | |
| "step": 15360 | |
| }, | |
| { | |
| "epoch": 2.5697810462978437, | |
| "grad_norm": 0.03208984062075615, | |
| "learning_rate": 9.753479792050981e-05, | |
| "loss": 0.6684, | |
| "step": 15375 | |
| }, | |
| { | |
| "epoch": 2.5722881497576466, | |
| "grad_norm": 0.03123905509710312, | |
| "learning_rate": 9.74341774274694e-05, | |
| "loss": 0.6789, | |
| "step": 15390 | |
| }, | |
| { | |
| "epoch": 2.5747952532174496, | |
| "grad_norm": 0.030513722449541092, | |
| "learning_rate": 9.733355693442898e-05, | |
| "loss": 0.6875, | |
| "step": 15405 | |
| }, | |
| { | |
| "epoch": 2.577302356677252, | |
| "grad_norm": 0.03204507753252983, | |
| "learning_rate": 9.723293644138856e-05, | |
| "loss": 0.6742, | |
| "step": 15420 | |
| }, | |
| { | |
| "epoch": 2.579809460137055, | |
| "grad_norm": 0.031124508008360863, | |
| "learning_rate": 9.713231594834815e-05, | |
| "loss": 0.6839, | |
| "step": 15435 | |
| }, | |
| { | |
| "epoch": 2.5823165635968577, | |
| "grad_norm": 0.03063870221376419, | |
| "learning_rate": 9.703169545530774e-05, | |
| "loss": 0.6736, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 2.5848236670566607, | |
| "grad_norm": 0.030677396804094315, | |
| "learning_rate": 9.693107496226732e-05, | |
| "loss": 0.6844, | |
| "step": 15465 | |
| }, | |
| { | |
| "epoch": 2.587330770516463, | |
| "grad_norm": 0.03137551248073578, | |
| "learning_rate": 9.68304544692269e-05, | |
| "loss": 0.6763, | |
| "step": 15480 | |
| }, | |
| { | |
| "epoch": 2.589837873976266, | |
| "grad_norm": 0.030652204528450966, | |
| "learning_rate": 9.672983397618649e-05, | |
| "loss": 0.6676, | |
| "step": 15495 | |
| }, | |
| { | |
| "epoch": 2.5923449774360687, | |
| "grad_norm": 0.03098338656127453, | |
| "learning_rate": 9.662921348314608e-05, | |
| "loss": 0.681, | |
| "step": 15510 | |
| }, | |
| { | |
| "epoch": 2.5948520808958717, | |
| "grad_norm": 0.030911816284060478, | |
| "learning_rate": 9.652859299010566e-05, | |
| "loss": 0.675, | |
| "step": 15525 | |
| }, | |
| { | |
| "epoch": 2.5973591843556743, | |
| "grad_norm": 0.03055042400956154, | |
| "learning_rate": 9.642797249706525e-05, | |
| "loss": 0.6789, | |
| "step": 15540 | |
| }, | |
| { | |
| "epoch": 2.5998662878154772, | |
| "grad_norm": 0.03084755130112171, | |
| "learning_rate": 9.632735200402483e-05, | |
| "loss": 0.6728, | |
| "step": 15555 | |
| }, | |
| { | |
| "epoch": 2.60237339127528, | |
| "grad_norm": 0.03066328726708889, | |
| "learning_rate": 9.622673151098442e-05, | |
| "loss": 0.693, | |
| "step": 15570 | |
| }, | |
| { | |
| "epoch": 2.6048804947350828, | |
| "grad_norm": 0.03215918317437172, | |
| "learning_rate": 9.612611101794399e-05, | |
| "loss": 0.6832, | |
| "step": 15585 | |
| }, | |
| { | |
| "epoch": 2.6073875981948857, | |
| "grad_norm": 0.03187975287437439, | |
| "learning_rate": 9.602549052490358e-05, | |
| "loss": 0.6799, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 2.6098947016546883, | |
| "grad_norm": 0.03179864585399628, | |
| "learning_rate": 9.592487003186316e-05, | |
| "loss": 0.6865, | |
| "step": 15615 | |
| }, | |
| { | |
| "epoch": 2.612401805114491, | |
| "grad_norm": 0.032180171459913254, | |
| "learning_rate": 9.582424953882275e-05, | |
| "loss": 0.664, | |
| "step": 15630 | |
| }, | |
| { | |
| "epoch": 2.614908908574294, | |
| "grad_norm": 0.03252346068620682, | |
| "learning_rate": 9.572362904578232e-05, | |
| "loss": 0.6686, | |
| "step": 15645 | |
| }, | |
| { | |
| "epoch": 2.617416012034097, | |
| "grad_norm": 0.03194168955087662, | |
| "learning_rate": 9.56230085527419e-05, | |
| "loss": 0.6711, | |
| "step": 15660 | |
| }, | |
| { | |
| "epoch": 2.6199231154938993, | |
| "grad_norm": 0.03153575584292412, | |
| "learning_rate": 9.552238805970149e-05, | |
| "loss": 0.6787, | |
| "step": 15675 | |
| }, | |
| { | |
| "epoch": 2.6224302189537023, | |
| "grad_norm": 0.03099830634891987, | |
| "learning_rate": 9.542176756666108e-05, | |
| "loss": 0.6638, | |
| "step": 15690 | |
| }, | |
| { | |
| "epoch": 2.624937322413505, | |
| "grad_norm": 0.032073475420475006, | |
| "learning_rate": 9.532114707362066e-05, | |
| "loss": 0.6867, | |
| "step": 15705 | |
| }, | |
| { | |
| "epoch": 2.627444425873308, | |
| "grad_norm": 0.03117840364575386, | |
| "learning_rate": 9.522052658058025e-05, | |
| "loss": 0.6751, | |
| "step": 15720 | |
| }, | |
| { | |
| "epoch": 2.6299515293331104, | |
| "grad_norm": 0.031706538051366806, | |
| "learning_rate": 9.511990608753983e-05, | |
| "loss": 0.6751, | |
| "step": 15735 | |
| }, | |
| { | |
| "epoch": 2.6324586327929134, | |
| "grad_norm": 0.0310919638723135, | |
| "learning_rate": 9.501928559449942e-05, | |
| "loss": 0.6818, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 2.634965736252716, | |
| "grad_norm": 0.032505493611097336, | |
| "learning_rate": 9.4918665101459e-05, | |
| "loss": 0.6762, | |
| "step": 15765 | |
| }, | |
| { | |
| "epoch": 2.637472839712519, | |
| "grad_norm": 0.03129402920603752, | |
| "learning_rate": 9.481804460841859e-05, | |
| "loss": 0.6774, | |
| "step": 15780 | |
| }, | |
| { | |
| "epoch": 2.6399799431723214, | |
| "grad_norm": 0.030791781842708588, | |
| "learning_rate": 9.471742411537817e-05, | |
| "loss": 0.6791, | |
| "step": 15795 | |
| }, | |
| { | |
| "epoch": 2.6424870466321244, | |
| "grad_norm": 0.030598165467381477, | |
| "learning_rate": 9.461680362233776e-05, | |
| "loss": 0.6751, | |
| "step": 15810 | |
| }, | |
| { | |
| "epoch": 2.644994150091927, | |
| "grad_norm": 0.03157910704612732, | |
| "learning_rate": 9.451618312929734e-05, | |
| "loss": 0.6661, | |
| "step": 15825 | |
| }, | |
| { | |
| "epoch": 2.64750125355173, | |
| "grad_norm": 0.031462252140045166, | |
| "learning_rate": 9.441556263625693e-05, | |
| "loss": 0.6783, | |
| "step": 15840 | |
| }, | |
| { | |
| "epoch": 2.650008357011533, | |
| "grad_norm": 0.031676456332206726, | |
| "learning_rate": 9.431494214321652e-05, | |
| "loss": 0.6838, | |
| "step": 15855 | |
| }, | |
| { | |
| "epoch": 2.6525154604713355, | |
| "grad_norm": 0.031083036214113235, | |
| "learning_rate": 9.421432165017609e-05, | |
| "loss": 0.678, | |
| "step": 15870 | |
| }, | |
| { | |
| "epoch": 2.655022563931138, | |
| "grad_norm": 0.03105340152978897, | |
| "learning_rate": 9.411370115713567e-05, | |
| "loss": 0.6786, | |
| "step": 15885 | |
| }, | |
| { | |
| "epoch": 2.657529667390941, | |
| "grad_norm": 0.03212074562907219, | |
| "learning_rate": 9.401308066409526e-05, | |
| "loss": 0.6724, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 2.660036770850744, | |
| "grad_norm": 0.03203478455543518, | |
| "learning_rate": 9.391246017105484e-05, | |
| "loss": 0.6664, | |
| "step": 15915 | |
| }, | |
| { | |
| "epoch": 2.6625438743105465, | |
| "grad_norm": 0.03217902034521103, | |
| "learning_rate": 9.381183967801443e-05, | |
| "loss": 0.6668, | |
| "step": 15930 | |
| }, | |
| { | |
| "epoch": 2.665050977770349, | |
| "grad_norm": 0.032049164175987244, | |
| "learning_rate": 9.371121918497402e-05, | |
| "loss": 0.6633, | |
| "step": 15945 | |
| }, | |
| { | |
| "epoch": 2.667558081230152, | |
| "grad_norm": 0.03231196105480194, | |
| "learning_rate": 9.361059869193359e-05, | |
| "loss": 0.6759, | |
| "step": 15960 | |
| }, | |
| { | |
| "epoch": 2.670065184689955, | |
| "grad_norm": 0.03290446102619171, | |
| "learning_rate": 9.350997819889317e-05, | |
| "loss": 0.6686, | |
| "step": 15975 | |
| }, | |
| { | |
| "epoch": 2.6725722881497576, | |
| "grad_norm": 0.03090088628232479, | |
| "learning_rate": 9.340935770585276e-05, | |
| "loss": 0.6841, | |
| "step": 15990 | |
| }, | |
| { | |
| "epoch": 2.6750793916095605, | |
| "grad_norm": 0.031320635229349136, | |
| "learning_rate": 9.330873721281234e-05, | |
| "loss": 0.6697, | |
| "step": 16005 | |
| }, | |
| { | |
| "epoch": 2.677586495069363, | |
| "grad_norm": 0.03119390271604061, | |
| "learning_rate": 9.320811671977193e-05, | |
| "loss": 0.679, | |
| "step": 16020 | |
| }, | |
| { | |
| "epoch": 2.680093598529166, | |
| "grad_norm": 0.031817544251680374, | |
| "learning_rate": 9.310749622673151e-05, | |
| "loss": 0.68, | |
| "step": 16035 | |
| }, | |
| { | |
| "epoch": 2.6826007019889686, | |
| "grad_norm": 0.030589740723371506, | |
| "learning_rate": 9.30068757336911e-05, | |
| "loss": 0.6881, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 2.6851078054487716, | |
| "grad_norm": 0.031363166868686676, | |
| "learning_rate": 9.290625524065069e-05, | |
| "loss": 0.6755, | |
| "step": 16065 | |
| }, | |
| { | |
| "epoch": 2.687614908908574, | |
| "grad_norm": 0.03159747272729874, | |
| "learning_rate": 9.280563474761027e-05, | |
| "loss": 0.682, | |
| "step": 16080 | |
| }, | |
| { | |
| "epoch": 2.690122012368377, | |
| "grad_norm": 0.03237079828977585, | |
| "learning_rate": 9.270501425456986e-05, | |
| "loss": 0.6808, | |
| "step": 16095 | |
| }, | |
| { | |
| "epoch": 2.69262911582818, | |
| "grad_norm": 0.031845077872276306, | |
| "learning_rate": 9.260439376152944e-05, | |
| "loss": 0.6764, | |
| "step": 16110 | |
| }, | |
| { | |
| "epoch": 2.6951362192879826, | |
| "grad_norm": 0.031239351257681847, | |
| "learning_rate": 9.250377326848903e-05, | |
| "loss": 0.6649, | |
| "step": 16125 | |
| }, | |
| { | |
| "epoch": 2.697643322747785, | |
| "grad_norm": 0.031146762892603874, | |
| "learning_rate": 9.240315277544861e-05, | |
| "loss": 0.6953, | |
| "step": 16140 | |
| }, | |
| { | |
| "epoch": 2.700150426207588, | |
| "grad_norm": 0.0323052816092968, | |
| "learning_rate": 9.230253228240818e-05, | |
| "loss": 0.672, | |
| "step": 16155 | |
| }, | |
| { | |
| "epoch": 2.702657529667391, | |
| "grad_norm": 0.031430117785930634, | |
| "learning_rate": 9.220191178936777e-05, | |
| "loss": 0.6796, | |
| "step": 16170 | |
| }, | |
| { | |
| "epoch": 2.7051646331271937, | |
| "grad_norm": 0.03176365792751312, | |
| "learning_rate": 9.210129129632736e-05, | |
| "loss": 0.6716, | |
| "step": 16185 | |
| }, | |
| { | |
| "epoch": 2.7076717365869962, | |
| "grad_norm": 0.031570978462696075, | |
| "learning_rate": 9.200067080328694e-05, | |
| "loss": 0.6779, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 2.710178840046799, | |
| "grad_norm": 0.031726591289043427, | |
| "learning_rate": 9.190005031024653e-05, | |
| "loss": 0.6739, | |
| "step": 16215 | |
| }, | |
| { | |
| "epoch": 2.712685943506602, | |
| "grad_norm": 0.03140697255730629, | |
| "learning_rate": 9.179942981720611e-05, | |
| "loss": 0.6795, | |
| "step": 16230 | |
| }, | |
| { | |
| "epoch": 2.7151930469664047, | |
| "grad_norm": 0.03162944316864014, | |
| "learning_rate": 9.16988093241657e-05, | |
| "loss": 0.6772, | |
| "step": 16245 | |
| }, | |
| { | |
| "epoch": 2.7177001504262077, | |
| "grad_norm": 0.03275005519390106, | |
| "learning_rate": 9.159818883112527e-05, | |
| "loss": 0.6726, | |
| "step": 16260 | |
| }, | |
| { | |
| "epoch": 2.7202072538860103, | |
| "grad_norm": 0.0312725305557251, | |
| "learning_rate": 9.149756833808485e-05, | |
| "loss": 0.6795, | |
| "step": 16275 | |
| }, | |
| { | |
| "epoch": 2.7227143573458132, | |
| "grad_norm": 0.03128618001937866, | |
| "learning_rate": 9.139694784504444e-05, | |
| "loss": 0.6867, | |
| "step": 16290 | |
| }, | |
| { | |
| "epoch": 2.725221460805616, | |
| "grad_norm": 0.03184065595269203, | |
| "learning_rate": 9.129632735200403e-05, | |
| "loss": 0.6737, | |
| "step": 16305 | |
| }, | |
| { | |
| "epoch": 2.7277285642654188, | |
| "grad_norm": 0.03144819289445877, | |
| "learning_rate": 9.119570685896361e-05, | |
| "loss": 0.6684, | |
| "step": 16320 | |
| }, | |
| { | |
| "epoch": 2.7302356677252213, | |
| "grad_norm": 0.03168636932969093, | |
| "learning_rate": 9.10950863659232e-05, | |
| "loss": 0.6759, | |
| "step": 16335 | |
| }, | |
| { | |
| "epoch": 2.7327427711850243, | |
| "grad_norm": 0.03160136938095093, | |
| "learning_rate": 9.099446587288278e-05, | |
| "loss": 0.67, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 2.7352498746448273, | |
| "grad_norm": 0.032716233283281326, | |
| "learning_rate": 9.089384537984237e-05, | |
| "loss": 0.6775, | |
| "step": 16365 | |
| }, | |
| { | |
| "epoch": 2.73775697810463, | |
| "grad_norm": 0.033191412687301636, | |
| "learning_rate": 9.079322488680195e-05, | |
| "loss": 0.6882, | |
| "step": 16380 | |
| }, | |
| { | |
| "epoch": 2.7402640815644324, | |
| "grad_norm": 0.03207962587475777, | |
| "learning_rate": 9.069260439376154e-05, | |
| "loss": 0.6784, | |
| "step": 16395 | |
| }, | |
| { | |
| "epoch": 2.7427711850242353, | |
| "grad_norm": 0.031515009701251984, | |
| "learning_rate": 9.059198390072112e-05, | |
| "loss": 0.6741, | |
| "step": 16410 | |
| }, | |
| { | |
| "epoch": 2.7452782884840383, | |
| "grad_norm": 0.03187147155404091, | |
| "learning_rate": 9.049136340768071e-05, | |
| "loss": 0.6662, | |
| "step": 16425 | |
| }, | |
| { | |
| "epoch": 2.747785391943841, | |
| "grad_norm": 0.03254789486527443, | |
| "learning_rate": 9.039074291464028e-05, | |
| "loss": 0.6879, | |
| "step": 16440 | |
| }, | |
| { | |
| "epoch": 2.7502924954036434, | |
| "grad_norm": 0.03185366839170456, | |
| "learning_rate": 9.029012242159987e-05, | |
| "loss": 0.6781, | |
| "step": 16455 | |
| }, | |
| { | |
| "epoch": 2.7527995988634464, | |
| "grad_norm": 0.03274752199649811, | |
| "learning_rate": 9.018950192855945e-05, | |
| "loss": 0.6779, | |
| "step": 16470 | |
| }, | |
| { | |
| "epoch": 2.7553067023232494, | |
| "grad_norm": 0.030197665095329285, | |
| "learning_rate": 9.008888143551904e-05, | |
| "loss": 0.6732, | |
| "step": 16485 | |
| }, | |
| { | |
| "epoch": 2.757813805783052, | |
| "grad_norm": 0.03070506826043129, | |
| "learning_rate": 8.998826094247862e-05, | |
| "loss": 0.6713, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.760320909242855, | |
| "grad_norm": 0.03231901675462723, | |
| "learning_rate": 8.988764044943821e-05, | |
| "loss": 0.6743, | |
| "step": 16515 | |
| }, | |
| { | |
| "epoch": 2.7628280127026574, | |
| "grad_norm": 0.031823549419641495, | |
| "learning_rate": 8.97870199563978e-05, | |
| "loss": 0.6738, | |
| "step": 16530 | |
| }, | |
| { | |
| "epoch": 2.7653351161624604, | |
| "grad_norm": 0.03237045556306839, | |
| "learning_rate": 8.968639946335738e-05, | |
| "loss": 0.6844, | |
| "step": 16545 | |
| }, | |
| { | |
| "epoch": 2.767842219622263, | |
| "grad_norm": 0.033365171402692795, | |
| "learning_rate": 8.958577897031695e-05, | |
| "loss": 0.6778, | |
| "step": 16560 | |
| }, | |
| { | |
| "epoch": 2.770349323082066, | |
| "grad_norm": 0.03203690052032471, | |
| "learning_rate": 8.948515847727654e-05, | |
| "loss": 0.6781, | |
| "step": 16575 | |
| }, | |
| { | |
| "epoch": 2.7728564265418685, | |
| "grad_norm": 0.0312359556555748, | |
| "learning_rate": 8.938453798423612e-05, | |
| "loss": 0.6863, | |
| "step": 16590 | |
| }, | |
| { | |
| "epoch": 2.7753635300016715, | |
| "grad_norm": 0.03242425248026848, | |
| "learning_rate": 8.928391749119571e-05, | |
| "loss": 0.6754, | |
| "step": 16605 | |
| }, | |
| { | |
| "epoch": 2.7778706334614744, | |
| "grad_norm": 0.03151217848062515, | |
| "learning_rate": 8.91832969981553e-05, | |
| "loss": 0.6799, | |
| "step": 16620 | |
| }, | |
| { | |
| "epoch": 2.780377736921277, | |
| "grad_norm": 0.032228607684373856, | |
| "learning_rate": 8.908267650511488e-05, | |
| "loss": 0.6709, | |
| "step": 16635 | |
| }, | |
| { | |
| "epoch": 2.7828848403810795, | |
| "grad_norm": 0.03263266757130623, | |
| "learning_rate": 8.898205601207446e-05, | |
| "loss": 0.6833, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 2.7853919438408825, | |
| "grad_norm": 0.030848173424601555, | |
| "learning_rate": 8.888143551903405e-05, | |
| "loss": 0.6802, | |
| "step": 16665 | |
| }, | |
| { | |
| "epoch": 2.7878990473006855, | |
| "grad_norm": 0.03234275057911873, | |
| "learning_rate": 8.878081502599364e-05, | |
| "loss": 0.6716, | |
| "step": 16680 | |
| }, | |
| { | |
| "epoch": 2.790406150760488, | |
| "grad_norm": 0.03131961077451706, | |
| "learning_rate": 8.868019453295322e-05, | |
| "loss": 0.6813, | |
| "step": 16695 | |
| }, | |
| { | |
| "epoch": 2.7929132542202906, | |
| "grad_norm": 0.03362729772925377, | |
| "learning_rate": 8.85795740399128e-05, | |
| "loss": 0.677, | |
| "step": 16710 | |
| }, | |
| { | |
| "epoch": 2.7954203576800936, | |
| "grad_norm": 0.03228291869163513, | |
| "learning_rate": 8.847895354687238e-05, | |
| "loss": 0.6737, | |
| "step": 16725 | |
| }, | |
| { | |
| "epoch": 2.7979274611398965, | |
| "grad_norm": 0.031786005944013596, | |
| "learning_rate": 8.837833305383196e-05, | |
| "loss": 0.6859, | |
| "step": 16740 | |
| }, | |
| { | |
| "epoch": 2.800434564599699, | |
| "grad_norm": 0.03170496225357056, | |
| "learning_rate": 8.827771256079155e-05, | |
| "loss": 0.6756, | |
| "step": 16755 | |
| }, | |
| { | |
| "epoch": 2.802941668059502, | |
| "grad_norm": 0.033506058156490326, | |
| "learning_rate": 8.817709206775113e-05, | |
| "loss": 0.6758, | |
| "step": 16770 | |
| }, | |
| { | |
| "epoch": 2.8054487715193046, | |
| "grad_norm": 0.032467689365148544, | |
| "learning_rate": 8.807647157471072e-05, | |
| "loss": 0.6745, | |
| "step": 16785 | |
| }, | |
| { | |
| "epoch": 2.8079558749791076, | |
| "grad_norm": 0.033489979803562164, | |
| "learning_rate": 8.79758510816703e-05, | |
| "loss": 0.6676, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 2.81046297843891, | |
| "grad_norm": 0.032214514911174774, | |
| "learning_rate": 8.787523058862989e-05, | |
| "loss": 0.6675, | |
| "step": 16815 | |
| }, | |
| { | |
| "epoch": 2.812970081898713, | |
| "grad_norm": 0.03187641128897667, | |
| "learning_rate": 8.777461009558948e-05, | |
| "loss": 0.6863, | |
| "step": 16830 | |
| }, | |
| { | |
| "epoch": 2.8154771853585157, | |
| "grad_norm": 0.031782276928424835, | |
| "learning_rate": 8.767398960254906e-05, | |
| "loss": 0.6782, | |
| "step": 16845 | |
| }, | |
| { | |
| "epoch": 2.8179842888183186, | |
| "grad_norm": 0.031185677275061607, | |
| "learning_rate": 8.757336910950865e-05, | |
| "loss": 0.6769, | |
| "step": 16860 | |
| }, | |
| { | |
| "epoch": 2.820491392278121, | |
| "grad_norm": 0.03163639456033707, | |
| "learning_rate": 8.747274861646822e-05, | |
| "loss": 0.6744, | |
| "step": 16875 | |
| }, | |
| { | |
| "epoch": 2.822998495737924, | |
| "grad_norm": 0.031712062656879425, | |
| "learning_rate": 8.73721281234278e-05, | |
| "loss": 0.6741, | |
| "step": 16890 | |
| }, | |
| { | |
| "epoch": 2.8255055991977267, | |
| "grad_norm": 0.03253958374261856, | |
| "learning_rate": 8.727150763038739e-05, | |
| "loss": 0.6844, | |
| "step": 16905 | |
| }, | |
| { | |
| "epoch": 2.8280127026575297, | |
| "grad_norm": 0.03280916064977646, | |
| "learning_rate": 8.717088713734698e-05, | |
| "loss": 0.6782, | |
| "step": 16920 | |
| }, | |
| { | |
| "epoch": 2.8305198061173327, | |
| "grad_norm": 0.03310822695493698, | |
| "learning_rate": 8.707026664430656e-05, | |
| "loss": 0.6704, | |
| "step": 16935 | |
| }, | |
| { | |
| "epoch": 2.833026909577135, | |
| "grad_norm": 0.031092172488570213, | |
| "learning_rate": 8.696964615126615e-05, | |
| "loss": 0.676, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 2.8355340130369378, | |
| "grad_norm": 0.0315091609954834, | |
| "learning_rate": 8.686902565822573e-05, | |
| "loss": 0.6658, | |
| "step": 16965 | |
| }, | |
| { | |
| "epoch": 2.8380411164967407, | |
| "grad_norm": 0.030993249267339706, | |
| "learning_rate": 8.676840516518532e-05, | |
| "loss": 0.6771, | |
| "step": 16980 | |
| }, | |
| { | |
| "epoch": 2.8405482199565437, | |
| "grad_norm": 0.03143613040447235, | |
| "learning_rate": 8.66677846721449e-05, | |
| "loss": 0.6887, | |
| "step": 16995 | |
| }, | |
| { | |
| "epoch": 2.8430553234163463, | |
| "grad_norm": 0.03253776207566261, | |
| "learning_rate": 8.656716417910447e-05, | |
| "loss": 0.6684, | |
| "step": 17010 | |
| }, | |
| { | |
| "epoch": 2.8455624268761492, | |
| "grad_norm": 0.03285781666636467, | |
| "learning_rate": 8.646654368606406e-05, | |
| "loss": 0.6827, | |
| "step": 17025 | |
| }, | |
| { | |
| "epoch": 2.848069530335952, | |
| "grad_norm": 0.03159667178988457, | |
| "learning_rate": 8.636592319302365e-05, | |
| "loss": 0.6703, | |
| "step": 17040 | |
| }, | |
| { | |
| "epoch": 2.8505766337957548, | |
| "grad_norm": 0.0313105471432209, | |
| "learning_rate": 8.626530269998323e-05, | |
| "loss": 0.6754, | |
| "step": 17055 | |
| }, | |
| { | |
| "epoch": 2.8530837372555573, | |
| "grad_norm": 0.0318642258644104, | |
| "learning_rate": 8.616468220694282e-05, | |
| "loss": 0.6697, | |
| "step": 17070 | |
| }, | |
| { | |
| "epoch": 2.8555908407153603, | |
| "grad_norm": 0.03128768131136894, | |
| "learning_rate": 8.60640617139024e-05, | |
| "loss": 0.6681, | |
| "step": 17085 | |
| }, | |
| { | |
| "epoch": 2.858097944175163, | |
| "grad_norm": 0.03178677707910538, | |
| "learning_rate": 8.596344122086199e-05, | |
| "loss": 0.6903, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 2.860605047634966, | |
| "grad_norm": 0.03252077102661133, | |
| "learning_rate": 8.586282072782157e-05, | |
| "loss": 0.6779, | |
| "step": 17115 | |
| }, | |
| { | |
| "epoch": 2.8631121510947684, | |
| "grad_norm": 0.032303210347890854, | |
| "learning_rate": 8.576220023478116e-05, | |
| "loss": 0.6683, | |
| "step": 17130 | |
| }, | |
| { | |
| "epoch": 2.8656192545545713, | |
| "grad_norm": 0.031926706433296204, | |
| "learning_rate": 8.566157974174074e-05, | |
| "loss": 0.6769, | |
| "step": 17145 | |
| }, | |
| { | |
| "epoch": 2.868126358014374, | |
| "grad_norm": 0.032100748270750046, | |
| "learning_rate": 8.556095924870033e-05, | |
| "loss": 0.6829, | |
| "step": 17160 | |
| }, | |
| { | |
| "epoch": 2.870633461474177, | |
| "grad_norm": 0.032134201377630234, | |
| "learning_rate": 8.54603387556599e-05, | |
| "loss": 0.6838, | |
| "step": 17175 | |
| }, | |
| { | |
| "epoch": 2.87314056493398, | |
| "grad_norm": 0.03254568204283714, | |
| "learning_rate": 8.535971826261949e-05, | |
| "loss": 0.6905, | |
| "step": 17190 | |
| }, | |
| { | |
| "epoch": 2.8756476683937824, | |
| "grad_norm": 0.03155257925391197, | |
| "learning_rate": 8.525909776957907e-05, | |
| "loss": 0.6799, | |
| "step": 17205 | |
| }, | |
| { | |
| "epoch": 2.878154771853585, | |
| "grad_norm": 0.03186199814081192, | |
| "learning_rate": 8.515847727653866e-05, | |
| "loss": 0.6889, | |
| "step": 17220 | |
| }, | |
| { | |
| "epoch": 2.880661875313388, | |
| "grad_norm": 0.03130493685603142, | |
| "learning_rate": 8.505785678349824e-05, | |
| "loss": 0.6713, | |
| "step": 17235 | |
| }, | |
| { | |
| "epoch": 2.883168978773191, | |
| "grad_norm": 0.032139863818883896, | |
| "learning_rate": 8.495723629045783e-05, | |
| "loss": 0.6695, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 2.8856760822329934, | |
| "grad_norm": 0.03158256411552429, | |
| "learning_rate": 8.485661579741741e-05, | |
| "loss": 0.6696, | |
| "step": 17265 | |
| }, | |
| { | |
| "epoch": 2.8881831856927964, | |
| "grad_norm": 0.03188103437423706, | |
| "learning_rate": 8.4755995304377e-05, | |
| "loss": 0.6631, | |
| "step": 17280 | |
| }, | |
| { | |
| "epoch": 2.890690289152599, | |
| "grad_norm": 0.032000407576560974, | |
| "learning_rate": 8.465537481133657e-05, | |
| "loss": 0.6736, | |
| "step": 17295 | |
| }, | |
| { | |
| "epoch": 2.893197392612402, | |
| "grad_norm": 0.0315023809671402, | |
| "learning_rate": 8.455475431829616e-05, | |
| "loss": 0.6765, | |
| "step": 17310 | |
| }, | |
| { | |
| "epoch": 2.8957044960722045, | |
| "grad_norm": 0.031286027282476425, | |
| "learning_rate": 8.445413382525574e-05, | |
| "loss": 0.6718, | |
| "step": 17325 | |
| }, | |
| { | |
| "epoch": 2.8982115995320075, | |
| "grad_norm": 0.031370267271995544, | |
| "learning_rate": 8.435351333221533e-05, | |
| "loss": 0.6742, | |
| "step": 17340 | |
| }, | |
| { | |
| "epoch": 2.90071870299181, | |
| "grad_norm": 0.032615795731544495, | |
| "learning_rate": 8.425289283917491e-05, | |
| "loss": 0.6756, | |
| "step": 17355 | |
| }, | |
| { | |
| "epoch": 2.903225806451613, | |
| "grad_norm": 0.03155631199479103, | |
| "learning_rate": 8.41522723461345e-05, | |
| "loss": 0.6872, | |
| "step": 17370 | |
| }, | |
| { | |
| "epoch": 2.9057329099114155, | |
| "grad_norm": 0.03252144530415535, | |
| "learning_rate": 8.405165185309408e-05, | |
| "loss": 0.6744, | |
| "step": 17385 | |
| }, | |
| { | |
| "epoch": 2.9082400133712185, | |
| "grad_norm": 0.034432001411914825, | |
| "learning_rate": 8.395103136005367e-05, | |
| "loss": 0.6824, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 2.910747116831021, | |
| "grad_norm": 0.033232349902391434, | |
| "learning_rate": 8.385041086701326e-05, | |
| "loss": 0.6837, | |
| "step": 17415 | |
| }, | |
| { | |
| "epoch": 2.913254220290824, | |
| "grad_norm": 0.03268026188015938, | |
| "learning_rate": 8.374979037397284e-05, | |
| "loss": 0.6858, | |
| "step": 17430 | |
| }, | |
| { | |
| "epoch": 2.915761323750627, | |
| "grad_norm": 0.03303677961230278, | |
| "learning_rate": 8.364916988093243e-05, | |
| "loss": 0.6793, | |
| "step": 17445 | |
| }, | |
| { | |
| "epoch": 2.9182684272104296, | |
| "grad_norm": 0.03122582472860813, | |
| "learning_rate": 8.354854938789201e-05, | |
| "loss": 0.6728, | |
| "step": 17460 | |
| }, | |
| { | |
| "epoch": 2.920775530670232, | |
| "grad_norm": 0.03166414424777031, | |
| "learning_rate": 8.34479288948516e-05, | |
| "loss": 0.6747, | |
| "step": 17475 | |
| }, | |
| { | |
| "epoch": 2.923282634130035, | |
| "grad_norm": 0.031359609216451645, | |
| "learning_rate": 8.334730840181117e-05, | |
| "loss": 0.6644, | |
| "step": 17490 | |
| }, | |
| { | |
| "epoch": 2.925789737589838, | |
| "grad_norm": 0.032412488013505936, | |
| "learning_rate": 8.324668790877075e-05, | |
| "loss": 0.6654, | |
| "step": 17505 | |
| }, | |
| { | |
| "epoch": 2.9282968410496406, | |
| "grad_norm": 0.030932830646634102, | |
| "learning_rate": 8.314606741573034e-05, | |
| "loss": 0.6783, | |
| "step": 17520 | |
| }, | |
| { | |
| "epoch": 2.930803944509443, | |
| "grad_norm": 0.03237373009324074, | |
| "learning_rate": 8.304544692268993e-05, | |
| "loss": 0.6835, | |
| "step": 17535 | |
| }, | |
| { | |
| "epoch": 2.933311047969246, | |
| "grad_norm": 0.030989525839686394, | |
| "learning_rate": 8.294482642964951e-05, | |
| "loss": 0.6774, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 2.935818151429049, | |
| "grad_norm": 0.031636081635951996, | |
| "learning_rate": 8.28442059366091e-05, | |
| "loss": 0.6845, | |
| "step": 17565 | |
| }, | |
| { | |
| "epoch": 2.9383252548888517, | |
| "grad_norm": 0.032494526356458664, | |
| "learning_rate": 8.274358544356867e-05, | |
| "loss": 0.6761, | |
| "step": 17580 | |
| }, | |
| { | |
| "epoch": 2.9408323583486546, | |
| "grad_norm": 0.03214458376169205, | |
| "learning_rate": 8.264296495052825e-05, | |
| "loss": 0.6837, | |
| "step": 17595 | |
| }, | |
| { | |
| "epoch": 2.943339461808457, | |
| "grad_norm": 0.03136483207345009, | |
| "learning_rate": 8.254234445748784e-05, | |
| "loss": 0.6642, | |
| "step": 17610 | |
| }, | |
| { | |
| "epoch": 2.94584656526826, | |
| "grad_norm": 0.032171837985515594, | |
| "learning_rate": 8.244172396444742e-05, | |
| "loss": 0.6667, | |
| "step": 17625 | |
| }, | |
| { | |
| "epoch": 2.9483536687280627, | |
| "grad_norm": 0.032955460250377655, | |
| "learning_rate": 8.234110347140701e-05, | |
| "loss": 0.6789, | |
| "step": 17640 | |
| }, | |
| { | |
| "epoch": 2.9508607721878657, | |
| "grad_norm": 0.03368501737713814, | |
| "learning_rate": 8.22404829783666e-05, | |
| "loss": 0.6711, | |
| "step": 17655 | |
| }, | |
| { | |
| "epoch": 2.9533678756476682, | |
| "grad_norm": 0.03181430697441101, | |
| "learning_rate": 8.213986248532618e-05, | |
| "loss": 0.6751, | |
| "step": 17670 | |
| }, | |
| { | |
| "epoch": 2.955874979107471, | |
| "grad_norm": 0.03176905959844589, | |
| "learning_rate": 8.203924199228577e-05, | |
| "loss": 0.6713, | |
| "step": 17685 | |
| }, | |
| { | |
| "epoch": 2.958382082567274, | |
| "grad_norm": 0.03155883401632309, | |
| "learning_rate": 8.193862149924535e-05, | |
| "loss": 0.6721, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 2.9608891860270767, | |
| "grad_norm": 0.03286443278193474, | |
| "learning_rate": 8.183800100620494e-05, | |
| "loss": 0.6669, | |
| "step": 17715 | |
| }, | |
| { | |
| "epoch": 2.9633962894868793, | |
| "grad_norm": 0.03203749656677246, | |
| "learning_rate": 8.173738051316452e-05, | |
| "loss": 0.6821, | |
| "step": 17730 | |
| }, | |
| { | |
| "epoch": 2.9659033929466823, | |
| "grad_norm": 0.03260009363293648, | |
| "learning_rate": 8.163676002012411e-05, | |
| "loss": 0.6785, | |
| "step": 17745 | |
| }, | |
| { | |
| "epoch": 2.9684104964064852, | |
| "grad_norm": 0.034267883747816086, | |
| "learning_rate": 8.15361395270837e-05, | |
| "loss": 0.6699, | |
| "step": 17760 | |
| }, | |
| { | |
| "epoch": 2.970917599866288, | |
| "grad_norm": 0.03159933537244797, | |
| "learning_rate": 8.143551903404328e-05, | |
| "loss": 0.6736, | |
| "step": 17775 | |
| }, | |
| { | |
| "epoch": 2.9734247033260903, | |
| "grad_norm": 0.03262174129486084, | |
| "learning_rate": 8.133489854100285e-05, | |
| "loss": 0.6775, | |
| "step": 17790 | |
| }, | |
| { | |
| "epoch": 2.9759318067858933, | |
| "grad_norm": 0.03155225142836571, | |
| "learning_rate": 8.123427804796244e-05, | |
| "loss": 0.6767, | |
| "step": 17805 | |
| }, | |
| { | |
| "epoch": 2.9784389102456963, | |
| "grad_norm": 0.03178201615810394, | |
| "learning_rate": 8.113365755492202e-05, | |
| "loss": 0.6741, | |
| "step": 17820 | |
| }, | |
| { | |
| "epoch": 2.980946013705499, | |
| "grad_norm": 0.03577803075313568, | |
| "learning_rate": 8.103303706188161e-05, | |
| "loss": 0.6763, | |
| "step": 17835 | |
| }, | |
| { | |
| "epoch": 2.983453117165302, | |
| "grad_norm": 0.03188909962773323, | |
| "learning_rate": 8.093241656884119e-05, | |
| "loss": 0.6685, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 2.9859602206251044, | |
| "grad_norm": 0.03267040103673935, | |
| "learning_rate": 8.083179607580078e-05, | |
| "loss": 0.6716, | |
| "step": 17865 | |
| }, | |
| { | |
| "epoch": 2.9884673240849073, | |
| "grad_norm": 0.03385018929839134, | |
| "learning_rate": 8.073117558276035e-05, | |
| "loss": 0.679, | |
| "step": 17880 | |
| }, | |
| { | |
| "epoch": 2.99097442754471, | |
| "grad_norm": 0.030928703024983406, | |
| "learning_rate": 8.063055508971994e-05, | |
| "loss": 0.681, | |
| "step": 17895 | |
| }, | |
| { | |
| "epoch": 2.993481531004513, | |
| "grad_norm": 0.032874446362257004, | |
| "learning_rate": 8.052993459667952e-05, | |
| "loss": 0.6637, | |
| "step": 17910 | |
| }, | |
| { | |
| "epoch": 2.9959886344643154, | |
| "grad_norm": 0.033117108047008514, | |
| "learning_rate": 8.04293141036391e-05, | |
| "loss": 0.663, | |
| "step": 17925 | |
| }, | |
| { | |
| "epoch": 2.9984957379241184, | |
| "grad_norm": 0.03306007385253906, | |
| "learning_rate": 8.032869361059869e-05, | |
| "loss": 0.6693, | |
| "step": 17940 | |
| }, | |
| { | |
| "epoch": 3.001002841383921, | |
| "grad_norm": 0.0331704318523407, | |
| "learning_rate": 8.022807311755828e-05, | |
| "loss": 0.6718, | |
| "step": 17955 | |
| }, | |
| { | |
| "epoch": 3.003509944843724, | |
| "grad_norm": 0.03156450018286705, | |
| "learning_rate": 8.012745262451786e-05, | |
| "loss": 0.6553, | |
| "step": 17970 | |
| }, | |
| { | |
| "epoch": 3.0060170483035265, | |
| "grad_norm": 0.033207476139068604, | |
| "learning_rate": 8.002683213147745e-05, | |
| "loss": 0.6632, | |
| "step": 17985 | |
| }, | |
| { | |
| "epoch": 3.0085241517633294, | |
| "grad_norm": 0.03195161744952202, | |
| "learning_rate": 7.992621163843703e-05, | |
| "loss": 0.6705, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 3.0110312552231324, | |
| "grad_norm": 0.03295569121837616, | |
| "learning_rate": 7.982559114539662e-05, | |
| "loss": 0.6648, | |
| "step": 18015 | |
| }, | |
| { | |
| "epoch": 3.013538358682935, | |
| "grad_norm": 0.031562697142362595, | |
| "learning_rate": 7.97249706523562e-05, | |
| "loss": 0.6576, | |
| "step": 18030 | |
| }, | |
| { | |
| "epoch": 3.016045462142738, | |
| "grad_norm": 0.031858429312705994, | |
| "learning_rate": 7.962435015931579e-05, | |
| "loss": 0.6528, | |
| "step": 18045 | |
| }, | |
| { | |
| "epoch": 3.0185525656025405, | |
| "grad_norm": 0.03421582654118538, | |
| "learning_rate": 7.952372966627538e-05, | |
| "loss": 0.6714, | |
| "step": 18060 | |
| }, | |
| { | |
| "epoch": 3.0210596690623435, | |
| "grad_norm": 0.03287555277347565, | |
| "learning_rate": 7.942310917323496e-05, | |
| "loss": 0.6788, | |
| "step": 18075 | |
| }, | |
| { | |
| "epoch": 3.023566772522146, | |
| "grad_norm": 0.03256657347083092, | |
| "learning_rate": 7.932248868019453e-05, | |
| "loss": 0.6485, | |
| "step": 18090 | |
| }, | |
| { | |
| "epoch": 3.026073875981949, | |
| "grad_norm": 0.03333086147904396, | |
| "learning_rate": 7.922186818715412e-05, | |
| "loss": 0.6678, | |
| "step": 18105 | |
| }, | |
| { | |
| "epoch": 3.0285809794417515, | |
| "grad_norm": 0.031958311796188354, | |
| "learning_rate": 7.91212476941137e-05, | |
| "loss": 0.666, | |
| "step": 18120 | |
| }, | |
| { | |
| "epoch": 3.0310880829015545, | |
| "grad_norm": 0.033307287842035294, | |
| "learning_rate": 7.902062720107329e-05, | |
| "loss": 0.6738, | |
| "step": 18135 | |
| }, | |
| { | |
| "epoch": 3.033595186361357, | |
| "grad_norm": 0.031850751489400864, | |
| "learning_rate": 7.892000670803288e-05, | |
| "loss": 0.6615, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 3.03610228982116, | |
| "grad_norm": 0.031476084142923355, | |
| "learning_rate": 7.881938621499245e-05, | |
| "loss": 0.6597, | |
| "step": 18165 | |
| }, | |
| { | |
| "epoch": 3.0386093932809626, | |
| "grad_norm": 0.032822057604789734, | |
| "learning_rate": 7.871876572195203e-05, | |
| "loss": 0.649, | |
| "step": 18180 | |
| }, | |
| { | |
| "epoch": 3.0411164967407656, | |
| "grad_norm": 0.032040949910879135, | |
| "learning_rate": 7.861814522891162e-05, | |
| "loss": 0.6708, | |
| "step": 18195 | |
| }, | |
| { | |
| "epoch": 3.043623600200568, | |
| "grad_norm": 0.03377379849553108, | |
| "learning_rate": 7.85175247358712e-05, | |
| "loss": 0.6705, | |
| "step": 18210 | |
| }, | |
| { | |
| "epoch": 3.046130703660371, | |
| "grad_norm": 0.03224708139896393, | |
| "learning_rate": 7.841690424283079e-05, | |
| "loss": 0.6685, | |
| "step": 18225 | |
| }, | |
| { | |
| "epoch": 3.0486378071201736, | |
| "grad_norm": 0.03286907821893692, | |
| "learning_rate": 7.831628374979037e-05, | |
| "loss": 0.6785, | |
| "step": 18240 | |
| }, | |
| { | |
| "epoch": 3.0511449105799766, | |
| "grad_norm": 0.033475641161203384, | |
| "learning_rate": 7.821566325674996e-05, | |
| "loss": 0.6581, | |
| "step": 18255 | |
| }, | |
| { | |
| "epoch": 3.0536520140397796, | |
| "grad_norm": 0.03226190432906151, | |
| "learning_rate": 7.811504276370955e-05, | |
| "loss": 0.6679, | |
| "step": 18270 | |
| }, | |
| { | |
| "epoch": 3.056159117499582, | |
| "grad_norm": 0.03244561329483986, | |
| "learning_rate": 7.801442227066913e-05, | |
| "loss": 0.6616, | |
| "step": 18285 | |
| }, | |
| { | |
| "epoch": 3.058666220959385, | |
| "grad_norm": 0.03348153084516525, | |
| "learning_rate": 7.791380177762872e-05, | |
| "loss": 0.667, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 3.0611733244191877, | |
| "grad_norm": 0.0332234688103199, | |
| "learning_rate": 7.78131812845883e-05, | |
| "loss": 0.6632, | |
| "step": 18315 | |
| }, | |
| { | |
| "epoch": 3.0636804278789906, | |
| "grad_norm": 0.0322151854634285, | |
| "learning_rate": 7.771256079154789e-05, | |
| "loss": 0.6657, | |
| "step": 18330 | |
| }, | |
| { | |
| "epoch": 3.066187531338793, | |
| "grad_norm": 0.033220209181308746, | |
| "learning_rate": 7.761194029850747e-05, | |
| "loss": 0.6694, | |
| "step": 18345 | |
| }, | |
| { | |
| "epoch": 3.068694634798596, | |
| "grad_norm": 0.03340331092476845, | |
| "learning_rate": 7.751131980546706e-05, | |
| "loss": 0.6557, | |
| "step": 18360 | |
| }, | |
| { | |
| "epoch": 3.0712017382583987, | |
| "grad_norm": 0.03259367495775223, | |
| "learning_rate": 7.741069931242664e-05, | |
| "loss": 0.6595, | |
| "step": 18375 | |
| }, | |
| { | |
| "epoch": 3.0737088417182017, | |
| "grad_norm": 0.03241262957453728, | |
| "learning_rate": 7.731007881938623e-05, | |
| "loss": 0.6579, | |
| "step": 18390 | |
| }, | |
| { | |
| "epoch": 3.0762159451780042, | |
| "grad_norm": 0.0317726731300354, | |
| "learning_rate": 7.72094583263458e-05, | |
| "loss": 0.6595, | |
| "step": 18405 | |
| }, | |
| { | |
| "epoch": 3.078723048637807, | |
| "grad_norm": 0.0336771234869957, | |
| "learning_rate": 7.710883783330539e-05, | |
| "loss": 0.6563, | |
| "step": 18420 | |
| }, | |
| { | |
| "epoch": 3.0812301520976098, | |
| "grad_norm": 0.033014651387929916, | |
| "learning_rate": 7.700821734026497e-05, | |
| "loss": 0.6689, | |
| "step": 18435 | |
| }, | |
| { | |
| "epoch": 3.0837372555574127, | |
| "grad_norm": 0.03290229290723801, | |
| "learning_rate": 7.690759684722454e-05, | |
| "loss": 0.6482, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 3.0862443590172153, | |
| "grad_norm": 0.033278971910476685, | |
| "learning_rate": 7.680697635418413e-05, | |
| "loss": 0.6532, | |
| "step": 18465 | |
| }, | |
| { | |
| "epoch": 3.0887514624770183, | |
| "grad_norm": 0.03247194364666939, | |
| "learning_rate": 7.670635586114371e-05, | |
| "loss": 0.6455, | |
| "step": 18480 | |
| }, | |
| { | |
| "epoch": 3.091258565936821, | |
| "grad_norm": 0.03357178345322609, | |
| "learning_rate": 7.66057353681033e-05, | |
| "loss": 0.6735, | |
| "step": 18495 | |
| }, | |
| { | |
| "epoch": 3.093765669396624, | |
| "grad_norm": 0.033263131976127625, | |
| "learning_rate": 7.650511487506289e-05, | |
| "loss": 0.661, | |
| "step": 18510 | |
| }, | |
| { | |
| "epoch": 3.0962727728564268, | |
| "grad_norm": 0.03202125430107117, | |
| "learning_rate": 7.640449438202247e-05, | |
| "loss": 0.6573, | |
| "step": 18525 | |
| }, | |
| { | |
| "epoch": 3.0987798763162293, | |
| "grad_norm": 0.033367644995450974, | |
| "learning_rate": 7.630387388898206e-05, | |
| "loss": 0.6536, | |
| "step": 18540 | |
| }, | |
| { | |
| "epoch": 3.1012869797760323, | |
| "grad_norm": 0.03215374797582626, | |
| "learning_rate": 7.620325339594164e-05, | |
| "loss": 0.6677, | |
| "step": 18555 | |
| }, | |
| { | |
| "epoch": 3.103794083235835, | |
| "grad_norm": 0.03222740814089775, | |
| "learning_rate": 7.610263290290123e-05, | |
| "loss": 0.6451, | |
| "step": 18570 | |
| }, | |
| { | |
| "epoch": 3.106301186695638, | |
| "grad_norm": 0.032317865639925, | |
| "learning_rate": 7.600201240986081e-05, | |
| "loss": 0.6477, | |
| "step": 18585 | |
| }, | |
| { | |
| "epoch": 3.1088082901554404, | |
| "grad_norm": 0.03358441963791847, | |
| "learning_rate": 7.59013919168204e-05, | |
| "loss": 0.6586, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 3.1113153936152433, | |
| "grad_norm": 0.03255462273955345, | |
| "learning_rate": 7.580077142377998e-05, | |
| "loss": 0.6626, | |
| "step": 18615 | |
| }, | |
| { | |
| "epoch": 3.113822497075046, | |
| "grad_norm": 0.032852429896593094, | |
| "learning_rate": 7.570015093073957e-05, | |
| "loss": 0.6663, | |
| "step": 18630 | |
| }, | |
| { | |
| "epoch": 3.116329600534849, | |
| "grad_norm": 0.033199895173311234, | |
| "learning_rate": 7.559953043769915e-05, | |
| "loss": 0.6647, | |
| "step": 18645 | |
| }, | |
| { | |
| "epoch": 3.1188367039946514, | |
| "grad_norm": 0.03370612487196922, | |
| "learning_rate": 7.549890994465874e-05, | |
| "loss": 0.664, | |
| "step": 18660 | |
| }, | |
| { | |
| "epoch": 3.1213438074544544, | |
| "grad_norm": 0.03296181559562683, | |
| "learning_rate": 7.539828945161833e-05, | |
| "loss": 0.6509, | |
| "step": 18675 | |
| }, | |
| { | |
| "epoch": 3.123850910914257, | |
| "grad_norm": 0.03318094462156296, | |
| "learning_rate": 7.529766895857791e-05, | |
| "loss": 0.6782, | |
| "step": 18690 | |
| }, | |
| { | |
| "epoch": 3.12635801437406, | |
| "grad_norm": 0.03345433250069618, | |
| "learning_rate": 7.519704846553748e-05, | |
| "loss": 0.6615, | |
| "step": 18705 | |
| }, | |
| { | |
| "epoch": 3.1288651178338625, | |
| "grad_norm": 0.03299123793840408, | |
| "learning_rate": 7.509642797249707e-05, | |
| "loss": 0.6567, | |
| "step": 18720 | |
| }, | |
| { | |
| "epoch": 3.1313722212936654, | |
| "grad_norm": 0.033207185566425323, | |
| "learning_rate": 7.499580747945665e-05, | |
| "loss": 0.6657, | |
| "step": 18735 | |
| }, | |
| { | |
| "epoch": 3.133879324753468, | |
| "grad_norm": 0.03186638280749321, | |
| "learning_rate": 7.489518698641623e-05, | |
| "loss": 0.6651, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 3.136386428213271, | |
| "grad_norm": 0.03253698721528053, | |
| "learning_rate": 7.479456649337581e-05, | |
| "loss": 0.6419, | |
| "step": 18765 | |
| }, | |
| { | |
| "epoch": 3.138893531673074, | |
| "grad_norm": 0.03357692062854767, | |
| "learning_rate": 7.46939460003354e-05, | |
| "loss": 0.6591, | |
| "step": 18780 | |
| }, | |
| { | |
| "epoch": 3.1414006351328765, | |
| "grad_norm": 0.03315422683954239, | |
| "learning_rate": 7.459332550729498e-05, | |
| "loss": 0.6542, | |
| "step": 18795 | |
| }, | |
| { | |
| "epoch": 3.143907738592679, | |
| "grad_norm": 0.03280475363135338, | |
| "learning_rate": 7.449270501425457e-05, | |
| "loss": 0.6488, | |
| "step": 18810 | |
| }, | |
| { | |
| "epoch": 3.146414842052482, | |
| "grad_norm": 0.032748933881521225, | |
| "learning_rate": 7.439208452121415e-05, | |
| "loss": 0.6448, | |
| "step": 18825 | |
| }, | |
| { | |
| "epoch": 3.148921945512285, | |
| "grad_norm": 0.03311785310506821, | |
| "learning_rate": 7.429146402817374e-05, | |
| "loss": 0.6798, | |
| "step": 18840 | |
| }, | |
| { | |
| "epoch": 3.1514290489720875, | |
| "grad_norm": 0.034732621163129807, | |
| "learning_rate": 7.419084353513332e-05, | |
| "loss": 0.6582, | |
| "step": 18855 | |
| }, | |
| { | |
| "epoch": 3.1539361524318905, | |
| "grad_norm": 0.03301689773797989, | |
| "learning_rate": 7.409022304209291e-05, | |
| "loss": 0.6545, | |
| "step": 18870 | |
| }, | |
| { | |
| "epoch": 3.156443255891693, | |
| "grad_norm": 0.03281566500663757, | |
| "learning_rate": 7.39896025490525e-05, | |
| "loss": 0.6566, | |
| "step": 18885 | |
| }, | |
| { | |
| "epoch": 3.158950359351496, | |
| "grad_norm": 0.034080591052770615, | |
| "learning_rate": 7.388898205601208e-05, | |
| "loss": 0.668, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 3.1614574628112986, | |
| "grad_norm": 0.032824501395225525, | |
| "learning_rate": 7.378836156297167e-05, | |
| "loss": 0.6654, | |
| "step": 18915 | |
| }, | |
| { | |
| "epoch": 3.1639645662711016, | |
| "grad_norm": 0.03173629939556122, | |
| "learning_rate": 7.368774106993125e-05, | |
| "loss": 0.6692, | |
| "step": 18930 | |
| }, | |
| { | |
| "epoch": 3.166471669730904, | |
| "grad_norm": 0.03352899104356766, | |
| "learning_rate": 7.358712057689084e-05, | |
| "loss": 0.6511, | |
| "step": 18945 | |
| }, | |
| { | |
| "epoch": 3.168978773190707, | |
| "grad_norm": 0.033694177865982056, | |
| "learning_rate": 7.348650008385042e-05, | |
| "loss": 0.6541, | |
| "step": 18960 | |
| }, | |
| { | |
| "epoch": 3.1714858766505096, | |
| "grad_norm": 0.033731088042259216, | |
| "learning_rate": 7.338587959081001e-05, | |
| "loss": 0.6731, | |
| "step": 18975 | |
| }, | |
| { | |
| "epoch": 3.1739929801103126, | |
| "grad_norm": 0.03405210003256798, | |
| "learning_rate": 7.32852590977696e-05, | |
| "loss": 0.6664, | |
| "step": 18990 | |
| }, | |
| { | |
| "epoch": 3.176500083570115, | |
| "grad_norm": 0.03264220058917999, | |
| "learning_rate": 7.318463860472917e-05, | |
| "loss": 0.6638, | |
| "step": 19005 | |
| }, | |
| { | |
| "epoch": 3.179007187029918, | |
| "grad_norm": 0.03331288322806358, | |
| "learning_rate": 7.308401811168875e-05, | |
| "loss": 0.6601, | |
| "step": 19020 | |
| }, | |
| { | |
| "epoch": 3.1815142904897207, | |
| "grad_norm": 0.032871656119823456, | |
| "learning_rate": 7.298339761864834e-05, | |
| "loss": 0.6582, | |
| "step": 19035 | |
| }, | |
| { | |
| "epoch": 3.1840213939495237, | |
| "grad_norm": 0.033308811485767365, | |
| "learning_rate": 7.288277712560792e-05, | |
| "loss": 0.6526, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 3.186528497409326, | |
| "grad_norm": 0.034691642969846725, | |
| "learning_rate": 7.27821566325675e-05, | |
| "loss": 0.6562, | |
| "step": 19065 | |
| }, | |
| { | |
| "epoch": 3.189035600869129, | |
| "grad_norm": 0.03305482491850853, | |
| "learning_rate": 7.268153613952708e-05, | |
| "loss": 0.6647, | |
| "step": 19080 | |
| }, | |
| { | |
| "epoch": 3.191542704328932, | |
| "grad_norm": 0.03342653810977936, | |
| "learning_rate": 7.258091564648666e-05, | |
| "loss": 0.6547, | |
| "step": 19095 | |
| }, | |
| { | |
| "epoch": 3.1940498077887347, | |
| "grad_norm": 0.032857514917850494, | |
| "learning_rate": 7.248029515344625e-05, | |
| "loss": 0.6675, | |
| "step": 19110 | |
| }, | |
| { | |
| "epoch": 3.1965569112485377, | |
| "grad_norm": 0.033686548471450806, | |
| "learning_rate": 7.237967466040584e-05, | |
| "loss": 0.665, | |
| "step": 19125 | |
| }, | |
| { | |
| "epoch": 3.1990640147083402, | |
| "grad_norm": 0.0347943976521492, | |
| "learning_rate": 7.227905416736542e-05, | |
| "loss": 0.6663, | |
| "step": 19140 | |
| }, | |
| { | |
| "epoch": 3.201571118168143, | |
| "grad_norm": 0.0345291905105114, | |
| "learning_rate": 7.2178433674325e-05, | |
| "loss": 0.6712, | |
| "step": 19155 | |
| }, | |
| { | |
| "epoch": 3.2040782216279458, | |
| "grad_norm": 0.03408714756369591, | |
| "learning_rate": 7.207781318128459e-05, | |
| "loss": 0.675, | |
| "step": 19170 | |
| }, | |
| { | |
| "epoch": 3.2065853250877487, | |
| "grad_norm": 0.03278841823339462, | |
| "learning_rate": 7.197719268824418e-05, | |
| "loss": 0.6694, | |
| "step": 19185 | |
| }, | |
| { | |
| "epoch": 3.2090924285475513, | |
| "grad_norm": 0.032465532422065735, | |
| "learning_rate": 7.187657219520376e-05, | |
| "loss": 0.6606, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 3.2115995320073543, | |
| "grad_norm": 0.03292189538478851, | |
| "learning_rate": 7.177595170216335e-05, | |
| "loss": 0.6602, | |
| "step": 19215 | |
| }, | |
| { | |
| "epoch": 3.214106635467157, | |
| "grad_norm": 0.03233910724520683, | |
| "learning_rate": 7.167533120912293e-05, | |
| "loss": 0.6736, | |
| "step": 19230 | |
| }, | |
| { | |
| "epoch": 3.21661373892696, | |
| "grad_norm": 0.03283598646521568, | |
| "learning_rate": 7.157471071608252e-05, | |
| "loss": 0.6561, | |
| "step": 19245 | |
| }, | |
| { | |
| "epoch": 3.2191208423867623, | |
| "grad_norm": 0.03332465514540672, | |
| "learning_rate": 7.14740902230421e-05, | |
| "loss": 0.6666, | |
| "step": 19260 | |
| }, | |
| { | |
| "epoch": 3.2216279458465653, | |
| "grad_norm": 0.033038314431905746, | |
| "learning_rate": 7.137346973000169e-05, | |
| "loss": 0.6615, | |
| "step": 19275 | |
| }, | |
| { | |
| "epoch": 3.224135049306368, | |
| "grad_norm": 0.033269450068473816, | |
| "learning_rate": 7.127284923696128e-05, | |
| "loss": 0.6429, | |
| "step": 19290 | |
| }, | |
| { | |
| "epoch": 3.226642152766171, | |
| "grad_norm": 0.03383258357644081, | |
| "learning_rate": 7.117222874392085e-05, | |
| "loss": 0.6564, | |
| "step": 19305 | |
| }, | |
| { | |
| "epoch": 3.2291492562259734, | |
| "grad_norm": 0.03422423452138901, | |
| "learning_rate": 7.107160825088043e-05, | |
| "loss": 0.6626, | |
| "step": 19320 | |
| }, | |
| { | |
| "epoch": 3.2316563596857764, | |
| "grad_norm": 0.03328223526477814, | |
| "learning_rate": 7.097098775784002e-05, | |
| "loss": 0.6662, | |
| "step": 19335 | |
| }, | |
| { | |
| "epoch": 3.2341634631455793, | |
| "grad_norm": 0.06695165485143661, | |
| "learning_rate": 7.08703672647996e-05, | |
| "loss": 0.6638, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 3.236670566605382, | |
| "grad_norm": 0.034399405121803284, | |
| "learning_rate": 7.076974677175918e-05, | |
| "loss": 0.6696, | |
| "step": 19365 | |
| }, | |
| { | |
| "epoch": 3.239177670065185, | |
| "grad_norm": 0.03373480588197708, | |
| "learning_rate": 7.066912627871876e-05, | |
| "loss": 0.6567, | |
| "step": 19380 | |
| }, | |
| { | |
| "epoch": 3.2416847735249874, | |
| "grad_norm": 0.03468296676874161, | |
| "learning_rate": 7.056850578567835e-05, | |
| "loss": 0.6693, | |
| "step": 19395 | |
| }, | |
| { | |
| "epoch": 3.2441918769847904, | |
| "grad_norm": 0.03372135013341904, | |
| "learning_rate": 7.046788529263793e-05, | |
| "loss": 0.6598, | |
| "step": 19410 | |
| }, | |
| { | |
| "epoch": 3.246698980444593, | |
| "grad_norm": 0.033345624804496765, | |
| "learning_rate": 7.036726479959752e-05, | |
| "loss": 0.6618, | |
| "step": 19425 | |
| }, | |
| { | |
| "epoch": 3.249206083904396, | |
| "grad_norm": 0.033639825880527496, | |
| "learning_rate": 7.02666443065571e-05, | |
| "loss": 0.675, | |
| "step": 19440 | |
| }, | |
| { | |
| "epoch": 3.2517131873641985, | |
| "grad_norm": 0.032892145216464996, | |
| "learning_rate": 7.016602381351669e-05, | |
| "loss": 0.6725, | |
| "step": 19455 | |
| }, | |
| { | |
| "epoch": 3.2542202908240014, | |
| "grad_norm": 0.03500252589583397, | |
| "learning_rate": 7.006540332047627e-05, | |
| "loss": 0.6648, | |
| "step": 19470 | |
| }, | |
| { | |
| "epoch": 3.256727394283804, | |
| "grad_norm": 0.033187173306941986, | |
| "learning_rate": 6.996478282743586e-05, | |
| "loss": 0.6574, | |
| "step": 19485 | |
| }, | |
| { | |
| "epoch": 3.259234497743607, | |
| "grad_norm": 0.033664412796497345, | |
| "learning_rate": 6.986416233439545e-05, | |
| "loss": 0.6555, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 3.2617416012034095, | |
| "grad_norm": 0.031998805701732635, | |
| "learning_rate": 6.976354184135503e-05, | |
| "loss": 0.6621, | |
| "step": 19515 | |
| }, | |
| { | |
| "epoch": 3.2642487046632125, | |
| "grad_norm": 0.03370664268732071, | |
| "learning_rate": 6.966292134831462e-05, | |
| "loss": 0.6575, | |
| "step": 19530 | |
| }, | |
| { | |
| "epoch": 3.266755808123015, | |
| "grad_norm": 0.03247015178203583, | |
| "learning_rate": 6.95623008552742e-05, | |
| "loss": 0.6704, | |
| "step": 19545 | |
| }, | |
| { | |
| "epoch": 3.269262911582818, | |
| "grad_norm": 0.03311055153608322, | |
| "learning_rate": 6.946168036223379e-05, | |
| "loss": 0.6616, | |
| "step": 19560 | |
| }, | |
| { | |
| "epoch": 3.2717700150426205, | |
| "grad_norm": 0.0337141677737236, | |
| "learning_rate": 6.936105986919337e-05, | |
| "loss": 0.6755, | |
| "step": 19575 | |
| }, | |
| { | |
| "epoch": 3.2742771185024235, | |
| "grad_norm": 0.03368309885263443, | |
| "learning_rate": 6.926043937615294e-05, | |
| "loss": 0.6641, | |
| "step": 19590 | |
| }, | |
| { | |
| "epoch": 3.2767842219622265, | |
| "grad_norm": 0.035854946821928024, | |
| "learning_rate": 6.915981888311253e-05, | |
| "loss": 0.6417, | |
| "step": 19605 | |
| }, | |
| { | |
| "epoch": 3.279291325422029, | |
| "grad_norm": 0.03423641249537468, | |
| "learning_rate": 6.905919839007212e-05, | |
| "loss": 0.6617, | |
| "step": 19620 | |
| }, | |
| { | |
| "epoch": 3.281798428881832, | |
| "grad_norm": 0.033775344491004944, | |
| "learning_rate": 6.89585778970317e-05, | |
| "loss": 0.6636, | |
| "step": 19635 | |
| }, | |
| { | |
| "epoch": 3.2843055323416346, | |
| "grad_norm": 0.03380570188164711, | |
| "learning_rate": 6.885795740399129e-05, | |
| "loss": 0.6803, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 3.2868126358014376, | |
| "grad_norm": 0.03310622647404671, | |
| "learning_rate": 6.875733691095086e-05, | |
| "loss": 0.6565, | |
| "step": 19665 | |
| }, | |
| { | |
| "epoch": 3.28931973926124, | |
| "grad_norm": 0.033229805529117584, | |
| "learning_rate": 6.865671641791044e-05, | |
| "loss": 0.6745, | |
| "step": 19680 | |
| }, | |
| { | |
| "epoch": 3.291826842721043, | |
| "grad_norm": 0.032646749168634415, | |
| "learning_rate": 6.855609592487003e-05, | |
| "loss": 0.6621, | |
| "step": 19695 | |
| }, | |
| { | |
| "epoch": 3.2943339461808456, | |
| "grad_norm": 0.03353268280625343, | |
| "learning_rate": 6.845547543182961e-05, | |
| "loss": 0.6591, | |
| "step": 19710 | |
| }, | |
| { | |
| "epoch": 3.2968410496406486, | |
| "grad_norm": 0.03348655626177788, | |
| "learning_rate": 6.83548549387892e-05, | |
| "loss": 0.6595, | |
| "step": 19725 | |
| }, | |
| { | |
| "epoch": 3.299348153100451, | |
| "grad_norm": 0.034753601998090744, | |
| "learning_rate": 6.825423444574879e-05, | |
| "loss": 0.6577, | |
| "step": 19740 | |
| }, | |
| { | |
| "epoch": 3.301855256560254, | |
| "grad_norm": 0.03206339105963707, | |
| "learning_rate": 6.815361395270837e-05, | |
| "loss": 0.6532, | |
| "step": 19755 | |
| }, | |
| { | |
| "epoch": 3.3043623600200567, | |
| "grad_norm": 0.03383897244930267, | |
| "learning_rate": 6.805299345966796e-05, | |
| "loss": 0.6582, | |
| "step": 19770 | |
| }, | |
| { | |
| "epoch": 3.3068694634798597, | |
| "grad_norm": 0.03341525420546532, | |
| "learning_rate": 6.795237296662754e-05, | |
| "loss": 0.655, | |
| "step": 19785 | |
| }, | |
| { | |
| "epoch": 3.309376566939662, | |
| "grad_norm": 0.033895961940288544, | |
| "learning_rate": 6.785175247358713e-05, | |
| "loss": 0.6635, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 3.311883670399465, | |
| "grad_norm": 1.8668147325515747, | |
| "learning_rate": 6.775113198054671e-05, | |
| "loss": 0.6677, | |
| "step": 19815 | |
| }, | |
| { | |
| "epoch": 3.3143907738592677, | |
| "grad_norm": 0.03255158290266991, | |
| "learning_rate": 6.76505114875063e-05, | |
| "loss": 0.6563, | |
| "step": 19830 | |
| }, | |
| { | |
| "epoch": 3.3168978773190707, | |
| "grad_norm": 0.03239602968096733, | |
| "learning_rate": 6.754989099446588e-05, | |
| "loss": 0.6564, | |
| "step": 19845 | |
| }, | |
| { | |
| "epoch": 3.3194049807788737, | |
| "grad_norm": 0.032781895250082016, | |
| "learning_rate": 6.744927050142547e-05, | |
| "loss": 0.6686, | |
| "step": 19860 | |
| }, | |
| { | |
| "epoch": 3.3219120842386762, | |
| "grad_norm": 0.03344454988837242, | |
| "learning_rate": 6.734865000838504e-05, | |
| "loss": 0.6584, | |
| "step": 19875 | |
| }, | |
| { | |
| "epoch": 3.324419187698479, | |
| "grad_norm": 0.0336369127035141, | |
| "learning_rate": 6.724802951534463e-05, | |
| "loss": 0.6561, | |
| "step": 19890 | |
| }, | |
| { | |
| "epoch": 3.3269262911582818, | |
| "grad_norm": 0.0323907732963562, | |
| "learning_rate": 6.714740902230421e-05, | |
| "loss": 0.6587, | |
| "step": 19905 | |
| }, | |
| { | |
| "epoch": 3.3294333946180847, | |
| "grad_norm": 0.03296393156051636, | |
| "learning_rate": 6.70467885292638e-05, | |
| "loss": 0.6666, | |
| "step": 19920 | |
| }, | |
| { | |
| "epoch": 3.3319404980778873, | |
| "grad_norm": 0.032257601618766785, | |
| "learning_rate": 6.694616803622338e-05, | |
| "loss": 0.6488, | |
| "step": 19935 | |
| }, | |
| { | |
| "epoch": 3.3344476015376903, | |
| "grad_norm": 0.03307221084833145, | |
| "learning_rate": 6.684554754318297e-05, | |
| "loss": 0.6546, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 3.336954704997493, | |
| "grad_norm": 0.03298574313521385, | |
| "learning_rate": 6.674492705014255e-05, | |
| "loss": 0.6795, | |
| "step": 19965 | |
| }, | |
| { | |
| "epoch": 3.339461808457296, | |
| "grad_norm": 0.0329146534204483, | |
| "learning_rate": 6.664430655710213e-05, | |
| "loss": 0.6571, | |
| "step": 19980 | |
| }, | |
| { | |
| "epoch": 3.3419689119170983, | |
| "grad_norm": 0.03403447940945625, | |
| "learning_rate": 6.654368606406171e-05, | |
| "loss": 0.6778, | |
| "step": 19995 | |
| }, | |
| { | |
| "epoch": 3.3444760153769013, | |
| "grad_norm": 0.03246279060840607, | |
| "learning_rate": 6.64430655710213e-05, | |
| "loss": 0.6612, | |
| "step": 20010 | |
| }, | |
| { | |
| "epoch": 3.346983118836704, | |
| "grad_norm": 0.03411612659692764, | |
| "learning_rate": 6.634244507798088e-05, | |
| "loss": 0.6478, | |
| "step": 20025 | |
| }, | |
| { | |
| "epoch": 3.349490222296507, | |
| "grad_norm": 0.033288851380348206, | |
| "learning_rate": 6.624182458494047e-05, | |
| "loss": 0.6551, | |
| "step": 20040 | |
| }, | |
| { | |
| "epoch": 3.3519973257563094, | |
| "grad_norm": 0.03258313983678818, | |
| "learning_rate": 6.614120409190005e-05, | |
| "loss": 0.6761, | |
| "step": 20055 | |
| }, | |
| { | |
| "epoch": 3.3545044292161124, | |
| "grad_norm": 0.033909596502780914, | |
| "learning_rate": 6.604058359885964e-05, | |
| "loss": 0.6707, | |
| "step": 20070 | |
| }, | |
| { | |
| "epoch": 3.357011532675915, | |
| "grad_norm": 0.033043161034584045, | |
| "learning_rate": 6.593996310581922e-05, | |
| "loss": 0.6549, | |
| "step": 20085 | |
| }, | |
| { | |
| "epoch": 3.359518636135718, | |
| "grad_norm": 0.03325843811035156, | |
| "learning_rate": 6.583934261277881e-05, | |
| "loss": 0.6655, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 3.362025739595521, | |
| "grad_norm": 0.033593397587537766, | |
| "learning_rate": 6.57387221197384e-05, | |
| "loss": 0.659, | |
| "step": 20115 | |
| }, | |
| { | |
| "epoch": 3.3645328430553234, | |
| "grad_norm": 0.032497063279151917, | |
| "learning_rate": 6.563810162669798e-05, | |
| "loss": 0.6728, | |
| "step": 20130 | |
| }, | |
| { | |
| "epoch": 3.367039946515126, | |
| "grad_norm": 0.03400912135839462, | |
| "learning_rate": 6.553748113365757e-05, | |
| "loss": 0.6701, | |
| "step": 20145 | |
| }, | |
| { | |
| "epoch": 3.369547049974929, | |
| "grad_norm": 0.033511847257614136, | |
| "learning_rate": 6.543686064061714e-05, | |
| "loss": 0.6522, | |
| "step": 20160 | |
| }, | |
| { | |
| "epoch": 3.372054153434732, | |
| "grad_norm": 0.034270040690898895, | |
| "learning_rate": 6.533624014757672e-05, | |
| "loss": 0.6509, | |
| "step": 20175 | |
| }, | |
| { | |
| "epoch": 3.3745612568945345, | |
| "grad_norm": 0.03471988067030907, | |
| "learning_rate": 6.523561965453631e-05, | |
| "loss": 0.657, | |
| "step": 20190 | |
| }, | |
| { | |
| "epoch": 3.3770683603543374, | |
| "grad_norm": 0.033193349838256836, | |
| "learning_rate": 6.51349991614959e-05, | |
| "loss": 0.6602, | |
| "step": 20205 | |
| }, | |
| { | |
| "epoch": 3.37957546381414, | |
| "grad_norm": 0.03341618552803993, | |
| "learning_rate": 6.503437866845548e-05, | |
| "loss": 0.6594, | |
| "step": 20220 | |
| }, | |
| { | |
| "epoch": 3.382082567273943, | |
| "grad_norm": 0.03393018990755081, | |
| "learning_rate": 6.493375817541507e-05, | |
| "loss": 0.6692, | |
| "step": 20235 | |
| }, | |
| { | |
| "epoch": 3.3845896707337455, | |
| "grad_norm": 0.032490186393260956, | |
| "learning_rate": 6.483313768237465e-05, | |
| "loss": 0.645, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 3.3870967741935485, | |
| "grad_norm": 0.03293507918715477, | |
| "learning_rate": 6.473251718933424e-05, | |
| "loss": 0.6414, | |
| "step": 20265 | |
| }, | |
| { | |
| "epoch": 3.389603877653351, | |
| "grad_norm": 0.033456623554229736, | |
| "learning_rate": 6.463189669629381e-05, | |
| "loss": 0.6547, | |
| "step": 20280 | |
| }, | |
| { | |
| "epoch": 3.392110981113154, | |
| "grad_norm": 0.03404277190566063, | |
| "learning_rate": 6.45312762032534e-05, | |
| "loss": 0.6605, | |
| "step": 20295 | |
| }, | |
| { | |
| "epoch": 3.3946180845729566, | |
| "grad_norm": 0.03427689149975777, | |
| "learning_rate": 6.443065571021298e-05, | |
| "loss": 0.6594, | |
| "step": 20310 | |
| }, | |
| { | |
| "epoch": 3.3971251880327595, | |
| "grad_norm": 0.033457666635513306, | |
| "learning_rate": 6.433003521717256e-05, | |
| "loss": 0.6778, | |
| "step": 20325 | |
| }, | |
| { | |
| "epoch": 3.399632291492562, | |
| "grad_norm": 0.034244317561388016, | |
| "learning_rate": 6.422941472413215e-05, | |
| "loss": 0.6742, | |
| "step": 20340 | |
| }, | |
| { | |
| "epoch": 3.402139394952365, | |
| "grad_norm": 0.03270183503627777, | |
| "learning_rate": 6.412879423109174e-05, | |
| "loss": 0.6435, | |
| "step": 20355 | |
| }, | |
| { | |
| "epoch": 3.404646498412168, | |
| "grad_norm": 0.034196462482213974, | |
| "learning_rate": 6.402817373805132e-05, | |
| "loss": 0.648, | |
| "step": 20370 | |
| }, | |
| { | |
| "epoch": 3.4071536018719706, | |
| "grad_norm": 0.032824840396642685, | |
| "learning_rate": 6.39275532450109e-05, | |
| "loss": 0.6767, | |
| "step": 20385 | |
| }, | |
| { | |
| "epoch": 3.409660705331773, | |
| "grad_norm": 0.03537704795598984, | |
| "learning_rate": 6.382693275197049e-05, | |
| "loss": 0.6721, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 3.412167808791576, | |
| "grad_norm": 0.03542947396636009, | |
| "learning_rate": 6.372631225893008e-05, | |
| "loss": 0.6612, | |
| "step": 20415 | |
| }, | |
| { | |
| "epoch": 3.414674912251379, | |
| "grad_norm": 0.033524878323078156, | |
| "learning_rate": 6.362569176588966e-05, | |
| "loss": 0.6709, | |
| "step": 20430 | |
| }, | |
| { | |
| "epoch": 3.4171820157111816, | |
| "grad_norm": 0.03377537056803703, | |
| "learning_rate": 6.352507127284923e-05, | |
| "loss": 0.6711, | |
| "step": 20445 | |
| }, | |
| { | |
| "epoch": 3.4196891191709846, | |
| "grad_norm": 0.033485304564237595, | |
| "learning_rate": 6.342445077980882e-05, | |
| "loss": 0.6622, | |
| "step": 20460 | |
| }, | |
| { | |
| "epoch": 3.422196222630787, | |
| "grad_norm": 0.033119190484285355, | |
| "learning_rate": 6.33238302867684e-05, | |
| "loss": 0.66, | |
| "step": 20475 | |
| }, | |
| { | |
| "epoch": 3.42470332609059, | |
| "grad_norm": 0.0347219854593277, | |
| "learning_rate": 6.322320979372799e-05, | |
| "loss": 0.6538, | |
| "step": 20490 | |
| }, | |
| { | |
| "epoch": 3.4272104295503927, | |
| "grad_norm": 0.033375516533851624, | |
| "learning_rate": 6.312258930068758e-05, | |
| "loss": 0.6721, | |
| "step": 20505 | |
| }, | |
| { | |
| "epoch": 3.4297175330101957, | |
| "grad_norm": 0.03268874064087868, | |
| "learning_rate": 6.302196880764716e-05, | |
| "loss": 0.6608, | |
| "step": 20520 | |
| }, | |
| { | |
| "epoch": 3.432224636469998, | |
| "grad_norm": 0.03364133462309837, | |
| "learning_rate": 6.292134831460675e-05, | |
| "loss": 0.6673, | |
| "step": 20535 | |
| }, | |
| { | |
| "epoch": 3.434731739929801, | |
| "grad_norm": 0.03426344320178032, | |
| "learning_rate": 6.282072782156633e-05, | |
| "loss": 0.6637, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 3.4372388433896037, | |
| "grad_norm": 0.03355192393064499, | |
| "learning_rate": 6.272010732852592e-05, | |
| "loss": 0.6781, | |
| "step": 20565 | |
| }, | |
| { | |
| "epoch": 3.4397459468494067, | |
| "grad_norm": 0.03330032154917717, | |
| "learning_rate": 6.261948683548549e-05, | |
| "loss": 0.6693, | |
| "step": 20580 | |
| }, | |
| { | |
| "epoch": 3.4422530503092093, | |
| "grad_norm": 0.03315750136971474, | |
| "learning_rate": 6.251886634244508e-05, | |
| "loss": 0.66, | |
| "step": 20595 | |
| }, | |
| { | |
| "epoch": 3.4447601537690122, | |
| "grad_norm": 0.033580485731363297, | |
| "learning_rate": 6.241824584940466e-05, | |
| "loss": 0.6749, | |
| "step": 20610 | |
| }, | |
| { | |
| "epoch": 3.447267257228815, | |
| "grad_norm": 0.03357269614934921, | |
| "learning_rate": 6.231762535636425e-05, | |
| "loss": 0.6563, | |
| "step": 20625 | |
| }, | |
| { | |
| "epoch": 3.4497743606886178, | |
| "grad_norm": 0.03285966068506241, | |
| "learning_rate": 6.221700486332383e-05, | |
| "loss": 0.6615, | |
| "step": 20640 | |
| }, | |
| { | |
| "epoch": 3.4522814641484203, | |
| "grad_norm": 0.034152496606111526, | |
| "learning_rate": 6.211638437028342e-05, | |
| "loss": 0.666, | |
| "step": 20655 | |
| }, | |
| { | |
| "epoch": 3.4547885676082233, | |
| "grad_norm": 0.03442816436290741, | |
| "learning_rate": 6.2015763877243e-05, | |
| "loss": 0.6637, | |
| "step": 20670 | |
| }, | |
| { | |
| "epoch": 3.4572956710680263, | |
| "grad_norm": 0.03422121703624725, | |
| "learning_rate": 6.191514338420259e-05, | |
| "loss": 0.661, | |
| "step": 20685 | |
| }, | |
| { | |
| "epoch": 3.459802774527829, | |
| "grad_norm": 0.035803407430648804, | |
| "learning_rate": 6.181452289116217e-05, | |
| "loss": 0.6659, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 3.462309877987632, | |
| "grad_norm": 0.03371883183717728, | |
| "learning_rate": 6.171390239812176e-05, | |
| "loss": 0.6659, | |
| "step": 20715 | |
| }, | |
| { | |
| "epoch": 3.4648169814474343, | |
| "grad_norm": 0.034014519304037094, | |
| "learning_rate": 6.161328190508133e-05, | |
| "loss": 0.662, | |
| "step": 20730 | |
| }, | |
| { | |
| "epoch": 3.4673240849072373, | |
| "grad_norm": 0.032825078815221786, | |
| "learning_rate": 6.151266141204092e-05, | |
| "loss": 0.6645, | |
| "step": 20745 | |
| }, | |
| { | |
| "epoch": 3.46983118836704, | |
| "grad_norm": 0.033502623438835144, | |
| "learning_rate": 6.14120409190005e-05, | |
| "loss": 0.6611, | |
| "step": 20760 | |
| }, | |
| { | |
| "epoch": 3.472338291826843, | |
| "grad_norm": 0.03605775162577629, | |
| "learning_rate": 6.131142042596009e-05, | |
| "loss": 0.6781, | |
| "step": 20775 | |
| }, | |
| { | |
| "epoch": 3.4748453952866454, | |
| "grad_norm": 0.03367803990840912, | |
| "learning_rate": 6.121079993291967e-05, | |
| "loss": 0.6547, | |
| "step": 20790 | |
| }, | |
| { | |
| "epoch": 3.4773524987464484, | |
| "grad_norm": 0.034281060099601746, | |
| "learning_rate": 6.111017943987926e-05, | |
| "loss": 0.6607, | |
| "step": 20805 | |
| }, | |
| { | |
| "epoch": 3.479859602206251, | |
| "grad_norm": 0.032581839710474014, | |
| "learning_rate": 6.100955894683884e-05, | |
| "loss": 0.6753, | |
| "step": 20820 | |
| }, | |
| { | |
| "epoch": 3.482366705666054, | |
| "grad_norm": 0.03385984152555466, | |
| "learning_rate": 6.090893845379842e-05, | |
| "loss": 0.6582, | |
| "step": 20835 | |
| }, | |
| { | |
| "epoch": 3.4848738091258564, | |
| "grad_norm": 0.03409432992339134, | |
| "learning_rate": 6.080831796075801e-05, | |
| "loss": 0.6541, | |
| "step": 20850 | |
| }, | |
| { | |
| "epoch": 3.4873809125856594, | |
| "grad_norm": 0.032626084983348846, | |
| "learning_rate": 6.0707697467717594e-05, | |
| "loss": 0.6625, | |
| "step": 20865 | |
| }, | |
| { | |
| "epoch": 3.489888016045462, | |
| "grad_norm": 0.034788578748703, | |
| "learning_rate": 6.060707697467718e-05, | |
| "loss": 0.6539, | |
| "step": 20880 | |
| }, | |
| { | |
| "epoch": 3.492395119505265, | |
| "grad_norm": 0.034189485013484955, | |
| "learning_rate": 6.0506456481636765e-05, | |
| "loss": 0.6595, | |
| "step": 20895 | |
| }, | |
| { | |
| "epoch": 3.4949022229650675, | |
| "grad_norm": 0.03473382443189621, | |
| "learning_rate": 6.040583598859635e-05, | |
| "loss": 0.6565, | |
| "step": 20910 | |
| }, | |
| { | |
| "epoch": 3.4974093264248705, | |
| "grad_norm": 0.0336168147623539, | |
| "learning_rate": 6.0305215495555936e-05, | |
| "loss": 0.6584, | |
| "step": 20925 | |
| }, | |
| { | |
| "epoch": 3.4999164298846734, | |
| "grad_norm": 0.0335552953183651, | |
| "learning_rate": 6.020459500251552e-05, | |
| "loss": 0.6557, | |
| "step": 20940 | |
| }, | |
| { | |
| "epoch": 3.502423533344476, | |
| "grad_norm": 0.03281300142407417, | |
| "learning_rate": 6.01039745094751e-05, | |
| "loss": 0.668, | |
| "step": 20955 | |
| }, | |
| { | |
| "epoch": 3.5049306368042785, | |
| "grad_norm": 0.03366611897945404, | |
| "learning_rate": 6.0003354016434685e-05, | |
| "loss": 0.6621, | |
| "step": 20970 | |
| }, | |
| { | |
| "epoch": 3.5074377402640815, | |
| "grad_norm": 0.034198787063360214, | |
| "learning_rate": 5.990273352339427e-05, | |
| "loss": 0.6619, | |
| "step": 20985 | |
| }, | |
| { | |
| "epoch": 3.5099448437238845, | |
| "grad_norm": 0.03419259935617447, | |
| "learning_rate": 5.9802113030353856e-05, | |
| "loss": 0.6737, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 3.5266588667892362, | |
| "grad_norm": 0.03425095975399017, | |
| "learning_rate": 5.913130974341775e-05, | |
| "loss": 0.665, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 3.543372889854588, | |
| "grad_norm": 0.033783555030822754, | |
| "learning_rate": 5.846050645648163e-05, | |
| "loss": 0.6577, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 3.5600869129199397, | |
| "grad_norm": 0.03537527099251747, | |
| "learning_rate": 5.778970316954553e-05, | |
| "loss": 0.6635, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 3.5768009359852915, | |
| "grad_norm": 0.03478403761982918, | |
| "learning_rate": 5.7118899882609426e-05, | |
| "loss": 0.6584, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 3.593514959050643, | |
| "grad_norm": 0.034150175750255585, | |
| "learning_rate": 5.644809659567332e-05, | |
| "loss": 0.6596, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 3.6102289821159954, | |
| "grad_norm": 0.034265898168087006, | |
| "learning_rate": 5.5777293308737214e-05, | |
| "loss": 0.6574, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 3.626943005181347, | |
| "grad_norm": 0.03564199060201645, | |
| "learning_rate": 5.510649002180111e-05, | |
| "loss": 0.6549, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 3.643657028246699, | |
| "grad_norm": 0.034864045679569244, | |
| "learning_rate": 5.4435686734865e-05, | |
| "loss": 0.6656, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 3.6603710513120506, | |
| "grad_norm": 0.03352364897727966, | |
| "learning_rate": 5.3764883447928896e-05, | |
| "loss": 0.6622, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 3.677085074377403, | |
| "grad_norm": 0.034029681235551834, | |
| "learning_rate": 5.309408016099279e-05, | |
| "loss": 0.6542, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 3.6937990974427546, | |
| "grad_norm": 0.03352760896086693, | |
| "learning_rate": 5.2423276874056684e-05, | |
| "loss": 0.6664, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 3.7105131205081063, | |
| "grad_norm": 0.03489440679550171, | |
| "learning_rate": 5.175247358712058e-05, | |
| "loss": 0.6629, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 3.727227143573458, | |
| "grad_norm": 0.03430229052901268, | |
| "learning_rate": 5.108167030018447e-05, | |
| "loss": 0.6574, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 3.74394116663881, | |
| "grad_norm": 0.033648181706666946, | |
| "learning_rate": 5.0410867013248366e-05, | |
| "loss": 0.6568, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 3.7606551897041616, | |
| "grad_norm": 0.033668212592601776, | |
| "learning_rate": 4.974006372631227e-05, | |
| "loss": 0.6564, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 3.7773692127695138, | |
| "grad_norm": 0.03772876039147377, | |
| "learning_rate": 4.906926043937616e-05, | |
| "loss": 0.6619, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 3.7940832358348655, | |
| "grad_norm": 0.03430061787366867, | |
| "learning_rate": 4.839845715244005e-05, | |
| "loss": 0.6603, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 3.8107972589002173, | |
| "grad_norm": 0.035929903388023376, | |
| "learning_rate": 4.772765386550394e-05, | |
| "loss": 0.6559, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 3.827511281965569, | |
| "grad_norm": 0.03500952944159508, | |
| "learning_rate": 4.7056850578567837e-05, | |
| "loss": 0.6579, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 3.844225305030921, | |
| "grad_norm": 0.03469489514827728, | |
| "learning_rate": 4.638604729163173e-05, | |
| "loss": 0.6558, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 3.860939328096273, | |
| "grad_norm": 0.03354435786604881, | |
| "learning_rate": 4.5715244004695625e-05, | |
| "loss": 0.6623, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 3.8776533511616247, | |
| "grad_norm": 0.03471764177083969, | |
| "learning_rate": 4.504444071775952e-05, | |
| "loss": 0.6648, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 3.8943673742269764, | |
| "grad_norm": 0.03438182920217514, | |
| "learning_rate": 4.437363743082341e-05, | |
| "loss": 0.6606, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 3.911081397292328, | |
| "grad_norm": 0.03417756408452988, | |
| "learning_rate": 4.370283414388731e-05, | |
| "loss": 0.6626, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 3.92779542035768, | |
| "grad_norm": 0.03406790643930435, | |
| "learning_rate": 4.30320308569512e-05, | |
| "loss": 0.6609, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 3.9445094434230317, | |
| "grad_norm": 0.035032719373703, | |
| "learning_rate": 4.2361227570015095e-05, | |
| "loss": 0.6579, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 3.961223466488384, | |
| "grad_norm": 0.03397015482187271, | |
| "learning_rate": 4.169042428307899e-05, | |
| "loss": 0.6557, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 3.9779374895537356, | |
| "grad_norm": 0.03334665298461914, | |
| "learning_rate": 4.101962099614288e-05, | |
| "loss": 0.6606, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 3.9946515126190874, | |
| "grad_norm": 0.033896464854478836, | |
| "learning_rate": 4.034881770920678e-05, | |
| "loss": 0.6584, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 4.0113655356844395, | |
| "grad_norm": 0.03485192731022835, | |
| "learning_rate": 3.967801442227067e-05, | |
| "loss": 0.6492, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 4.028079558749791, | |
| "grad_norm": 0.0341389924287796, | |
| "learning_rate": 3.9007211135334565e-05, | |
| "loss": 0.6461, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 4.044793581815143, | |
| "grad_norm": 0.03344714641571045, | |
| "learning_rate": 3.833640784839846e-05, | |
| "loss": 0.6475, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 4.061507604880495, | |
| "grad_norm": 0.03415651619434357, | |
| "learning_rate": 3.7665604561462354e-05, | |
| "loss": 0.6533, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 4.0782216279458465, | |
| "grad_norm": 0.03357802331447601, | |
| "learning_rate": 3.699480127452625e-05, | |
| "loss": 0.6513, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 4.094935651011198, | |
| "grad_norm": 0.03374486416578293, | |
| "learning_rate": 3.632399798759014e-05, | |
| "loss": 0.6456, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 4.11164967407655, | |
| "grad_norm": 0.03407549113035202, | |
| "learning_rate": 3.5653194700654036e-05, | |
| "loss": 0.651, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 4.128363697141902, | |
| "grad_norm": 0.03402148187160492, | |
| "learning_rate": 3.498239141371793e-05, | |
| "loss": 0.6461, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 4.1450777202072535, | |
| "grad_norm": 0.03708890080451965, | |
| "learning_rate": 3.4311588126781824e-05, | |
| "loss": 0.652, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 4.161791743272605, | |
| "grad_norm": 0.034347113221883774, | |
| "learning_rate": 3.364078483984572e-05, | |
| "loss": 0.6509, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 4.178505766337958, | |
| "grad_norm": 0.0340665765106678, | |
| "learning_rate": 3.296998155290961e-05, | |
| "loss": 0.6451, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 4.19521978940331, | |
| "grad_norm": 0.035704102367162704, | |
| "learning_rate": 3.2299178265973506e-05, | |
| "loss": 0.6507, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 4.211933812468661, | |
| "grad_norm": 0.03486304730176926, | |
| "learning_rate": 3.16283749790374e-05, | |
| "loss": 0.6547, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 4.228647835534013, | |
| "grad_norm": 0.03282959759235382, | |
| "learning_rate": 3.0957571692101294e-05, | |
| "loss": 0.6523, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 4.245361858599365, | |
| "grad_norm": 0.035729847848415375, | |
| "learning_rate": 3.0286768405165188e-05, | |
| "loss": 0.6503, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 4.262075881664717, | |
| "grad_norm": 0.03634531795978546, | |
| "learning_rate": 2.9615965118229082e-05, | |
| "loss": 0.6493, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 4.278789904730068, | |
| "grad_norm": 0.033321358263492584, | |
| "learning_rate": 2.8945161831292976e-05, | |
| "loss": 0.6492, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 4.29550392779542, | |
| "grad_norm": 0.03541552275419235, | |
| "learning_rate": 2.8274358544356867e-05, | |
| "loss": 0.6466, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 4.312217950860772, | |
| "grad_norm": 0.03528020158410072, | |
| "learning_rate": 2.760355525742076e-05, | |
| "loss": 0.6549, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 4.328931973926124, | |
| "grad_norm": 0.03419233486056328, | |
| "learning_rate": 2.6932751970484655e-05, | |
| "loss": 0.6502, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 4.345645996991476, | |
| "grad_norm": 0.03410422429442406, | |
| "learning_rate": 2.626194868354855e-05, | |
| "loss": 0.656, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 4.362360020056828, | |
| "grad_norm": 0.033916935324668884, | |
| "learning_rate": 2.5591145396612443e-05, | |
| "loss": 0.643, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 4.37907404312218, | |
| "grad_norm": 0.034409794956445694, | |
| "learning_rate": 2.492034210967634e-05, | |
| "loss": 0.6445, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 4.3957880661875315, | |
| "grad_norm": 0.03435683995485306, | |
| "learning_rate": 2.4249538822740235e-05, | |
| "loss": 0.6493, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 4.412502089252883, | |
| "grad_norm": 0.03483356907963753, | |
| "learning_rate": 2.357873553580413e-05, | |
| "loss": 0.649, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 4.429216112318235, | |
| "grad_norm": 0.0342116504907608, | |
| "learning_rate": 2.290793224886802e-05, | |
| "loss": 0.6516, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 4.445930135383587, | |
| "grad_norm": 0.035094503313302994, | |
| "learning_rate": 2.2237128961931914e-05, | |
| "loss": 0.6509, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 4.4626441584489385, | |
| "grad_norm": 0.035515137016773224, | |
| "learning_rate": 2.1566325674995808e-05, | |
| "loss": 0.6491, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 4.47935818151429, | |
| "grad_norm": 0.033778801560401917, | |
| "learning_rate": 2.0895522388059702e-05, | |
| "loss": 0.6516, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 4.496072204579642, | |
| "grad_norm": 0.03408665210008621, | |
| "learning_rate": 2.0224719101123596e-05, | |
| "loss": 0.6482, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 4.512786227644995, | |
| "grad_norm": 0.03422163799405098, | |
| "learning_rate": 1.955391581418749e-05, | |
| "loss": 0.6488, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 4.529500250710346, | |
| "grad_norm": 0.03455764427781105, | |
| "learning_rate": 1.8883112527251384e-05, | |
| "loss": 0.6473, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 4.546214273775698, | |
| "grad_norm": 0.033868152648210526, | |
| "learning_rate": 1.8212309240315278e-05, | |
| "loss": 0.6508, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 4.56292829684105, | |
| "grad_norm": 0.03507550060749054, | |
| "learning_rate": 1.7541505953379172e-05, | |
| "loss": 0.6468, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 4.579642319906402, | |
| "grad_norm": 0.03554074466228485, | |
| "learning_rate": 1.6870702666443066e-05, | |
| "loss": 0.6456, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 4.596356342971753, | |
| "grad_norm": 0.03559200465679169, | |
| "learning_rate": 1.619989937950696e-05, | |
| "loss": 0.6544, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 4.613070366037105, | |
| "grad_norm": 0.03546106070280075, | |
| "learning_rate": 1.5529096092570854e-05, | |
| "loss": 0.646, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 4.629784389102457, | |
| "grad_norm": 0.03442246466875076, | |
| "learning_rate": 1.4858292805634748e-05, | |
| "loss": 0.6523, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 4.646498412167809, | |
| "grad_norm": 0.03322317451238632, | |
| "learning_rate": 1.4187489518698644e-05, | |
| "loss": 0.6483, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 4.66321243523316, | |
| "grad_norm": 0.0362270288169384, | |
| "learning_rate": 1.3516686231762538e-05, | |
| "loss": 0.649, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 4.679926458298512, | |
| "grad_norm": 0.03510970249772072, | |
| "learning_rate": 1.2845882944826429e-05, | |
| "loss": 0.6461, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 4.696640481363865, | |
| "grad_norm": 0.03399231657385826, | |
| "learning_rate": 1.2175079657890325e-05, | |
| "loss": 0.6491, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 4.713354504429216, | |
| "grad_norm": 0.03436035290360451, | |
| "learning_rate": 1.1504276370954219e-05, | |
| "loss": 0.6499, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 4.730068527494568, | |
| "grad_norm": 0.034751422703266144, | |
| "learning_rate": 1.0833473084018113e-05, | |
| "loss": 0.6476, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 4.74678255055992, | |
| "grad_norm": 0.034067828208208084, | |
| "learning_rate": 1.0162669797082005e-05, | |
| "loss": 0.6463, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 4.763496573625272, | |
| "grad_norm": 0.03397444635629654, | |
| "learning_rate": 9.4918665101459e-06, | |
| "loss": 0.6487, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 4.780210596690623, | |
| "grad_norm": 0.03437269851565361, | |
| "learning_rate": 8.821063223209793e-06, | |
| "loss": 0.6477, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 4.796924619755975, | |
| "grad_norm": 0.034697502851486206, | |
| "learning_rate": 8.150259936273687e-06, | |
| "loss": 0.6488, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 4.813638642821327, | |
| "grad_norm": 0.03559542074799538, | |
| "learning_rate": 7.479456649337582e-06, | |
| "loss": 0.6492, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 4.830352665886679, | |
| "grad_norm": 0.03439110890030861, | |
| "learning_rate": 6.808653362401476e-06, | |
| "loss": 0.6462, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 4.84706668895203, | |
| "grad_norm": 0.03447462245821953, | |
| "learning_rate": 6.13785007546537e-06, | |
| "loss": 0.6499, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 4.863780712017382, | |
| "grad_norm": 0.03418246656656265, | |
| "learning_rate": 5.467046788529264e-06, | |
| "loss": 0.6492, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 4.880494735082735, | |
| "grad_norm": 0.03533853963017464, | |
| "learning_rate": 4.796243501593159e-06, | |
| "loss": 0.6513, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 4.8972087581480865, | |
| "grad_norm": 0.03379116207361221, | |
| "learning_rate": 4.125440214657052e-06, | |
| "loss": 0.6491, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 4.913922781213438, | |
| "grad_norm": 0.03501541167497635, | |
| "learning_rate": 3.454636927720946e-06, | |
| "loss": 0.6444, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 4.93063680427879, | |
| "grad_norm": 0.03520382195711136, | |
| "learning_rate": 2.78383364078484e-06, | |
| "loss": 0.6499, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 4.809879753006175, | |
| "grad_norm": 0.03432910144329071, | |
| "learning_rate": 7.629605477665471e-06, | |
| "loss": 0.6493, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 4.826129346766331, | |
| "grad_norm": 0.03554558381438255, | |
| "learning_rate": 6.9775024453863715e-06, | |
| "loss": 0.6423, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 4.842378940526487, | |
| "grad_norm": 0.03414788842201233, | |
| "learning_rate": 6.3253994131072716e-06, | |
| "loss": 0.6475, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 4.858628534286643, | |
| "grad_norm": 0.03513456508517265, | |
| "learning_rate": 5.673296380828172e-06, | |
| "loss": 0.6474, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 4.874878128046799, | |
| "grad_norm": 0.03595611825585365, | |
| "learning_rate": 5.021193348549072e-06, | |
| "loss": 0.6485, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 4.891127721806955, | |
| "grad_norm": 0.035549987107515335, | |
| "learning_rate": 4.369090316269971e-06, | |
| "loss": 0.649, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 4.9073773155671105, | |
| "grad_norm": 0.03475033864378929, | |
| "learning_rate": 3.7169872839908704e-06, | |
| "loss": 0.6468, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 4.923626909327266, | |
| "grad_norm": 0.03467612341046333, | |
| "learning_rate": 3.0648842517117705e-06, | |
| "loss": 0.6404, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 4.939876503087422, | |
| "grad_norm": 0.03498971089720726, | |
| "learning_rate": 2.4127812194326705e-06, | |
| "loss": 0.6544, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 4.956126096847579, | |
| "grad_norm": 0.03452787175774574, | |
| "learning_rate": 1.7606781871535704e-06, | |
| "loss": 0.6491, | |
| "step": 30500 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 30770, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.120160458945331e+19, | |
| "train_batch_size": 5, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |