diff --git a/.gitattributes b/.gitattributes index b73104383dff24f809472d298f5f3ab0cb1ddecc..10b07e4cc20168a4a53996dcb92c931ec8d9c7b8 100644 --- a/.gitattributes +++ b/.gitattributes @@ -60,3 +60,4 @@ mn-coding-uv-1.5e6-32k-ds-r1-1.5b-svd_muon-ulr1.5e-6-vlr1.5e-6-none-muonadamlr1e coding-uv-1e6-16k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_80/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_261/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text +mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_0.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f98333d7412e03568b47c3bf459e6057c677846 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:126d1254aa344c228075e7cff0eb7be73c8b1a500c416e22a1051b9b47e79ccb +size 15149 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_1.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c049cfc205263971c362a87f390cfb1b2fde29a --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59cf203bc1134d9f8fe894798a8088576b12ff03cb7f967683e633d5770d5ca5 +size 15149 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_14.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_14.pt new file mode 100644 index 0000000000000000000000000000000000000000..57a6b064b8c36861daa51cef95d99eef2d8e72e9 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cc750b9ed2a87e7ecd13080d992680c8bf353e1a846fb0a51896d85d439f697 +size 15157 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_18.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_18.pt new file mode 100644 index 0000000000000000000000000000000000000000..131c33695a7a277bc5afeb86fbe1dd308217aab9 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbf8d04b6f7ff1153d1589cc6963f85ea844fd76bfa2791fa38acb63395a629c +size 15157 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_2.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..417372afcbe5bb43c4c1c978865b267f60350e16 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cce0f02c2676b8cc88e0ef6e2d9bbb78f334fa284fe19fe267a4577679f86993 +size 15149 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_22.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_22.pt new file mode 100644 index 0000000000000000000000000000000000000000..4cc67cedfa5e88ea5866366d5b611db321423d9b --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8993d59f63b7730800fda0dcb1002f4153222c9059e4263bc9254e4c44496de +size 15157 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_24.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_24.pt new file mode 100644 index 0000000000000000000000000000000000000000..7735d8806dc0f199e2e7727d0135c05eafea9e89 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_24.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f1e0984f271337823d611d8cf38385beacf0de088fab37ce3f7775fa0cbef19 +size 15157 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_27.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_27.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d6cf13dafafa9979df4a5d3297ce98a8e2d0ad7 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_27.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56a51dea332c4f1308b6dbf532e8c5602ec574bdc5cba52253fd32e8902989be +size 15157 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_3.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..44682346bd07eb66dfaaa2c6feedac679b01e61b --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59c84cf65729883c977a4c39cf8682dd813f203cad33a575f9123aac5a75ef88 +size 15149 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_30.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_30.pt new file mode 100644 index 0000000000000000000000000000000000000000..06a961946ee06da7686bcf540ec1d4c021769531 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_30.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d821971f0623e679168c7d1d87fea11be38966997fbe2426201e19e6375e2ba1 +size 15157 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_5.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_5.pt new file mode 100644 index 0000000000000000000000000000000000000000..21002b76a0ff93e9480ce2acd77888f79219eb31 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/extra_state_world_size_32_rank_5.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d2fa768adb1357952a37236bc256ab1eecdbaca5f84815a9320a07cea3a836b +size 15149 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/fsdp_config.json b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/fsdp_config.json new file mode 100644 index 0000000000000000000000000000000000000000..504c844e502dfb07beb71c2d5b170c5bfae2dbd3 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/fsdp_config.json @@ -0,0 +1,4 @@ +{ + "FSDP_version": 2, + "world_size": 32 +} \ No newline at end of file diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/huggingface/chat_template.jinja b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/huggingface/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..c2066bd7391c270626e39c9d7124f00360126412 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/huggingface/chat_template.jinja @@ -0,0 +1 @@ +{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\n'}}{% endif %} \ No newline at end of file diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/huggingface/config.json b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/huggingface/config.json new file mode 100644 index 0000000000000000000000000000000000000000..88c9c17d727c3cac04bf4222ac9ea3779f41ff3f --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/huggingface/config.json @@ -0,0 +1,60 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151646, + "dtype": "float32", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 131072, + "max_window_layers": 21, + "model_type": "qwen2", + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000, + "sliding_window": null, + "tie_word_embeddings": false, + "transformers_version": "4.57.6", + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/huggingface/generation_config.json b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/huggingface/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c2d8e16ea86adf9674a298ad1a1bb8f0c6c2d4e --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/huggingface/generation_config.json @@ -0,0 +1,9 @@ +{ + "_from_model_config": true, + "bos_token_id": 151646, + "do_sample": true, + "eos_token_id": 151643, + "temperature": 0.6, + "top_p": 0.95, + "transformers_version": "4.57.6" +} diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/huggingface/special_tokens_map.json b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/huggingface/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1d385d62cf08bca35254547902b792c243656ec1 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/huggingface/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/huggingface/tokenizer.json b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/huggingface/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1a2db243e47cbc113f6b2ddcc388aeeb8fe1a94c --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/huggingface/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893 +size 11422778 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/huggingface/tokenizer_config.json b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/huggingface/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d252dd4e5764106823080946500c02a8ed8c90c9 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/huggingface/tokenizer_config.json @@ -0,0 +1,194 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": null, + "added_tokens_decoder": { + "151643": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|User|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151645": { + "content": "<|Assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151646": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|EOT|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151648": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151649": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "<|begin▁of▁sentence|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|end▁of▁sentence|>", + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 16384, + "pad_token": "<|end▁of▁sentence|>", + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizerFast", + "unk_token": null, + "use_default_system_prompt": false +} diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/model_world_size_32_rank_0.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/model_world_size_32_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..5087b06b6ffeba2cf3839cc96dfe49db51ab217f --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/model_world_size_32_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:379ba705741ae9447031c47b7b2573f095a4bab87a723105e3e2e891a203c604 +size 222310341 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/model_world_size_32_rank_1.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/model_world_size_32_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..105381594b064a93e4ffb4b792ebdbd218bd49b3 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/model_world_size_32_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdb6d2db0e82b527b89f8148ded64c9ce32a35dec709dd27a346654c8570950b +size 222310341 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/model_world_size_32_rank_26.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/model_world_size_32_rank_26.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab8fb47b3fce2c4f7e3a741dc010459ebef84ac0 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/model_world_size_32_rank_26.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91c99295858b570ffd7fec14d21072772d04a1324988ee5051566a8eedc67966 +size 222310687 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/model_world_size_32_rank_4.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/model_world_size_32_rank_4.pt new file mode 100644 index 0000000000000000000000000000000000000000..45b8b25751ba26d34429ae0e27d1363d7cf9f201 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/model_world_size_32_rank_4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6cd33782316a7b35614b1cbc9a9435d553c08232a091c2c6b403cc84279d042 +size 222310341 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/model_world_size_32_rank_9.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/model_world_size_32_rank_9.pt new file mode 100644 index 0000000000000000000000000000000000000000..e2821fd3b899f66c34628ee94ead48fba6f75533 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/model_world_size_32_rank_9.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:308fdb779e0ba0759ac8b59f9d7d739a6e17443ddb8c9e6bde221f24b020c376 +size 222310341 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_0.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a80e2bf92ce915ae51d2905a593f7a6180ff2da --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a1b7cd8ebc71c4d534920b2e94e29b3b54e639286b7b3a4b35d4a5b3b1f2979 +size 736712895 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_1.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e3d762dad44017ad4518e0a06ff942253003dd8 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc815f1ca66d08ec1761724bd2c1ca9950d1878fca213666786e0540936f4144 +size 736712895 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_10.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_10.pt new file mode 100644 index 0000000000000000000000000000000000000000..99289c341066ba82aa258a1797f61816655fffa4 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d2cfd9182e031e938a58d1b6a95f74ab1d6b33a0af9d5b5c4c4b848143fbb0e +size 736714952 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_11.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_11.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd4a3a451aa6b13864207d227c8161807b407eee --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_11.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46454d8f52bc3246590bc100428dee0daf74f7666d444a630e8404706ba01c0d +size 736714952 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_12.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_12.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4b2935683f5cf95cedf5014e789d11f6f17e5bb --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_12.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f4a5b412965dc322082c05f809d57257d3f7f4c14b4b36e4236aaef2e5caba8 +size 736714952 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_13.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_13.pt new file mode 100644 index 0000000000000000000000000000000000000000..3eff2400ff307868432057345926e28acf3d2b8d --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_13.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:675216adc7a4bad5b9c5d78679bf4efb2712b6e4158a718dd273636b371db4cf +size 736714952 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_14.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_14.pt new file mode 100644 index 0000000000000000000000000000000000000000..dea9aaee21c502bdabfde0e5247ac0fe56dbab99 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:097bf1aa1fb8728f0568994a9b415038e5fe346a5bc06373ba41a22f9d63a0e8 +size 736714952 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_15.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_15.pt new file mode 100644 index 0000000000000000000000000000000000000000..29fa471bd79c278e603a3056208b61ec78068d3a --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_15.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2833cdc4301668a154912587a927b3139424b418aaa5935da6eeb563abcbba32 +size 736714952 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_16.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_16.pt new file mode 100644 index 0000000000000000000000000000000000000000..65f257a76c35eff007bc427d93b6a45810f07f91 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb9e3b22e96041d5a97838989b9b8d8c62347be323769bc9c250587a04a5de8 +size 736714952 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_18.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_18.pt new file mode 100644 index 0000000000000000000000000000000000000000..be901a4b18f649290b4843823a20aa8c4fdd6af9 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1a98a23c82591642bb651dc06ce062e9e9c88b0eb18911bee68ebaf83489421 +size 736714952 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_19.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_19.pt new file mode 100644 index 0000000000000000000000000000000000000000..6181f4bc3c3a84cfbc2631130afea8c659e63c3f --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_19.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeb46e8523dea4b47c2ab6c57391d9eceb0922bc55768372b624175e37897eea +size 736714952 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_2.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..91850a3e69e476bb10b45c59623b0b62110903b3 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ae01e04f8f41c6d1cbf88939f9810e51360918159d5ce617180e84baff0b7ed +size 736712895 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_21.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_21.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e3571a114b4cacbaf57d63e897d110a1aea5a2a --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_21.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54c620343c9e6b4c7bbaac2bccb21fcea61eb12262d02613f0177ce46ac36bd5 +size 736714952 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_22.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_22.pt new file mode 100644 index 0000000000000000000000000000000000000000..98d7631a8af5beadfe7b03e867daccf419cc2686 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3903739688c18bb05d5b22f149044f4e320217a217ef0f54ad745f7b40622277 +size 736714952 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_23.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_23.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f7bd1e98b49f08f0039e1b29c3c6c106bbc6284 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_23.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97cdc6d37b9f3184f15cb49d7e963f49fa411954e863c77521f3ce3d6258a52b +size 736714952 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_25.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_25.pt new file mode 100644 index 0000000000000000000000000000000000000000..87a1589f533dea4afd76e18dfd01e1e14d6b0137 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_25.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9da493f0f393425c80af98067b4eb195cff2c8e0b720520bb4ac73ce2d982ad +size 736714952 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_26.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_26.pt new file mode 100644 index 0000000000000000000000000000000000000000..84484c45d7e234d012a9826e4db2ffdcfb56f525 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_26.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97ba26517071a5c6d80e227d32fa21770a5990328db4aae60e6bdd0cba4a8034 +size 736714952 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_27.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_27.pt new file mode 100644 index 0000000000000000000000000000000000000000..4410a0ec1b26440cbab0a3d0c4b454feb45f7ad3 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_27.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3bc325be1b8d96d2e1be1b74c9302cf967612772acf9ac357830bae9a767c33 +size 736714952 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_28.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_28.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e30c1a0238406062db6e3342d55534e6134be0f --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_28.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a767ecc87a62cbae88f60f98114bd0c9d398c2f408a1419f1886652c36396a0 +size 736714952 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_3.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..d021c62c58dd2f218f8244505311a7cc350e17ba --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0af63529ded31a5a2fd88e6b08588d813148b077d222b957a468216b4e9565ef +size 736712895 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_30.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_30.pt new file mode 100644 index 0000000000000000000000000000000000000000..2fbf94f7aed7c0a876c0a9df576c87857f1a8f0d --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_30.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cf1266e2e43927b48f85a68c33cad8dd693959849704dbadc0fc7aa1e80594b +size 736714952 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_31.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_31.pt new file mode 100644 index 0000000000000000000000000000000000000000..85e965e37f68d528a64f21d050564ee9f3ba51e9 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_31.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:798890cb4c6290d70a0c8754fde0c09b71a3ffbac7b9dbf2de463569107b15c7 +size 736714952 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_4.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_4.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba64e06d43e20ce75d06ca35e23bbd29ca6319e7 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c80e5eac971a93695baa16cb73c71734a2cdd15c089b6997caaf43580eba4db +size 736712895 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_5.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_5.pt new file mode 100644 index 0000000000000000000000000000000000000000..26d6bc3e38062b895ac3aacbbc33c924d1e7b84a --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_5.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8e43d6532939bbd604295f41aaa66eecbba822d6a528d8e90ad1ad823245a90 +size 736712895 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_6.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_6.pt new file mode 100644 index 0000000000000000000000000000000000000000..e704bb49e9fdacb76c6a369d774c1b8a3caf02f3 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b90b967b224b767e26a939c494154c4f687db5194908024db894ab381892e13 +size 736712895 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_8.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_8.pt new file mode 100644 index 0000000000000000000000000000000000000000..486e6449ecba54e38502fd2c0f85ea8ea5d2b133 --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/actor/optim_world_size_32_rank_8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fea9ffb67dac57a7cbb60b4735c8aeaa22fb4b09c27351475100b741aa3af26 +size 736712895 diff --git a/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/data.pt b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/data.pt new file mode 100644 index 0000000000000000000000000000000000000000..9bdaad0d1c2f8520638bd2e1826c19e0a139ab1d --- /dev/null +++ b/mn-coding-uv-2e6-32k-ds-r1-1.5b-svd_muon-ulr2e-6-vlr2e-6-none-muonadamlr1e-6/global_step_160/data.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cdc293557822ad4c9d734d3f26ed4e76969c355b5e91b6ff2bb8d39089eb5c4 +size 1947