diff --git a/.gitattributes b/.gitattributes index fdaa0bf0ccc26376ecf811498bbb935d966845e7..b73104383dff24f809472d298f5f3ab0cb1ddecc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -59,3 +59,4 @@ coding-uv-1.5e6-32k-ds-r1-1.5b-svd_muon-ulr1.5e-6-vlr1.5e-6-none-muonadamlr1e-6/ mn-coding-uv-1.5e6-32k-ds-r1-1.5b-svd_muon-ulr1.5e-6-vlr1.5e-6-none-muonadamlr1e-6/global_step_240/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text coding-uv-1e6-16k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_80/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_261/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text +mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_0.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..15f6267d24e35cb4685af4ab57ead7719cb8deb7 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23e56b931028a6834b99743f1a26c2fe37c7acf21baf8e9cd716c3e8f6a51dc2 +size 15149 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_11.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_11.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4ec1b41ed0709f86817b50792ae7e85d99e1179 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_11.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3f1a862a74dec08afcd371254bffab2c60e6a628ae70667d8e02d68e8ffc29d +size 15157 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_12.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_12.pt new file mode 100644 index 0000000000000000000000000000000000000000..9474165e8ec04411b08500c0f3efeebd9f630004 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_12.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:808e69b26852f7b750e7d696987a22fab31ec197b3ca73e775aa56d046c4cc49 +size 15157 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_13.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_13.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4a3e3448259b143c9a60554c1eb76d0797157aa --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_13.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c73657e8d333f9910af83c554295276377e2f19d6c64f261a9d5fb484157ecd +size 15157 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_14.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_14.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa2d1448c5598f67c15a58aba56cfaa2a0216ca4 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d11c4f685710ef1b660467402ab865138d9fddc20fb5f8239200e9719e9b684b +size 15157 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_15.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_15.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0d5b56f29767c9766fbc3c301a2c02b87c8e546 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_15.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb501d9a07025fe6167873a894db71817a9965c1c550bf3ad4018be49cb68879 +size 15157 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_17.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_17.pt new file mode 100644 index 0000000000000000000000000000000000000000..a96855642a82ea8a8201fa6de41437e42276623f --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_17.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2d274b4ffda0444ca2931c09323ef4a20bcedc336bfef4f8ceea219c64c0b43 +size 15157 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_18.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_18.pt new file mode 100644 index 0000000000000000000000000000000000000000..f358fa2a159ef543f4694769e5e527893e7b2688 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dff377556b6deea6687765c739b1baf6352c6c7586ad721faaee2b50306a78ec +size 15157 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_19.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_19.pt new file mode 100644 index 0000000000000000000000000000000000000000..093f2c127f74b8ab9aa5c99fe7c74738ccbd93bb --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_19.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13c6525d77b4d2fddf4f2c8f415f4552a4e6411e351e72642602f8e2fd36102a +size 15157 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_2.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..a69e009527c860acbd90cf8a7465ae9ddfc959b6 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99b56de3050d58223238c87c2e4330fcd610b2966b6666d2e3dda31bdd176f37 +size 15149 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_21.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_21.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4272c1b438193d87a9d101c839a9d2b1185dbc4 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_21.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:966086a6fbcc1f73d6a329074f38e099598cb9f934b36cc9ab512d312c60ae4f +size 15157 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_22.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_22.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac556299165ff3627b5b5fef0df436632889d5ee --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c967bd34abe9ba48a243455cc87afa7a54b1d50ef61ee8cdd3ae93731643c54d +size 15157 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_24.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_24.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f6e94ae232403faacce28f1b62221aabc3f27ac --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_24.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b997dc77771fa33a67078f0aab2603d408619c1058e0266e7f38c3e82a136878 +size 15157 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_26.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_26.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ba01e6b9989c4e748844c1a31d9dfbd4c5e1843 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_26.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69e8763f10e59c3c27bf489feae366b54f8376b48bef2ce3bf2fdbaa0c02c662 +size 15157 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_27.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_27.pt new file mode 100644 index 0000000000000000000000000000000000000000..eb5890d1822bfb374c4dbf87f22ce8a53c9ba329 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_27.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe25ee531308c7fcbb4b61cca6602010ab9396c49409be99d4530acef21352e2 +size 15157 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_3.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3b1337f96b85d54dcac9776ec3dbe0aad66a030 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e07db091493d2a2ae6729ed2123164c6e6ad621b5900304790957a58f69f9d66 +size 15149 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_30.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_30.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb8aaab2d748ddcbbf1a343a9bd55dba398e6c65 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_30.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad5d047a1268a05bcb3535b79aac232a4dd163652b6fc9db5dac753ed8fee6cd +size 15157 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_4.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_4.pt new file mode 100644 index 0000000000000000000000000000000000000000..dab05e2ed50dc7102b66b40e34570c00866b2fa6 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fde66e69e26e91ae22b2868ff82a64fba634e29888096ef7c5ca550743bb8ccb +size 15149 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_8.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_8.pt new file mode 100644 index 0000000000000000000000000000000000000000..db58b522070d7c3fe9db763d5d9fa2fbb1cc0245 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a83885b4c3639b53651c4fb3a76eb3187b410e954a869e9c05a2c9d65a6b5872 +size 15149 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_9.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_9.pt new file mode 100644 index 0000000000000000000000000000000000000000..8576eef208964d198009b0c674ca8a914c99c8a1 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_9.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:155338d90afbb780d82ea9fe0e91084cf5f16197e5711b42e61b4311481420b5 +size 15149 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/fsdp_config.json b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/fsdp_config.json new file mode 100644 index 0000000000000000000000000000000000000000..504c844e502dfb07beb71c2d5b170c5bfae2dbd3 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/fsdp_config.json @@ -0,0 +1,4 @@ +{ + "FSDP_version": 2, + "world_size": 32 +} \ No newline at end of file diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/huggingface/chat_template.jinja b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/huggingface/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..c2066bd7391c270626e39c9d7124f00360126412 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/huggingface/chat_template.jinja @@ -0,0 +1 @@ +{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\n'}}{% endif %} \ No newline at end of file diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/huggingface/config.json b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/huggingface/config.json new file mode 100644 index 0000000000000000000000000000000000000000..88c9c17d727c3cac04bf4222ac9ea3779f41ff3f --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/huggingface/config.json @@ -0,0 +1,60 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151646, + "dtype": "float32", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 131072, + "max_window_layers": 21, + "model_type": "qwen2", + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000, + "sliding_window": null, + "tie_word_embeddings": false, + "transformers_version": "4.57.6", + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/huggingface/generation_config.json b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/huggingface/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c2d8e16ea86adf9674a298ad1a1bb8f0c6c2d4e --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/huggingface/generation_config.json @@ -0,0 +1,9 @@ +{ + "_from_model_config": true, + "bos_token_id": 151646, + "do_sample": true, + "eos_token_id": 151643, + "temperature": 0.6, + "top_p": 0.95, + "transformers_version": "4.57.6" +} diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/huggingface/special_tokens_map.json b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/huggingface/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1d385d62cf08bca35254547902b792c243656ec1 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/huggingface/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/huggingface/tokenizer.json b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/huggingface/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1a2db243e47cbc113f6b2ddcc388aeeb8fe1a94c --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/huggingface/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893 +size 11422778 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/huggingface/tokenizer_config.json b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/huggingface/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d252dd4e5764106823080946500c02a8ed8c90c9 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/huggingface/tokenizer_config.json @@ -0,0 +1,194 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": null, + "added_tokens_decoder": { + "151643": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|User|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151645": { + "content": "<|Assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151646": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|EOT|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151648": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151649": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "<|begin▁of▁sentence|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|end▁of▁sentence|>", + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 16384, + "pad_token": "<|end▁of▁sentence|>", + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizerFast", + "unk_token": null, + "use_default_system_prompt": false +} diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_19.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_19.pt new file mode 100644 index 0000000000000000000000000000000000000000..077e4fbeaef46d827dda27ca1fc1db39f9c2e53f --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_19.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a27736086b2770701b26187e5d3bf47559f5b90cee8249b5ee04b1a65c4b0cf4 +size 222310687 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_22.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_22.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2ce4fe0727d7da7430428a82c1efd486224153e --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b464968ac1fd1510c90765a8084a758ae09cd1f212076b5ec9649bcabf41a25 +size 222310687 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_26.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_26.pt new file mode 100644 index 0000000000000000000000000000000000000000..dbc86a73d63ea808d46ca88dbc3a53229bd9f4a6 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_26.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3842d166ebd5dcaf70787e30507f3aa926b5367405df69e7d4d5675aff49d738 +size 222310687 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_7.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_7.pt new file mode 100644 index 0000000000000000000000000000000000000000..5835fcb2998eec674cc165da5dcf6de65e2e47bf --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_7.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b44e682175bd1db54f61f434f5b8cc331ce1aa1b484b235e9eb5d0955c190c71 +size 222310341 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_1.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..fc50d9b6bd87a0808edd59af1742e79028ecc651 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5cc22416574d03f45a76250ca02c5482aa7f446100c0681c6c96c825cedf066 +size 736724479 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_11.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_11.pt new file mode 100644 index 0000000000000000000000000000000000000000..738cdcda6fd2e3bfa21aeae366fe6f44119594cd --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_11.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88ed799ac4817e5523a6b12adcb42eedc62a5b345e7996380a647f1f5accedb4 +size 736726536 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_14.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_14.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec84cf7ec709d0df252a87a0a09dc53829ddbc13 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:514f602184f76d069e7cf10bbe32a8c19d32563f43f490e7cfc0ddab7851e0d2 +size 736726536 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_18.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_18.pt new file mode 100644 index 0000000000000000000000000000000000000000..2da55872a0bd377442ee99905c800fc75a6f2021 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8066b3e087456e296d2203e3cce0f7fda24071834e322bca9607230e0345930b +size 736726536 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_19.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_19.pt new file mode 100644 index 0000000000000000000000000000000000000000..c6548df260eee2a0b3fe2a11620e88b8090902a1 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_19.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:507a2f771ca4f678f2a2cb516908fb00c5da870f8a5015cbccae7a6edfb14f29 +size 736726536 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_2.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a804209a253423cdb5f093000bff62f65c68e71 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00583030573c56262ba359f9c920aa66d48af6c11cec68ac9a08bfef60a63cd9 +size 736724479 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_21.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_21.pt new file mode 100644 index 0000000000000000000000000000000000000000..63ab02a8b6890fb00b203221e5cfe6b937c8fba3 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_21.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31aaa46867473cb9e4615fda3d3ccecff7158d265aa375c3d8d0be1388a1217e +size 736726536 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_22.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_22.pt new file mode 100644 index 0000000000000000000000000000000000000000..e2c5984c839bc646bf3c46628f8cfbb02e3e88db --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4fcc3d7ad3a692224d31c26a387a6e3b36694a3ec9bd306c64c544865f91012 +size 736726536 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_23.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_23.pt new file mode 100644 index 0000000000000000000000000000000000000000..6cfa641ff23fff07427254a6a6d731504946a6be --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_23.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6591de5b25df8a9a2bb447430a924f5bdeb9bf9f594c0dd19234fb80191c4bd +size 736726536 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_25.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_25.pt new file mode 100644 index 0000000000000000000000000000000000000000..9645fc076044bb36bdfddb0520bb842c13b8e747 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_25.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de211b5d514589fdf44c1eefce5bb9a1344ad8a7c90d67a53f5d1b240c6e58c1 +size 736726536 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_27.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_27.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c6346df0439f20828f4f4d22fd94a0fecd595a6 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_27.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddebd42e82116aeece0c396d362542364f7e499b805b31e432576c92a2a9afd8 +size 736726536 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_28.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_28.pt new file mode 100644 index 0000000000000000000000000000000000000000..b186f89650d235ae740205e47f9abc54126fc80a --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_28.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94862fb068f8bef5836976f959a41b7c97380ff732a679ba201dcbe6f77dfe0e +size 736726536 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_29.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_29.pt new file mode 100644 index 0000000000000000000000000000000000000000..eee1634c261abf5bf080e72b25cded3c96fd9a7c --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_29.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2426af80ef27b1230db25605ecd297aab4e1d75528dd9ee5f6c1e12a5dd1d059 +size 736726536 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_3.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..64d8ab0eaa23d03ac9b4c49ab1d8140f8fdcd88e --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a55c5616500366f540d8f37c39edbc87b70c9146d7bf8c0477cdc6f4a53c015 +size 736724479 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_30.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_30.pt new file mode 100644 index 0000000000000000000000000000000000000000..059707f27627b753700386f412f6a3129fc209b5 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_30.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c6798237c181712e2763450e40a4f26b6b06acb1a442de420a13e439e9e5279 +size 736726536 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_31.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_31.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3c1093f42ea22e266d84b197aab16a90b6d25ed --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_31.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b050f0f1997c9e357c32f1c3ae3076ce9265be5533a5cfc584fabb182ba0fc5 +size 736726536 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_5.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_5.pt new file mode 100644 index 0000000000000000000000000000000000000000..b8c9ea4e88ee0fafb7ae0b1ca30607acf2804de1 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_5.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f64ceab048fdd09a4a62d71dfa6a23f4fe68f9b7d7d1654cb7f2ce3a443240d2 +size 736724479 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_9.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_9.pt new file mode 100644 index 0000000000000000000000000000000000000000..8495218210bc26cce7e6ce2967d6ecd73ddc1ee8 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_9.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:232edf047de0839d2ada996be968206b9aa4b1a98466c34586292ead8da1c1d4 +size 736724479 diff --git a/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/data.pt b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/data.pt new file mode 100644 index 0000000000000000000000000000000000000000..08143a5fc3d1f6ebea6a208f91a73f3e7bb8a063 --- /dev/null +++ b/mn-coding-uv-1e6-32k-ds-r1-1.5b-svd_muon-ulr1e-6-vlr1e-6-none-muonadamlr1e-6/global_step_280/data.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96905533eef0a2d2ae0a662d9adfe350d86d012fbab3d2f29edc86188248a168 +size 1947