diff --git a/.gitattributes b/.gitattributes index 1025883d219bf7e33f308e2d090b8302aa2fca8d..a1db4d86ae78543e155dc554a458e57132bf8fae 100644 --- a/.gitattributes +++ b/.gitattributes @@ -47,3 +47,4 @@ mn-coding-adam-3e6-32k-ds-r1-1.5b-adamw-lr3e-6/global_step_340/actor/huggingface mn-coding-adam-5e6-32k-ds-r1-1.5b-adamw-lr5e-6/global_step_240/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text coding-muon-1e4-16k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_80/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_240/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text +mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_0.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4c218fff316d44c6e88f1164f5a69494fb3da97 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d1c31ce5c2a9611422c2e6fe37a5581689eb82c2511ead86047ec0915bed4e2 +size 15149 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_1.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..f804fcf24204672e79c4d318cc7b2c5476d12366 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e660672dc5bc33314822438d20224504f8bd70b35ef8a25868881eeef62b12c +size 15149 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_10.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_10.pt new file mode 100644 index 0000000000000000000000000000000000000000..be8a424d3ceb5e31c2f818e2897e0441619cb65e --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c3efb99e0f500d25a2a6afec7ade994c714d28f0b8229148eda1440abbe840e +size 15157 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_12.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_12.pt new file mode 100644 index 0000000000000000000000000000000000000000..56654357c6ecb691f65953f8ab2e8485607ec872 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_12.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a2d42010cd1d44c1c8d6e5790cca277703e21f86ec0281330e7c3f84756f718 +size 15157 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_13.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_13.pt new file mode 100644 index 0000000000000000000000000000000000000000..d1ad8b9fb36db50b3c0bf883f5a280f8d61efe00 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_13.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78624a4e28b0cbd3c9dda6179baad0548434fb09fa802dde80fee371cd5ea033 +size 15157 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_14.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_14.pt new file mode 100644 index 0000000000000000000000000000000000000000..78b1fb68cecc27411e8b59bdc7f3888cfd34db88 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6488cd96e6eb54fd2a456af272f8f75edd622bb27192345e97399965f9915254 +size 15157 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_15.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_15.pt new file mode 100644 index 0000000000000000000000000000000000000000..44f80a0edd72efb92e0017b77014032bb9a05910 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_15.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd4ba8532c8d0763976cbdf050d89e7ee72be2db0d23ac6b8e7151acc5e8a2a8 +size 15157 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_16.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_16.pt new file mode 100644 index 0000000000000000000000000000000000000000..05ae2c52ae9e469a602f226b7564aa247c310863 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fa50d92350d9cdbec8e76fc91fac5a101c32e7622c04ecbe305491af6b8c556 +size 15157 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_18.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_18.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a3d3ff733a67985baffb0d0e08cc61a2e6317fe --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c81689e07d6053c543a68c6bb7b017e9893ec9fd788e43c414b1220a2c9ba825 +size 15157 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_2.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc3995450b294e1be3dbffa4b946d7c2815b0e3c --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97e28b2e62d74f39ab3c9b656ff08d2f42be333f7e661ee13861e8b5c22135b0 +size 15149 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_20.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_20.pt new file mode 100644 index 0000000000000000000000000000000000000000..26f98f98622e30ba29dfc12b2e382500fe6d459d --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_20.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2b67ddee4030d12956d6204281658439939339516bef4aef4cd4c20230b6305 +size 15157 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_21.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_21.pt new file mode 100644 index 0000000000000000000000000000000000000000..bcf3caab2490d8b0594f5fd11bf99f81ca8906b5 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_21.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1178a96590517883599098bc0f59c4669cfd5ea4624a05b2be1b43c70bf7c8d4 +size 15157 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_22.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_22.pt new file mode 100644 index 0000000000000000000000000000000000000000..e7647be6020c43f13fcc535e793c2ff80bf4ca5c --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6884296a4dc6b9aee722c127e086354704ca555282a251e720ed7dacb38c02c +size 15157 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_23.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_23.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d7362d2872ecc15bf0f30459039ceba0b916c01 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_23.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11b81969f7cedc2c445ee117137ae9e1a27160d7a6d229b9d432c312b9d4d426 +size 15157 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_24.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_24.pt new file mode 100644 index 0000000000000000000000000000000000000000..117e2eab7a8b9184d74450041856ba46dac0a538 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_24.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9d4029ae306fcbc97b5cc1dc2831e6ad3440cc3d9e71d0c40197c59bdadebfb +size 15157 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_26.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_26.pt new file mode 100644 index 0000000000000000000000000000000000000000..c10ead499cce1719ed56035a48e5f42d93433532 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_26.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:363e32c87d779c25264b80aa14b31a144af3c0fd31dfaad3c48e17fc0b3b2b78 +size 15157 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_29.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_29.pt new file mode 100644 index 0000000000000000000000000000000000000000..9846454509d80b2930c10ab9889f5a050d15ff5e --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_29.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12d1d057764764b2c4cdeb03147d92f8b5a48832dc697882be7795aa533db38e +size 15157 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_3.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..d7b79c2c17ceece0ed94e772c924f9d67195b01d --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4810576ab640951a79937401963f2b083473d89e48c297816ec86c1ef62c1e55 +size 15149 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_30.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_30.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b1e6df542f1e28b8e50ef14f2d549be6b6b6ade --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_30.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:380a435104488e79324186c43c9e1b464d5cce25b60355964de5756ae12000c6 +size 15157 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_6.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_6.pt new file mode 100644 index 0000000000000000000000000000000000000000..7610cb067892a08b1c0c80bb007ab943134a4499 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da0399b7e214f3f287b8f26e5f72a6986a4a7c2cbb29907b684666a347cd1b06 +size 15149 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_7.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_7.pt new file mode 100644 index 0000000000000000000000000000000000000000..ddc29cf37372bc5c943161112dc1a8597d46236d --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_7.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d0bc3399f0826cb660fd1d73153eaa5e392a155222631094b7c348bcbed82a3 +size 15149 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_8.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_8.pt new file mode 100644 index 0000000000000000000000000000000000000000..29d9281f37e73e77aea7f58b3a7237a9940308fe --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3339f1a57a227117799e996f43393d6bd5f63d49e1ac717827c2e42accf75bfa +size 15149 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/fsdp_config.json b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/fsdp_config.json new file mode 100644 index 0000000000000000000000000000000000000000..504c844e502dfb07beb71c2d5b170c5bfae2dbd3 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/fsdp_config.json @@ -0,0 +1,4 @@ +{ + "FSDP_version": 2, + "world_size": 32 +} \ No newline at end of file diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/chat_template.jinja b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..c2066bd7391c270626e39c9d7124f00360126412 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/chat_template.jinja @@ -0,0 +1 @@ +{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\n'}}{% endif %} \ No newline at end of file diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/config.json b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/config.json new file mode 100644 index 0000000000000000000000000000000000000000..88c9c17d727c3cac04bf4222ac9ea3779f41ff3f --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/config.json @@ -0,0 +1,60 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151646, + "dtype": "float32", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 131072, + "max_window_layers": 21, + "model_type": "qwen2", + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000, + "sliding_window": null, + "tie_word_embeddings": false, + "transformers_version": "4.57.6", + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/generation_config.json b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c2d8e16ea86adf9674a298ad1a1bb8f0c6c2d4e --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/generation_config.json @@ -0,0 +1,9 @@ +{ + "_from_model_config": true, + "bos_token_id": 151646, + "do_sample": true, + "eos_token_id": 151643, + "temperature": 0.6, + "top_p": 0.95, + "transformers_version": "4.57.6" +} diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/special_tokens_map.json b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1d385d62cf08bca35254547902b792c243656ec1 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/tokenizer.json b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1a2db243e47cbc113f6b2ddcc388aeeb8fe1a94c --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893 +size 11422778 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/tokenizer_config.json b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d252dd4e5764106823080946500c02a8ed8c90c9 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/tokenizer_config.json @@ -0,0 +1,194 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": null, + "added_tokens_decoder": { + "151643": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|User|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151645": { + "content": "<|Assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151646": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|EOT|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151648": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151649": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "<|begin▁of▁sentence|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|end▁of▁sentence|>", + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 16384, + "pad_token": "<|end▁of▁sentence|>", + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizerFast", + "unk_token": null, + "use_default_system_prompt": false +} diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_13.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_13.pt new file mode 100644 index 0000000000000000000000000000000000000000..72c7ad34e067f3de889408243799e448d5b60df0 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_13.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:892cfc6d56461e0bb8dbedadfbccf91dbacdebefe858e58501e3448989ec50f5 +size 222310687 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_16.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_16.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e6f0634a817928232189f1150709addafc2f8e1 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35b42845e23e8f738eb189818e2ce2bfda64e9df63a8f543da086de285fb857f +size 222310687 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_2.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..c82b7de8de2d88f711f5665042b5ca015540cba0 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efea37dad2b71ac241873eb91df1eadb6ee0eb148379d9cfd617c99d871dd178 +size 222310341 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_30.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_30.pt new file mode 100644 index 0000000000000000000000000000000000000000..043485a1c10ab2da596daa53b0b1a4637af13468 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_30.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3011dfac6ef25205fcd9e0afcf8df98f4fe1f6f4567276bf9a9e46eb6b84776 +size 222310687 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_9.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_9.pt new file mode 100644 index 0000000000000000000000000000000000000000..089398ae47c2664f366b7c3c1e383f19455025e4 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_9.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fe7b21d5cc110be19918763264ee697502790409bc45710b14ecc92d35dc2b5 +size 222310341 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_0.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3860f07ddfcd946978a4c013004b835af35b0255 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6167e5e68ad9868482ef89a8978cfa404fabe3f4a1a799c19a89ee0fa6a30bca +size 280698884 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_10.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_10.pt new file mode 100644 index 0000000000000000000000000000000000000000..72102ccd09d1521bbb40801d567691bb61052942 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:529ecf4652ce5d6e4b3b0046cdd91781d7eade4ff36d3eecbc11db669710d79f +size 280699373 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_11.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_11.pt new file mode 100644 index 0000000000000000000000000000000000000000..7495c1f8e034dd684b7391df6045b3aec51dec09 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_11.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e1f976d51c029f2582d184605e111f35868963ea548e175a7cab8ac560cc2e3 +size 280699373 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_14.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_14.pt new file mode 100644 index 0000000000000000000000000000000000000000..66d4b49d0fa95eea76ef96ae013815ab4a03ccfe --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:459eccfe682e9d49b7dc7542daf91ea6cfffdeb769fb5f34cd8d59799371dcbf +size 280699373 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_15.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_15.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e501a0523c1556a242d8e81a0f35563fbbc6f8f --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_15.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dae175736c4c83b8735c8dd1bfe64a67cefa7ad07ac9d32588a34e0801d61b58 +size 280699373 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_16.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_16.pt new file mode 100644 index 0000000000000000000000000000000000000000..078d42aa568558b76d1ed592dde1b9a00ffd9052 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd35a1a9fc381bc2b53cfed8494eb20bc4594ec76ff34c58fd4ac007fa60727a +size 280699373 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_17.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_17.pt new file mode 100644 index 0000000000000000000000000000000000000000..9cb261cbbd4cb9a275c4aec871a47c4ea83594d6 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_17.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d5cfb2b8af12e7a921d7394bbb74dab4c7e26d0f36d1f82eb034bd8b07b714a +size 280699373 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_18.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_18.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb74808680087de71d55ddf04ad8b98ba1ca89e4 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa95ff7004ebd2085490c7b9d15df4359cd62c10a4ba9d7c24aef0c6b5470ba0 +size 280699373 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_21.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_21.pt new file mode 100644 index 0000000000000000000000000000000000000000..8810fc31fca921ee90dd857faa24f3dd559e1012 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_21.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d045bf7db3cdf0c8faf5327be663d61caf0f5351fd23d064423ad09d870c85a9 +size 280699373 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_25.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_25.pt new file mode 100644 index 0000000000000000000000000000000000000000..3bca651c565e934396e74faa54569a8d3355262b --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_25.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92f3f49b42e26e24477e15a5434320bb328cdedd7dabeb13ad86597ef5f96ca7 +size 280699373 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_27.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_27.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa6f647fd9ede3e28ab7fff75c366af8404efd9a --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_27.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f14c3a3d07e787d9de059d9ec570392708f8c48cd1d91a3b8820610a0ee7ee27 +size 280699373 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_28.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_28.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ab2fa4688cd2e512aef91199d8bd51e17b2ce07 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_28.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61c0e3dded2127bf79c6bdf17235e9c1539bc773665595f617621da3053698b4 +size 280699373 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_29.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_29.pt new file mode 100644 index 0000000000000000000000000000000000000000..e85b9211cf71783fe1593fe4f7aa8a0d53a4b92e --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_29.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ad06477ad5e6bc3ee265366185d2b969b340a061ef827fb64ac4bd16335cfe9 +size 280699373 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_31.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_31.pt new file mode 100644 index 0000000000000000000000000000000000000000..3550e1ad2efbed1f5478d5906816498a56d85c10 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_31.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e021927fea2a165c6146cfdf3ad8441080ca3601f55d2166ef8da9b90502f88 +size 280699373 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_7.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_7.pt new file mode 100644 index 0000000000000000000000000000000000000000..4bb276586849627190ff9da8134285c252290b29 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_7.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fd42ea7aeda8c93be0ce4901d3eb1bb6ab96b057c3efd5136f79682763240c7 +size 280698884 diff --git a/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/data.pt b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/data.pt new file mode 100644 index 0000000000000000000000000000000000000000..54e2a2a9ab0bae04a6c8f2837b54976210965b03 --- /dev/null +++ b/mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/data.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c375a220c159d721c3210cc0768d6af6ddeae6debb148ae7a6b8a7468850a1e6 +size 1947