diff --git a/.gitattributes b/.gitattributes index a506ce53db372ed18b2415d00528e35cd1c4ccc4..cae9f65b25c172829f896349ca42b66eaf3c56be 100644 --- a/.gitattributes +++ b/.gitattributes @@ -50,3 +50,4 @@ coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/glob mn-coding-muon-1e4-32k-ds-r1-1.5b-muon-muonlr1e-4-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text coding-muon-1e5-16k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_80/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_300/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text +mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_0.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5a09d46589b38cc477d6b1c3773d36e10124bb4 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d759248aa9bc6f291a0d9a1aaa1db75927ad42d549429b093738d02fbd6f85d4 +size 15149 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_1.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf2e472ebf726d4161c9fa0c75b33d27a2d94b62 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a04725148a648d5cc2a378348a5fe70e3028734d3dddb28eaaa9d4f12ee84d38 +size 15149 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_11.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_11.pt new file mode 100644 index 0000000000000000000000000000000000000000..44992da82afd3e0d8d9d7b8880354729dd7e40f2 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_11.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb45cd754d4832e7e4e3ff369b61079c50bec251a177492dc94c08e4abc685e0 +size 15157 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_14.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_14.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e4c03f9e00fa34e49b45455e1b5b56ca47b15dc --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fe42a703804951ae2c25e673230b805868b0d14e05aaa7756064ea455f56192 +size 15157 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_17.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_17.pt new file mode 100644 index 0000000000000000000000000000000000000000..86728148edb5e7647ee4cae8f92e6390ff36a785 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_17.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf694e41f5732b200620235bb6d76187f269a2eb32016505ea5107a57d50e69b +size 15157 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_18.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_18.pt new file mode 100644 index 0000000000000000000000000000000000000000..a54ba9c351ca16e09d84fa317bc27c9438174c09 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:631ed726829b424aab44b67aad5195a417f3a071662e57686b51c86c8bfdfd9d +size 15157 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_19.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_19.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0a57e0606e6738c0d9a46890b1206b1584d8297 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_19.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c556c439542ebfa9ff753bb51e40b02f3eac8eb1013cd6e05266857867ddc10a +size 15157 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_2.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..586886b7ac7234123c279bc85f2fd3e589c96e33 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2738f8ae40d4822149d0580e68f1c4785ae5b882414c5f8a8329c122059e51ba +size 15149 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_20.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_20.pt new file mode 100644 index 0000000000000000000000000000000000000000..28dfc55fbe09b273e1d90cbd0c2c29bb9b59f3e5 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_20.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bb746b329c4a092a00be9f42392a4e1ae12f22acc288a676e14f2b03d642449 +size 15157 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_21.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_21.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3479633d982950c80966f4f55bdb0e741d9fda9 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_21.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:232204a26aa68eedc6b398e948f9261afdae997dd31ecb44f494cf655c4cc2f8 +size 15157 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_22.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_22.pt new file mode 100644 index 0000000000000000000000000000000000000000..6003bd640e12585d362e5beb249af4b5ca17935f --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f95b1d9914853e2a9a12f90926f0a2b2939057844a6c1b9609bdf690078b6dbd +size 15157 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_24.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_24.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e014a7ef1a206fe52f5db2a60c60e39285f373b --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_24.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18763b6534750225818be403090085715ff60577b184efdc50b59922ea765f94 +size 15157 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_25.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_25.pt new file mode 100644 index 0000000000000000000000000000000000000000..2da71bfa76d0179129ca793a0ad1615213f73f67 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_25.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9607ee672bfae25ffd0b2d9bf53e7c894589d72aa56fdf6a3ce2861d1900f5d4 +size 15157 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_26.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_26.pt new file mode 100644 index 0000000000000000000000000000000000000000..40310cb88e9461e2ff0835cbff56b18d7edca42f --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_26.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29635668d2f7ecc3e66bea17b32492cc1caf58896c98a66ba2bbe2c1beff8ddb +size 15157 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_27.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_27.pt new file mode 100644 index 0000000000000000000000000000000000000000..338c91697c03ba17414ed3fef112528b922be5f2 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_27.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c0f97cf56308871e9211ae18a574413a9b2da9cda0cbf81f5f5da793af58f26 +size 15157 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_28.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_28.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d138b3c73851cc067c3e57f231527b960a84d3b --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_28.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65154bc02a2ba6dc924954929e418361ec835e7092228d2c1a58c59649c63c6d +size 15157 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_29.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_29.pt new file mode 100644 index 0000000000000000000000000000000000000000..e0a97afc5de87590d48130ffc13dbf4975627433 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_29.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c51c1b8a33cac2de4f6073000ceaadcd383b842830b01b4f0aa7caa89815689 +size 15157 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_3.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..2627162fb45dd78d34bbc0ce3ec9c7a1cb4d7b53 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85faebc18cdc4813e87a8881bd8c662aa70ce45a00503204e058e6511ecc15ea +size 15149 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_30.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_30.pt new file mode 100644 index 0000000000000000000000000000000000000000..88f03d1be7b8709f52fd85b5b0d7a100253de40d --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_30.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88652e33dcf17946e05130906fdc3bb04bba52620ca3b4495b9528817b70b441 +size 15157 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_4.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_4.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d8b46a216e7340d5ae7db3bacec89aefaef851a --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f839f3246199210f53e4fe6aa22ccf1464752944dbe54da4df23c8d82d16d75 +size 15149 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_5.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_5.pt new file mode 100644 index 0000000000000000000000000000000000000000..1baaaf78815649106e08ca9dad3b46c3be474fd1 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_5.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6808e3294c603ab13d4a5976236f9ceb7ef3bc0038eabb3a2e08a8fee8893f7b +size 15149 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_6.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_6.pt new file mode 100644 index 0000000000000000000000000000000000000000..098ab481952b6fe59956c53cac4967e30932ddc9 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42b47609421822bebecbb81800c6d4bf544111808b7c5358a300920d273b072a +size 15149 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_7.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_7.pt new file mode 100644 index 0000000000000000000000000000000000000000..dfa4f599ef351c397d03577c3109d2203b8a7cc8 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_7.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf6a5feafe0ae5a95cfbfc9b773a9b93d84228edb2d5a4d18b8e5b777ec6546e +size 15149 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_8.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_8.pt new file mode 100644 index 0000000000000000000000000000000000000000..72d7f9bfa37b146a4d1d351958bb9ff520c197bc --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae4f1f602cc94f4757def13e2bd781ffaac556eabdc5afd68ea54c84aac6d0c7 +size 15149 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_9.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_9.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbc4ede41b45e81df082cd0c6add4696c935e565 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/extra_state_world_size_32_rank_9.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:653f0ff91c0abc457531ac4b4f1de8973be53212fa429a154b56c561aa82ddc0 +size 15149 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/fsdp_config.json b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/fsdp_config.json new file mode 100644 index 0000000000000000000000000000000000000000..504c844e502dfb07beb71c2d5b170c5bfae2dbd3 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/fsdp_config.json @@ -0,0 +1,4 @@ +{ + "FSDP_version": 2, + "world_size": 32 +} \ No newline at end of file diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/chat_template.jinja b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..c2066bd7391c270626e39c9d7124f00360126412 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/chat_template.jinja @@ -0,0 +1 @@ +{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\n'}}{% endif %} \ No newline at end of file diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/config.json b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/config.json new file mode 100644 index 0000000000000000000000000000000000000000..88c9c17d727c3cac04bf4222ac9ea3779f41ff3f --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/config.json @@ -0,0 +1,60 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151646, + "dtype": "float32", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 131072, + "max_window_layers": 21, + "model_type": "qwen2", + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000, + "sliding_window": null, + "tie_word_embeddings": false, + "transformers_version": "4.57.6", + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/generation_config.json b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4c2d8e16ea86adf9674a298ad1a1bb8f0c6c2d4e --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/generation_config.json @@ -0,0 +1,9 @@ +{ + "_from_model_config": true, + "bos_token_id": 151646, + "do_sample": true, + "eos_token_id": 151643, + "temperature": 0.6, + "top_p": 0.95, + "transformers_version": "4.57.6" +} diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/special_tokens_map.json b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1d385d62cf08bca35254547902b792c243656ec1 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/tokenizer.json b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1a2db243e47cbc113f6b2ddcc388aeeb8fe1a94c --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893 +size 11422778 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/tokenizer_config.json b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d252dd4e5764106823080946500c02a8ed8c90c9 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/huggingface/tokenizer_config.json @@ -0,0 +1,194 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": null, + "added_tokens_decoder": { + "151643": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|User|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151645": { + "content": "<|Assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151646": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|EOT|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151648": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151649": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "<|begin▁of▁sentence|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|end▁of▁sentence|>", + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 16384, + "pad_token": "<|end▁of▁sentence|>", + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizerFast", + "unk_token": null, + "use_default_system_prompt": false +} diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_0.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ec00c18b3356ae24e007e2141e00536fe7d73d2 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60dcd4d7e23007efd7b7ca8ebe4904fbd5ccbe399d6b27b40084e79f8065b404 +size 222310341 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_17.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_17.pt new file mode 100644 index 0000000000000000000000000000000000000000..aedf5caa6f58472be680e221924c5a7d0cb34986 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_17.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a026d660359d84ba801ace235c7d436ee7549ccf5a6eb048305a29b38d9874a +size 222310687 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_29.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_29.pt new file mode 100644 index 0000000000000000000000000000000000000000..c82d56b0172fb3dd30caa776254846d22677520d --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_29.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6eb9e0eee2e14da5730a7cfea6c3d5d7c700a5dc76dd92905927cc13649156b1 +size 222310687 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_31.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_31.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d1667c6d4386f314d01b985314753aa23223b9a --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_31.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ddc7d1184491b8400dea2dc12b2a1af2235c0b852e6fcaf9a1205c44d54b950 +size 222310687 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_6.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_6.pt new file mode 100644 index 0000000000000000000000000000000000000000..607a0a22ed39620122a7f0e079ac963275596dc9 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/model_world_size_32_rank_6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c6afea4fe811f8240b6c9a40411bee0b8131c065ae44c8369940da2ae92b490 +size 222310341 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_11.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_11.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ebd1b81fa10a294ee13eae4389640b7833eae68 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_11.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6743e5a0d1ebaeb389cfbc0f49acfc7078401db344536a029b767232f3606a77 +size 280699373 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_14.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_14.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ff34831c00baf22d35e199faa7a65384e85d37c --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5108fabf9289973a572ad79a85c99dcb3ae2c13e7778f6eedec086811ea5ebee +size 280699373 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_17.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_17.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad77b391c2ecda668b57ea6f477fc3c313db6d2f --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_17.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d03a0fe958c841ecfc0c2d115874e76b61da259fbe94b0779fa18f341837b29f +size 280699373 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_19.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_19.pt new file mode 100644 index 0000000000000000000000000000000000000000..54a72ad0bf2155c76446b0a36fb917bfba4ee2a0 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_19.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f11b773c13756af4d0dc35878660e01294e37b7327adb8a715dc7f177773c46b +size 280699373 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_21.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_21.pt new file mode 100644 index 0000000000000000000000000000000000000000..ffb46cab3587239e5a0bc32c195712f9c968ff13 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_21.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e609aa6b9d472e1e76bc4fa10790633e7cd77396194ee92f20a81774d95942b1 +size 280699373 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_23.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_23.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5951457eaa561cedf323ea3c67bed87dc5c2ae5 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_23.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:872c95a19238bc029bdceac7d11a86134983dcba8dd9bdade8ba88d9a109bf9d +size 280699373 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_24.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_24.pt new file mode 100644 index 0000000000000000000000000000000000000000..e81ca972e11a89b6b2bad7494f4f457bb37c75e0 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_24.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f0f502070c770b0ed82dbd10243e1a72cdfb438e86c35b8b0c172166c4f5cb9 +size 280699373 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_28.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_28.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a869f41be43e547355755ca3a50686eba1ab280 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_28.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccff5f0a8914b6dddf65ed932f23486caff98ad375c5f61e982d111e8620d692 +size 280699373 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_31.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_31.pt new file mode 100644 index 0000000000000000000000000000000000000000..fae11fd5a9f2c33182ca9a286ad3055535f3bcef --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_31.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d5da80fd166f9b6f4d70e000444e80d036b0397b41b237b8d860ef1dca3e309 +size 280699373 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_4.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_4.pt new file mode 100644 index 0000000000000000000000000000000000000000..d0b55d5df2e353fc2aa3b31a107ceab4e53e8aa0 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59436f7a985be6461b84b5f958dc09c68fac7acd6a895abf3344d22998ee3ebc +size 280698884 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_6.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_6.pt new file mode 100644 index 0000000000000000000000000000000000000000..50764e196efe8a57b20cf92e23fbf8b109ebde05 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b717e523377d4b9538a758d0bce852beb6a0362ab0b1767244286be4558b1793 +size 280698884 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_9.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_9.pt new file mode 100644 index 0000000000000000000000000000000000000000..408c8f7f5ac569e8148b61c9cc93dca1eaa22471 --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/actor/optim_world_size_32_rank_9.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b898b2e570209428d9eb63140bcaacbfde59059bef90cf11afbc67ebaf36053 +size 280698884 diff --git a/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/data.pt b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/data.pt new file mode 100644 index 0000000000000000000000000000000000000000..942c2b5d3942da719d456a54ea5e51d687fba59f --- /dev/null +++ b/mn-coding-muon-1e5-32k-ds-r1-1.5b-muon-muonlr1e-5-spectral_norm-muonadamlr1e-6/global_step_280/data.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:203d80ec6301db396f39e3dcb2bb338ceeb1e3132ab65d9e27aa7cb77a9f4e89 +size 1947