{ "model_name": "HuggingFaceTB/SmolLM2-360M-Instruct", "total_params": 361821120, "config": { "vocab_size": 49152, "hidden_size": 960, "intermediate_size": 2560, "num_hidden_layers": 32, "num_attention_heads": 15, "num_kv_heads": 5, "head_dim": 64, "max_position_embeddings": 8192, "rms_norm_eps": 1e-05, "rope_theta": 100000, "tie_word_embeddings": true }, "tokenization": { "test_input": "47 + 86", "token_ids": [ 36, 39, 1232, 216, 40, 38 ], "num_tokens": 6, "tokens": [ "4", "7", " +", " ", "8", "6" ] }, "hidden_states": { "num_outputs": 33, "shape": [ 1, 6, 960 ] }, "param_groups": { "Embedding": { "count": 1, "total": 47185920 }, "Layer 0 - Attention": { "count": 4, "total": 2457600 }, "Layer 0 - MLP": { "count": 3, "total": 7372800 }, "Layer 0 - Norms": { "count": 2, "total": 1920 }, "Layer 1 - Attention": { "count": 4, "total": 2457600 }, "Layer 1 - MLP": { "count": 3, "total": 7372800 }, "Layer 1 - Norms": { "count": 2, "total": 1920 }, "Layer 2 - Attention": { "count": 4, "total": 2457600 }, "Layer 2 - MLP": { "count": 3, "total": 7372800 }, "Layer 2 - Norms": { "count": 2, "total": 1920 }, "Layer 3 - Attention": { "count": 4, "total": 2457600 }, "Layer 3 - MLP": { "count": 3, "total": 7372800 }, "Layer 3 - Norms": { "count": 2, "total": 1920 }, "Layer 4 - Attention": { "count": 4, "total": 2457600 }, "Layer 4 - MLP": { "count": 3, "total": 7372800 }, "Layer 4 - Norms": { "count": 2, "total": 1920 }, "Layer 5 - Attention": { "count": 4, "total": 2457600 }, "Layer 5 - MLP": { "count": 3, "total": 7372800 }, "Layer 5 - Norms": { "count": 2, "total": 1920 }, "Layer 6 - Attention": { "count": 4, "total": 2457600 }, "Layer 6 - MLP": { "count": 3, "total": 7372800 }, "Layer 6 - Norms": { "count": 2, "total": 1920 }, "Layer 7 - Attention": { "count": 4, "total": 2457600 }, "Layer 7 - MLP": { "count": 3, "total": 7372800 }, "Layer 7 - Norms": { "count": 2, "total": 1920 }, "Layer 8 - Attention": { "count": 4, "total": 2457600 }, "Layer 8 - MLP": { "count": 3, "total": 7372800 }, "Layer 8 - Norms": { "count": 2, "total": 1920 }, "Layer 9 - Attention": { "count": 4, "total": 2457600 }, "Layer 9 - MLP": { "count": 3, "total": 7372800 }, "Layer 9 - Norms": { "count": 2, "total": 1920 }, "Layer 10 - Attention": { "count": 4, "total": 2457600 }, "Layer 10 - MLP": { "count": 3, "total": 7372800 }, "Layer 10 - Norms": { "count": 2, "total": 1920 }, "Layer 11 - Attention": { "count": 4, "total": 2457600 }, "Layer 11 - MLP": { "count": 3, "total": 7372800 }, "Layer 11 - Norms": { "count": 2, "total": 1920 }, "Layer 12 - Attention": { "count": 4, "total": 2457600 }, "Layer 12 - MLP": { "count": 3, "total": 7372800 }, "Layer 12 - Norms": { "count": 2, "total": 1920 }, "Layer 13 - Attention": { "count": 4, "total": 2457600 }, "Layer 13 - MLP": { "count": 3, "total": 7372800 }, "Layer 13 - Norms": { "count": 2, "total": 1920 }, "Layer 14 - Attention": { "count": 4, "total": 2457600 }, "Layer 14 - MLP": { "count": 3, "total": 7372800 }, "Layer 14 - Norms": { "count": 2, "total": 1920 }, "Layer 15 - Attention": { "count": 4, "total": 2457600 }, "Layer 15 - MLP": { "count": 3, "total": 7372800 }, "Layer 15 - Norms": { "count": 2, "total": 1920 }, "Layer 16 - Attention": { "count": 4, "total": 2457600 }, "Layer 16 - MLP": { "count": 3, "total": 7372800 }, "Layer 16 - Norms": { "count": 2, "total": 1920 }, "Layer 17 - Attention": { "count": 4, "total": 2457600 }, "Layer 17 - MLP": { "count": 3, "total": 7372800 }, "Layer 17 - Norms": { "count": 2, "total": 1920 }, "Layer 18 - Attention": { "count": 4, "total": 2457600 }, "Layer 18 - MLP": { "count": 3, "total": 7372800 }, "Layer 18 - Norms": { "count": 2, "total": 1920 }, "Layer 19 - Attention": { "count": 4, "total": 2457600 }, "Layer 19 - MLP": { "count": 3, "total": 7372800 }, "Layer 19 - Norms": { "count": 2, "total": 1920 }, "Layer 20 - Attention": { "count": 4, "total": 2457600 }, "Layer 20 - MLP": { "count": 3, "total": 7372800 }, "Layer 20 - Norms": { "count": 2, "total": 1920 }, "Layer 21 - Attention": { "count": 4, "total": 2457600 }, "Layer 21 - MLP": { "count": 3, "total": 7372800 }, "Layer 21 - Norms": { "count": 2, "total": 1920 }, "Layer 22 - Attention": { "count": 4, "total": 2457600 }, "Layer 22 - MLP": { "count": 3, "total": 7372800 }, "Layer 22 - Norms": { "count": 2, "total": 1920 }, "Layer 23 - Attention": { "count": 4, "total": 2457600 }, "Layer 23 - MLP": { "count": 3, "total": 7372800 }, "Layer 23 - Norms": { "count": 2, "total": 1920 }, "Layer 24 - Attention": { "count": 4, "total": 2457600 }, "Layer 24 - MLP": { "count": 3, "total": 7372800 }, "Layer 24 - Norms": { "count": 2, "total": 1920 }, "Layer 25 - Attention": { "count": 4, "total": 2457600 }, "Layer 25 - MLP": { "count": 3, "total": 7372800 }, "Layer 25 - Norms": { "count": 2, "total": 1920 }, "Layer 26 - Attention": { "count": 4, "total": 2457600 }, "Layer 26 - MLP": { "count": 3, "total": 7372800 }, "Layer 26 - Norms": { "count": 2, "total": 1920 }, "Layer 27 - Attention": { "count": 4, "total": 2457600 }, "Layer 27 - MLP": { "count": 3, "total": 7372800 }, "Layer 27 - Norms": { "count": 2, "total": 1920 }, "Layer 28 - Attention": { "count": 4, "total": 2457600 }, "Layer 28 - MLP": { "count": 3, "total": 7372800 }, "Layer 28 - Norms": { "count": 2, "total": 1920 }, "Layer 29 - Attention": { "count": 4, "total": 2457600 }, "Layer 29 - MLP": { "count": 3, "total": 7372800 }, "Layer 29 - Norms": { "count": 2, "total": 1920 }, "Layer 30 - Attention": { "count": 4, "total": 2457600 }, "Layer 30 - MLP": { "count": 3, "total": 7372800 }, "Layer 30 - Norms": { "count": 2, "total": 1920 }, "Layer 31 - Attention": { "count": 4, "total": 2457600 }, "Layer 31 - MLP": { "count": 3, "total": 7372800 }, "Layer 31 - Norms": { "count": 2, "total": 1920 }, "Final Norm": { "count": 1, "total": 960 } } }