{ "architectures": [ "BitMambaStudent" ], "hidden_size": 768, "num_hidden_layers": 12, "vocab_size": 32768, "d_state": 16, "expand": 2, "model_type": "bitmamba" }