{ "architectures": [ "Lfm2ForCausalLM" ], "block_auto_adjust_ff_dim": true, "block_ffn_dim_multiplier": 1.0, "block_multiple_of": 256, "bos_token_id": 1, "conv_L_cache": 3, "conv_bias": false, "dtype": "bfloat16", "eos_token_id": 2, "full_attn_idxs": [ 0, 1, 2, 3, 4, 5, 6, 7 ], "hidden_size": 512, "initializer_range": 0.02, "intermediate_size": 2048, "layer_types": [ "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention" ], "max_position_embeddings": 2048, "model_type": "lfm2", "norm_eps": 1e-05, "num_attention_heads": 8, "num_hidden_layers": 8, "num_key_value_heads": 4, "pad_token_id": 0, "quantization_config": { "autoround_version": "0.13.0", "bits": 4, "block_name_to_quantize": "model.layers", "data_type": "int", "group_size": 128, "low_gpu_mem_usage": true, "packing_format": "auto_round:auto_gptq", "quant_method": "auto-round", "seqlen": 1024, "sym": true }, "rope_parameters": { "rope_theta": 1000000.0, "rope_type": "default" }, "tie_word_embeddings": true, "transformers_version": "5.10.2", "use_cache": true, "vocab_size": 50257 }