{ "bits": 4, "data_type": "int", "group_size": 128, "sym": true, "low_gpu_mem_usage": true, "autoround_version": "0.13.1", "block_name_to_quantize": "model.layers", "quant_method": "auto-round", "packing_format": "auto_round:auto_gptq", "extra_config": { ".*model\\.layers\\.[1-48]\\.mlp\\.gate.*": { "bits": 16, "data_type": "float" } } }