File size: 1,150 Bytes
030ce34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
{
  "mode": "stitch-tensors",
  "precision": "5.0",
  "alpha_method": "hybrid",
  "model_name_or_path": "Qwen/Qwen3.6-35B-A3B",
  "precision_config": "/mnt/data/machine/krishnateja95/HIGGS_repo/HIGGS_runs/Qwen3_6_30b/qwen_3_6_30b_configs/optimal_precision_5.0_bits_mode_hybrid.json",
  "checkpoint_dirs": {
    "FP16": "/mnt/data/machine/krishnateja95/HIGGS_repo/HIGGS_runs/Qwen3_6_30b/qwen3_6_30b_quantized_layers/checkpoints/fp16",
    "FP8_Dynamic": "/mnt/data/machine/krishnateja95/HIGGS_repo/HIGGS_runs/Qwen3_6_30b/qwen3_6_30b_quantized_layers/checkpoints/fp8_dynamic",
    "NVFP4": "/mnt/data/machine/krishnateja95/HIGGS_repo/HIGGS_runs/Qwen3_6_30b/qwen3_6_30b_quantized_layers/checkpoints/nvfp4"
  },
  "detected_layer_prefix": "model.language_model.layers.",
  "has_shared_expert": true,
  "has_visual_tower": true,
  "has_mtp": false,
  "has_linear_attn": true,
  "num_layer_assignments": 60,
  "regex_target_counts": {
    "FP8_Dynamic": 4,
    "NVFP4": 2,
    "FP16_ignore": 2
  },
  "selected_tensor_counts": {
    "FP16": 825,
    "FP8_Dynamic": 7750,
    "NVFP4": 107940
  },
  "num_output_tensors": 116515,
  "max_shard_size_gb": 5.0
}