{
  "model_type": "sortformer",
  "num_speakers": 4,
  "ats_weight": 0.5,
  "pil_weight": 0.5,
  "dtype": "float16",
  "fc_encoder_config": {
    "model_type": "sortformer_fc_encoder",
    "hidden_size": 512,
    "num_hidden_layers": 17,
    "num_attention_heads": 8,
    "num_key_value_heads": 8,
    "intermediate_size": 2048,
    "hidden_act": "silu",
    "num_mel_bins": 128,
    "conv_kernel_size": 9,
    "subsampling_factor": 8,
    "subsampling_conv_channels": 256,
    "subsampling_conv_kernel_size": 3,
    "subsampling_conv_stride": 2,
    "max_position_embeddings": 5000,
    "attention_bias": true,
    "scale_input": true
  },
  "tf_encoder_config": {
    "model_type": "sortformer_tf_encoder",
    "d_model": 192,
    "encoder_layers": 18,
    "encoder_attention_heads": 8,
    "encoder_ffn_dim": 768,
    "activation_function": "relu",
    "max_source_positions": 1500,
    "k_proj_bias": true
  },
  "modules_config": {
    "model_type": "sortformer_modules",
    "num_speakers": 4,
    "fc_d_model": 512,
    "tf_d_model": 192,
    "subsampling_factor": 8,
    "chunk_len": 188,
    "fifo_len": 0,
    "spkcache_len": 188,
    "spkcache_update_period": 188,
    "chunk_left_context": 1,
    "chunk_right_context": 1,
    "spkcache_sil_frames_per_spk": 3,
    "causal_attn_rc": 7,
    "scores_boost_latest": 0.05,
    "sil_threshold": 0.2,
    "pred_score_threshold": 0.25,
    "strong_boost_rate": 0.75,
    "weak_boost_rate": 1.5,
    "min_pos_scores_rate": 0.5,
    "max_index": 99999,
    "use_aosc": true
  },
  "processor_config": {
    "feature_size": 128,
    "sampling_rate": 16000,
    "hop_length": 160,
    "n_fft": 512,
    "win_length": 400,
    "preemphasis": 0.97
  }
}