File size: 964 Bytes
cba4934
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
{
  "config": {
    "project": "coconut-qwen3",
    "name": "vq-on-latents-qwen3-0.6b-K4096-d64",
    "save_dir": "/dlabscratch/dlabscratch1/Chengheng/mnlp/vq_on_latents/qwen3_0_6b/K_4096_d_64",
    "latents_path": "/dlabscratch/dlabscratch1/Chengheng/mnlp/vq_on_latents/latent_embeddings_qwen3_0_6b_fp32/latents_train.npz",
    "wandb": true,
    "use_fsq": false,
    "vq_latent_dim": 64,
    "vq_num_embeddings": 4096,
    "vq_commitment_cost": 1.0,
    "vq_decay": 0.999,
    "vq_threshold_ema_dead_code": 0,
    "vq_use_cosine_sim": false,
    "vq_orthogonal_reg_weight": 0,
    "vq_orthogonal_reg_max_codes": 128,
    "vq_sample_codebook_temp": 0.0,
    "vq_lib_kmeans_init": false,
    "kmeans_init_from_dataset": true,
    "warmup_steps": 1000,
    "per_position": false,
    "commitment_weight": 0.1,
    "batch_size": 256,
    "num_epochs": 10,
    "lr": 0.0005,
    "weight_decay": 0.01,
    "seed": 0,
    "log_every": 50
  },
  "per_position": false
}