Chengheng
/

VQ-VAE-Coconut-Qwen3-0.6B

+{
+  "config": {
+    "project": "coconut-qwen3",
+    "name": "vq-on-latents-qwen3-0.6b-K4096-d64",
+    "save_dir": "/dlabscratch/dlabscratch1/Chengheng/mnlp/vq_on_latents/qwen3_0_6b/K_4096_d_64",
+    "latents_path": "/dlabscratch/dlabscratch1/Chengheng/mnlp/vq_on_latents/latent_embeddings_qwen3_0_6b_fp32/latents_train.npz",
+    "wandb": true,
+    "use_fsq": false,
+    "vq_latent_dim": 64,
+    "vq_num_embeddings": 4096,
+    "vq_commitment_cost": 1.0,
+    "vq_decay": 0.999,
+    "vq_threshold_ema_dead_code": 0,
+    "vq_use_cosine_sim": false,
+    "vq_orthogonal_reg_weight": 0,
+    "vq_orthogonal_reg_max_codes": 128,
+    "vq_sample_codebook_temp": 0.0,
+    "vq_lib_kmeans_init": false,
+    "kmeans_init_from_dataset": true,
+    "warmup_steps": 1000,
+    "per_position": false,
+    "commitment_weight": 0.1,
+    "batch_size": 256,
+    "num_epochs": 10,
+    "lr": 0.0005,
+    "weight_decay": 0.01,
+    "seed": 0,
+    "log_every": 50
+  },
+  "per_position": false
+}