{ "config": { "project": "coconut-qwen3", "name": "vq-on-latents-qwen3-0.6b-K4096-d64", "save_dir": "/dlabscratch/dlabscratch1/Chengheng/mnlp/vq_on_latents/qwen3_0_6b/K_4096_d_64", "latents_path": "/dlabscratch/dlabscratch1/Chengheng/mnlp/vq_on_latents/latent_embeddings_qwen3_0_6b_fp32/latents_train.npz", "wandb": true, "use_fsq": false, "vq_latent_dim": 64, "vq_num_embeddings": 4096, "vq_commitment_cost": 1.0, "vq_decay": 0.999, "vq_threshold_ema_dead_code": 0, "vq_use_cosine_sim": false, "vq_orthogonal_reg_weight": 0, "vq_orthogonal_reg_max_codes": 128, "vq_sample_codebook_temp": 0.0, "vq_lib_kmeans_init": false, "kmeans_init_from_dataset": true, "warmup_steps": 1000, "per_position": false, "commitment_weight": 0.1, "batch_size": 256, "num_epochs": 10, "lr": 0.0005, "weight_decay": 0.01, "seed": 0, "log_every": 50 }, "per_position": false }