| { | |
| "config": { | |
| "project": "coconut-qwen3", | |
| "name": "vq-on-latents-qwen3-0.6b-K4096-d64", | |
| "save_dir": "/dlabscratch/dlabscratch1/Chengheng/mnlp/vq_on_latents/qwen3_0_6b/K_4096_d_64", | |
| "latents_path": "/dlabscratch/dlabscratch1/Chengheng/mnlp/vq_on_latents/latent_embeddings_qwen3_0_6b_fp32/latents_train.npz", | |
| "wandb": true, | |
| "use_fsq": false, | |
| "vq_latent_dim": 64, | |
| "vq_num_embeddings": 4096, | |
| "vq_commitment_cost": 1.0, | |
| "vq_decay": 0.999, | |
| "vq_threshold_ema_dead_code": 0, | |
| "vq_use_cosine_sim": false, | |
| "vq_orthogonal_reg_weight": 0, | |
| "vq_orthogonal_reg_max_codes": 128, | |
| "vq_sample_codebook_temp": 0.0, | |
| "vq_lib_kmeans_init": false, | |
| "kmeans_init_from_dataset": true, | |
| "warmup_steps": 1000, | |
| "per_position": false, | |
| "commitment_weight": 0.1, | |
| "batch_size": 256, | |
| "num_epochs": 10, | |
| "lr": 0.0005, | |
| "weight_decay": 0.01, | |
| "seed": 0, | |
| "log_every": 50 | |
| }, | |
| "per_position": false | |
| } |