VQ-VAE-Coconut-Qwen3-0.6B / manifest.json
Chengheng's picture
Upload manifest.json with huggingface_hub
cba4934 verified
raw
history blame
964 Bytes
{
"config": {
"project": "coconut-qwen3",
"name": "vq-on-latents-qwen3-0.6b-K4096-d64",
"save_dir": "/dlabscratch/dlabscratch1/Chengheng/mnlp/vq_on_latents/qwen3_0_6b/K_4096_d_64",
"latents_path": "/dlabscratch/dlabscratch1/Chengheng/mnlp/vq_on_latents/latent_embeddings_qwen3_0_6b_fp32/latents_train.npz",
"wandb": true,
"use_fsq": false,
"vq_latent_dim": 64,
"vq_num_embeddings": 4096,
"vq_commitment_cost": 1.0,
"vq_decay": 0.999,
"vq_threshold_ema_dead_code": 0,
"vq_use_cosine_sim": false,
"vq_orthogonal_reg_weight": 0,
"vq_orthogonal_reg_max_codes": 128,
"vq_sample_codebook_temp": 0.0,
"vq_lib_kmeans_init": false,
"kmeans_init_from_dataset": true,
"warmup_steps": 1000,
"per_position": false,
"commitment_weight": 0.1,
"batch_size": 256,
"num_epochs": 10,
"lr": 0.0005,
"weight_decay": 0.01,
"seed": 0,
"log_every": 50
},
"per_position": false
}