File size: 1,667 Bytes
0f31e57 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | {
"activation": "gelu",
"adam_betas": "0.9,0.95",
"arch": "perceiver",
"args_from": "configs/base.json",
"aug_drop": 0.0,
"aug_flip": true,
"aug_jitter": 0.0,
"aug_rotate": true,
"batch_size": 64,
"behind_emb_dim": 8,
"cache_dir": "hf://usm3d/s23dr-2026-sampled_4096_v2:train",
"conf_clamp_min": null,
"conf_head_wd": 0.1,
"conf_mode": "sinkhorn",
"conf_weight": 0.1,
"cooldown_start": 0,
"cooldown_steps": 0,
"cosine_decay": false,
"cpu": false,
"cross_attn_interval": 4,
"decoder_input_xattn": false,
"decoder_layers": 3,
"deterministic": false,
"dropout": 0.1,
"ema_decay": 0.0,
"encoder_layers": 4,
"endpoint_warmup": 0,
"endpoint_weight": 0.0,
"ff": 1024,
"git_dirty": true,
"git_sha": "5b37dfc70c392936631b59d0bab24f20e4a2b0d9",
"hidden": 256,
"kv_heads_cross": 2,
"kv_heads_self": 2,
"latent_layers": 7,
"latent_tokens": 256,
"learnable_fourier": false,
"length_floor": 0.0,
"lr": 3e-05,
"num_heads": 4,
"out_dir": "runs/validate_155_compiled",
"pre_encoder_layers": 0,
"qk_norm": true,
"qk_norm_type": "l2",
"resume": "runs/validate_155_compiled/20260408_173614_64c7_4670/checkpoints/step125000.pt",
"rms_norm": true,
"seed": 353,
"segment_conf": true,
"segment_param": "midpoint_dir_len",
"segments": 64,
"seq_len": 4096,
"sinkhorn_dustbin": 0.3,
"sinkhorn_eps": 0.1,
"sinkhorn_eps_schedule": "none",
"sinkhorn_eps_start": null,
"sinkhorn_iters": 20,
"sinkhorn_weight": 1.0,
"steps": 135000,
"val_cache_dir": "",
"varifold_cross_only": false,
"varifold_weight": 0.0,
"vote_features": true,
"warmup": 10000,
"weight_decay": 0.01
}
|