{ "activation": "swiglu", "bias": false, "dim_feedforward": 3072, "dpt_features": 128, "dpt_out_channels": [ 96, 192, 384, 768 ], "dpt_out_layers": null, "dropout": 0.0, "gaussian_dim": 14, "gaussian_encoder_norm_type": "rms_norm", "include_alpha": false, "latent_dim": 768, "norm_first": true, "norm_type": "rms_norm", "num_heads": 6, "num_layers": 12, "num_register_tokens": 16, "patch_size": 8, "pe_type": "rope", "pos_dim": 3, "pos_pe_num_freqs": 12, "qk_norm": true, "rope_double_max_freq": false, "turn_to_cam_coord": true, "use_dpt_decoder": true, "use_ldr": false, "vdir_num_freqs": 0, "vdir_pe_type": "nerf", "view_indep_qk_norm": true, "view_transformer_ffn_hidden_dim": 3072, "view_transformer_include_self_attn": true, "view_transformer_latent_dim": 768, "view_transformer_n_heads": 6, "view_transformer_n_layers": 6, "view_transformer_use_swin_attn": false }