{ "format": "openvision_video_vit_reconstruction_export", "source_model": "models/video_vit.py", "source_unified_model": "models/unified_tokenizer.py", "source_config": "configs/unified_video_vit.py", "source_workdir": "gs://data_us_central1_a/experiment/zlt/openvision4//unified_3d_vit/pre_unified_B16_decvit_B1_textdec_L_bz_2_recon_1_lowdim_True_routeakl_False_1e-4_lpips_1_gram_5_contrastive_False_caption_True_capw_1__unified_3d_vit_fp32_20000_res_128_v159/ft/ft_unified_B16_decvit_B1_textdec_L_bz_2_recon_1_lowdim_True_routeakl_True_1e-4_lpips_1_gram_20_contrastive_False_caption_True_capw_1__unified_3d_vit_fp32_2000_res_256_v159/mix/mix_unified_B16_decvit_B1_textdec_L_bz_1_recon_1_lowdim_True_routeakl_True_1e-4_lpips_1_gram_20_contrastive_False_caption_True_capw_1_mixvideo_imgbs_4096_vidbs_256__unified_3d_vit_fp32_1000_res_256_v159", "restored_step": 294386, "init_image_shape": [ 256, 1, 256, 256, 3 ], "encoder": { "variant": "B/16", "patch_size": [ 4, 16, 16 ], "width": 768, "depth": 12, "mlp_dim": 3072, "num_heads": 12, "pool_type": "gap", "post_norm": false, "emb_head_bias": false, "pad_value": 0.0, "in_channels": 3, "patch_dim": 3072, "posemb": "rope3d", "use_dense_general": false, "naflex_supported": false }, "decoder": { "variant": "B/1", "patch_size": [ 1, 1, 1 ], "width": 768, "depth": 12, "mlp_dim": 3072, "num_heads": 12, "pool_type": "gap", "post_norm": false, "emb_head_bias": false, "pad_value": 0.0, "in_channels": 768, "patch_dim": 768, "posemb": "rope3d", "use_dense_general": false, "naflex_supported": false }, "lowdim": { "use_lowdim_image": true, "use_image_lowdim_kl_regularization": true, "image_token_bottleneck_dim": 32, "paths": { "Dense_0": "/home/letianzhang/jax2hf/unified_3d_vit_v159_mix_256/dense/top_dense0.pt", "image_lowdim_down": "/home/letianzhang/jax2hf/unified_3d_vit_v159_mix_256/dense/image_lowdim_down.pt", "image_lowdim_down_logvar": "/home/letianzhang/jax2hf/unified_3d_vit_v159_mix_256/dense/image_lowdim_down_logvar.pt", "image_lowdim_up": "/home/letianzhang/jax2hf/unified_3d_vit_v159_mix_256/dense/image_lowdim_up.pt" } }, "naflex_supported": false, "weights": { "encoder": "encoder/video_vit_pytorch_model.bin", "decoder": "decoder/video_vit_pytorch_model.bin", "dense_dir": "dense" } }