{ "assets": [ { "metadata": {}, "name": "droid", "safetensors": "droid.safetensors", "safetensors_sha256": "6a79773c4f21c2d896479a992e677e890517ecbd2b35ba8ae1777d9f865050b1", "source": { "filename": "droid.pth", "gdrive_id": "1PpqVt1H4maBa_GbPJp4NwxRsd9jk-elh", "source": "princeton-vl/DROID-SLAM" }, "source_sha256": "46476ef64cde45a97504910d6f3de2eef7b398ec1c6e4e668815c29076024526", "tensor_count": 102, "tensors": { "module.cnet.conv1.bias": { "dtype": "float32", "shape": [ 32 ] }, "module.cnet.conv1.weight": { "dtype": "float32", "shape": [ 32, 3, 7, 7 ] }, "module.cnet.conv2.bias": { "dtype": "float32", "shape": [ 256 ] }, "module.cnet.conv2.weight": { "dtype": "float32", "shape": [ 256, 128, 1, 1 ] }, "module.cnet.layer1.0.conv1.bias": { "dtype": "float32", "shape": [ 32 ] }, "module.cnet.layer1.0.conv1.weight": { "dtype": "float32", "shape": [ 32, 32, 3, 3 ] }, "module.cnet.layer1.0.conv2.bias": { "dtype": "float32", "shape": [ 32 ] }, "module.cnet.layer1.0.conv2.weight": { "dtype": "float32", "shape": [ 32, 32, 3, 3 ] }, "module.cnet.layer1.1.conv1.bias": { "dtype": "float32", "shape": [ 32 ] }, "module.cnet.layer1.1.conv1.weight": { "dtype": "float32", "shape": [ 32, 32, 3, 3 ] }, "module.cnet.layer1.1.conv2.bias": { "dtype": "float32", "shape": [ 32 ] }, "module.cnet.layer1.1.conv2.weight": { "dtype": "float32", "shape": [ 32, 32, 3, 3 ] }, "module.cnet.layer2.0.conv1.bias": { "dtype": "float32", "shape": [ 64 ] }, "module.cnet.layer2.0.conv1.weight": { "dtype": "float32", "shape": [ 64, 32, 3, 3 ] }, "module.cnet.layer2.0.conv2.bias": { "dtype": "float32", "shape": [ 64 ] }, "module.cnet.layer2.0.conv2.weight": { "dtype": "float32", "shape": [ 64, 64, 3, 3 ] }, "module.cnet.layer2.0.downsample.0.bias": { "dtype": "float32", "shape": [ 64 ] }, "module.cnet.layer2.0.downsample.0.weight": { "dtype": "float32", "shape": [ 64, 32, 1, 1 ] }, "module.cnet.layer2.1.conv1.bias": { "dtype": "float32", "shape": [ 64 ] }, "module.cnet.layer2.1.conv1.weight": { "dtype": "float32", "shape": [ 64, 64, 3, 3 ] }, "module.cnet.layer2.1.conv2.bias": { "dtype": "float32", "shape": [ 64 ] }, "module.cnet.layer2.1.conv2.weight": { "dtype": "float32", "shape": [ 64, 64, 3, 3 ] }, "module.cnet.layer3.0.conv1.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.cnet.layer3.0.conv1.weight": { "dtype": "float32", "shape": [ 128, 64, 3, 3 ] }, "module.cnet.layer3.0.conv2.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.cnet.layer3.0.conv2.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "module.cnet.layer3.0.downsample.0.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.cnet.layer3.0.downsample.0.weight": { "dtype": "float32", "shape": [ 128, 64, 1, 1 ] }, "module.cnet.layer3.1.conv1.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.cnet.layer3.1.conv1.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "module.cnet.layer3.1.conv2.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.cnet.layer3.1.conv2.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "module.fnet.conv1.bias": { "dtype": "float32", "shape": [ 32 ] }, "module.fnet.conv1.weight": { "dtype": "float32", "shape": [ 32, 3, 7, 7 ] }, "module.fnet.conv2.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.fnet.conv2.weight": { "dtype": "float32", "shape": [ 128, 128, 1, 1 ] }, "module.fnet.layer1.0.conv1.bias": { "dtype": "float32", "shape": [ 32 ] }, "module.fnet.layer1.0.conv1.weight": { "dtype": "float32", "shape": [ 32, 32, 3, 3 ] }, "module.fnet.layer1.0.conv2.bias": { "dtype": "float32", "shape": [ 32 ] }, "module.fnet.layer1.0.conv2.weight": { "dtype": "float32", "shape": [ 32, 32, 3, 3 ] }, "module.fnet.layer1.1.conv1.bias": { "dtype": "float32", "shape": [ 32 ] }, "module.fnet.layer1.1.conv1.weight": { "dtype": "float32", "shape": [ 32, 32, 3, 3 ] }, "module.fnet.layer1.1.conv2.bias": { "dtype": "float32", "shape": [ 32 ] }, "module.fnet.layer1.1.conv2.weight": { "dtype": "float32", "shape": [ 32, 32, 3, 3 ] }, "module.fnet.layer2.0.conv1.bias": { "dtype": "float32", "shape": [ 64 ] }, "module.fnet.layer2.0.conv1.weight": { "dtype": "float32", "shape": [ 64, 32, 3, 3 ] }, "module.fnet.layer2.0.conv2.bias": { "dtype": "float32", "shape": [ 64 ] }, "module.fnet.layer2.0.conv2.weight": { "dtype": "float32", "shape": [ 64, 64, 3, 3 ] }, "module.fnet.layer2.0.downsample.0.bias": { "dtype": "float32", "shape": [ 64 ] }, "module.fnet.layer2.0.downsample.0.weight": { "dtype": "float32", "shape": [ 64, 32, 1, 1 ] }, "module.fnet.layer2.1.conv1.bias": { "dtype": "float32", "shape": [ 64 ] }, "module.fnet.layer2.1.conv1.weight": { "dtype": "float32", "shape": [ 64, 64, 3, 3 ] }, "module.fnet.layer2.1.conv2.bias": { "dtype": "float32", "shape": [ 64 ] }, "module.fnet.layer2.1.conv2.weight": { "dtype": "float32", "shape": [ 64, 64, 3, 3 ] }, "module.fnet.layer3.0.conv1.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.fnet.layer3.0.conv1.weight": { "dtype": "float32", "shape": [ 128, 64, 3, 3 ] }, "module.fnet.layer3.0.conv2.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.fnet.layer3.0.conv2.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "module.fnet.layer3.0.downsample.0.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.fnet.layer3.0.downsample.0.weight": { "dtype": "float32", "shape": [ 128, 64, 1, 1 ] }, "module.fnet.layer3.1.conv1.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.fnet.layer3.1.conv1.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "module.fnet.layer3.1.conv2.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.fnet.layer3.1.conv2.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "module.update.agg.conv1.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.update.agg.conv1.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "module.update.agg.conv2.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.update.agg.conv2.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "module.update.agg.eta.0.bias": { "dtype": "float32", "shape": [ 1 ] }, "module.update.agg.eta.0.weight": { "dtype": "float32", "shape": [ 1, 128, 3, 3 ] }, "module.update.agg.upmask.0.bias": { "dtype": "float32", "shape": [ 576 ] }, "module.update.agg.upmask.0.weight": { "dtype": "float32", "shape": [ 576, 128, 1, 1 ] }, "module.update.corr_encoder.0.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.update.corr_encoder.0.weight": { "dtype": "float32", "shape": [ 128, 196, 1, 1 ] }, "module.update.corr_encoder.2.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.update.corr_encoder.2.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "module.update.delta.0.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.update.delta.0.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "module.update.delta.2.bias": { "dtype": "float32", "shape": [ 3 ] }, "module.update.delta.2.weight": { "dtype": "float32", "shape": [ 3, 128, 3, 3 ] }, "module.update.flow_encoder.0.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.update.flow_encoder.0.weight": { "dtype": "float32", "shape": [ 128, 4, 7, 7 ] }, "module.update.flow_encoder.2.bias": { "dtype": "float32", "shape": [ 64 ] }, "module.update.flow_encoder.2.weight": { "dtype": "float32", "shape": [ 64, 128, 3, 3 ] }, "module.update.gru.convq.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.update.gru.convq.weight": { "dtype": "float32", "shape": [ 128, 448, 3, 3 ] }, "module.update.gru.convq_glo.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.update.gru.convq_glo.weight": { "dtype": "float32", "shape": [ 128, 128, 1, 1 ] }, "module.update.gru.convr.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.update.gru.convr.weight": { "dtype": "float32", "shape": [ 128, 448, 3, 3 ] }, "module.update.gru.convr_glo.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.update.gru.convr_glo.weight": { "dtype": "float32", "shape": [ 128, 128, 1, 1 ] }, "module.update.gru.convz.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.update.gru.convz.weight": { "dtype": "float32", "shape": [ 128, 448, 3, 3 ] }, "module.update.gru.convz_glo.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.update.gru.convz_glo.weight": { "dtype": "float32", "shape": [ 128, 128, 1, 1 ] }, "module.update.gru.w.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.update.gru.w.weight": { "dtype": "float32", "shape": [ 128, 128, 1, 1 ] }, "module.update.weight.0.bias": { "dtype": "float32", "shape": [ 128 ] }, "module.update.weight.0.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "module.update.weight.2.bias": { "dtype": "float32", "shape": [ 3 ] }, "module.update.weight.2.weight": { "dtype": "float32", "shape": [ 3, 128, 3, 3 ] } } }, { "metadata": {}, "name": "metric3d", "safetensors": "metric3d.safetensors", "safetensors_sha256": "07f829a274e32f3a0617834ce37689cf6fa621caa02ff177298a30847dc1d8d5", "source": { "filename": "metric_depth_vit_large_800k.pth", "repo_id": "JUGGHM/Metric3D", "repo_type": "model" }, "source_sha256": "15328ffc42b528b95f188687418f6f03b3f123eb34ccdbd686c112abbea6d972", "tensor_count": 529, "tensors": { "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.0.0.conv1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.0.0.conv1.weight": { "dtype": "float32", "shape": [ 128, 256, 3, 3 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.0.0.conv2.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.0.0.conv2.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.0.0.downsample.0.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.0.0.downsample.0.weight": { "dtype": "float32", "shape": [ 128, 256, 1, 1 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.0.0.downsample.1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.0.0.downsample.1.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.0.0.norm1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.0.0.norm1.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.0.0.norm2.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.0.0.norm2.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.0.0.norm3.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.0.0.norm3.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.0.1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.0.1.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.1.0.conv1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.1.0.conv1.weight": { "dtype": "float32", "shape": [ 128, 256, 3, 3 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.1.0.conv2.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.1.0.conv2.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.1.0.downsample.0.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.1.0.downsample.0.weight": { "dtype": "float32", "shape": [ 128, 256, 1, 1 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.1.0.downsample.1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.1.0.downsample.1.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.1.0.norm1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.1.0.norm1.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.1.0.norm2.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.1.0.norm2.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.1.0.norm3.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.1.0.norm3.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.1.1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs04.1.1.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.0.0.conv1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.0.0.conv1.weight": { "dtype": "float32", "shape": [ 128, 512, 3, 3 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.0.0.conv2.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.0.0.conv2.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.0.0.downsample.0.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.0.0.downsample.0.weight": { "dtype": "float32", "shape": [ 128, 512, 1, 1 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.0.0.downsample.1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.0.0.downsample.1.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.0.0.norm1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.0.0.norm1.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.0.0.norm2.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.0.0.norm2.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.0.0.norm3.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.0.0.norm3.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.0.1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.0.1.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.1.0.conv1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.1.0.conv1.weight": { "dtype": "float32", "shape": [ 128, 512, 3, 3 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.1.0.conv2.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.1.0.conv2.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.1.0.downsample.0.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.1.0.downsample.0.weight": { "dtype": "float32", "shape": [ 128, 512, 1, 1 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.1.0.downsample.1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.1.0.downsample.1.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.1.0.norm1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.1.0.norm1.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.1.0.norm2.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.1.0.norm2.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.1.0.norm3.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.1.0.norm3.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.1.1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs08.1.1.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.0.0.conv1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.0.0.conv1.weight": { "dtype": "float32", "shape": [ 128, 1024, 3, 3 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.0.0.conv2.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.0.0.conv2.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.0.0.downsample.0.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.0.0.downsample.0.weight": { "dtype": "float32", "shape": [ 128, 1024, 1, 1 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.0.0.downsample.1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.0.0.downsample.1.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.0.0.norm1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.0.0.norm1.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.0.0.norm2.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.0.0.norm2.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.0.0.norm3.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.0.0.norm3.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.0.1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.0.1.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.1.0.conv1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.1.0.conv1.weight": { "dtype": "float32", "shape": [ 128, 1024, 3, 3 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.1.0.conv2.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.1.0.conv2.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.1.0.downsample.0.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.1.0.downsample.0.weight": { "dtype": "float32", "shape": [ 128, 1024, 1, 1 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.1.0.downsample.1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.1.0.downsample.1.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.1.0.norm1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.1.0.norm1.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.1.0.norm2.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.1.0.norm2.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.1.0.norm3.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.1.0.norm3.weight": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.1.1.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.context_feature_encoder.outputs16.1.1.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "model_state_dict.depth_model.decoder.context_zqr_convs.0.bias": { "dtype": "float32", "shape": [ 384 ] }, "model_state_dict.depth_model.decoder.context_zqr_convs.0.weight": { "dtype": "float32", "shape": [ 384, 128, 3, 3 ] }, "model_state_dict.depth_model.decoder.context_zqr_convs.1.bias": { "dtype": "float32", "shape": [ 384 ] }, "model_state_dict.depth_model.decoder.context_zqr_convs.1.weight": { "dtype": "float32", "shape": [ 384, 128, 3, 3 ] }, "model_state_dict.depth_model.decoder.context_zqr_convs.2.bias": { "dtype": "float32", "shape": [ 384 ] }, "model_state_dict.depth_model.decoder.context_zqr_convs.2.weight": { "dtype": "float32", "shape": [ 384, 128, 3, 3 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_1.out_conv.bias": { "dtype": "float32", "shape": [ 258 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_1.out_conv.weight": { "dtype": "float32", "shape": [ 258, 512, 1, 1 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_1.way_branch.conv1.bias": { "dtype": "float32", "shape": [ 512 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_1.way_branch.conv1.weight": { "dtype": "float32", "shape": [ 512, 512, 3, 3 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_1.way_branch.conv2.bias": { "dtype": "float32", "shape": [ 512 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_1.way_branch.conv2.weight": { "dtype": "float32", "shape": [ 512, 512, 3, 3 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_1.way_trunk.conv1.bias": { "dtype": "float32", "shape": [ 512 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_1.way_trunk.conv1.weight": { "dtype": "float32", "shape": [ 512, 512, 3, 3 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_1.way_trunk.conv2.bias": { "dtype": "float32", "shape": [ 512 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_1.way_trunk.conv2.weight": { "dtype": "float32", "shape": [ 512, 512, 3, 3 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_2.out_conv.bias": { "dtype": "float32", "shape": [ 512 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_2.out_conv.weight": { "dtype": "float32", "shape": [ 512, 1024, 1, 1 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_2.way_branch.conv1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_2.way_branch.conv1.weight": { "dtype": "float32", "shape": [ 1024, 1024, 3, 3 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_2.way_branch.conv2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_2.way_branch.conv2.weight": { "dtype": "float32", "shape": [ 1024, 1024, 3, 3 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_2.way_trunk.conv1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_2.way_trunk.conv1.weight": { "dtype": "float32", "shape": [ 1024, 1024, 3, 3 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_2.way_trunk.conv2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_2.way_trunk.conv2.weight": { "dtype": "float32", "shape": [ 1024, 1024, 3, 3 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_3.out_conv.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_3.out_conv.weight": { "dtype": "float32", "shape": [ 1024, 1024, 1, 1 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_3.way_trunk.conv1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_3.way_trunk.conv1.weight": { "dtype": "float32", "shape": [ 1024, 1024, 3, 3 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_3.way_trunk.conv2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.decoder.decoder_mono.upconv_3.way_trunk.conv2.weight": { "dtype": "float32", "shape": [ 1024, 1024, 3, 3 ] }, "model_state_dict.depth_model.decoder.depth_regressor.0.bias": { "dtype": "float32", "shape": [ 256 ] }, "model_state_dict.depth_model.decoder.depth_regressor.0.weight": { "dtype": "float32", "shape": [ 256, 256, 3, 3 ] }, "model_state_dict.depth_model.decoder.depth_regressor.2.bias": { "dtype": "float32", "shape": [ 256 ] }, "model_state_dict.depth_model.decoder.depth_regressor.2.weight": { "dtype": "float32", "shape": [ 256, 256, 1, 1 ] }, "model_state_dict.depth_model.decoder.normal_predictor.0.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.normal_predictor.0.weight": { "dtype": "float32", "shape": [ 128, 256, 3, 3 ] }, "model_state_dict.depth_model.decoder.normal_predictor.2.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.normal_predictor.2.weight": { "dtype": "float32", "shape": [ 128, 128, 1, 1 ] }, "model_state_dict.depth_model.decoder.normal_predictor.4.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.normal_predictor.4.weight": { "dtype": "float32", "shape": [ 128, 128, 1, 1 ] }, "model_state_dict.depth_model.decoder.normal_predictor.6.bias": { "dtype": "float32", "shape": [ 3 ] }, "model_state_dict.depth_model.decoder.normal_predictor.6.weight": { "dtype": "float32", "shape": [ 3, 128, 1, 1 ] }, "model_state_dict.depth_model.decoder.token2feature.read_0.readoper.project_learn.weight": { "dtype": "float32", "shape": [ 1024, 5120 ] }, "model_state_dict.depth_model.decoder.token2feature.read_0.readoper.project_patch.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.decoder.token2feature.read_0.readoper.project_patch.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.decoder.token2feature.read_0.sample.0.bias": { "dtype": "float32", "shape": [ 256 ] }, "model_state_dict.depth_model.decoder.token2feature.read_0.sample.0.weight": { "dtype": "float32", "shape": [ 256, 1024, 1, 1 ] }, "model_state_dict.depth_model.decoder.token2feature.read_1.readoper.project_learn.weight": { "dtype": "float32", "shape": [ 1024, 5120 ] }, "model_state_dict.depth_model.decoder.token2feature.read_1.readoper.project_patch.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.decoder.token2feature.read_1.readoper.project_patch.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.decoder.token2feature.read_1.sample.bias": { "dtype": "float32", "shape": [ 512 ] }, "model_state_dict.depth_model.decoder.token2feature.read_1.sample.weight": { "dtype": "float32", "shape": [ 1024, 512, 2, 2 ] }, "model_state_dict.depth_model.decoder.token2feature.read_2.readoper.project_learn.weight": { "dtype": "float32", "shape": [ 1024, 5120 ] }, "model_state_dict.depth_model.decoder.token2feature.read_2.readoper.project_patch.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.decoder.token2feature.read_2.readoper.project_patch.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.decoder.token2feature.read_3.readoper.project_learn.weight": { "dtype": "float32", "shape": [ 1024, 5120 ] }, "model_state_dict.depth_model.decoder.token2feature.read_3.readoper.project_patch.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.decoder.token2feature.read_3.readoper.project_patch.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.decoder.update_block.flow_head.conv1d.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.update_block.flow_head.conv1d.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "model_state_dict.depth_model.decoder.update_block.flow_head.conv1n.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.update_block.flow_head.conv1n.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "model_state_dict.depth_model.decoder.update_block.flow_head.conv2d.bias": { "dtype": "float32", "shape": [ 2 ] }, "model_state_dict.depth_model.decoder.update_block.flow_head.conv2d.weight": { "dtype": "float32", "shape": [ 2, 128, 3, 3 ] }, "model_state_dict.depth_model.decoder.update_block.flow_head.conv2n.bias": { "dtype": "float32", "shape": [ 4 ] }, "model_state_dict.depth_model.decoder.update_block.flow_head.conv2n.weight": { "dtype": "float32", "shape": [ 4, 128, 3, 3 ] }, "model_state_dict.depth_model.decoder.update_block.gru08.convq.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.update_block.gru08.convq.weight": { "dtype": "float32", "shape": [ 128, 262, 3, 3 ] }, "model_state_dict.depth_model.decoder.update_block.gru08.convr.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.update_block.gru08.convr.weight": { "dtype": "float32", "shape": [ 128, 262, 3, 3 ] }, "model_state_dict.depth_model.decoder.update_block.gru08.convz.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.update_block.gru08.convz.weight": { "dtype": "float32", "shape": [ 128, 262, 3, 3 ] }, "model_state_dict.depth_model.decoder.update_block.gru16.convq.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.update_block.gru16.convq.weight": { "dtype": "float32", "shape": [ 128, 384, 3, 3 ] }, "model_state_dict.depth_model.decoder.update_block.gru16.convr.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.update_block.gru16.convr.weight": { "dtype": "float32", "shape": [ 128, 384, 3, 3 ] }, "model_state_dict.depth_model.decoder.update_block.gru16.convz.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.update_block.gru16.convz.weight": { "dtype": "float32", "shape": [ 128, 384, 3, 3 ] }, "model_state_dict.depth_model.decoder.update_block.gru32.convq.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.update_block.gru32.convq.weight": { "dtype": "float32", "shape": [ 128, 256, 3, 3 ] }, "model_state_dict.depth_model.decoder.update_block.gru32.convr.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.update_block.gru32.convr.weight": { "dtype": "float32", "shape": [ 128, 256, 3, 3 ] }, "model_state_dict.depth_model.decoder.update_block.gru32.convz.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.update_block.gru32.convz.weight": { "dtype": "float32", "shape": [ 128, 256, 3, 3 ] }, "model_state_dict.depth_model.decoder.update_block.mask.0.bias": { "dtype": "float32", "shape": [ 128 ] }, "model_state_dict.depth_model.decoder.update_block.mask.0.weight": { "dtype": "float32", "shape": [ 128, 128, 3, 3 ] }, "model_state_dict.depth_model.decoder.update_block.mask.2.bias": { "dtype": "float32", "shape": [ 144 ] }, "model_state_dict.depth_model.decoder.update_block.mask.2.weight": { "dtype": "float32", "shape": [ 144, 128, 1, 1 ] }, "model_state_dict.depth_model.encoder.blocks.0.0.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.0.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.0.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.0.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.0.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.0.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.0.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.0.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.0.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.0.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.0.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.0.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.0.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.0.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.1.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.1.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.1.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.1.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.1.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.1.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.1.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.1.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.1.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.1.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.1.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.1.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.1.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.1.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.10.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.10.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.10.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.10.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.10.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.10.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.10.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.10.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.10.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.10.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.10.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.10.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.10.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.10.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.11.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.11.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.11.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.11.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.11.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.11.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.11.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.11.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.11.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.11.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.11.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.11.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.11.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.11.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.12.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.12.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.12.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.12.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.12.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.12.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.12.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.12.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.12.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.12.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.12.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.12.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.12.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.12.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.13.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.13.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.13.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.13.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.13.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.13.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.13.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.13.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.13.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.13.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.13.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.13.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.13.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.13.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.14.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.14.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.14.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.14.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.14.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.14.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.14.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.14.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.14.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.14.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.14.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.14.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.14.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.14.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.15.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.15.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.15.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.15.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.15.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.15.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.15.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.15.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.15.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.15.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.15.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.15.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.15.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.15.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.16.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.16.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.16.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.16.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.16.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.16.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.16.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.16.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.16.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.16.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.16.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.16.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.16.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.16.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.17.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.17.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.17.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.17.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.17.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.17.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.17.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.17.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.17.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.17.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.17.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.17.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.17.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.17.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.18.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.18.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.18.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.18.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.18.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.18.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.18.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.18.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.18.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.18.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.18.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.18.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.18.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.18.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.19.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.19.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.19.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.19.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.19.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.19.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.19.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.19.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.19.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.19.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.19.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.19.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.19.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.19.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.2.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.2.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.2.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.2.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.2.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.2.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.2.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.2.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.2.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.2.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.2.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.2.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.2.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.2.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.20.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.20.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.20.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.20.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.20.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.20.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.20.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.20.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.20.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.20.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.20.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.20.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.20.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.20.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.21.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.21.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.21.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.21.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.21.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.21.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.21.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.21.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.21.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.21.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.21.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.21.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.21.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.21.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.22.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.22.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.22.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.22.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.22.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.22.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.22.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.22.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.22.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.22.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.22.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.22.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.22.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.22.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.23.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.23.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.23.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.23.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.23.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.23.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.23.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.23.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.23.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.23.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.23.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.23.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.23.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.23.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.3.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.3.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.3.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.3.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.3.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.3.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.3.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.3.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.3.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.3.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.3.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.3.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.3.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.3.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.4.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.4.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.4.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.4.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.4.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.4.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.4.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.4.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.4.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.4.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.4.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.4.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.4.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.4.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.5.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.5.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.5.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.5.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.5.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.5.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.5.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.5.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.5.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.5.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.5.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.5.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.5.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.5.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.6.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.6.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.6.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.6.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.6.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.6.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.6.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.6.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.6.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.6.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.6.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.6.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.6.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.6.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.7.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.7.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.7.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.7.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.7.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.7.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.7.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.7.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.7.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.7.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.7.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.7.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.7.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.7.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.8.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.8.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.8.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.8.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.8.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.8.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.8.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.8.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.8.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.8.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.8.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.8.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.8.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.8.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.9.attn.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.9.attn.proj.weight": { "dtype": "float32", "shape": [ 1024, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.9.attn.qkv.bias": { "dtype": "float32", "shape": [ 3072 ] }, "model_state_dict.depth_model.encoder.blocks.0.9.attn.qkv.weight": { "dtype": "float32", "shape": [ 3072, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.9.ls1.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.9.ls2.gamma": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.9.mlp.fc1.bias": { "dtype": "float32", "shape": [ 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.9.mlp.fc1.weight": { "dtype": "float32", "shape": [ 4096, 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.9.mlp.fc2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.9.mlp.fc2.weight": { "dtype": "float32", "shape": [ 1024, 4096 ] }, "model_state_dict.depth_model.encoder.blocks.0.9.norm1.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.9.norm1.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.9.norm2.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.blocks.0.9.norm2.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.cls_token": { "dtype": "float32", "shape": [ 1, 1, 1024 ] }, "model_state_dict.depth_model.encoder.norm.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.norm.weight": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.patch_embed.proj.bias": { "dtype": "float32", "shape": [ 1024 ] }, "model_state_dict.depth_model.encoder.patch_embed.proj.weight": { "dtype": "float32", "shape": [ 1024, 3, 14, 14 ] }, "model_state_dict.depth_model.encoder.pos_embed": { "dtype": "float32", "shape": [ 1, 1370, 1024 ] }, "model_state_dict.depth_model.encoder.register_tokens": { "dtype": "float32", "shape": [ 1, 4, 1024 ] } } }, { "metadata": { "J_regressor": { "kind": "csc_matrix", "shape": [ 16, 778 ] }, "bs_style": { "kind": "str", "repr": "'lbs'" }, "bs_type": { "kind": "str", "repr": "'lrotmin'" } }, "name": "MANO_RIGHT", "safetensors": "MANO_RIGHT.safetensors", "safetensors_sha256": "8842f74bf128f8f495dc199747d70c37805b712a99101f46e082acd51b1b726c", "source": { "filename": "assets/data/mano/MANO_RIGHT.pkl", "license_provenance": "public Hugging Face mirror, not official MANO distribution", "repo_id": "menyifang/MIMO_VidDecomp", "repo_type": "model" }, "source_sha256": "45d60aa3b27ef9107a7afd4e00808f307fd91111e1cfa35afd5c4a62de264767", "tensor_count": 13, "tensors": { "J": { "dtype": "float64", "shape": [ 16, 3 ] }, "J_regressor.data": { "dtype": "float64", "shape": [ 1896 ] }, "J_regressor.indices": { "dtype": "int32", "shape": [ 1896 ] }, "J_regressor.indptr": { "dtype": "int32", "shape": [ 779 ] }, "f": { "dtype": "uint32", "shape": [ 1538, 3 ] }, "hands_coeffs": { "dtype": "float64", "shape": [ 1554, 45 ] }, "hands_components": { "dtype": "float64", "shape": [ 45, 45 ] }, "hands_mean": { "dtype": "float64", "shape": [ 45 ] }, "kintree_table": { "dtype": "int64", "shape": [ 2, 16 ] }, "posedirs": { "dtype": "float64", "shape": [ 778, 3, 135 ] }, "shapedirs": { "dtype": "float64", "shape": [ 778, 3, 10 ] }, "v_template": { "dtype": "float64", "shape": [ 778, 3 ] }, "weights": { "dtype": "float64", "shape": [ 778, 16 ] } } }, { "metadata": { "J_regressor": { "kind": "csc_matrix", "shape": [ 16, 778 ] }, "bs_style": { "kind": "str", "repr": "'lbs'" }, "bs_type": { "kind": "str", "repr": "'lrotmin'" } }, "name": "MANO_LEFT", "safetensors": "MANO_LEFT.safetensors", "safetensors_sha256": "2a0afc8861560b30cc4f7a38eb1605e42086fa0f493c406254528e7a4691c589", "source": { "filename": "assets/data/mano/MANO_LEFT.pkl", "license_provenance": "public Hugging Face mirror, not official MANO distribution", "repo_id": "menyifang/MIMO_VidDecomp", "repo_type": "model" }, "source_sha256": "c4022f7083f2ca7c78b2b3d595abbab52debd32b09d372b16923a801f0ea6a30", "tensor_count": 13, "tensors": { "J": { "dtype": "float64", "shape": [ 16, 3 ] }, "J_regressor.data": { "dtype": "float64", "shape": [ 1896 ] }, "J_regressor.indices": { "dtype": "int32", "shape": [ 1896 ] }, "J_regressor.indptr": { "dtype": "int32", "shape": [ 779 ] }, "f": { "dtype": "uint32", "shape": [ 1538, 3 ] }, "hands_coeffs": { "dtype": "float64", "shape": [ 1554, 45 ] }, "hands_components": { "dtype": "float64", "shape": [ 45, 45 ] }, "hands_mean": { "dtype": "float64", "shape": [ 45 ] }, "kintree_table": { "dtype": "int64", "shape": [ 2, 16 ] }, "posedirs": { "dtype": "float64", "shape": [ 778, 3, 135 ] }, "shapedirs": { "dtype": "float64", "shape": [ 778, 3, 10 ] }, "v_template": { "dtype": "float64", "shape": [ 778, 3 ] }, "weights": { "dtype": "float64", "shape": [ 778, 16 ] } } } ], "format": "hawor-safetensors-v1", "notes": [ "MANO conversion only includes user-provided licensed MANO files.", "HaWoR runtime loaders still need to be patched before these safetensors can replace original files." ] }