#!/bin/bash # organize_data.sh # Moves large data files into the HuggingFace repo structure. # Run this ONCE before uploading to HuggingFace. # # What this moves: # images/ (7.8GB, 53K files) → data/images/ # motions_tokens/ (338MB, 41,971 .pt) → data/motions_tokens/ # motions_raw/ (4.8GB, 41,971 .pt) → data/motions_raw/ # # Source paths (edit if needed): SRC_IMAGES="/Users/bytedance/Downloads/MotionVLA/motionvla/data/vimogen_full/images" SRC_TOKENS="/Users/bytedance/Downloads/MotionVLA/motionvla/data/vimogen_full/motions_dsfast_v4" SRC_RAW="/Users/bytedance/Downloads/MotionVLA/motionvla/data/vimogen_full/in_the_wild_video" # Destination (relative to this script's directory): SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" DST_IMAGES="$SCRIPT_DIR/data/images" DST_TOKENS="$SCRIPT_DIR/data/motions_tokens" DST_RAW="$SCRIPT_DIR/data/motions_raw" set -e echo "==================================================" echo " MotionVLA Data Organizer" echo "==================================================" echo "" echo "Source images : $SRC_IMAGES" echo "Source tokens : $SRC_TOKENS" echo "Source raw : $SRC_RAW" echo "" echo "Destination : $SCRIPT_DIR/data/" echo "" echo "Press ENTER to continue, Ctrl+C to cancel..." read # Step 1: Move images if [ -d "$SRC_IMAGES" ]; then echo "[1/3] Moving images (7.8GB) ..." mkdir -p "$DST_IMAGES" mv "$SRC_IMAGES"/* "$DST_IMAGES"/ echo " Done: $(ls "$DST_IMAGES" | wc -l) files" else echo "[1/3] SKIP: $SRC_IMAGES not found" fi # Step 2: Move motion tokens (v4, Qwen vocab space) if [ -d "$SRC_TOKENS" ]; then echo "[2/3] Moving motion tokens (338MB) ..." mkdir -p "$DST_TOKENS" mv "$SRC_TOKENS"/* "$DST_TOKENS"/ echo " Done: $(ls "$DST_TOKENS" | wc -l) files" else echo "[2/3] SKIP: $SRC_TOKENS not found" fi # Step 3: Move raw 276-dim motions if [ -d "$SRC_RAW" ]; then echo "[3/3] Moving raw 276-dim motions (4.8GB) ..." mkdir -p "$DST_RAW" mv "$SRC_RAW"/* "$DST_RAW"/ echo " Done: $(ls "$DST_RAW" | wc -l) files" else echo "[3/3] SKIP: $SRC_RAW not found" fi echo "" echo "==================================================" echo " Data organization complete!" echo " Total size:" du -sh "$SCRIPT_DIR/data/" 2>/dev/null echo "==================================================" echo "" echo "Next steps:" echo " 1. Upload to HuggingFace:" echo " huggingface-cli upload /MotionVLA-Dataset . --repo-type dataset" echo " 2. Upload model checkpoints:" echo " huggingface-cli upload /MotionVLA checkpoints/ --repo-type model"