""" Converters: real robot datasets → eval harness CSV format. Usage: python convert_to_eval_csv.py --dataset pusht --out pusht_eval.csv python convert_to_eval_csv.py --dataset franka --out franka_eval.csv python convert_to_eval_csv.py --dataset humanoid --out humanoid_eval.csv python convert_to_eval_csv.py --dataset aloha --out aloha_eval.csv Output CSV columns: episode_id, policy_name, frame_id, timestamp, state_0 ... state_N, action_0 ... action_N, success """ import argparse import numpy as np import pandas as pd from datasets import load_dataset # ── helpers ─────────────────────────────────────────────────────────────────── def episode_success(group, reward_col="next.reward", done_col="next.done"): """Infer episode success from reward signal or done flag.""" if reward_col in group.columns: return int(group[reward_col].max() > 0) # fallback: episode completed normally = success return int(group[done_col].iloc[-1]) if done_col in group.columns else 1 def to_eval_csv(hf_dataset_name, policy_name, state_col, action_col, max_episodes=None, out_path=None): print(f"Loading {hf_dataset_name} …") ds = load_dataset(hf_dataset_name, split="train") df = ds.to_pandas() ep_ids = sorted(df["episode_index"].unique()) if max_episodes: ep_ids = ep_ids[:max_episodes] rows = [] for ei in ep_ids: grp = df[df["episode_index"] == ei].reset_index(drop=True) success = episode_success(grp) states = np.vstack(grp[state_col].values) actions = np.vstack(grp[action_col].values) if action_col in grp.columns else states for fi, (s, a) in enumerate(zip(states, actions)): row = { "episode_id": int(ei), "policy_name": policy_name, "frame_id": fi, "timestamp": round(grp["timestamp"].iloc[fi], 4) if "timestamp" in grp.columns else fi, "success": success, } for i, v in enumerate(s): row[f"state_{i}"] = round(float(v), 6) for i, v in enumerate(a): row[f"action_{i}"] = round(float(v), 6) rows.append(row) out = pd.DataFrame(rows) if out_path: out.to_csv(out_path, index=False) print(f"Saved {len(ep_ids)} episodes ({len(out):,} frames) → {out_path}") return out # ── dataset-specific converters ─────────────────────────────────────────────── DATASETS = { # Real tabletop push-T (Columbia / CAIRLAB) # Robot: custom delta robot, 2-DOF end-effector + contact sensors # Task: push a T-shaped block to a goal region # State: 8-dim (EE pos/vel + block pose estimate) "pusht": dict( hf="lerobot/columbia_cairlab_pusht_real", label="Push-T (Columbia real robot)", state_col="observation.state", action_col="action", max_eps=40, note="2-DOF delta robot, tabletop push task, 136 episodes total" ), # Franka Panda free-play dataset (NYU) # Robot: 7-DOF Franka Emika Panda — the most common research arm # Task: unstructured manipulation play (no fixed goal) # State: 13-dim (7 joint pos + 6 EE pose) "franka": dict( hf="lerobot/nyu_franka_play_dataset", label="Franka Panda Play (NYU)", state_col="observation.state", action_col="action", max_eps=50, note="7-DOF Franka Panda, 456 episodes of free-play manipulation" ), # Unitree H1 humanoid — warehouse task # Robot: full-size humanoid, 19-DOF state, 40-DOF action # Task: pick and place in warehouse setting # No reward signal — we treat episode completion as success "humanoid": dict( hf="lerobot/unitreeh1_warehouse", label="Unitree H1 Humanoid (warehouse)", state_col="observation.state", action_col="action", max_eps=24, note="19-DOF humanoid state, 40-DOF action, 24 episodes" ), # ALOHA bimanual static (cups open) — same as demo tab "aloha": dict( hf="lerobot/aloha_static_cups_open", label="ALOHA Bimanual (cups open)", state_col="observation.state", action_col="action", max_eps=50, note="14-DOF bimanual ALOHA, 50 episodes, cup-opening task" ), } # ── multi-policy comparison helper ─────────────────────────────────────────── def make_comparison_csv(datasets_and_names: list[tuple[str, str]], max_eps_each: int = 20, out_path: str = "comparison_eval.csv"): """ Combine multiple datasets as different 'policies' for A/B comparison. datasets_and_names: list of (dataset_key, policy_label) Example: make_comparison_csv([("pusht","Push-T"), ("franka","Franka"), ("aloha","ALOHA")]) """ dfs = [] for key, label in datasets_and_names: cfg = DATASETS[key] df = to_eval_csv(cfg["hf"], label, cfg["state_col"], cfg["action_col"], max_episodes=max_eps_each) # Truncate to common state dim dfs.append(df) # Align state/action columns across datasets (fill missing with 0) out = pd.concat(dfs, ignore_index=True).fillna(0.0) out.to_csv(out_path, index=False) print(f"\nSaved multi-policy comparison CSV → {out_path}") print(f"Policies: {out['policy_name'].unique().tolist()}") print(f"Total episodes: {out['episode_id'].nunique()}") print(f"Total frames: {len(out):,}") return out # ── CLI ─────────────────────────────────────────────────────────────────────── if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--dataset", choices=list(DATASETS.keys()) + ["compare"], default="pusht", help="Dataset to convert") parser.add_argument("--out", default=None, help="Output CSV path") parser.add_argument("--max-eps", type=int, default=None, help="Max episodes to convert (default: all)") args = parser.parse_args() if args.dataset == "compare": out = args.out or "comparison_eval.csv" make_comparison_csv( [("pusht","Push-T"), ("franka","Franka"), ("humanoid","H1-Humanoid")], max_eps_each=args.max_eps or 15, out_path=out, ) else: cfg = DATASETS[args.dataset] out = args.out or f"{args.dataset}_eval.csv" print(f"\n{cfg['label']}") print(f"Note: {cfg['note']}\n") to_eval_csv(cfg["hf"], cfg["label"], cfg["state_col"], cfg["action_col"], max_episodes=args.max_eps or cfg["max_eps"], out_path=out) print("\nDone. Upload the CSV to the HuggingFace Space:") print(" https://huggingface.co/spaces/ShubhamRasal/robot-policy-eval")