robot-policy-eval / convert_to_eval_csv.py
Shubham-Rasal
add real robot dataset converters and sample CSVs
07c7d4a
"""
Converters: real robot datasets β†’ eval harness CSV format.
Usage:
python convert_to_eval_csv.py --dataset pusht --out pusht_eval.csv
python convert_to_eval_csv.py --dataset franka --out franka_eval.csv
python convert_to_eval_csv.py --dataset humanoid --out humanoid_eval.csv
python convert_to_eval_csv.py --dataset aloha --out aloha_eval.csv
Output CSV columns:
episode_id, policy_name, frame_id, timestamp,
state_0 ... state_N, action_0 ... action_N, success
"""
import argparse
import numpy as np
import pandas as pd
from datasets import load_dataset
# ── helpers ───────────────────────────────────────────────────────────────────
def episode_success(group, reward_col="next.reward", done_col="next.done"):
"""Infer episode success from reward signal or done flag."""
if reward_col in group.columns:
return int(group[reward_col].max() > 0)
# fallback: episode completed normally = success
return int(group[done_col].iloc[-1]) if done_col in group.columns else 1
def to_eval_csv(hf_dataset_name, policy_name, state_col, action_col,
max_episodes=None, out_path=None):
print(f"Loading {hf_dataset_name} …")
ds = load_dataset(hf_dataset_name, split="train")
df = ds.to_pandas()
ep_ids = sorted(df["episode_index"].unique())
if max_episodes:
ep_ids = ep_ids[:max_episodes]
rows = []
for ei in ep_ids:
grp = df[df["episode_index"] == ei].reset_index(drop=True)
success = episode_success(grp)
states = np.vstack(grp[state_col].values)
actions = np.vstack(grp[action_col].values) if action_col in grp.columns else states
for fi, (s, a) in enumerate(zip(states, actions)):
row = {
"episode_id": int(ei),
"policy_name": policy_name,
"frame_id": fi,
"timestamp": round(grp["timestamp"].iloc[fi], 4) if "timestamp" in grp.columns else fi,
"success": success,
}
for i, v in enumerate(s):
row[f"state_{i}"] = round(float(v), 6)
for i, v in enumerate(a):
row[f"action_{i}"] = round(float(v), 6)
rows.append(row)
out = pd.DataFrame(rows)
if out_path:
out.to_csv(out_path, index=False)
print(f"Saved {len(ep_ids)} episodes ({len(out):,} frames) β†’ {out_path}")
return out
# ── dataset-specific converters ───────────────────────────────────────────────
DATASETS = {
# Real tabletop push-T (Columbia / CAIRLAB)
# Robot: custom delta robot, 2-DOF end-effector + contact sensors
# Task: push a T-shaped block to a goal region
# State: 8-dim (EE pos/vel + block pose estimate)
"pusht": dict(
hf="lerobot/columbia_cairlab_pusht_real",
label="Push-T (Columbia real robot)",
state_col="observation.state",
action_col="action",
max_eps=40,
note="2-DOF delta robot, tabletop push task, 136 episodes total"
),
# Franka Panda free-play dataset (NYU)
# Robot: 7-DOF Franka Emika Panda β€” the most common research arm
# Task: unstructured manipulation play (no fixed goal)
# State: 13-dim (7 joint pos + 6 EE pose)
"franka": dict(
hf="lerobot/nyu_franka_play_dataset",
label="Franka Panda Play (NYU)",
state_col="observation.state",
action_col="action",
max_eps=50,
note="7-DOF Franka Panda, 456 episodes of free-play manipulation"
),
# Unitree H1 humanoid β€” warehouse task
# Robot: full-size humanoid, 19-DOF state, 40-DOF action
# Task: pick and place in warehouse setting
# No reward signal β€” we treat episode completion as success
"humanoid": dict(
hf="lerobot/unitreeh1_warehouse",
label="Unitree H1 Humanoid (warehouse)",
state_col="observation.state",
action_col="action",
max_eps=24,
note="19-DOF humanoid state, 40-DOF action, 24 episodes"
),
# ALOHA bimanual static (cups open) β€” same as demo tab
"aloha": dict(
hf="lerobot/aloha_static_cups_open",
label="ALOHA Bimanual (cups open)",
state_col="observation.state",
action_col="action",
max_eps=50,
note="14-DOF bimanual ALOHA, 50 episodes, cup-opening task"
),
}
# ── multi-policy comparison helper ───────────────────────────────────────────
def make_comparison_csv(datasets_and_names: list[tuple[str, str]],
max_eps_each: int = 20,
out_path: str = "comparison_eval.csv"):
"""
Combine multiple datasets as different 'policies' for A/B comparison.
datasets_and_names: list of (dataset_key, policy_label)
Example:
make_comparison_csv([("pusht","Push-T"), ("franka","Franka"), ("aloha","ALOHA")])
"""
dfs = []
for key, label in datasets_and_names:
cfg = DATASETS[key]
df = to_eval_csv(cfg["hf"], label, cfg["state_col"], cfg["action_col"],
max_episodes=max_eps_each)
# Truncate to common state dim
dfs.append(df)
# Align state/action columns across datasets (fill missing with 0)
out = pd.concat(dfs, ignore_index=True).fillna(0.0)
out.to_csv(out_path, index=False)
print(f"\nSaved multi-policy comparison CSV β†’ {out_path}")
print(f"Policies: {out['policy_name'].unique().tolist()}")
print(f"Total episodes: {out['episode_id'].nunique()}")
print(f"Total frames: {len(out):,}")
return out
# ── CLI ───────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--dataset", choices=list(DATASETS.keys()) + ["compare"],
default="pusht", help="Dataset to convert")
parser.add_argument("--out", default=None, help="Output CSV path")
parser.add_argument("--max-eps", type=int, default=None,
help="Max episodes to convert (default: all)")
args = parser.parse_args()
if args.dataset == "compare":
out = args.out or "comparison_eval.csv"
make_comparison_csv(
[("pusht","Push-T"), ("franka","Franka"), ("humanoid","H1-Humanoid")],
max_eps_each=args.max_eps or 15,
out_path=out,
)
else:
cfg = DATASETS[args.dataset]
out = args.out or f"{args.dataset}_eval.csv"
print(f"\n{cfg['label']}")
print(f"Note: {cfg['note']}\n")
to_eval_csv(cfg["hf"], cfg["label"], cfg["state_col"], cfg["action_col"],
max_episodes=args.max_eps or cfg["max_eps"], out_path=out)
print("\nDone. Upload the CSV to the HuggingFace Space:")
print(" https://huggingface.co/spaces/ShubhamRasal/robot-policy-eval")