Spaces:

ShubhamRasal
/

robot-policy-eval

Sleeping

Shubham-Rasal Claude Sonnet 4.6 commited on 9 days ago

Commit

654b826

1 Parent(s): 317e86c

Rewrite PhAIL loader: static.json-driven episode discovery

Path structure is sample/inference/<batch>/<episode>/<signal>.parquet
not sample/inference/<model>/... — model name lives in static.json.

New approach:
- Find all static.json files under sample/inference/
- Load each for model name + outcome (success label)
- Load robot_state.q.parquet (7-DOF joint positions) as states
- Load robot_commands.pose.parquet as actions (fallback to states)
- Build policy_data keyed by actual model name from metadata

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show

app.py +55 -82

app.py CHANGED Viewed

@@ -90,42 +90,15 @@ PHAIL_POLICIES = {
     "smolvla": "SmolVLA",
 }
-def _pick_joint_cols(ep):
-    """Return (state_cols, action_cols) from a PhAIL episode dataframe."""
-    cols = ep.columns.tolist()
-    # Print once for debugging (visible in Space logs)
-    print(f"[PhAIL] columns ({len(cols)}): {cols[:20]}")
-    keywords_state  = ["joint_pos", "q_pos", "position", "state", "obs"]
-    keywords_action = ["joint_cmd", "q_cmd", "command", "action", "target"]
-    def match(kws):
-        return [c for c in cols if any(k in c.lower() for k in kws)]
-    sc = match(keywords_state)
-    ac = match(keywords_action)
-    # Avoid overlap: if both lists share columns, prefer more specific
-    if sc and ac and set(sc) & set(ac):
-        ac = [c for c in ac if c not in sc]
-    # Absolute fallback: split numeric columns in half
-    if not sc:
-        num = ep.select_dtypes(include=[np.number]).columns.tolist()
-        mid = max(1, len(num) // 2)
-        sc = num[:mid]
-        ac = num[mid:] if not ac else ac
-    if not ac:
-        ac = sc  # use state as action proxy (effort will be ~0)
-    return sc, ac
 def load_phail_sample(progress=None):
     """
-    Download the 20-episode stratified sample from phail-anon/phail-v1.0.
-    Returns policy_data dict ready for run_analysis().
     """
     if "phail" in _cache:
         return _cache["phail"]
@@ -134,82 +107,82 @@ def load_phail_sample(progress=None):
         if progress is not None:
             progress(frac, desc=desc)
-    _prog(0.05, "Listing PhAIL sample files on HuggingFace Hub…")
     all_files = list(list_repo_files("phail-anon/phail-v1.0", repo_type="dataset"))
-    sample_parquets = sorted([f for f in all_files
-                               if f.startswith("sample/inference/") and f.endswith(".parquet")])
-    print(f"[PhAIL] found {len(sample_parquets)} sample parquets")
-    if not sample_parquets:
-        raise ValueError("No parquet files found under sample/inference/ in phail-anon/phail-v1.0. "
-                         f"All files: {all_files[:30]}")
-    _prog(0.15, f"Found {len(sample_parquets)} episodes — downloading…")
-    policy_data = {label: {"trials": [], "speeds": [], "efforts": [], "zs": []}
-                   for label in PHAIL_POLICIES.values()}
-    col_schema_logged = False
-    for i, fpath in enumerate(sample_parquets):
-        _prog(0.15 + 0.7 * (i / len(sample_parquets)),
-              f"Episode {i+1}/{len(sample_parquets)}…")
-        parts = fpath.split("/")
-        # path: sample/inference/<model>/batch_X/episode_Y/something.parquet
-        model_key = parts[2] if len(parts) > 2 else None
-        label = PHAIL_POLICIES.get(model_key)
-        if label is None:
-            print(f"[PhAIL] skip {fpath} — model_key={model_key!r} not in PHAIL_POLICIES")
             continue
         try:
-            local = hf_hub_download(repo_id="phail-anon/phail-v1.0",
-                                     filename=fpath, repo_type="dataset")
-            ep = pd.read_parquet(local)
         except Exception as exc:
-            print(f"[PhAIL] failed to load {fpath}: {exc}")
             continue
-        if not col_schema_logged:
-            print(f"[PhAIL] full column list: {ep.columns.tolist()}")
-            col_schema_logged = True
-        sc, ac = _pick_joint_cols(ep)
-        states  = ep[sc].values.astype(float)
-        actions = ep[ac].values.astype(float)
         if len(states) < 4:
-            print(f"[PhAIL] skip {fpath} — only {len(states)} rows")
             continue
         speed, effort, z = extract_episode(states, actions)
-        # Load success label from static.json in the same episode directory
-        success = 0
-        try:
-            static_path = "/".join(parts[:-1]) + "/static.json"
-            meta_local  = hf_hub_download(repo_id="phail-anon/phail-v1.0",
-                                           filename=static_path, repo_type="dataset")
-            with open(meta_local) as f:
-                meta = json.load(f)
-            outcome = meta.get("eval", {}).get("outcome", "")
-            success  = 1 if outcome == "Success" else 0
-        except Exception as exc:
-            print(f"[PhAIL] no static.json for {fpath}: {exc}")
         policy_data[label]["trials"].append(success)
         policy_data[label]["speeds"].append(speed)
         policy_data[label]["efforts"].append(effort)
         policy_data[label]["zs"].append(z)
-    policy_data = {k: v for k, v in policy_data.items() if v["trials"]}
-    print(f"[PhAIL] loaded policies: { {k: len(v['trials']) for k, v in policy_data.items()} }")
     if not policy_data:
-        raise ValueError(
-            "PhAIL: no episodes loaded. Check Space logs for column names and path structure.")
     _prog(0.95, "Finalising…")
     _cache["phail"] = policy_data

     "smolvla": "SmolVLA",
 }
 def load_phail_sample(progress=None):
     """
+    Load PhAIL sample — path structure:
+      sample/inference/<batch_id>/<episode_id>/<signal>.parquet
+      sample/inference/<batch_id>/<episode_id>/static.json  ← model name + outcome
+    Per-episode signals used:
+      robot_state.q.parquet       — 7-DOF Franka joint positions (states)
+      robot_commands.pose.parquet — commanded EE pose (actions proxy)
     """
     if "phail" in _cache:
         return _cache["phail"]
         if progress is not None:
             progress(frac, desc=desc)
+    _prog(0.05, "Listing PhAIL sample files…")
     all_files = list(list_repo_files("phail-anon/phail-v1.0", repo_type="dataset"))
+    # Collect episode dirs that have static.json
+    static_files = sorted([f for f in all_files
+                            if f.startswith("sample/inference/") and f.endswith("/static.json")])
+    print(f"[PhAIL] found {len(static_files)} episodes (static.json)")
+    if not static_files:
+        raise ValueError("No static.json files found under sample/inference/")
+    _prog(0.1, f"Found {len(static_files)} episodes — loading…")
+    policy_data = {}  # built dynamically from actual model names in static.json
+    for i, sf in enumerate(static_files):
+        _prog(0.1 + 0.8 * (i / len(static_files)), f"Episode {i+1}/{len(static_files)}…")
+        ep_dir = sf[: -len("/static.json")]  # e.g. sample/inference/000.../000.../
+        # ── load metadata ──────────────────────────────────────────────────────
+        try:
+            meta_local = hf_hub_download(repo_id="phail-anon/phail-v1.0",
+                                          filename=sf, repo_type="dataset")
+            with open(meta_local) as f:
+                meta = json.load(f)
+        except Exception as exc:
+            print(f"[PhAIL] skip {sf}: {exc}")
             continue
+        model   = meta.get("model", meta.get("source", "unknown"))
+        outcome = meta.get("eval", {}).get("outcome", meta.get("outcome", ""))
+        success = 1 if outcome == "Success" else 0
+        # Map model key → display label
+        label = PHAIL_POLICIES.get(model, model)
+        # ── load joint positions (states) ──────────────────────────────────────
+        q_path = ep_dir + "/robot_state.q.parquet"
         try:
+            q_local = hf_hub_download(repo_id="phail-anon/phail-v1.0",
+                                       filename=q_path, repo_type="dataset")
+            q_df = pd.read_parquet(q_local)
         except Exception as exc:
+            print(f"[PhAIL] no robot_state.q for {ep_dir}: {exc}")
             continue
+        # ── load commands (actions proxy) ──────────────────────────────────────
+        cmd_path = ep_dir + "/robot_commands.pose.parquet"
+        try:
+            cmd_local = hf_hub_download(repo_id="phail-anon/phail-v1.0",
+                                         filename=cmd_path, repo_type="dataset")
+            cmd_df = pd.read_parquet(cmd_local)
+        except Exception:
+            cmd_df = q_df  # fall back to state = action (effort ≈ 0)
+        states  = q_df.select_dtypes(include=[np.number]).values.astype(float)
+        actions = cmd_df.select_dtypes(include=[np.number]).values.astype(float)
         if len(states) < 4:
+            print(f"[PhAIL] skip {ep_dir} — only {len(states)} rows")
             continue
         speed, effort, z = extract_episode(states, actions)
+        if label not in policy_data:
+            policy_data[label] = {"trials": [], "speeds": [], "efforts": [], "zs": []}
         policy_data[label]["trials"].append(success)
         policy_data[label]["speeds"].append(speed)
         policy_data[label]["efforts"].append(effort)
         policy_data[label]["zs"].append(z)
+    print(f"[PhAIL] loaded: { {k: len(v['trials']) for k, v in policy_data.items()} }")
     if not policy_data:
+        raise ValueError("PhAIL: no episodes loaded — check logs for path/schema details.")
     _prog(0.95, "Finalising…")
     _cache["phail"] = policy_data