Shubham-Rasal Claude Sonnet 4.6 commited on
Commit
6156e81
Β·
1 Parent(s): 9459552

Add PhAIL tab: 4 autonomous VLA policies head-to-head on Franka

Browse files

Adds a new first tab that loads the 20-episode stratified sample
from phail-anon/phail-v1.0 β€” ACT, GR00T N1.6, Ο€0.5, SmolVLA β€”
running autonomously on a Franka Research 3 bin-to-place task.
No GPU required; scores pre-recorded rollout parquets via the
existing Bayesian + SPARC + STL pipeline.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (2) hide show
  1. app.py +157 -2
  2. requirements.txt +2 -0
app.py CHANGED
@@ -12,8 +12,9 @@ from plotly.subplots import make_subplots
12
  from scipy.fft import rfft, rfftfreq
13
  from scipy import stats
14
  from datasets import load_dataset
 
15
  import gradio as gr
16
- import io, json
17
 
18
  # ── constants ────────────────────────────────────────────────────────────────
19
  PALETTE = ["#60A5FA", "#FB923C", "#F87171", "#34D399", "#A78BFA"]
@@ -81,6 +82,115 @@ def ci_lower(s, n, N=100_000):
81
  # ── load ALOHA demo data ──────────────────────────────────────────────────────
82
  _cache = {}
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  def load_aloha():
85
  if "aloha" in _cache:
86
  return _cache["aloha"]
@@ -492,7 +602,52 @@ def build_ui():
492
 
493
  with gr.Tabs():
494
 
495
- # ── TAB 1: DEMO DATA ──────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
496
  with gr.Tab("🦾 Try with Real ALOHA Data"):
497
  gr.Markdown("""
498
  **Dataset**: [`lerobot/aloha_static_cups_open`](https://huggingface.co/datasets/lerobot/aloha_static_cups_open)
 
12
  from scipy.fft import rfft, rfftfreq
13
  from scipy import stats
14
  from datasets import load_dataset
15
+ from huggingface_hub import hf_hub_download, list_repo_files
16
  import gradio as gr
17
+ import io, json, os, tempfile
18
 
19
  # ── constants ────────────────────────────────────────────────────────────────
20
  PALETTE = ["#60A5FA", "#FB923C", "#F87171", "#34D399", "#A78BFA"]
 
82
  # ── load ALOHA demo data ──────────────────────────────────────────────────────
83
  _cache = {}
84
 
85
+ # ── load PhAIL sample (4 autonomous VLA policies on Franka) ──────────────────
86
+ PHAIL_POLICIES = {
87
+ "act": "ACT",
88
+ "groot": "GR00T N1.6",
89
+ "openpi": "Ο€0.5",
90
+ "smolvla": "SmolVLA",
91
+ }
92
+
93
+ def load_phail_sample(progress=None):
94
+ """
95
+ Download the 20-episode stratified sample from phail-anon/phail-v1.0.
96
+ Returns policy_data dict ready for run_analysis().
97
+ """
98
+ if "phail" in _cache:
99
+ return _cache["phail"]
100
+
101
+ if progress:
102
+ progress(0.05, desc="Listing PhAIL sample files on HuggingFace Hub…")
103
+
104
+ # Collect parquet paths under sample/inference/
105
+ all_files = list(list_repo_files("phail-anon/phail-v1.0", repo_type="dataset"))
106
+ sample_parquets = [f for f in all_files
107
+ if f.startswith("sample/inference/") and f.endswith(".parquet")]
108
+
109
+ if not sample_parquets:
110
+ raise ValueError("No sample parquet files found in phail-anon/phail-v1.0")
111
+
112
+ if progress:
113
+ progress(0.15, desc=f"Downloading {len(sample_parquets)} episode files…")
114
+
115
+ policy_data = {label: {"trials": [], "speeds": [], "efforts": [], "zs": []}
116
+ for label in PHAIL_POLICIES.values()}
117
+
118
+ for i, fpath in enumerate(sample_parquets):
119
+ if progress:
120
+ progress(0.15 + 0.7 * (i / len(sample_parquets)),
121
+ desc=f"Processing episode {i+1}/{len(sample_parquets)}…")
122
+
123
+ # Identify policy from path sample/inference/<model>/batch_*/episode_*/
124
+ parts = fpath.split("/")
125
+ model_key = parts[2] if len(parts) > 2 else None
126
+ label = PHAIL_POLICIES.get(model_key)
127
+ if label is None:
128
+ continue
129
+
130
+ local = hf_hub_download(repo_id="phail-anon/phail-v1.0",
131
+ filename=fpath, repo_type="dataset")
132
+ ep = pd.read_parquet(local)
133
+
134
+ # Extract joint state columns (7-DOF Franka)
135
+ state_cols = [c for c in ep.columns if "joint_position" in c or "q_" in c]
136
+ action_cols = [c for c in ep.columns if "joint_command" in c or "q_cmd" in c]
137
+
138
+ # Fallback: use all numeric columns if named columns absent
139
+ if not state_cols:
140
+ numeric = ep.select_dtypes(include=[np.number]).columns.tolist()
141
+ mid = len(numeric) // 2
142
+ state_cols = numeric[:mid] or numeric
143
+ action_cols = numeric[mid:] or numeric
144
+
145
+ states = ep[state_cols].values.astype(float)
146
+ actions = ep[action_cols].values.astype(float) if action_cols else states
147
+
148
+ if len(states) < 4:
149
+ continue
150
+
151
+ speed, effort, z = extract_episode(states, actions)
152
+
153
+ # Success label from static.json lives alongside parquet β€” infer from path
154
+ # PhAIL annotates eval.outcome: "Success" | "Stalled" | "Safety" | "Ran_out_of_time"
155
+ # Try to load meta.json; fall back to 0
156
+ meta_path = fpath.replace(".parquet", "").rstrip("/") + "/../static.json"
157
+ success = 0
158
+ try:
159
+ meta_local = hf_hub_download(
160
+ repo_id="phail-anon/phail-v1.0",
161
+ filename="/".join(parts[:-1]) + "/static.json",
162
+ repo_type="dataset")
163
+ with open(meta_local) as f:
164
+ meta = json.load(f)
165
+ outcome = meta.get("eval", {}).get("outcome", "")
166
+ success = 1 if outcome == "Success" else 0
167
+ except Exception:
168
+ pass
169
+
170
+ policy_data[label]["trials"].append(success)
171
+ policy_data[label]["speeds"].append(speed)
172
+ policy_data[label]["efforts"].append(effort)
173
+ policy_data[label]["zs"].append(z)
174
+
175
+ # Drop policies with no data
176
+ policy_data = {k: v for k, v in policy_data.items() if v["trials"]}
177
+
178
+ if progress:
179
+ progress(0.95, desc="Finalising…")
180
+
181
+ _cache["phail"] = policy_data
182
+ return policy_data
183
+
184
+
185
+ def run_phail(progress=gr.Progress()):
186
+ progress(0, desc="Connecting to HuggingFace Hub…")
187
+ policy_data = load_phail_sample(progress)
188
+ progress(0.9, desc="Running Bayesian + SPARC + STL analysis…")
189
+ results = run_analysis(policy_data)
190
+ progress(1.0, desc="Done!")
191
+ return results
192
+
193
+
194
  def load_aloha():
195
  if "aloha" in _cache:
196
  return _cache["aloha"]
 
602
 
603
  with gr.Tabs():
604
 
605
+ # ── TAB 1: PhAIL β€” 4 autonomous VLA policies ──────────────────────
606
+ with gr.Tab("πŸ† PhAIL: 4 VLA Policies Head-to-Head"):
607
+ gr.Markdown("""
608
+ **Dataset**: [`phail-anon/phail-v1.0`](https://huggingface.co/datasets/phail-anon/phail-v1.0)
609
+ β€” 20 stratified episodes from **4 real VLA policies** running autonomously on a **Franka Research 3** robot.
610
+ No GPU needed β€” we're scoring pre-recorded rollouts, not running the policies.
611
+
612
+ | Policy | Type | Developer |
613
+ |--------|------|-----------|
614
+ | ACT | Action Chunking Transformer | Academic (Chi et al.) |
615
+ | GR00T N1.6 | Foundation model | NVIDIA |
616
+ | Ο€0.5 | Diffusion policy VLA | Physical Intelligence |
617
+ | SmolVLA | Compact VLA | HuggingFace |
618
+
619
+ Task: **bin-to-bin pick-and-place** (batteries, scissors, towels, wooden spoons).
620
+ Success labels are human-verified from gripper telemetry.
621
+ """)
622
+
623
+ phail_btn = gr.Button("β–Ά Load & Analyse PhAIL Sample", variant="primary", size="lg")
624
+
625
+ with gr.Row():
626
+ ph_bayes = gr.Plot(label="Bayesian Posteriors")
627
+ ph_bayes_mat = gr.Plot(label="P(row beats col)")
628
+ with gr.Row():
629
+ ph_sparc = gr.Plot(label="SPARC Smoothness")
630
+ ph_speed = gr.Plot(label="Speed Profiles")
631
+ with gr.Row():
632
+ ph_stl = gr.Plot(label="STL Robustness")
633
+ ph_viols = gr.Plot(label="Violations")
634
+ with gr.Row():
635
+ ph_radar = gr.Plot(label="Composite Radar")
636
+ ph_rank = gr.Plot(label="Final Ranking")
637
+
638
+ ph_scorecard = gr.Markdown(label="Scorecard")
639
+
640
+ phail_btn.click(
641
+ fn=run_phail,
642
+ inputs=[],
643
+ outputs=[ph_bayes, ph_bayes_mat,
644
+ ph_sparc, ph_speed,
645
+ ph_stl, ph_viols,
646
+ ph_radar, ph_rank,
647
+ ph_scorecard],
648
+ )
649
+
650
+ # ── TAB 2: DEMO DATA ──────────────────────────────────────────────
651
  with gr.Tab("🦾 Try with Real ALOHA Data"):
652
  gr.Markdown("""
653
  **Dataset**: [`lerobot/aloha_static_cups_open`](https://huggingface.co/datasets/lerobot/aloha_static_cups_open)
requirements.txt CHANGED
@@ -4,3 +4,5 @@ numpy>=1.24.0
4
  scipy>=1.10.0
5
  datasets>=2.14.0
6
  pandas>=2.0.0
 
 
 
4
  scipy>=1.10.0
5
  datasets>=2.14.0
6
  pandas>=2.0.0
7
+ huggingface_hub>=0.23.0
8
+ pyarrow>=14.0.0