Spaces:
Running
Running
Shubham-Rasal Claude Sonnet 4.6 commited on
Commit Β·
6156e81
1
Parent(s): 9459552
Add PhAIL tab: 4 autonomous VLA policies head-to-head on Franka
Browse filesAdds a new first tab that loads the 20-episode stratified sample
from phail-anon/phail-v1.0 β ACT, GR00T N1.6, Ο0.5, SmolVLA β
running autonomously on a Franka Research 3 bin-to-place task.
No GPU required; scores pre-recorded rollout parquets via the
existing Bayesian + SPARC + STL pipeline.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- app.py +157 -2
- requirements.txt +2 -0
app.py
CHANGED
|
@@ -12,8 +12,9 @@ from plotly.subplots import make_subplots
|
|
| 12 |
from scipy.fft import rfft, rfftfreq
|
| 13 |
from scipy import stats
|
| 14 |
from datasets import load_dataset
|
|
|
|
| 15 |
import gradio as gr
|
| 16 |
-
import io, json
|
| 17 |
|
| 18 |
# ββ constants ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 19 |
PALETTE = ["#60A5FA", "#FB923C", "#F87171", "#34D399", "#A78BFA"]
|
|
@@ -81,6 +82,115 @@ def ci_lower(s, n, N=100_000):
|
|
| 81 |
# ββ load ALOHA demo data ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 82 |
_cache = {}
|
| 83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
def load_aloha():
|
| 85 |
if "aloha" in _cache:
|
| 86 |
return _cache["aloha"]
|
|
@@ -492,7 +602,52 @@ def build_ui():
|
|
| 492 |
|
| 493 |
with gr.Tabs():
|
| 494 |
|
| 495 |
-
# ββ TAB 1:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 496 |
with gr.Tab("π¦Ύ Try with Real ALOHA Data"):
|
| 497 |
gr.Markdown("""
|
| 498 |
**Dataset**: [`lerobot/aloha_static_cups_open`](https://huggingface.co/datasets/lerobot/aloha_static_cups_open)
|
|
|
|
| 12 |
from scipy.fft import rfft, rfftfreq
|
| 13 |
from scipy import stats
|
| 14 |
from datasets import load_dataset
|
| 15 |
+
from huggingface_hub import hf_hub_download, list_repo_files
|
| 16 |
import gradio as gr
|
| 17 |
+
import io, json, os, tempfile
|
| 18 |
|
| 19 |
# ββ constants ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 20 |
PALETTE = ["#60A5FA", "#FB923C", "#F87171", "#34D399", "#A78BFA"]
|
|
|
|
| 82 |
# ββ load ALOHA demo data ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 83 |
_cache = {}
|
| 84 |
|
| 85 |
+
# ββ load PhAIL sample (4 autonomous VLA policies on Franka) ββββββββββββββββββ
|
| 86 |
+
PHAIL_POLICIES = {
|
| 87 |
+
"act": "ACT",
|
| 88 |
+
"groot": "GR00T N1.6",
|
| 89 |
+
"openpi": "Ο0.5",
|
| 90 |
+
"smolvla": "SmolVLA",
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
def load_phail_sample(progress=None):
|
| 94 |
+
"""
|
| 95 |
+
Download the 20-episode stratified sample from phail-anon/phail-v1.0.
|
| 96 |
+
Returns policy_data dict ready for run_analysis().
|
| 97 |
+
"""
|
| 98 |
+
if "phail" in _cache:
|
| 99 |
+
return _cache["phail"]
|
| 100 |
+
|
| 101 |
+
if progress:
|
| 102 |
+
progress(0.05, desc="Listing PhAIL sample files on HuggingFace Hubβ¦")
|
| 103 |
+
|
| 104 |
+
# Collect parquet paths under sample/inference/
|
| 105 |
+
all_files = list(list_repo_files("phail-anon/phail-v1.0", repo_type="dataset"))
|
| 106 |
+
sample_parquets = [f for f in all_files
|
| 107 |
+
if f.startswith("sample/inference/") and f.endswith(".parquet")]
|
| 108 |
+
|
| 109 |
+
if not sample_parquets:
|
| 110 |
+
raise ValueError("No sample parquet files found in phail-anon/phail-v1.0")
|
| 111 |
+
|
| 112 |
+
if progress:
|
| 113 |
+
progress(0.15, desc=f"Downloading {len(sample_parquets)} episode filesβ¦")
|
| 114 |
+
|
| 115 |
+
policy_data = {label: {"trials": [], "speeds": [], "efforts": [], "zs": []}
|
| 116 |
+
for label in PHAIL_POLICIES.values()}
|
| 117 |
+
|
| 118 |
+
for i, fpath in enumerate(sample_parquets):
|
| 119 |
+
if progress:
|
| 120 |
+
progress(0.15 + 0.7 * (i / len(sample_parquets)),
|
| 121 |
+
desc=f"Processing episode {i+1}/{len(sample_parquets)}β¦")
|
| 122 |
+
|
| 123 |
+
# Identify policy from path sample/inference/<model>/batch_*/episode_*/
|
| 124 |
+
parts = fpath.split("/")
|
| 125 |
+
model_key = parts[2] if len(parts) > 2 else None
|
| 126 |
+
label = PHAIL_POLICIES.get(model_key)
|
| 127 |
+
if label is None:
|
| 128 |
+
continue
|
| 129 |
+
|
| 130 |
+
local = hf_hub_download(repo_id="phail-anon/phail-v1.0",
|
| 131 |
+
filename=fpath, repo_type="dataset")
|
| 132 |
+
ep = pd.read_parquet(local)
|
| 133 |
+
|
| 134 |
+
# Extract joint state columns (7-DOF Franka)
|
| 135 |
+
state_cols = [c for c in ep.columns if "joint_position" in c or "q_" in c]
|
| 136 |
+
action_cols = [c for c in ep.columns if "joint_command" in c or "q_cmd" in c]
|
| 137 |
+
|
| 138 |
+
# Fallback: use all numeric columns if named columns absent
|
| 139 |
+
if not state_cols:
|
| 140 |
+
numeric = ep.select_dtypes(include=[np.number]).columns.tolist()
|
| 141 |
+
mid = len(numeric) // 2
|
| 142 |
+
state_cols = numeric[:mid] or numeric
|
| 143 |
+
action_cols = numeric[mid:] or numeric
|
| 144 |
+
|
| 145 |
+
states = ep[state_cols].values.astype(float)
|
| 146 |
+
actions = ep[action_cols].values.astype(float) if action_cols else states
|
| 147 |
+
|
| 148 |
+
if len(states) < 4:
|
| 149 |
+
continue
|
| 150 |
+
|
| 151 |
+
speed, effort, z = extract_episode(states, actions)
|
| 152 |
+
|
| 153 |
+
# Success label from static.json lives alongside parquet β infer from path
|
| 154 |
+
# PhAIL annotates eval.outcome: "Success" | "Stalled" | "Safety" | "Ran_out_of_time"
|
| 155 |
+
# Try to load meta.json; fall back to 0
|
| 156 |
+
meta_path = fpath.replace(".parquet", "").rstrip("/") + "/../static.json"
|
| 157 |
+
success = 0
|
| 158 |
+
try:
|
| 159 |
+
meta_local = hf_hub_download(
|
| 160 |
+
repo_id="phail-anon/phail-v1.0",
|
| 161 |
+
filename="/".join(parts[:-1]) + "/static.json",
|
| 162 |
+
repo_type="dataset")
|
| 163 |
+
with open(meta_local) as f:
|
| 164 |
+
meta = json.load(f)
|
| 165 |
+
outcome = meta.get("eval", {}).get("outcome", "")
|
| 166 |
+
success = 1 if outcome == "Success" else 0
|
| 167 |
+
except Exception:
|
| 168 |
+
pass
|
| 169 |
+
|
| 170 |
+
policy_data[label]["trials"].append(success)
|
| 171 |
+
policy_data[label]["speeds"].append(speed)
|
| 172 |
+
policy_data[label]["efforts"].append(effort)
|
| 173 |
+
policy_data[label]["zs"].append(z)
|
| 174 |
+
|
| 175 |
+
# Drop policies with no data
|
| 176 |
+
policy_data = {k: v for k, v in policy_data.items() if v["trials"]}
|
| 177 |
+
|
| 178 |
+
if progress:
|
| 179 |
+
progress(0.95, desc="Finalisingβ¦")
|
| 180 |
+
|
| 181 |
+
_cache["phail"] = policy_data
|
| 182 |
+
return policy_data
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def run_phail(progress=gr.Progress()):
|
| 186 |
+
progress(0, desc="Connecting to HuggingFace Hubβ¦")
|
| 187 |
+
policy_data = load_phail_sample(progress)
|
| 188 |
+
progress(0.9, desc="Running Bayesian + SPARC + STL analysisβ¦")
|
| 189 |
+
results = run_analysis(policy_data)
|
| 190 |
+
progress(1.0, desc="Done!")
|
| 191 |
+
return results
|
| 192 |
+
|
| 193 |
+
|
| 194 |
def load_aloha():
|
| 195 |
if "aloha" in _cache:
|
| 196 |
return _cache["aloha"]
|
|
|
|
| 602 |
|
| 603 |
with gr.Tabs():
|
| 604 |
|
| 605 |
+
# ββ TAB 1: PhAIL β 4 autonomous VLA policies ββββββββββββββββββββββ
|
| 606 |
+
with gr.Tab("π PhAIL: 4 VLA Policies Head-to-Head"):
|
| 607 |
+
gr.Markdown("""
|
| 608 |
+
**Dataset**: [`phail-anon/phail-v1.0`](https://huggingface.co/datasets/phail-anon/phail-v1.0)
|
| 609 |
+
β 20 stratified episodes from **4 real VLA policies** running autonomously on a **Franka Research 3** robot.
|
| 610 |
+
No GPU needed β we're scoring pre-recorded rollouts, not running the policies.
|
| 611 |
+
|
| 612 |
+
| Policy | Type | Developer |
|
| 613 |
+
|--------|------|-----------|
|
| 614 |
+
| ACT | Action Chunking Transformer | Academic (Chi et al.) |
|
| 615 |
+
| GR00T N1.6 | Foundation model | NVIDIA |
|
| 616 |
+
| Ο0.5 | Diffusion policy VLA | Physical Intelligence |
|
| 617 |
+
| SmolVLA | Compact VLA | HuggingFace |
|
| 618 |
+
|
| 619 |
+
Task: **bin-to-bin pick-and-place** (batteries, scissors, towels, wooden spoons).
|
| 620 |
+
Success labels are human-verified from gripper telemetry.
|
| 621 |
+
""")
|
| 622 |
+
|
| 623 |
+
phail_btn = gr.Button("βΆ Load & Analyse PhAIL Sample", variant="primary", size="lg")
|
| 624 |
+
|
| 625 |
+
with gr.Row():
|
| 626 |
+
ph_bayes = gr.Plot(label="Bayesian Posteriors")
|
| 627 |
+
ph_bayes_mat = gr.Plot(label="P(row beats col)")
|
| 628 |
+
with gr.Row():
|
| 629 |
+
ph_sparc = gr.Plot(label="SPARC Smoothness")
|
| 630 |
+
ph_speed = gr.Plot(label="Speed Profiles")
|
| 631 |
+
with gr.Row():
|
| 632 |
+
ph_stl = gr.Plot(label="STL Robustness")
|
| 633 |
+
ph_viols = gr.Plot(label="Violations")
|
| 634 |
+
with gr.Row():
|
| 635 |
+
ph_radar = gr.Plot(label="Composite Radar")
|
| 636 |
+
ph_rank = gr.Plot(label="Final Ranking")
|
| 637 |
+
|
| 638 |
+
ph_scorecard = gr.Markdown(label="Scorecard")
|
| 639 |
+
|
| 640 |
+
phail_btn.click(
|
| 641 |
+
fn=run_phail,
|
| 642 |
+
inputs=[],
|
| 643 |
+
outputs=[ph_bayes, ph_bayes_mat,
|
| 644 |
+
ph_sparc, ph_speed,
|
| 645 |
+
ph_stl, ph_viols,
|
| 646 |
+
ph_radar, ph_rank,
|
| 647 |
+
ph_scorecard],
|
| 648 |
+
)
|
| 649 |
+
|
| 650 |
+
# ββ TAB 2: DEMO DATA ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 651 |
with gr.Tab("π¦Ύ Try with Real ALOHA Data"):
|
| 652 |
gr.Markdown("""
|
| 653 |
**Dataset**: [`lerobot/aloha_static_cups_open`](https://huggingface.co/datasets/lerobot/aloha_static_cups_open)
|
requirements.txt
CHANGED
|
@@ -4,3 +4,5 @@ numpy>=1.24.0
|
|
| 4 |
scipy>=1.10.0
|
| 5 |
datasets>=2.14.0
|
| 6 |
pandas>=2.0.0
|
|
|
|
|
|
|
|
|
| 4 |
scipy>=1.10.0
|
| 5 |
datasets>=2.14.0
|
| 6 |
pandas>=2.0.0
|
| 7 |
+
huggingface_hub>=0.23.0
|
| 8 |
+
pyarrow>=14.0.0
|