Shoraky Claude Opus 4.7 commited on
Commit
21dc9cb
·
1 Parent(s): 9bb13a1

Delete HF storage by explicit file commit; allow remote-only player delete

Browse files

Player/session deletes left the folder behind on the private HF dataset.
delete_folder can fail to resolve a prefix on a Xet-backed dataset, so the
remote folder survived while the local copy was already removed (hence the app
showed it gone after refresh but HF still had it).

- _delete_folder_from_hf now lists the actual files under the path and deletes
them explicitly via create_commit(CommitOperationDelete...), then confirms by
re-listing. Success is judged purely by the end state (nothing remaining).
- delete_player now checks the remote up front: if the local copy is already
gone but the player still exists on HF, it still deletes the remote folder
instead of 404-ing and orphaning it. Only "absent locally AND remotely" 404s.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

Files changed (1) hide show
  1. api.py +64 -39
api.py CHANGED
@@ -19,7 +19,7 @@ from fastapi.concurrency import run_in_threadpool
19
  from fastapi.responses import JSONResponse
20
  from fastapi.staticfiles import StaticFiles
21
  from typing import List
22
- from huggingface_hub import hf_hub_download, snapshot_download, HfApi
23
 
24
  app = FastAPI(title="Sporalize Labs 3D Analysis Engine")
25
 
@@ -159,65 +159,74 @@ def push_session_to_hf(player_id: str, session_id: str, session_dir: str):
159
  )
160
 
161
 
162
- def _hf_path_exists(api, path_in_repo: str) -> bool:
163
- """True if any tracked file remains under path_in_repo on the remote."""
164
  prefix = path_in_repo.strip("/") + "/"
165
- try:
166
- files = api.list_repo_files(
167
- repo_id=STORAGE_DATASET_REPO_ID,
168
- repo_type=STORAGE_DATASET_REPO_TYPE,
169
- token=get_hf_token(),
170
- )
171
- except Exception as error:
172
- # If we cannot even list the repo, we cannot confirm the delete.
173
- raise error
174
- return any(f == path_in_repo or f.startswith(prefix) for f in files)
175
 
176
 
177
  def _delete_folder_from_hf(path_in_repo: str, commit_message: str):
178
- """Delete a folder from the HF storage dataset.
179
 
180
- Returns (deleted, error_message). Correctness is judged by the *end state*:
181
- if nothing remains under the path on the remote, the delete succeeded —
182
- regardless of whether delete_folder raised (e.g. it raises when the folder
183
- is already absent, or on a benign race). Only a path that still exists after
184
- the attempt, or an inability to confirm, is reported as a failure.
185
  """
186
  if not hf_storage_enabled():
187
  return True, None
188
 
189
  api = HfApi(token=get_hf_token())
190
 
191
- # Fast path: nothing there to begin with.
192
  try:
193
- if not _hf_path_exists(api, path_in_repo):
194
- return True, None
195
- except Exception:
196
- pass # fall through to attempt the delete and re-check
 
197
 
198
- delete_error = None
 
 
 
 
199
  try:
200
- api.delete_folder(
201
  repo_id=STORAGE_DATASET_REPO_ID,
202
  repo_type=STORAGE_DATASET_REPO_TYPE,
203
- path_in_repo=path_in_repo,
204
  commit_message=commit_message,
 
205
  )
206
  except Exception as error:
207
- delete_error = error
 
 
 
 
 
 
 
 
208
 
209
- # Confirm by the actual end state rather than the raised exception.
210
  try:
211
- if not _hf_path_exists(api, path_in_repo):
212
- return True, None
213
- except Exception as confirm_error:
214
- message = str(delete_error or confirm_error)
215
- print(f"[SPORALIZE] HF delete could not be confirmed for '{path_in_repo}': {message}", flush=True)
 
216
  return False, message
217
 
218
- message = str(delete_error) if delete_error else "folder still present on remote after delete"
219
- print(f"[SPORALIZE] HF delete failed for '{path_in_repo}': {message}", flush=True)
220
- return False, message
221
 
222
 
223
  def delete_session_from_hf(player_id: str, session_id: str):
@@ -1209,10 +1218,26 @@ def delete_session(session_id: str):
1209
  @app.delete("/api/players/{player_id}")
1210
  def delete_player(player_id: str):
1211
  player_dir = player_storage_path(player_id)
1212
- if not os.path.isdir(player_dir):
 
 
 
 
 
 
 
 
 
 
 
 
1213
  raise HTTPException(status_code=404, detail="Player storage not found")
1214
 
1215
- shutil.rmtree(player_dir, ignore_errors=True)
 
 
 
 
1216
  hf_deleted, hf_error = delete_player_from_hf(player_id)
1217
  if hf_storage_enabled() and not hf_deleted:
1218
  raise HTTPException(
 
19
  from fastapi.responses import JSONResponse
20
  from fastapi.staticfiles import StaticFiles
21
  from typing import List
22
+ from huggingface_hub import hf_hub_download, snapshot_download, HfApi, CommitOperationDelete
23
 
24
  app = FastAPI(title="Sporalize Labs 3D Analysis Engine")
25
 
 
159
  )
160
 
161
 
162
+ def _hf_files_under(api, path_in_repo: str):
163
+ """All tracked files at or under path_in_repo on the remote (fresh listing)."""
164
  prefix = path_in_repo.strip("/") + "/"
165
+ files = api.list_repo_files(
166
+ repo_id=STORAGE_DATASET_REPO_ID,
167
+ repo_type=STORAGE_DATASET_REPO_TYPE,
168
+ revision="main",
169
+ token=get_hf_token(),
170
+ )
171
+ return [f for f in files if f == path_in_repo or f.startswith(prefix)]
 
 
 
172
 
173
 
174
  def _delete_folder_from_hf(path_in_repo: str, commit_message: str):
175
+ """Delete everything under path_in_repo from the HF storage dataset.
176
 
177
+ Instead of relying on delete_folder (which can mis-resolve a prefix on a
178
+ Xet-backed dataset and leave the folder behind), we list the actual files
179
+ under the path and delete them explicitly in one commit. Success is judged
180
+ by the end state: nothing remaining under the path. Returns (deleted, error).
 
181
  """
182
  if not hf_storage_enabled():
183
  return True, None
184
 
185
  api = HfApi(token=get_hf_token())
186
 
 
187
  try:
188
+ targets = _hf_files_under(api, path_in_repo)
189
+ except Exception as error:
190
+ message = str(error)
191
+ print(f"[SPORALIZE] HF delete: could not list '{path_in_repo}': {message}", flush=True)
192
+ return False, message
193
 
194
+ if not targets:
195
+ # Already gone on the remote — desired end state.
196
+ return True, None
197
+
198
+ operations = [CommitOperationDelete(path_in_repo=f) for f in targets]
199
  try:
200
+ api.create_commit(
201
  repo_id=STORAGE_DATASET_REPO_ID,
202
  repo_type=STORAGE_DATASET_REPO_TYPE,
203
+ operations=operations,
204
  commit_message=commit_message,
205
+ token=get_hf_token(),
206
  )
207
  except Exception as error:
208
+ delete_error = str(error)
209
+ print(f"[SPORALIZE] HF delete commit failed for '{path_in_repo}' ({len(targets)} files): {delete_error}", flush=True)
210
+ # Confirm by end state anyway — a benign race may still have removed it.
211
+ try:
212
+ if not _hf_files_under(api, path_in_repo):
213
+ return True, None
214
+ except Exception:
215
+ pass
216
+ return False, delete_error
217
 
218
+ # Confirm the files are actually gone.
219
  try:
220
+ remaining = _hf_files_under(api, path_in_repo)
221
+ except Exception:
222
+ remaining = []
223
+ if remaining:
224
+ message = f"{len(remaining)} file(s) still present under '{path_in_repo}' after delete"
225
+ print(f"[SPORALIZE] HF delete incomplete: {message}", flush=True)
226
  return False, message
227
 
228
+ print(f"[SPORALIZE] HF delete ok: removed {len(targets)} file(s) under '{path_in_repo}'", flush=True)
229
+ return True, None
 
230
 
231
 
232
  def delete_session_from_hf(player_id: str, session_id: str):
 
1218
  @app.delete("/api/players/{player_id}")
1219
  def delete_player(player_id: str):
1220
  player_dir = player_storage_path(player_id)
1221
+ local_exists = os.path.isdir(player_dir)
1222
+
1223
+ # Check the remote up front so we can tell "nothing anywhere" (real 404)
1224
+ # apart from "still on the remote" (must be deleted even if local is gone).
1225
+ remote_path = hf_storage_path(safe_name(player_id))
1226
+ remote_exists = False
1227
+ if hf_storage_enabled():
1228
+ try:
1229
+ remote_exists = bool(_hf_files_under(HfApi(token=get_hf_token()), remote_path))
1230
+ except Exception:
1231
+ remote_exists = False
1232
+
1233
+ if not local_exists and not remote_exists:
1234
  raise HTTPException(status_code=404, detail="Player storage not found")
1235
 
1236
+ if local_exists:
1237
+ shutil.rmtree(player_dir, ignore_errors=True)
1238
+
1239
+ # Always mirror to the remote (even when the local copy was already gone but
1240
+ # a previous remote delete failed, leaving an orphaned HF folder).
1241
  hf_deleted, hf_error = delete_player_from_hf(player_id)
1242
  if hf_storage_enabled() and not hf_deleted:
1243
  raise HTTPException(