Ciroc0 commited on
Commit
798163a
·
1 Parent(s): c678b17

Add startup and runtime diagnostics

Browse files
Files changed (1) hide show
  1. app.py +162 -24
app.py CHANGED
@@ -3,6 +3,10 @@ import requests
3
  import pandas as pd
4
  import numpy as np
5
  import json
 
 
 
 
6
  from datetime import datetime, timedelta
7
 
8
  from huggingface_hub import HfApi, hf_hub_download
@@ -23,6 +27,9 @@ AARHUS_LON = 10.2108
23
  HF_TOKEN = os.environ.get("HF_TOKEN")
24
 
25
  COPENHAGEN_TZ = ZoneInfo("Europe/Copenhagen")
 
 
 
26
 
27
  # Extended feature set from PLAN.md
28
  FORECAST_FEATURES = [
@@ -61,6 +68,65 @@ OBSERVATION_FEATURES = [
61
  ]
62
 
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  def now_cph():
65
  """Current time in Copenhagen timezone."""
66
  return datetime.now(COPENHAGEN_TZ)
@@ -86,6 +152,7 @@ def fetch_forecasts_for_period(start_date, end_date):
86
  Fetch historical forecast runs for Aarhus from Open-Meteo.
87
  Returns DataFrame with all features.
88
  """
 
89
  all_forecasts = []
90
  run_hours = [0, 3, 6, 9, 12, 15, 18, 21]
91
 
@@ -155,16 +222,19 @@ def fetch_forecasts_for_period(start_date, end_date):
155
  time.sleep(0.1)
156
 
157
  if not all_forecasts:
 
158
  return None
159
 
160
  df = pd.DataFrame(all_forecasts)
161
  df = df.drop_duplicates(subset=['target_timestamp'], keep='first')
162
  df = df.sort_values('target_timestamp').reset_index(drop=True)
 
163
  return df
164
 
165
 
166
  def fetch_future_forecasts():
167
  """Fetch future forecasts - 48 hours ahead."""
 
168
  now = now_cph()
169
  today = now.date()
170
 
@@ -229,15 +299,17 @@ def fetch_future_forecasts():
229
  forecasts.append(row)
230
 
231
  if not forecasts:
 
232
  return None
233
 
234
  df = pd.DataFrame(forecasts)
235
  df = df.drop_duplicates(subset=['target_timestamp'], keep='first')
236
  df = df.sort_values('target_timestamp').reset_index(drop=True)
 
237
  return df
238
 
239
  except Exception as e:
240
- print(f" Error fetching future forecasts: {e}")
241
  return None
242
 
243
 
@@ -248,6 +320,7 @@ def fetch_observations_for_period(start_date, end_date):
248
  """
249
  Fetch actual weather observations for Aarhus from Open-Meteo archive.
250
  """
 
251
  url = "https://archive-api.open-meteo.com/v1/archive"
252
 
253
  cph_today = now_cph().date()
@@ -297,9 +370,10 @@ def fetch_observations_for_period(start_date, end_date):
297
  current_hour = now_cph().replace(minute=0, second=0, microsecond=0)
298
  df = df[df['target_timestamp'] <= current_hour]
299
 
 
300
  return df
301
  except Exception as e:
302
- print(f" Error fetching observations: {e}")
303
  return None
304
 
305
 
@@ -374,12 +448,14 @@ def build_training_matrix(forecasts_df, observations_df):
374
  Adds all derived features.
375
  """
376
  if forecasts_df is None or observations_df is None:
 
377
  return None
378
 
379
  # Merge on target_timestamp
380
  merged = pd.merge(forecasts_df, observations_df, on='target_timestamp', how='inner')
381
 
382
  if len(merged) == 0:
 
383
  return None
384
 
385
  # Add temporal features
@@ -400,6 +476,7 @@ def build_training_matrix(forecasts_df, observations_df):
400
  current_hour = now_cph().replace(minute=0, second=0, microsecond=0)
401
  merged = merged[merged['target_timestamp'] <= current_hour]
402
 
 
403
  return merged
404
 
405
 
@@ -408,6 +485,7 @@ def build_training_matrix(forecasts_df, observations_df):
408
  # =============================================================================
409
  def upload_to_dataset(local_path, filename, dataset_name, repo_type="dataset"):
410
  """Upload a file to Hugging Face dataset."""
 
411
  try:
412
  api = HfApi()
413
  api.upload_file(
@@ -417,14 +495,16 @@ def upload_to_dataset(local_path, filename, dataset_name, repo_type="dataset"):
417
  repo_type=repo_type,
418
  token=HF_TOKEN
419
  )
 
420
  return True
421
  except Exception as e:
422
- print(f" Upload error: {e}")
423
  return False
424
 
425
 
426
  def load_from_dataset(filename, dataset_name, repo_type="dataset"):
427
  """Load a file from Hugging Face dataset."""
 
428
  try:
429
  path = hf_hub_download(
430
  repo_id=dataset_name,
@@ -432,9 +512,10 @@ def load_from_dataset(filename, dataset_name, repo_type="dataset"):
432
  repo_type=repo_type,
433
  token=HF_TOKEN
434
  )
 
435
  return path
436
  except Exception as e:
437
- print(f" Download error: {e}")
438
  return None
439
 
440
 
@@ -500,6 +581,7 @@ def load_existing_training_matrix():
500
  # =============================================================================
501
  def backfill_historical_data():
502
  """Backfill historical data from 2025-11-01 to now."""
 
503
  init_dataset_if_needed()
504
 
505
  start_date = datetime(2025, 11, 1).date()
@@ -562,6 +644,7 @@ def backfill_historical_data():
562
  # =============================================================================
563
  def update_daily():
564
  """Daily update - fetch last 7 days and append to dataset."""
 
565
  init_dataset_if_needed()
566
 
567
  end_date = now_cph().date()
@@ -618,6 +701,7 @@ def update_daily():
618
  # =============================================================================
619
  def load_model_bundle(target_name):
620
  """Load model bundle from dataset."""
 
621
  try:
622
  path = hf_hub_download(
623
  repo_id=DATASET_NAME,
@@ -627,7 +711,7 @@ def load_model_bundle(target_name):
627
  )
628
  return joblib.load(path)
629
  except Exception as e:
630
- print(f"❌ Could not load model bundle for {target_name}: {e}")
631
  return None
632
 
633
 
@@ -658,8 +742,9 @@ def predict_with_bundle(bundle, df):
658
 
659
  def generate_predictions():
660
  """Generate predictions for all targets."""
 
661
  current_time = now_cph()
662
- print(f"🔮 Generating predictions: {current_time}")
663
 
664
  future_forecasts = fetch_future_forecasts()
665
  if future_forecasts is None or len(future_forecasts) == 0:
@@ -676,7 +761,7 @@ def generate_predictions():
676
  with open(registry_path, 'r') as f:
677
  registry = json.load(f)
678
  except Exception as e:
679
- print(f" Could not load model registry: {e}")
680
  registry = None
681
 
682
  results = future_forecasts.copy()
@@ -741,6 +826,7 @@ def generate_predictions():
741
  # =============================================================================
742
  def verify_predictions():
743
  """Verify past predictions with actual observations."""
 
744
  try:
745
  # Load predictions
746
  pred_path, pred_filename = load_first_available_dataset_file(
@@ -803,29 +889,48 @@ def verify_predictions():
803
  # =============================================================================
804
  def run_scheduler():
805
  """Background scheduler for automated tasks."""
 
806
  # Every 3 hours: fetch new forecast run
807
- schedule.every(3).hours.do(lambda: fetch_forecasts_for_period(now_cph().date() - timedelta(days=1), now_cph().date()))
 
 
 
 
 
 
 
808
 
809
  # Every hour: fetch new observations
810
- schedule.every().hour.do(lambda: fetch_observations_for_period(now_cph().date() - timedelta(days=1), now_cph().date()))
 
 
 
 
 
 
 
811
 
812
  # Every 3 hours: generate new predictions
813
- schedule.every(3).hours.do(generate_predictions)
814
 
815
  # Every hour: verify past predictions
816
- schedule.every().hour.do(verify_predictions)
817
 
818
  # Daily: rebuild training matrix
819
- schedule.every().day.at("06:00").do(update_daily)
820
 
821
  while True:
822
- schedule.run_pending()
 
 
 
823
  time.sleep(60)
824
 
825
 
826
  # =============================================================================
827
  # GRADIO UI
828
  # =============================================================================
 
829
  with gr.Blocks(title="DMI Aarhus Collector") as demo:
830
  gr.Markdown("""
831
  # 🌤️ DMI Aarhus Weather Collector
@@ -841,16 +946,49 @@ with gr.Blocks(title="DMI Aarhus Collector") as demo:
841
  btn_predict = gr.Button("🔮 Generate Predictions", variant="primary")
842
  btn_verify = gr.Button("✅ Verify Predictions", variant="secondary")
843
 
844
- btn_backfill.click(backfill_historical_data, outputs=status)
845
- btn_daily.click(update_daily, outputs=status)
846
- btn_predict.click(generate_predictions, outputs=status)
847
- btn_verify.click(verify_predictions, outputs=status)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
848
 
849
  if __name__ == "__main__":
850
- scheduler_thread = threading.Thread(target=run_scheduler, daemon=True)
851
- scheduler_thread.start()
852
- demo.launch(
853
- server_name="0.0.0.0",
854
- server_port=7860,
855
- ssr_mode=False
856
- )
 
 
 
 
 
 
 
 
3
  import pandas as pd
4
  import numpy as np
5
  import json
6
+ import platform
7
+ import sys
8
+ import traceback
9
+ import faulthandler
10
  from datetime import datetime, timedelta
11
 
12
  from huggingface_hub import HfApi, hf_hub_download
 
27
  HF_TOKEN = os.environ.get("HF_TOKEN")
28
 
29
  COPENHAGEN_TZ = ZoneInfo("Europe/Copenhagen")
30
+ APP_NAME = "dmi-collector"
31
+
32
+ faulthandler.enable()
33
 
34
  # Extended feature set from PLAN.md
35
  FORECAST_FEATURES = [
 
68
  ]
69
 
70
 
71
+ def log_event(message, **fields):
72
+ """Emit a structured log line that shows up clearly in HF runtime logs."""
73
+ timestamp = datetime.utcnow().isoformat(timespec="seconds") + "Z"
74
+ details = " ".join(f"{key}={fields[key]!r}" for key in sorted(fields))
75
+ if details:
76
+ print(f"[{APP_NAME}] {timestamp} {message} {details}", flush=True)
77
+ else:
78
+ print(f"[{APP_NAME}] {timestamp} {message}", flush=True)
79
+
80
+
81
+ def log_exception(context, exc):
82
+ """Log an exception with a full traceback."""
83
+ log_event(f"{context} failed", error=str(exc), error_type=type(exc).__name__)
84
+ print(traceback.format_exc(), flush=True)
85
+
86
+
87
+ def install_global_logging():
88
+ """Install process-level exception logging."""
89
+ def handle_exception(exc_type, exc_value, exc_traceback):
90
+ if issubclass(exc_type, KeyboardInterrupt):
91
+ sys.__excepthook__(exc_type, exc_value, exc_traceback)
92
+ return
93
+ log_event("uncaught_exception", error=str(exc_value), error_type=exc_type.__name__)
94
+ print("".join(traceback.format_exception(exc_type, exc_value, exc_traceback)), flush=True)
95
+
96
+ sys.excepthook = handle_exception
97
+
98
+
99
+ def log_startup():
100
+ """Log runtime context during process startup."""
101
+ log_event(
102
+ "startup",
103
+ python=sys.version.split()[0],
104
+ platform=platform.platform(),
105
+ cwd=os.getcwd(),
106
+ hf_token_present=bool(HF_TOKEN),
107
+ dataset=DATASET_NAME,
108
+ predictions_dataset=PREDICTIONS_DATASET,
109
+ )
110
+
111
+
112
+ def run_logged(name, fn, *args, **kwargs):
113
+ """Run a function with start/end/error logging."""
114
+ log_event(f"{name} started")
115
+ try:
116
+ result = fn(*args, **kwargs)
117
+ if isinstance(result, pd.DataFrame):
118
+ log_event(f"{name} completed", rows=len(result), columns=list(result.columns))
119
+ else:
120
+ log_event(f"{name} completed", result_type=type(result).__name__)
121
+ return result
122
+ except Exception as exc:
123
+ log_exception(name, exc)
124
+ raise
125
+
126
+
127
+ install_global_logging()
128
+
129
+
130
  def now_cph():
131
  """Current time in Copenhagen timezone."""
132
  return datetime.now(COPENHAGEN_TZ)
 
152
  Fetch historical forecast runs for Aarhus from Open-Meteo.
153
  Returns DataFrame with all features.
154
  """
155
+ log_event("fetch_forecasts_for_period", start_date=str(start_date), end_date=str(end_date))
156
  all_forecasts = []
157
  run_hours = [0, 3, 6, 9, 12, 15, 18, 21]
158
 
 
222
  time.sleep(0.1)
223
 
224
  if not all_forecasts:
225
+ log_event("fetch_forecasts_for_period no_data", start_date=str(start_date), end_date=str(end_date))
226
  return None
227
 
228
  df = pd.DataFrame(all_forecasts)
229
  df = df.drop_duplicates(subset=['target_timestamp'], keep='first')
230
  df = df.sort_values('target_timestamp').reset_index(drop=True)
231
+ log_event("fetch_forecasts_for_period done", rows=len(df))
232
  return df
233
 
234
 
235
  def fetch_future_forecasts():
236
  """Fetch future forecasts - 48 hours ahead."""
237
+ log_event("fetch_future_forecasts started")
238
  now = now_cph()
239
  today = now.date()
240
 
 
299
  forecasts.append(row)
300
 
301
  if not forecasts:
302
+ log_event("fetch_future_forecasts no_data")
303
  return None
304
 
305
  df = pd.DataFrame(forecasts)
306
  df = df.drop_duplicates(subset=['target_timestamp'], keep='first')
307
  df = df.sort_values('target_timestamp').reset_index(drop=True)
308
+ log_event("fetch_future_forecasts done", rows=len(df))
309
  return df
310
 
311
  except Exception as e:
312
+ log_exception("fetch_future_forecasts", e)
313
  return None
314
 
315
 
 
320
  """
321
  Fetch actual weather observations for Aarhus from Open-Meteo archive.
322
  """
323
+ log_event("fetch_observations_for_period", start_date=str(start_date), end_date=str(end_date))
324
  url = "https://archive-api.open-meteo.com/v1/archive"
325
 
326
  cph_today = now_cph().date()
 
370
  current_hour = now_cph().replace(minute=0, second=0, microsecond=0)
371
  df = df[df['target_timestamp'] <= current_hour]
372
 
373
+ log_event("fetch_observations_for_period done", rows=len(df))
374
  return df
375
  except Exception as e:
376
+ log_exception("fetch_observations_for_period", e)
377
  return None
378
 
379
 
 
448
  Adds all derived features.
449
  """
450
  if forecasts_df is None or observations_df is None:
451
+ log_event("build_training_matrix missing_inputs")
452
  return None
453
 
454
  # Merge on target_timestamp
455
  merged = pd.merge(forecasts_df, observations_df, on='target_timestamp', how='inner')
456
 
457
  if len(merged) == 0:
458
+ log_event("build_training_matrix no_matches")
459
  return None
460
 
461
  # Add temporal features
 
476
  current_hour = now_cph().replace(minute=0, second=0, microsecond=0)
477
  merged = merged[merged['target_timestamp'] <= current_hour]
478
 
479
+ log_event("build_training_matrix done", rows=len(merged), columns=len(merged.columns))
480
  return merged
481
 
482
 
 
485
  # =============================================================================
486
  def upload_to_dataset(local_path, filename, dataset_name, repo_type="dataset"):
487
  """Upload a file to Hugging Face dataset."""
488
+ log_event("upload_to_dataset", local_path=local_path, filename=filename, dataset_name=dataset_name)
489
  try:
490
  api = HfApi()
491
  api.upload_file(
 
495
  repo_type=repo_type,
496
  token=HF_TOKEN
497
  )
498
+ log_event("upload_to_dataset done", filename=filename, dataset_name=dataset_name)
499
  return True
500
  except Exception as e:
501
+ log_exception("upload_to_dataset", e)
502
  return False
503
 
504
 
505
  def load_from_dataset(filename, dataset_name, repo_type="dataset"):
506
  """Load a file from Hugging Face dataset."""
507
+ log_event("load_from_dataset", filename=filename, dataset_name=dataset_name)
508
  try:
509
  path = hf_hub_download(
510
  repo_id=dataset_name,
 
512
  repo_type=repo_type,
513
  token=HF_TOKEN
514
  )
515
+ log_event("load_from_dataset done", filename=filename, dataset_name=dataset_name, path=path)
516
  return path
517
  except Exception as e:
518
+ log_exception("load_from_dataset", e)
519
  return None
520
 
521
 
 
581
  # =============================================================================
582
  def backfill_historical_data():
583
  """Backfill historical data from 2025-11-01 to now."""
584
+ log_event("backfill_historical_data entered")
585
  init_dataset_if_needed()
586
 
587
  start_date = datetime(2025, 11, 1).date()
 
644
  # =============================================================================
645
  def update_daily():
646
  """Daily update - fetch last 7 days and append to dataset."""
647
+ log_event("update_daily entered")
648
  init_dataset_if_needed()
649
 
650
  end_date = now_cph().date()
 
701
  # =============================================================================
702
  def load_model_bundle(target_name):
703
  """Load model bundle from dataset."""
704
+ log_event("load_model_bundle", target_name=target_name)
705
  try:
706
  path = hf_hub_download(
707
  repo_id=DATASET_NAME,
 
711
  )
712
  return joblib.load(path)
713
  except Exception as e:
714
+ log_exception(f"load_model_bundle[{target_name}]", e)
715
  return None
716
 
717
 
 
742
 
743
  def generate_predictions():
744
  """Generate predictions for all targets."""
745
+ log_event("generate_predictions entered")
746
  current_time = now_cph()
747
+ log_event("generate_predictions clock", current_time=str(current_time))
748
 
749
  future_forecasts = fetch_future_forecasts()
750
  if future_forecasts is None or len(future_forecasts) == 0:
 
761
  with open(registry_path, 'r') as f:
762
  registry = json.load(f)
763
  except Exception as e:
764
+ log_exception("generate_predictions model_registry", e)
765
  registry = None
766
 
767
  results = future_forecasts.copy()
 
826
  # =============================================================================
827
  def verify_predictions():
828
  """Verify past predictions with actual observations."""
829
+ log_event("verify_predictions entered")
830
  try:
831
  # Load predictions
832
  pred_path, pred_filename = load_first_available_dataset_file(
 
889
  # =============================================================================
890
  def run_scheduler():
891
  """Background scheduler for automated tasks."""
892
+ log_event("scheduler starting")
893
  # Every 3 hours: fetch new forecast run
894
+ schedule.every(3).hours.do(
895
+ lambda: run_logged(
896
+ "scheduled_fetch_forecasts",
897
+ fetch_forecasts_for_period,
898
+ now_cph().date() - timedelta(days=1),
899
+ now_cph().date(),
900
+ )
901
+ )
902
 
903
  # Every hour: fetch new observations
904
+ schedule.every().hour.do(
905
+ lambda: run_logged(
906
+ "scheduled_fetch_observations",
907
+ fetch_observations_for_period,
908
+ now_cph().date() - timedelta(days=1),
909
+ now_cph().date(),
910
+ )
911
+ )
912
 
913
  # Every 3 hours: generate new predictions
914
+ schedule.every(3).hours.do(lambda: run_logged("scheduled_generate_predictions", generate_predictions))
915
 
916
  # Every hour: verify past predictions
917
+ schedule.every().hour.do(lambda: run_logged("scheduled_verify_predictions", verify_predictions))
918
 
919
  # Daily: rebuild training matrix
920
+ schedule.every().day.at("06:00").do(lambda: run_logged("scheduled_update_daily", update_daily))
921
 
922
  while True:
923
+ try:
924
+ schedule.run_pending()
925
+ except Exception as exc:
926
+ log_exception("scheduler.run_pending", exc)
927
  time.sleep(60)
928
 
929
 
930
  # =============================================================================
931
  # GRADIO UI
932
  # =============================================================================
933
+ log_event("building_gradio_ui")
934
  with gr.Blocks(title="DMI Aarhus Collector") as demo:
935
  gr.Markdown("""
936
  # 🌤️ DMI Aarhus Weather Collector
 
946
  btn_predict = gr.Button("🔮 Generate Predictions", variant="primary")
947
  btn_verify = gr.Button("✅ Verify Predictions", variant="secondary")
948
 
949
+ def backfill_handler():
950
+ try:
951
+ return run_logged("gradio_backfill_historical_data", backfill_historical_data)
952
+ except Exception as exc:
953
+ return f"❌ backfill_historical_data failed: {exc}"
954
+
955
+ def daily_handler():
956
+ try:
957
+ return run_logged("gradio_update_daily", update_daily)
958
+ except Exception as exc:
959
+ return f"❌ update_daily failed: {exc}"
960
+
961
+ def predict_handler():
962
+ try:
963
+ return run_logged("gradio_generate_predictions", generate_predictions)
964
+ except Exception as exc:
965
+ return f"❌ generate_predictions failed: {exc}"
966
+
967
+ def verify_handler():
968
+ try:
969
+ return run_logged("gradio_verify_predictions", verify_predictions)
970
+ except Exception as exc:
971
+ return f"❌ verify_predictions failed: {exc}"
972
+
973
+ btn_backfill.click(backfill_handler, outputs=status)
974
+ btn_daily.click(daily_handler, outputs=status)
975
+ btn_predict.click(predict_handler, outputs=status)
976
+ btn_verify.click(verify_handler, outputs=status)
977
+
978
+ log_event("gradio_ui_ready")
979
 
980
  if __name__ == "__main__":
981
+ log_startup()
982
+ try:
983
+ scheduler_thread = threading.Thread(target=run_scheduler, daemon=True, name="collector-scheduler")
984
+ scheduler_thread.start()
985
+ log_event("scheduler_thread_started", thread_name=scheduler_thread.name, is_alive=scheduler_thread.is_alive())
986
+ log_event("launching_gradio", server_name="0.0.0.0", server_port=7860, ssr_mode=False)
987
+ demo.launch(
988
+ server_name="0.0.0.0",
989
+ server_port=7860,
990
+ ssr_mode=False
991
+ )
992
+ except Exception as exc:
993
+ log_exception("__main__", exc)
994
+ raise