Ciroc0 commited on
Commit
f61f9fa
·
verified ·
1 Parent(s): 1ea5846

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +17 -0
  2. app.py +542 -0
  3. gitattributes +35 -0
  4. requirements.txt +13 -0
README.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Dmi Collector
3
+ emoji: 📚
4
+ colorFrom: red
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 6.8.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: cc0-1.0
11
+ ---
12
+
13
+ # DMI Data Collector
14
+
15
+ # Automatisk indsamling af DMI HARMONIE forecasts vs. faktisk vejr for Aarhus.
16
+
17
+ # Kører dagligt kl 06:00 UTC.
app.py ADDED
@@ -0,0 +1,542 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import pandas as pd
4
+ import numpy as np
5
+ from datetime import datetime, timedelta
6
+ from datasets import load_dataset
7
+ from huggingface_hub import HfApi, hf_hub_download
8
+ import schedule
9
+ import time
10
+ import threading
11
+ import os
12
+ import joblib
13
+ from zoneinfo import ZoneInfo
14
+
15
+ DATASET_NAME = "Ciroc0/dmi-aarhus-weather-data"
16
+ PREDICTIONS_DATASET = "Ciroc0/dmi-aarhus-predictions"
17
+ AARHUS_LAT = 56.1567
18
+ AARHUS_LON = 10.2108
19
+ HF_TOKEN = os.environ.get("HF_TOKEN")
20
+
21
+ COPENHAGEN_TZ = ZoneInfo("Europe/Copenhagen")
22
+
23
+ def now_cph():
24
+ return datetime.now(COPENHAGEN_TZ)
25
+
26
+ def fetch_forecasts_for_period(start_date, end_date):
27
+ all_forecasts = []
28
+ run_hours = [0, 3, 6, 9, 12, 15, 18, 21]
29
+
30
+ current_date = start_date
31
+ cph_now = now_cph()
32
+
33
+ while current_date <= end_date:
34
+ for hour in run_hours:
35
+ reference_time = datetime.combine(current_date, datetime.min.time()) + timedelta(hours=hour)
36
+ reference_time = reference_time.replace(tzinfo=COPENHAGEN_TZ)
37
+
38
+ if reference_time > cph_now:
39
+ continue
40
+
41
+ url = "https://api.open-meteo.com/v1/forecast"
42
+ params = {
43
+ "latitude": AARHUS_LAT,
44
+ "longitude": AARHUS_LON,
45
+ "start_date": current_date.strftime("%Y-%m-%d"),
46
+ "end_date": (current_date + timedelta(days=2)).strftime("%Y-%m-%d"),
47
+ "models": "dmi_harmonie",
48
+ "hourly": ["temperature_2m", "windspeed_10m", "pressure_msl", "relativehumidity_2m"],
49
+ "timezone": "Europe/Copenhagen"
50
+ }
51
+
52
+ try:
53
+ resp = requests.get(url, params=params, timeout=30)
54
+ if resp.status_code != 200:
55
+ del params['models']
56
+ resp = requests.get(url, params=params, timeout=30)
57
+
58
+ if resp.status_code == 200:
59
+ data = resp.json()
60
+ if 'hourly' in data:
61
+ times = pd.to_datetime(data['hourly']['time'])
62
+ times = times.tz_localize('Europe/Copenhagen', ambiguous='infer')
63
+
64
+ for i, target_time in enumerate(times):
65
+ lead_hours = (target_time - reference_time).total_seconds() / 3600
66
+
67
+ if 0 < lead_hours <= 48:
68
+ all_forecasts.append({
69
+ 'timestamp': target_time,
70
+ 'reference_time': reference_time,
71
+ 'lead_time_hours': int(lead_hours),
72
+ 'dmi_temp_pred': data['hourly']['temperature_2m'][i],
73
+ 'dmi_wind_pred': data['hourly']['windspeed_10m'][i],
74
+ 'dmi_pressure_pred': data['hourly']['pressure_msl'][i],
75
+ 'dmi_humidity_pred': data['hourly']['relativehumidity_2m'][i]
76
+ })
77
+ except Exception as e:
78
+ print(f"Fejl: {e}")
79
+ continue
80
+
81
+ current_date += timedelta(days=1)
82
+ time.sleep(0.1)
83
+
84
+ if not all_forecasts:
85
+ return None
86
+
87
+ df = pd.DataFrame(all_forecasts)
88
+ # VIKTIGT: Drop duplicates baseret på timestamp (target tid), ikke reference_time!
89
+ # reference_time er ens for alle 48 timer i samme forecast
90
+ df = df.drop_duplicates(subset=['timestamp'], keep='first')
91
+ df = df.sort_values('timestamp').reset_index(drop=True)
92
+ return df
93
+
94
+ def fetch_actuals_for_period(start_date, end_date):
95
+ url = "https://archive-api.open-meteo.com/v1/archive"
96
+
97
+ cph_today = now_cph().date()
98
+ if end_date > cph_today:
99
+ end_date = cph_today
100
+
101
+ params = {
102
+ "latitude": AARHUS_LAT,
103
+ "longitude": AARHUS_LON,
104
+ "start_date": start_date.strftime("%Y-%m-%d"),
105
+ "end_date": end_date.strftime("%Y-%m-%d"),
106
+ "hourly": ["temperature_2m", "windspeed_10m", "pressure_msl", "relativehumidity_2m"],
107
+ "timezone": "Europe/Copenhagen"
108
+ }
109
+
110
+ try:
111
+ resp = requests.get(url, params=params, timeout=60)
112
+ if resp.status_code != 200:
113
+ return None
114
+
115
+ data = resp.json()
116
+ if 'hourly' not in data:
117
+ return None
118
+
119
+ timestamps = pd.to_datetime(data['hourly']['time'])
120
+ timestamps = timestamps.tz_localize('Europe/Copenhagen', ambiguous='infer')
121
+
122
+ actuals_df = pd.DataFrame({
123
+ 'timestamp': timestamps,
124
+ 'actual_temp': data['hourly']['temperature_2m'],
125
+ 'actual_wind': data['hourly']['windspeed_10m'],
126
+ 'actual_pressure': data['hourly']['pressure_msl'],
127
+ 'actual_humidity': data['hourly']['relativehumidity_2m']
128
+ })
129
+
130
+ # Filtrer fremtidige timer væk: behold kun observationer op til nuværende time
131
+ current_hour = now_cph().replace(minute=0, second=0, microsecond=0)
132
+ actuals_df = actuals_df[actuals_df['timestamp'] <= current_hour]
133
+ return actuals_df
134
+ except Exception as e:
135
+ print(f"❌ Fejl: {e}")
136
+ return None
137
+
138
+ def fetch_future_forecasts():
139
+ """Henter fremtidige forecasts - 48 timer frem"""
140
+ now = now_cph()
141
+ today = now.date()
142
+
143
+ current_hour = now.hour
144
+ run_hours = [0, 3, 6, 9, 12, 15, 18, 21]
145
+ latest_run = max([h for h in run_hours if h <= current_hour], default=0)
146
+
147
+ reference_time = datetime.combine(today, datetime.min.time()) + timedelta(hours=latest_run)
148
+ reference_time = reference_time.replace(tzinfo=COPENHAGEN_TZ)
149
+
150
+ # Hent 3 dage frem for at sikre vi har 48 timer dækket
151
+ url = "https://api.open-meteo.com/v1/forecast"
152
+ params = {
153
+ "latitude": AARHUS_LAT,
154
+ "longitude": AARHUS_LON,
155
+ "start_date": today.strftime("%Y-%m-%d"),
156
+ "end_date": (today + timedelta(days=3)).strftime("%Y-%m-%d"),
157
+ "models": "dmi_harmonie",
158
+ "hourly": ["temperature_2m", "windspeed_10m", "pressure_msl", "relativehumidity_2m"],
159
+ "timezone": "Europe/Copenhagen"
160
+ }
161
+
162
+ try:
163
+ resp = requests.get(url, params=params, timeout=30)
164
+ if resp.status_code != 200:
165
+ del params['models']
166
+ resp = requests.get(url, params=params, timeout=30)
167
+
168
+ if resp.status_code != 200:
169
+ return None
170
+
171
+ data = resp.json()
172
+ if 'hourly' not in data:
173
+ return None
174
+
175
+ times = pd.to_datetime(data['hourly']['time'])
176
+ times = times.tz_localize('Europe/Copenhagen', ambiguous='infer')
177
+
178
+ forecasts = []
179
+
180
+ for i, target_time in enumerate(times):
181
+ # Kun fremtidige tidspunkter
182
+ if target_time > now:
183
+ lead_hours = (target_time - reference_time).total_seconds() / 3600
184
+
185
+ # Op til 48 timer frem
186
+ if 0 < lead_hours <= 48:
187
+ forecasts.append({
188
+ 'timestamp': target_time,
189
+ 'reference_time': reference_time,
190
+ 'lead_time_hours': int(lead_hours),
191
+ 'dmi_temp_pred': data['hourly']['temperature_2m'][i],
192
+ 'dmi_wind_pred': data['hourly']['windspeed_10m'][i],
193
+ 'dmi_pressure_pred': data['hourly']['pressure_msl'][i],
194
+ 'dmi_humidity_pred': data['hourly']['relativehumidity_2m'][i]
195
+ })
196
+
197
+ if not forecasts:
198
+ return None
199
+
200
+ df = pd.DataFrame(forecasts)
201
+ # Drop duplicates baseret på timestamp (target tid), ikke reference_time!
202
+ df = df.drop_duplicates(subset=['timestamp'], keep='first')
203
+ df = df.sort_values('timestamp').reset_index(drop=True)
204
+
205
+ print(f"✅ Hentede {len(df)} forecasts fra {df['timestamp'].min()} til {df['timestamp'].max()}")
206
+ return df
207
+
208
+ except Exception as e:
209
+ print(f"❌ Fejl: {e}")
210
+ return None
211
+
212
+ def get_features_for_prediction(row):
213
+ ts = row['reference_time']
214
+ if hasattr(ts, 'tzinfo') and ts.tzinfo is not None:
215
+ ts_naive = ts.replace(tzinfo=None)
216
+ else:
217
+ ts_naive = ts
218
+
219
+ hour = ts_naive.hour
220
+ month = ts_naive.month
221
+ day_of_year = ts_naive.timetuple().tm_yday
222
+
223
+ return {
224
+ 'dmi_temp_pred': row['dmi_temp_pred'],
225
+ 'dmi_wind_pred': row['dmi_wind_pred'],
226
+ 'dmi_pressure_pred': row['dmi_pressure_pred'],
227
+ 'dmi_humidity_pred': row['dmi_humidity_pred'],
228
+ 'hour_sin': np.sin(2 * np.pi * hour / 24),
229
+ 'hour_cos': np.cos(2 * np.pi * hour / 24),
230
+ 'month_sin': np.sin(2 * np.pi * month / 12),
231
+ 'month_cos': np.cos(2 * np.pi * month / 12),
232
+ 'hour': hour,
233
+ 'day_of_year': day_of_year
234
+ }
235
+
236
+ def load_model():
237
+ try:
238
+ model_path = hf_hub_download(
239
+ repo_id=DATASET_NAME,
240
+ filename="xgb_model.pkl",
241
+ repo_type="dataset",
242
+ token=HF_TOKEN
243
+ )
244
+ return joblib.load(model_path)
245
+ except Exception as e:
246
+ print(f"❌ Kunne ikke loade model: {e}")
247
+ return None
248
+
249
+ def generate_ml_predictions(forecasts_df):
250
+ model = load_model()
251
+ if model is None:
252
+ return None
253
+
254
+ feature_cols = [
255
+ 'dmi_temp_pred', 'dmi_wind_pred', 'dmi_pressure_pred', 'dmi_humidity_pred',
256
+ 'hour_sin', 'hour_cos', 'month_sin', 'month_cos',
257
+ 'hour', 'day_of_year'
258
+ ]
259
+
260
+ features = []
261
+ for _, row in forecasts_df.iterrows():
262
+ feat = get_features_for_prediction(row)
263
+ features.append(feat)
264
+
265
+ X = pd.DataFrame(features)
266
+
267
+ corrections = model.predict(X[feature_cols])
268
+ forecasts_df = forecasts_df.copy()
269
+ forecasts_df['ml_pred'] = forecasts_df['dmi_temp_pred'] + corrections
270
+
271
+ return forecasts_df
272
+
273
+ def backfill_historical_data():
274
+ start_date = datetime(2025, 11, 1).date()
275
+ end_date = now_cph().date()
276
+
277
+ print(f"🔄 Henter fra {start_date} til {end_date}")
278
+
279
+ all_data = []
280
+ current_month_start = start_date
281
+
282
+ while current_month_start <= end_date:
283
+ if current_month_start.month == 12:
284
+ next_month = datetime(current_month_start.year + 1, 1, 1).date()
285
+ else:
286
+ next_month = datetime(current_month_start.year, current_month_start.month + 1, 1).date()
287
+
288
+ month_end = min(next_month - timedelta(days=1), end_date)
289
+
290
+ print(f"🔄 Henter {current_month_start.strftime('%Y-%m')}...")
291
+
292
+ forecasts = fetch_forecasts_for_period(current_month_start, month_end)
293
+
294
+ if forecasts is not None and len(forecasts) > 0:
295
+ min_target = forecasts['timestamp'].min().date()
296
+ max_target = forecasts['timestamp'].max().date()
297
+
298
+ actuals = fetch_actuals_for_period(
299
+ min_target - timedelta(days=2),
300
+ max_target + timedelta(days=2)
301
+ )
302
+
303
+ if actuals is not None:
304
+ merged = pd.merge(forecasts, actuals, on='timestamp', how='inner')
305
+
306
+ if len(merged) > 0:
307
+ merged['hour'] = merged['reference_time'].dt.hour
308
+ merged['day_of_year'] = merged['reference_time'].dt.dayofyear
309
+ merged['month'] = merged['reference_time'].dt.month
310
+ merged['hour_sin'] = np.sin(2 * np.pi * merged['hour'] / 24)
311
+ merged['hour_cos'] = np.cos(2 * np.pi * merged['hour'] / 24)
312
+ merged['month_sin'] = np.sin(2 * np.pi * merged['month'] / 12)
313
+ merged['month_cos'] = np.cos(2 * np.pi * merged['month'] / 12)
314
+ merged['dmi_error'] = merged['actual_temp'] - merged['dmi_temp_pred']
315
+
316
+ all_data.append(merged)
317
+ print(f"✅ {len(merged)} rækker")
318
+
319
+ current_month_start = next_month
320
+
321
+ if not all_data:
322
+ return "❌ Ingen data"
323
+
324
+ final_df = pd.concat(all_data, ignore_index=True)
325
+ # Fjern fremtidige tider: behold kun rækker hvor timestamp er mindre eller lig med nuværende time
326
+ current_hour = now_cph().replace(minute=0, second=0, microsecond=0)
327
+ final_df = final_df[final_df['timestamp'] <= current_hour]
328
+ # Drop duplicates baseret på timestamp (target tid)
329
+ final_df = final_df.drop_duplicates(subset=['timestamp'], keep='first')
330
+
331
+ try:
332
+ final_df.to_parquet("data.parquet")
333
+ api = HfApi()
334
+ api.upload_file(
335
+ path_or_fileobj="data.parquet",
336
+ path_in_repo="data.parquet",
337
+ repo_id=DATASET_NAME,
338
+ repo_type="dataset",
339
+ token=HF_TOKEN
340
+ )
341
+ return f"✅ {len(final_df)} rækker med timestamp som nøgle"
342
+ except Exception as e:
343
+ return f"❌ Fejl: {str(e)}"
344
+
345
+ def update_daily():
346
+ end_date = now_cph().date()
347
+ start_date = end_date - timedelta(days=7)
348
+
349
+ print(f"⏰ København tid: {now_cph()}")
350
+
351
+ forecasts = fetch_forecasts_for_period(start_date, end_date)
352
+ if forecasts is None:
353
+ return "❌ Ingen forecasts"
354
+
355
+ min_target = forecasts['timestamp'].min().date()
356
+ max_target = forecasts['timestamp'].max().date()
357
+ actuals = fetch_actuals_for_period(min_target - timedelta(days=2), max_target)
358
+
359
+ if actuals is None:
360
+ return "❌ Ingen actuals"
361
+
362
+ merged = pd.merge(forecasts, actuals, on='timestamp', how='inner')
363
+ if len(merged) == 0:
364
+ return "❌ Ingen match"
365
+
366
+ # Fjern fremtidige tider: behold kun rækker hvor timestamp er mindre eller lig med nuværende time
367
+ current_hour = now_cph().replace(minute=0, second=0, microsecond=0)
368
+ merged = merged[merged['timestamp'] <= current_hour]
369
+
370
+ merged['hour'] = merged['reference_time'].dt.hour
371
+ merged['day_of_year'] = merged['reference_time'].dt.dayofyear
372
+ merged['month'] = merged['reference_time'].dt.month
373
+ merged['hour_sin'] = np.sin(2 * np.pi * merged['hour'] / 24)
374
+ merged['hour_cos'] = np.cos(2 * np.pi * merged['hour'] / 24)
375
+ merged['month_sin'] = np.sin(2 * np.pi * merged['month'] / 12)
376
+ merged['month_cos'] = np.cos(2 * np.pi * merged['month'] / 12)
377
+ merged['dmi_error'] = merged['actual_temp'] - merged['dmi_temp_pred']
378
+
379
+ try:
380
+ dataset = load_dataset(DATASET_NAME, split="train")
381
+ existing = dataset.to_pandas()
382
+
383
+ if 'timestamp' not in existing.columns:
384
+ return "❌ Eksisterende data mangler timestamp kolonne"
385
+
386
+ if existing['timestamp'].dt.tz is None:
387
+ existing['timestamp'] = existing['timestamp'].dt.tz_localize('Europe/Copenhagen', ambiguous='infer')
388
+ else:
389
+ existing['timestamp'] = existing['timestamp'].dt.tz_convert('Europe/Copenhagen')
390
+
391
+ # Fjern dubletter baseret på timestamp (target tid)
392
+ existing_ts = set(existing['timestamp'])
393
+ mask = ~merged['timestamp'].isin(existing_ts)
394
+ new_data = merged[mask]
395
+
396
+ if len(new_data) == 0:
397
+ return "ℹ️ Ingen nye data"
398
+
399
+ combined = pd.concat([existing, new_data], ignore_index=True)
400
+ # Sikr ingen duplicates i combined
401
+ combined = combined.drop_duplicates(subset=['timestamp'], keep='first')
402
+
403
+ status_msg = f"✅ {len(new_data)} nye rækker tilføjet"
404
+ except Exception as e:
405
+ print(f"Info: {e}")
406
+ combined = merged
407
+ status_msg = f"✅ {len(merged)} rækker gemt (nyt datasæt)"
408
+
409
+ combined.to_parquet("data.parquet")
410
+ api = HfApi()
411
+ api.upload_file(path_or_fileobj="data.parquet", path_in_repo="data.parquet",
412
+ repo_id=DATASET_NAME, repo_type="dataset", token=HF_TOKEN)
413
+
414
+ return status_msg
415
+
416
+ def update_predictions():
417
+ current_time = now_cph()
418
+ print(f"🔮 Genererer live predictions: {current_time}")
419
+
420
+ future_forecasts = fetch_future_forecasts()
421
+ if future_forecasts is None or len(future_forecasts) == 0:
422
+ return "❌ Kunne ikke hente fremtidige forecasts"
423
+
424
+ predictions = generate_ml_predictions(future_forecasts)
425
+ if predictions is None:
426
+ return "❌ Kunne ikke loade model"
427
+
428
+ predictions['prediction_made_at'] = current_time
429
+ predictions['city'] = 'aarhus'
430
+ predictions['verified'] = False
431
+ predictions['actual_temp'] = None
432
+
433
+ try:
434
+ dataset = load_dataset(PREDICTIONS_DATASET, split="train")
435
+ existing = dataset.to_pandas()
436
+
437
+ if 'timestamp' in existing.columns:
438
+ if existing['timestamp'].dt.tz is None:
439
+ existing['timestamp'] = existing['timestamp'].dt.tz_localize('Europe/Copenhagen', ambiguous='infer')
440
+
441
+ # Fjern duplicates baseret på timestamp (target tidspunkt)
442
+ # Hver target tid skal kun have én prediction
443
+ new_timestamps = set(predictions['timestamp'])
444
+ existing = existing[~existing['timestamp'].isin(new_timestamps)]
445
+
446
+ combined = pd.concat([existing, predictions], ignore_index=True)
447
+ # Drop duplicates igen for sikkerheds skyld
448
+ combined = combined.drop_duplicates(subset=['timestamp'], keep='first')
449
+ else:
450
+ combined = predictions
451
+ except:
452
+ combined = predictions
453
+
454
+ try:
455
+ combined.to_parquet("predictions.parquet")
456
+ api = HfApi()
457
+ api.upload_file(
458
+ path_or_fileobj="predictions.parquet",
459
+ path_in_repo="predictions.parquet",
460
+ repo_id=PREDICTIONS_DATASET,
461
+ repo_type="dataset",
462
+ token=HF_TOKEN
463
+ )
464
+ return f"✅ {len(predictions)} nye predictions gemt ({predictions['timestamp'].min()} til {predictions['timestamp'].max()})"
465
+ except Exception as e:
466
+ return f"❌ Fejl: {str(e)}"
467
+
468
+ def verify_past_predictions():
469
+ try:
470
+ dataset = load_dataset(PREDICTIONS_DATASET, split="train")
471
+ pred_df = dataset.to_pandas()
472
+
473
+ if 'timestamp' not in pred_df.columns:
474
+ return "❌ Ingen timestamp kolonne"
475
+
476
+ if pred_df['timestamp'].dt.tz is None:
477
+ pred_df['timestamp'] = pred_df['timestamp'].dt.tz_localize('Europe/Copenhagen', ambiguous='infer')
478
+
479
+ now = now_cph()
480
+ to_verify = pred_df[
481
+ (~pred_df['verified']) &
482
+ (pred_df['timestamp'] < now - timedelta(hours=1))
483
+ ]
484
+
485
+ if len(to_verify) == 0:
486
+ return "Ingen at verificere"
487
+
488
+ start_date = to_verify['timestamp'].min().date()
489
+ end_date = to_verify['timestamp'].max().date()
490
+ actuals = fetch_actuals_for_period(start_date, end_date)
491
+
492
+ if actuals is None:
493
+ return "Kunne ikke hente actuals"
494
+
495
+ for idx, row in to_verify.iterrows():
496
+ match = actuals[actuals['timestamp'] == row['timestamp']]
497
+ if len(match) > 0:
498
+ pred_df.loc[idx, 'actual_temp'] = match.iloc[0]['actual_temp']
499
+ pred_df.loc[idx, 'verified'] = True
500
+
501
+ pred_df.to_parquet("predictions.parquet")
502
+ api = HfApi()
503
+ api.upload_file(
504
+ path_or_fileobj="predictions.parquet",
505
+ path_in_repo="predictions.parquet",
506
+ repo_id=PREDICTIONS_DATASET,
507
+ repo_type="dataset",
508
+ token=HF_TOKEN
509
+ )
510
+
511
+ return f"{len(to_verify)} verificeret"
512
+
513
+ except Exception as e:
514
+ return f"Verificeringsfejl: {e}"
515
+
516
+ def run_scheduler():
517
+ schedule.every().day.at("06:00").do(update_daily)
518
+ while True:
519
+ schedule.run_pending()
520
+ time.sleep(60)
521
+
522
+ scheduler_thread = threading.Thread(target=run_scheduler)
523
+ scheduler_thread.daemon = True
524
+ scheduler_thread.start()
525
+
526
+ with gr.Blocks(title="DMI Collector + Live Predictions") as demo:
527
+ gr.Markdown("""
528
+ # 🌤️ DMI Data Collector + Live Predictions
529
+ """)
530
+
531
+ status = gr.Textbox(label="Status", lines=10)
532
+
533
+ with gr.Row():
534
+ btn_backfill = gr.Button("🚀 Hent historisk data", variant="primary")
535
+ btn_daily = gr.Button("🔄 Opdater træningsdata", variant="secondary")
536
+ btn_predict = gr.Button("🔮 Generér Live Predictions NU", variant="primary")
537
+
538
+ btn_backfill.click(backfill_historical_data, outputs=status)
539
+ btn_daily.click(update_daily, outputs=status)
540
+ btn_predict.click(update_predictions, outputs=status)
541
+
542
+ demo.launch()
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ huggingface-hub>=0.25.0
2
+ datasets>=3.0.0
3
+ pandas>=2.2.3
4
+ requests>=2.31.0
5
+ schedule>=1.2.0
6
+ pyarrow>=15.0.0
7
+ numpy>=2.0.0
8
+ gradio>=4.0.0
9
+ joblib>=1.3.0
10
+ xgboost>=2.0.0
11
+ plotly>=5.18.0
12
+ scikit-learn>=1.3.0
13
+ tzdata>=2024.1