ShreeshantXD commited on
Commit
6d74982
·
1 Parent(s): e3130b4

feat: add baseline scores JSON, inference script, and update Dockerfile for improved project structure

Browse files
Dockerfile CHANGED
@@ -27,6 +27,7 @@ COPY --from=builder /app/gridmind-server /usr/local/bin/gridmind-server
27
 
28
  # Copy Python layer and Dashboard
29
  COPY python/ ./python/
 
30
  COPY dashboard/ ./dashboard/
31
  COPY data/ ./data/
32
  COPY openenv.yaml ./
 
27
 
28
  # Copy Python layer and Dashboard
29
  COPY python/ ./python/
30
+ COPY inference.py ./inference.py
31
  COPY dashboard/ ./dashboard/
32
  COPY data/ ./data/
33
  COPY openenv.yaml ./
README.md CHANGED
@@ -10,7 +10,7 @@
10
 
11
  1. **Get a free AI API key** from [Hugging Face](https://huggingface.co/join) (takes 2 minutes)
12
  2. **Run the simulator**: `docker run -p 7860:7860 -p 7861:7861 ghcr.io/your-repo/gridmind-rl:latest`
13
- 3. **Watch the AI learn**: `python python/inference.py --episodes 1`
14
 
15
  That's it! The AI will start making energy decisions and you'll see live results.
16
 
@@ -122,7 +122,7 @@ export HF_TOKEN=hf_your_token_here
122
 
123
  ```bash
124
  # Run 3 learning episodes (takes ~5 minutes)
125
- python python/inference.py --episodes 3
126
  ```
127
 
128
  You'll see output like:
@@ -202,7 +202,7 @@ $env:MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3" # Faster but less accura
202
 
203
  **Run longer training**:
204
  ```bash
205
- python python/inference.py --episodes 10 # Takes ~30 minutes
206
  ```
207
 
208
  **Test the environment manually**:
 
10
 
11
  1. **Get a free AI API key** from [Hugging Face](https://huggingface.co/join) (takes 2 minutes)
12
  2. **Run the simulator**: `docker run -p 7860:7860 -p 7861:7861 ghcr.io/your-repo/gridmind-rl:latest`
13
+ 3. **Watch the AI learn**: `python inference.py --episodes 1` (or `--fast-mode` for a quick heuristic run, no API calls)
14
 
15
  That's it! The AI will start making energy decisions and you'll see live results.
16
 
 
122
 
123
  ```bash
124
  # Run 3 learning episodes (takes ~5 minutes)
125
+ python inference.py --episodes 3
126
  ```
127
 
128
  You'll see output like:
 
202
 
203
  **Run longer training**:
204
  ```bash
205
+ python inference.py --episodes 10 --llm-every 4 # Scale LLM calls via --llm-every; use --fast-mode for tests
206
  ```
207
 
208
  **Test the environment manually**:
baseline_scores.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "meta-llama/Llama-3.1-8B-Instruct",
3
+ "api_base": "https://router.huggingface.co/v1",
4
+ "episodes_per_task": 1,
5
+ "seed_base": 1000,
6
+ "fast_mode": true,
7
+ "llm_every": 4,
8
+ "max_steps": null,
9
+ "task_averages": {
10
+ "1": 0.2776,
11
+ "2": 0.2182,
12
+ "3": 0.3115
13
+ },
14
+ "overall_average": 0.2691,
15
+ "all_results": [
16
+ {
17
+ "task_id": 1,
18
+ "seed": 1100,
19
+ "total_reward": -54.91106240679752,
20
+ "total_steps": 96,
21
+ "elapsed_sec": 0.8684265613555908,
22
+ "score": 0.2776,
23
+ "sub_scores": {
24
+ "cost": 0.277555958007489
25
+ },
26
+ "exploit_detected": false
27
+ },
28
+ {
29
+ "task_id": 2,
30
+ "seed": 1200,
31
+ "total_reward": -573.2793620498348,
32
+ "total_steps": 96,
33
+ "elapsed_sec": 0.9907081127166748,
34
+ "score": 0.2182,
35
+ "sub_scores": {
36
+ "cost": 0.2595566056450961,
37
+ "temperature": 0.15625
38
+ },
39
+ "exploit_detected": false
40
+ },
41
+ {
42
+ "task_id": 3,
43
+ "seed": 1300,
44
+ "total_reward": -670.8653705366278,
45
+ "total_steps": 96,
46
+ "elapsed_sec": 0.8988945484161377,
47
+ "score": 0.3115,
48
+ "sub_scores": {
49
+ "batch_deadline": 1,
50
+ "carbon": 0.24377839161166936,
51
+ "cost": 0.25263438913936676,
52
+ "grid_response": 0.21428571428571427,
53
+ "temperature": 0.14583333333333334
54
+ },
55
+ "exploit_detected": false
56
+ }
57
+ ]
58
+ }
dashboard/__pycache__/server.cpython-311.pyc ADDED
Binary file (5.56 kB). View file
 
dashboard/static/dashboard.js CHANGED
@@ -7,7 +7,9 @@
7
 
8
  // ── Config ──────────────────────────────────────────────────────────────────
9
  const POLL_MS = 500;
10
- const HISTORY_LEN = 288; // 288 steps = full episode
 
 
11
  const API_BASE = '/api';
12
  const TASK_NAMES = {
13
  1: 'Task 1 — Cost Minimization (Easy)',
@@ -95,8 +97,8 @@ function makeBarChart(id, labels, datasets) {
95
  }
96
 
97
  // ── Initialise all charts ─────────────────────────────────────────────────────
98
- const emptyLabels = Array.from({ length: 72 }, (_, i) => `${i}h`);
99
- const emptyData = Array(72).fill(null);
100
 
101
  // 1. Price curve
102
  const priceChart = makeLineChart('chart-price',
@@ -306,7 +308,7 @@ function renderGantt(jobs, currentStep) {
306
  wrap.innerHTML = '<div style="color:var(--text-dim);font-size:0.8rem">No batch jobs in this episode.</div>';
307
  return;
308
  }
309
- const totalSlots = 288;
310
  wrap.innerHTML = '';
311
  jobs.forEach(job => {
312
  const row = document.createElement('div');
@@ -416,7 +418,7 @@ async function fetchAndUpdate() {
416
  const hourOfDay = b.hour_of_day || 0;
417
 
418
  // ── Header ──
419
- document.getElementById('ep-step').textContent = `ep:${state.episode} step:${step}/287`;
420
  document.getElementById('task-badge').textContent = TASK_NAMES[state.task_id] || 'Task 1';
421
 
422
  // ── KPIs ──
@@ -444,20 +446,19 @@ async function fetchAndUpdate() {
444
  document.getElementById('kpi-storage').textContent = `${(b.thermal_storage_level * 100).toFixed(1)}`;
445
 
446
  // ── Price curve chart ──
447
- if (state.price_curve_episode && state.price_curve_episode.length === 72) {
448
- const labels = Array.from({ length: 72 }, (_, i) => `${i}:00`);
449
  priceChart.data.labels = labels;
450
  priceChart.data.datasets[0].data = state.price_curve_episode;
451
- // Current position marker
452
- const marker = Array(72).fill(null);
453
  marker[Math.floor(step / 4)] = state.price_curve_episode[Math.floor(step / 4)];
454
  priceChart.data.datasets[1].data = marker;
455
  priceChart.update('none');
456
  }
457
 
458
  // ── Carbon curve ──
459
- if (state.carbon_curve_episode && state.carbon_curve_episode.length === 72) {
460
- carbonChart.data.labels = Array.from({ length: 72 }, (_, i) => `${i}:00`);
461
  carbonChart.data.datasets[0].data = state.carbon_curve_episode;
462
  carbonChart.update('none');
463
  }
 
7
 
8
  // ── Config ──────────────────────────────────────────────────────────────────
9
  const POLL_MS = 500;
10
+ const EPISODE_STEPS = 96; // 24h × 4 steps/h (15-min)
11
+ const HISTORY_LEN = EPISODE_STEPS;
12
+ const CURVE_POINTS = 24; // hourly downsample (EpisodeSteps/4)
13
  const API_BASE = '/api';
14
  const TASK_NAMES = {
15
  1: 'Task 1 — Cost Minimization (Easy)',
 
97
  }
98
 
99
  // ── Initialise all charts ─────────────────────────────────────────────────────
100
+ const emptyLabels = Array.from({ length: CURVE_POINTS }, (_, i) => `${i}h`);
101
+ const emptyData = Array(CURVE_POINTS).fill(null);
102
 
103
  // 1. Price curve
104
  const priceChart = makeLineChart('chart-price',
 
308
  wrap.innerHTML = '<div style="color:var(--text-dim);font-size:0.8rem">No batch jobs in this episode.</div>';
309
  return;
310
  }
311
+ const totalSlots = EPISODE_STEPS;
312
  wrap.innerHTML = '';
313
  jobs.forEach(job => {
314
  const row = document.createElement('div');
 
418
  const hourOfDay = b.hour_of_day || 0;
419
 
420
  // ── Header ──
421
+ document.getElementById('ep-step').textContent = `ep:${state.episode} step:${step}/${EPISODE_STEPS - 1}`;
422
  document.getElementById('task-badge').textContent = TASK_NAMES[state.task_id] || 'Task 1';
423
 
424
  // ── KPIs ──
 
446
  document.getElementById('kpi-storage').textContent = `${(b.thermal_storage_level * 100).toFixed(1)}`;
447
 
448
  // ── Price curve chart ──
449
+ if (state.price_curve_episode && state.price_curve_episode.length === CURVE_POINTS) {
450
+ const labels = Array.from({ length: CURVE_POINTS }, (_, i) => `${i}:00`);
451
  priceChart.data.labels = labels;
452
  priceChart.data.datasets[0].data = state.price_curve_episode;
453
+ const marker = Array(CURVE_POINTS).fill(null);
 
454
  marker[Math.floor(step / 4)] = state.price_curve_episode[Math.floor(step / 4)];
455
  priceChart.data.datasets[1].data = marker;
456
  priceChart.update('none');
457
  }
458
 
459
  // ── Carbon curve ──
460
+ if (state.carbon_curve_episode && state.carbon_curve_episode.length === CURVE_POINTS) {
461
+ carbonChart.data.labels = Array.from({ length: CURVE_POINTS }, (_, i) => `${i}:00`);
462
  carbonChart.data.datasets[0].data = state.carbon_curve_episode;
463
  carbonChart.update('none');
464
  }
dashboard/static/index.html CHANGED
@@ -514,7 +514,7 @@
514
 
515
  <!-- Row 1: Price curve + Temperature + Controls -->
516
  <div class="card col-8">
517
- <div class="card-title"><span class="icon">💰</span> Electricity Price Curve (72h)</div>
518
  <div class="chart-wrap">
519
  <canvas id="chart-price"></canvas>
520
  </div>
@@ -587,7 +587,7 @@
587
  </div>
588
 
589
  <div class="card col-6">
590
- <div class="card-title"><span class="icon">🌍</span> Carbon Intensity Curve (72h)</div>
591
  <div class="chart-wrap">
592
  <canvas id="chart-carbon"></canvas>
593
  </div>
 
514
 
515
  <!-- Row 1: Price curve + Temperature + Controls -->
516
  <div class="card col-8">
517
+ <div class="card-title"><span class="icon">💰</span> Electricity Price Curve (24h)</div>
518
  <div class="chart-wrap">
519
  <canvas id="chart-price"></canvas>
520
  </div>
 
587
  </div>
588
 
589
  <div class="card col-6">
590
+ <div class="card-title"><span class="icon">🌍</span> Carbon Intensity Curve (24h)</div>
591
  <div class="chart-wrap">
592
  <canvas id="chart-carbon"></canvas>
593
  </div>
env/environment.go CHANGED
@@ -11,7 +11,7 @@ import (
11
  )
12
 
13
  const (
14
- EpisodeSteps = 288 // 72 hours × 15-min intervals
15
  StepDurationHrs = 0.25 // each step = 15 minutes = 0.25 h
16
  MaxBuildings = 3
17
  DefaultSetpoint = 21.0 // °C comfortable indoor temp
@@ -206,6 +206,7 @@ func (e *Environment) GetState() StateResponse {
206
  OutdoorTemperature: b.OutdoorTemperature,
207
  SetpointTemperature: b.SetpointTemperature,
208
  BaselineCost: b.BaselineCost,
 
209
  CumulativeCarbon: b.CumulativeCarbon,
210
  Jobs: b.Jobs,
211
  }
@@ -276,6 +277,7 @@ func (e *Environment) newBuildingState(id int) *BuildingState {
276
  OutdoorTemperature: outdoorTemp,
277
  PrevHVACLevel: 0.5,
278
  BaselineCost: 0.0,
 
279
  SetpointTemperature: DefaultSetpoint,
280
  MaxHVACPower: MaxHVACPowerKW,
281
  MaxStorageCapacity: MaxStorageKWh,
@@ -299,8 +301,12 @@ func (e *Environment) generateBatchJobs() []BatchJob {
299
 
300
  jobs := make([]BatchJob, numJobs)
301
  for i := range jobs {
302
- // Deadline spread across episode, ensuring feasibility
303
- deadline := 20 + e.rng.Intn(60)
 
 
 
 
304
  jobs[i] = BatchJob{
305
  ID: i + 1,
306
  DeadlineSlot: deadline,
@@ -438,9 +444,10 @@ func (e *Environment) stepBuilding(b *BuildingState, act ActionModel, idx int) S
438
  // Baseline (always-on at 70% HVAC, no storage/shedding)
439
  baselineKW := 0.7*b.MaxHVACPower + b.ProcessDemand
440
  baselineEnergy := baselineKW * StepDurationHrs
441
- b.BaselineCost += baselineEnergy * b.CurrentPrice
 
442
 
443
- // ----- Reward computation -----
444
  rc := ComputeReward(ComputeRewardInput{
445
  B: b,
446
  Act: act,
@@ -479,6 +486,9 @@ func (e *Environment) stepBuilding(b *BuildingState, act ActionModel, idx int) S
479
  }
480
  }
481
 
 
 
 
482
  // Record history
483
  if idx < len(e.TempHistory) {
484
  e.TempHistory[idx] = append(e.TempHistory[idx], b.IndoorTemperature)
 
11
  )
12
 
13
  const (
14
+ EpisodeSteps = 96 // 24 hours × 15-min intervals (96 × 0.25h = 24h)
15
  StepDurationHrs = 0.25 // each step = 15 minutes = 0.25 h
16
  MaxBuildings = 3
17
  DefaultSetpoint = 21.0 // °C comfortable indoor temp
 
206
  OutdoorTemperature: b.OutdoorTemperature,
207
  SetpointTemperature: b.SetpointTemperature,
208
  BaselineCost: b.BaselineCost,
209
+ BaselineCarbon: b.BaselineCarbon,
210
  CumulativeCarbon: b.CumulativeCarbon,
211
  Jobs: b.Jobs,
212
  }
 
277
  OutdoorTemperature: outdoorTemp,
278
  PrevHVACLevel: 0.5,
279
  BaselineCost: 0.0,
280
+ BaselineCarbon: 0.0,
281
  SetpointTemperature: DefaultSetpoint,
282
  MaxHVACPower: MaxHVACPowerKW,
283
  MaxStorageCapacity: MaxStorageKWh,
 
301
 
302
  jobs := make([]BatchJob, numJobs)
303
  for i := range jobs {
304
+ // Deadline spread across episode (leave slack at end for duration)
305
+ span := EpisodeSteps - 12
306
+ if span < 8 {
307
+ span = 8
308
+ }
309
+ deadline := 4 + e.rng.Intn(span)
310
  jobs[i] = BatchJob{
311
  ID: i + 1,
312
  DeadlineSlot: deadline,
 
444
  // Baseline (always-on at 70% HVAC, no storage/shedding)
445
  baselineKW := 0.7*b.MaxHVACPower + b.ProcessDemand
446
  baselineEnergy := baselineKW * StepDurationHrs
447
+ b.BaselineCost += baselineEnergy * b.CurrentPrice
448
+ b.BaselineCarbon += baselineEnergy * b.CarbonIntensity
449
 
450
+ // ----- Reward computation -----
451
  rc := ComputeReward(ComputeRewardInput{
452
  B: b,
453
  Act: act,
 
486
  }
487
  }
488
 
489
+ // Per-building step index matches global timestep for this transition (0 .. EpisodeSteps-1)
490
+ b.Step = s
491
+
492
  // Record history
493
  if idx < len(e.TempHistory) {
494
  e.TempHistory[idx] = append(e.TempHistory[idx], b.IndoorTemperature)
env/models.go CHANGED
@@ -27,7 +27,7 @@ type BuildingState struct {
27
 
28
  // Temporal
29
  HourOfDay int `json:"hour_of_day"` // 0–23
30
- Step int `json:"step"` // 0–95 within episode
31
 
32
  // Batch job queue: pending deadlines (raw slots)
33
  BatchQueue []int `json:"batch_queue"` // deadline slots of pending jobs
@@ -41,6 +41,7 @@ type BuildingState struct {
41
  OutdoorTemperature float64 `json:"-"` // °C for weather perturbation
42
  PrevHVACLevel float64 `json:"-"` // for stability penalty
43
  BaselineCost float64 `json:"-"` // always-on policy running cost
 
44
  SetpointTemperature float64 `json:"-"` // target indoor temp (°C)
45
  MaxHVACPower float64 `json:"-"` // kW
46
  MaxStorageCapacity float64 `json:"-"` // kWh
@@ -139,6 +140,7 @@ type BuildingStatePublic struct {
139
  OutdoorTemperature float64 `json:"outdoor_temperature"`
140
  SetpointTemperature float64 `json:"setpoint_temperature"`
141
  BaselineCost float64 `json:"baseline_cost"`
 
142
  CumulativeCarbon float64 `json:"cumulative_carbon"`
143
  Jobs []BatchJob `json:"jobs"`
144
  // History for chart rendering
 
27
 
28
  // Temporal
29
  HourOfDay int `json:"hour_of_day"` // 0–23
30
+ Step int `json:"step"` // 0–95 within a 96-step (24h) episode
31
 
32
  // Batch job queue: pending deadlines (raw slots)
33
  BatchQueue []int `json:"batch_queue"` // deadline slots of pending jobs
 
41
  OutdoorTemperature float64 `json:"-"` // °C for weather perturbation
42
  PrevHVACLevel float64 `json:"-"` // for stability penalty
43
  BaselineCost float64 `json:"-"` // always-on policy running cost
44
+ BaselineCarbon float64 `json:"-"` // baseline policy gCO2 (for grading)
45
  SetpointTemperature float64 `json:"-"` // target indoor temp (°C)
46
  MaxHVACPower float64 `json:"-"` // kW
47
  MaxStorageCapacity float64 `json:"-"` // kWh
 
140
  OutdoorTemperature float64 `json:"outdoor_temperature"`
141
  SetpointTemperature float64 `json:"setpoint_temperature"`
142
  BaselineCost float64 `json:"baseline_cost"`
143
+ BaselineCarbon float64 `json:"baseline_carbon"`
144
  CumulativeCarbon float64 `json:"cumulative_carbon"`
145
  Jobs []BatchJob `json:"jobs"`
146
  // History for chart rendering
env/rewards.go CHANGED
@@ -50,7 +50,8 @@ func ComputeReward(inp ComputeRewardInput) RewardComponents {
50
  }
51
 
52
  // ── 4. Deadline Penalty ──────────────────────────────────────────────────
53
- if inp.BatchMissed > 0 {
 
54
  rc.DeadlinePenalty = -float64(inp.BatchMissed) * 1.5
55
  }
56
 
@@ -115,29 +116,26 @@ func computeGridResponse(stress, shedFraction float64) float64 {
115
  return -shedFraction * (0.7 - stress) * 0.3
116
  }
117
 
118
- // computeArbitrageBonus rewards charging storage during cheap periods and
119
- // discharging during expensive periods.
120
  func computeArbitrageBonus(chargeRate, currentPrice float64, curve []float64, step int) float64 {
121
- // Compute rolling average of future prices (next 8 steps = 2 hours)
122
- lookAhead := 8
123
- futureSum := 0.0
124
  count := 0
125
- for i := step + 1; i <= step+lookAhead && i < len(curve); i++ {
126
- futureSum += curve[i]
127
  count++
128
  }
129
  if count == 0 {
130
  return 0.0
131
  }
132
- futureAvg := futureSum / float64(count)
133
 
134
- // If current price is lower than future avg → charging is smart → reward
135
- if chargeRate > 0 && currentPrice < futureAvg {
136
- return chargeRate * (futureAvg - currentPrice) * 2.0
137
  }
138
- // If current price is higher than future avg → discharging is smart → reward
139
- if chargeRate < 0 && currentPrice > futureAvg {
140
- return math.Abs(chargeRate) * (currentPrice - futureAvg) * 2.0
141
  }
142
  return 0.0
143
  }
 
50
  }
51
 
52
  // ── 4. Deadline Penalty ──────────────────────────────────────────────────
53
+ // Task 1 is cost-only; batch jobs are not part of the objective.
54
+ if inp.BatchMissed > 0 && inp.TaskID >= 2 {
55
  rc.DeadlinePenalty = -float64(inp.BatchMissed) * 1.5
56
  }
57
 
 
116
  return -shedFraction * (0.7 - stress) * 0.3
117
  }
118
 
119
+ // computeArbitrageBonus rewards storage use when current price is low vs recent history
120
+ // (causal: uses only past prices, no future curve leakage).
121
  func computeArbitrageBonus(chargeRate, currentPrice float64, curve []float64, step int) float64 {
122
+ lookBack := 8
123
+ pastSum := 0.0
 
124
  count := 0
125
+ for i := step - lookBack; i < step && i >= 0; i++ {
126
+ pastSum += curve[i]
127
  count++
128
  }
129
  if count == 0 {
130
  return 0.0
131
  }
132
+ pastAvg := pastSum / float64(count)
133
 
134
+ if chargeRate > 0 && currentPrice < pastAvg {
135
+ return chargeRate * (pastAvg - currentPrice) * 2.0
 
136
  }
137
+ if chargeRate < 0 && currentPrice > pastAvg {
138
+ return math.Abs(chargeRate) * (currentPrice - pastAvg) * 2.0
 
139
  }
140
  return 0.0
141
  }
env/tasks.go CHANGED
@@ -34,7 +34,7 @@ func AllTasks() []TaskConfig {
34
  Name: "Full Demand-Response with Batch Scheduling",
35
  Description: "Minimize cost, maintain temperature, respond to grid stress events, schedule all batch jobs before their deadlines, and minimize carbon emissions.",
36
  Difficulty: "hard",
37
- Weights: map[string]float64{"cost": 0.35, "temperature": 0.25, "grid_response": 0.25, "batch_deadline": 0.15},
38
  },
39
  }
40
  }
@@ -187,17 +187,31 @@ func gradeTask3(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade {
187
  batchScore = float64(completedOnTime) / float64(totalJobs)
188
  }
189
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  grade.SubScores["cost"] = costScore
191
  grade.SubScores["temperature"] = tempScore
192
  grade.SubScores["grid_response"] = gridScore
193
  grade.SubScores["batch_deadline"] = batchScore
 
194
 
195
- // Weighted composite score
196
- grade.Score = costScore*0.35 + tempScore*0.25 + gridScore*0.25 + batchScore*0.15
197
 
198
  grade.Details["grid_stress_steps"] = gridStressSteps
199
  grade.Details["grid_response_steps"] = gridResponseSteps
200
  grade.Details["total_jobs"] = totalJobs
201
  grade.Details["completed_on_time"] = completedOnTime
 
 
202
  return grade
203
  }
 
34
  Name: "Full Demand-Response with Batch Scheduling",
35
  Description: "Minimize cost, maintain temperature, respond to grid stress events, schedule all batch jobs before their deadlines, and minimize carbon emissions.",
36
  Difficulty: "hard",
37
+ Weights: map[string]float64{"cost": 0.28, "temperature": 0.20, "grid_response": 0.20, "batch_deadline": 0.12, "carbon": 0.20},
38
  },
39
  }
40
  }
 
187
  batchScore = float64(completedOnTime) / float64(totalJobs)
188
  }
189
 
190
+ // Carbon sub-score vs baseline always-on policy (same spirit as cost)
191
+ agentCarbon := 0.0
192
+ baselineCarbon := 0.0
193
+ for _, b := range inp.Buildings {
194
+ agentCarbon += b.CumulativeCarbon
195
+ baselineCarbon += b.BaselineCarbon
196
+ }
197
+ carbonScore := 0.0
198
+ if baselineCarbon > 0 {
199
+ carbonScore = math.Max(0, 1.0-agentCarbon/baselineCarbon)
200
+ }
201
+
202
  grade.SubScores["cost"] = costScore
203
  grade.SubScores["temperature"] = tempScore
204
  grade.SubScores["grid_response"] = gridScore
205
  grade.SubScores["batch_deadline"] = batchScore
206
+ grade.SubScores["carbon"] = math.Min(1.0, carbonScore)
207
 
208
+ grade.Score = costScore*0.28 + tempScore*0.20 + gridScore*0.20 + batchScore*0.12 + carbonScore*0.20
 
209
 
210
  grade.Details["grid_stress_steps"] = gridStressSteps
211
  grade.Details["grid_response_steps"] = gridResponseSteps
212
  grade.Details["total_jobs"] = totalJobs
213
  grade.Details["completed_on_time"] = completedOnTime
214
+ grade.Details["agent_carbon"] = agentCarbon
215
+ grade.Details["baseline_carbon"] = baselineCarbon
216
  return grade
217
  }
inference.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Hackathon entrypoint: run from repo root with:
3
+ python inference.py
4
+ Delegates to python/inference.py (single source of truth).
5
+ """
6
+ import runpy
7
+ from pathlib import Path
8
+
9
+ if __name__ == "__main__":
10
+ impl = Path(__file__).resolve().parent / "python" / "inference.py"
11
+ runpy.run_path(str(impl), run_name="__main__")
main.go CHANGED
@@ -135,6 +135,7 @@ func newServer() *Server {
135
  func (s *Server) routes() *http.ServeMux {
136
  mux := http.NewServeMux()
137
  mux.HandleFunc("/health", s.handleHealth)
 
138
  mux.HandleFunc("/reset", s.handleReset)
139
  mux.HandleFunc("/step", s.handleStep)
140
  mux.HandleFunc("/state", s.handleState)
@@ -153,6 +154,12 @@ func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) {
153
  json.NewEncoder(w).Encode(map[string]string{"status": "ok", "version": "1.0.0"})
154
  }
155
 
 
 
 
 
 
 
156
  // ── /reset ───────────────────────────────────────────────────────────────────
157
 
158
  func (s *Server) handleReset(w http.ResponseWriter, r *http.Request) {
@@ -280,9 +287,14 @@ func (s *Server) handleGrade(w http.ResponseWriter, r *http.Request) {
280
  // Build building states from public state
281
  buildings := make([]*env.BuildingState, len(state.Buildings))
282
  for i, pub := range state.Buildings {
 
 
283
  buildings[i] = &env.BuildingState{
284
- CumulativeCost: pub.CumulativeCost,
285
- BaselineCost: pub.BaselineCost,
 
 
 
286
  }
287
  }
288
 
@@ -342,7 +354,7 @@ func main() {
342
  srv.envMgr.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1})
343
 
344
  log.Printf("GridMind-RL environment server starting on :%s", port)
345
- log.Printf("Endpoints: GET /health /state /replay /grade /tasks /metrics | POST /reset /step")
346
 
347
  mux := withCORS(withLogging(srv.routes()))
348
  if err := http.ListenAndServe(":"+port, mux); err != nil {
 
135
  func (s *Server) routes() *http.ServeMux {
136
  mux := http.NewServeMux()
137
  mux.HandleFunc("/health", s.handleHealth)
138
+ mux.HandleFunc("/ping", s.handlePing)
139
  mux.HandleFunc("/reset", s.handleReset)
140
  mux.HandleFunc("/step", s.handleStep)
141
  mux.HandleFunc("/state", s.handleState)
 
154
  json.NewEncoder(w).Encode(map[string]string{"status": "ok", "version": "1.0.0"})
155
  }
156
 
157
+ func (s *Server) handlePing(w http.ResponseWriter, r *http.Request) {
158
+ w.Header().Set("Content-Type", "application/json")
159
+ w.WriteHeader(http.StatusOK)
160
+ json.NewEncoder(w).Encode(map[string]string{"status": "ok"})
161
+ }
162
+
163
  // ── /reset ───────────────────────────────────────────────────────────────────
164
 
165
  func (s *Server) handleReset(w http.ResponseWriter, r *http.Request) {
 
287
  // Build building states from public state
288
  buildings := make([]*env.BuildingState, len(state.Buildings))
289
  for i, pub := range state.Buildings {
290
+ jobsCopy := make([]env.BatchJob, len(pub.Jobs))
291
+ copy(jobsCopy, pub.Jobs)
292
  buildings[i] = &env.BuildingState{
293
+ CumulativeCost: pub.CumulativeCost,
294
+ BaselineCost: pub.BaselineCost,
295
+ CumulativeCarbon: pub.CumulativeCarbon,
296
+ BaselineCarbon: pub.BaselineCarbon,
297
+ Jobs: jobsCopy,
298
  }
299
  }
300
 
 
354
  srv.envMgr.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1})
355
 
356
  log.Printf("GridMind-RL environment server starting on :%s", port)
357
+ log.Printf("Endpoints: GET /health /ping /state /replay /grade /tasks /metrics | POST /reset /step")
358
 
359
  mux := withCORS(withLogging(srv.routes()))
360
  if err := http.ListenAndServe(":"+port, mux); err != nil {
openenv.yaml CHANGED
@@ -62,7 +62,7 @@ schemas:
62
  type: integer
63
  minimum: 0
64
  maximum: 95
65
- description: Current episode step (15-min intervals)
66
  building_id:
67
  type: integer
68
  description: Building identifier for multi-building federation
@@ -103,7 +103,7 @@ schemas:
103
 
104
  reward:
105
  type: number
106
- description: Dense multi-component reward combining cost savings, temperature constraints, grid response, and deadlines.
107
 
108
  tasks:
109
  - id: 1
@@ -123,6 +123,9 @@ endpoints:
123
  health:
124
  path: /health
125
  method: GET
 
 
 
126
  reset:
127
  path: /reset
128
  method: POST
 
62
  type: integer
63
  minimum: 0
64
  maximum: 95
65
+ description: Current episode step index (0–95); 96 steps = 24 hours at 15-min resolution
66
  building_id:
67
  type: integer
68
  description: Building identifier for multi-building federation
 
103
 
104
  reward:
105
  type: number
106
+ description: Dense multi-component reward (cost, optional temperature/grid/carbon/deadlines) task-gated to match objectives.
107
 
108
  tasks:
109
  - id: 1
 
123
  health:
124
  path: /health
125
  method: GET
126
+ ping:
127
+ path: /ping
128
+ method: GET
129
  reset:
130
  path: /reset
131
  method: POST
python/inference.py CHANGED
@@ -8,14 +8,15 @@ Usage:
8
  export API_BASE_URL=https://router.huggingface.co/v1
9
  export MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
10
  export HF_TOKEN=hf_xxxx
11
- python python/inference.py [--episodes 3] [--env-url http://localhost:7860]
 
12
  """
13
 
 
 
14
  import argparse
15
  import json
16
  import os
17
- import random
18
- import re
19
  import sys
20
  import time
21
  from typing import Any
@@ -29,9 +30,12 @@ ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
29
  API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
30
  MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
31
  HF_TOKEN = os.getenv("HF_TOKEN", "")
32
- DEFAULT_EPISODES = 3
33
- DEFAULT_SEED_BASE = 1000 # episodes use seed BASE+episode_idx for reproducibility
34
  MAX_RETRIES = 3
 
 
 
35
 
36
  SYSPROMPT = """You are GridMind, an expert industrial energy management controller.
37
  You control a building's HVAC, thermal storage, batch job scheduling, and load shedding.
@@ -39,9 +43,9 @@ Your goal is to minimize electricity costs while maintaining comfort and meeting
39
  Always respond with a single valid JSON object matching the action schema. No explanation needed."""
40
 
41
  TASK_DESCRIPTIONS = {
42
- 1: "Task 1 (Easy - Cost Minimization): Minimize total energy cost over 24 hours. No temperature constraints. Use cheap off-peak periods and thermal storage arbitrage.",
43
  2: "Task 2 (Medium - Temperature Management): Minimize cost AND keep indoor temperature within 19-23°C at all times. Balance comfort vs cost.",
44
- 3: "Task 3 (Hard - Full Demand Response): Minimize cost, maintain temperature, respond to grid stress events by shedding load when grid_stress_signal > 0.7, AND schedule all batch jobs before their deadlines.",
45
  }
46
 
47
  ACTION_SCHEMA_STR = """{
@@ -53,8 +57,29 @@ ACTION_SCHEMA_STR = """{
53
  }"""
54
 
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  # ── Environment client ───────────────────────────────────────────────────────
57
 
 
58
  class GridMindEnvClient:
59
  """Simple HTTP client for the GridMind-RL Go environment server."""
60
 
@@ -93,6 +118,7 @@ class GridMindEnvClient:
93
 
94
  # ── LLM agent ───────────────────────────────────────────────────────────────
95
 
 
96
  class LLMAgent:
97
  """OpenAI-compatible LLM agent that chooses actions given observations."""
98
 
@@ -119,7 +145,7 @@ Current observation:
119
  - Hour of day: {obs.get('hour_of_day', 12)} (0=midnight, peak prices 8-12 and 17-21)
120
  - Pending batch job deadlines: {obs.get('batch_queue', [])}
121
  - Cumulative cost so far: ${obs.get('cumulative_cost', 0):.4f}
122
- - Episode step: {obs.get('step', 0)}/95
123
 
124
  Strategy hints:
125
  - Charge thermal storage when price < $0.08/kWh, discharge when price > $0.15/kWh
@@ -139,37 +165,20 @@ Respond with ONLY a JSON action:
139
  {"role": "user", "content": prompt},
140
  ],
141
  max_tokens=128,
142
- temperature=0.1,
143
  )
144
  content = completion.choices[0].message.content.strip()
145
- return self._parse_action(content)
 
 
 
 
146
  except Exception as e:
147
  print(f" [LLM attempt {attempt+1}/{MAX_RETRIES}] error: {e}")
148
  time.sleep(1)
149
 
150
- # Fallback: rule-based heuristic
151
  return self._heuristic_action(obs)
152
 
153
- def _parse_action(self, content: str) -> dict:
154
- """Extract and validate JSON action from LLM response."""
155
- # Try direct JSON parse
156
- try:
157
- action = json.loads(content)
158
- return self._clamp_action(action)
159
- except json.JSONDecodeError:
160
- pass
161
- # Try to extract JSON block from text
162
- match = re.search(r"\{[^}]+\}", content, re.DOTALL)
163
- if match:
164
- try:
165
- action = json.loads(match.group())
166
- return self._clamp_action(action)
167
- except json.JSONDecodeError:
168
- pass
169
- # Fallback
170
- print(f" [WARN] could not parse LLM response: {content[:100]}")
171
- return self._default_action()
172
-
173
  def _clamp_action(self, action: dict) -> dict:
174
  return {
175
  "hvac_power_level": max(0.0, min(1.0, float(action.get("hvac_power_level", 0.5)))),
@@ -180,38 +189,33 @@ Respond with ONLY a JSON action:
180
  }
181
 
182
  def _heuristic_action(self, obs: dict) -> dict:
183
- """Simple rule-based heuristic when LLM is unavailable."""
184
  price = obs.get("current_price", 0.10)
185
  stress = obs.get("grid_stress_signal", 0.0)
186
  temp = obs.get("indoor_temperature", 21.0)
187
  storage = obs.get("thermal_storage_level", 0.5)
188
  queue = obs.get("batch_queue", [])
189
 
190
- # HVAC: reduce during peak
191
  hvac = 0.7 if price < 0.08 else (0.3 if price > 0.15 else 0.5)
192
- # Adjust for temperature
193
  if temp > 23.0:
194
  hvac = max(hvac, 0.8)
195
  elif temp < 19.0:
196
  hvac = min(hvac, 0.2)
197
 
198
- # Storage arbitrage
199
  charge = 0.0
200
  if price < 0.07 and storage < 0.8:
201
  charge = 0.5
202
  elif price > 0.15 and storage > 0.3:
203
  charge = -0.5
204
 
205
- # Load shedding
206
  shed = 0.0
207
  if stress > 0.7:
208
  shed = 0.4
209
  elif stress > 0.5:
210
  shed = 0.2
211
 
212
- # Batch jobs: schedule soon if deadline approaching
213
  slot = 2
214
- if queue and min(queue) < 10:
215
  slot = 0
216
 
217
  return {
@@ -223,48 +227,84 @@ Respond with ONLY a JSON action:
223
  }
224
 
225
  def _default_action(self) -> dict:
226
- return {"hvac_power_level": 0.5, "thermal_charge_rate": 0.0,
227
- "batch_job_slot": 0, "load_shed_fraction": 0.0, "building_id": 0}
 
 
 
 
 
228
 
229
 
230
  # ── Episode runner ───────────────────────────────────────────────────────────
231
 
232
- def run_episode(env_client: GridMindEnvClient, agent: LLMAgent,
233
- task_id: int, seed: int, verbose: bool = False) -> dict[str, Any]:
234
- """Run a single episode and return grade + metadata."""
 
 
 
 
 
 
 
 
 
 
235
  reset_resp = env_client.reset(task_id=task_id, seed=seed)
236
  obs = reset_resp["observations"][0]
237
 
 
 
238
  total_reward = 0.0
239
  total_steps = 0
240
  start_time = time.time()
 
 
 
 
 
241
 
242
- step_resp = {}
243
- _step = 0
244
  while not step_resp.get("done", False):
245
- action = agent.choose_action(obs, task_id)
246
- step_resp = env_client.step(action)
247
 
 
 
 
 
 
 
 
 
 
248
  if step_resp is None or "observation" not in step_resp:
249
- print(f" [WARN] step {_step}: server returned invalid response, skipping step")
250
- _step += 1
251
  break
252
 
 
 
 
253
  obs = step_resp["observation"]
254
- total_reward += step_resp["reward"]
255
  total_steps += 1
256
-
257
- if verbose and _step % 16 == 0:
258
- print(f" step={_step:02d} price=${obs['current_price']:.3f} "
259
- f"temp={obs['indoor_temperature']:.1f}°C "
260
- f"stress={obs['grid_stress_signal']:.2f} "
261
- f"cost=${obs['cumulative_cost']:.2f} "
262
- f"reward={step_resp['reward']:.3f}")
263
- _step += 1
 
 
 
264
 
265
  elapsed = time.time() - start_time
266
  grade = env_client.grade()
267
 
 
 
268
  return {
269
  "task_id": task_id,
270
  "seed": seed,
@@ -279,12 +319,32 @@ def run_episode(env_client: GridMindEnvClient, agent: LLMAgent,
279
 
280
  # ── Main ─────────────────────────────────────────────────────────────────────
281
 
282
- def main():
 
283
  parser = argparse.ArgumentParser(description="GridMind-RL baseline inference")
284
  parser.add_argument("--episodes", type=int, default=DEFAULT_EPISODES)
285
  parser.add_argument("--env-url", type=str, default=ENV_URL)
286
  parser.add_argument("--verbose", action="store_true")
287
  parser.add_argument("--output", type=str, default="baseline_scores.json")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  args = parser.parse_args()
289
 
290
  print("=" * 60)
@@ -293,48 +353,59 @@ def main():
293
  print(f" API: {API_BASE_URL}")
294
  print(f" Env: {args.env_url}")
295
  print(f" Episodes per task: {args.episodes}")
 
296
  print("=" * 60)
297
 
298
  env_client = GridMindEnvClient(base_url=args.env_url)
299
 
300
- # Wait for env server to be healthy
301
  print("\nWaiting for environment server...")
302
  for attempt in range(30):
303
  if env_client.health():
304
- print(" Environment server is healthy")
305
  break
306
  time.sleep(2)
307
  if attempt == 29:
308
- print(" Environment server not reachable. Exiting.")
309
  sys.exit(1)
310
 
311
  agent = LLMAgent()
312
- all_results = []
313
 
314
  for task_id in [1, 2, 3]:
315
- print(f"\n── Task {task_id}: {TASK_DESCRIPTIONS[task_id][:60]}...")
316
- task_scores = []
317
  for ep in range(args.episodes):
318
  seed = DEFAULT_SEED_BASE + task_id * 100 + ep
319
  print(f" Episode {ep+1}/{args.episodes} (seed={seed})")
320
- result = run_episode(env_client, agent, task_id=task_id, seed=seed, verbose=args.verbose)
321
- task_scores.append(result["score"])
 
 
 
 
 
 
 
 
 
322
  all_results.append(result)
323
- print(f" → score={result['score']:.4f} | reward={result['total_reward']:.3f} | {result['elapsed_sec']:.1f}s")
 
 
 
324
 
325
  avg_score = sum(task_scores) / len(task_scores)
326
  print(f" Task {task_id} average score: {avg_score:.4f}")
327
 
328
- # Score summary table
329
  print("\n" + "=" * 60)
330
  print("BASELINE SCORES SUMMARY")
331
  print("=" * 60)
332
  print(f"{'Task':<10} {'Model':<30} {'Score':<10} {'Episodes':<10}")
333
  print("-" * 60)
334
 
335
- task_avgs = {}
336
  for task_id in [1, 2, 3]:
337
- scores = [r["score"] for r in all_results if r["task_id"] == task_id]
338
  avg = sum(scores) / len(scores) if scores else 0.0
339
  task_avgs[task_id] = avg
340
  print(f"Task {task_id:<6} {MODEL_NAME:<30} {avg:<10.4f} {len(scores)}")
@@ -343,19 +414,21 @@ def main():
343
  overall = sum(task_avgs.values()) / len(task_avgs)
344
  print(f"{'Overall':<10} {'':<30} {overall:<10.4f}")
345
 
346
- # Save results
347
  output = {
348
  "model": MODEL_NAME,
349
  "api_base": API_BASE_URL,
350
  "episodes_per_task": args.episodes,
351
  "seed_base": DEFAULT_SEED_BASE,
 
 
 
352
  "task_averages": {str(k): v for k, v in task_avgs.items()},
353
  "overall_average": overall,
354
  "all_results": all_results,
355
  }
356
- with open(args.output, "w") as f:
357
  json.dump(output, f, indent=2)
358
- print(f"\n Results saved to {args.output}")
359
 
360
 
361
  if __name__ == "__main__":
 
8
  export API_BASE_URL=https://router.huggingface.co/v1
9
  export MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
10
  export HF_TOKEN=hf_xxxx
11
+ python inference.py
12
+ # or: python python/inference.py [--episodes 1] [--llm-every 4] [--fast-mode]
13
  """
14
 
15
+ from __future__ import annotations
16
+
17
  import argparse
18
  import json
19
  import os
 
 
20
  import sys
21
  import time
22
  from typing import Any
 
30
  API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
31
  MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
32
  HF_TOKEN = os.getenv("HF_TOKEN", "")
33
+ DEFAULT_EPISODES = 1
34
+ DEFAULT_SEED_BASE = 1000
35
  MAX_RETRIES = 3
36
+ # 96 steps × 15 min = 24 h (must match env.EpisodeSteps)
37
+ EPISODE_STEPS = 96
38
+ LAST_STEP_INDEX = EPISODE_STEPS - 1
39
 
40
  SYSPROMPT = """You are GridMind, an expert industrial energy management controller.
41
  You control a building's HVAC, thermal storage, batch job scheduling, and load shedding.
 
43
  Always respond with a single valid JSON object matching the action schema. No explanation needed."""
44
 
45
  TASK_DESCRIPTIONS = {
46
+ 1: "Task 1 (Easy - Cost Minimization): Minimize total energy cost over 24 hours. No temperature or batch constraints. Use cheap off-peak periods and thermal storage.",
47
  2: "Task 2 (Medium - Temperature Management): Minimize cost AND keep indoor temperature within 19-23°C at all times. Balance comfort vs cost.",
48
+ 3: "Task 3 (Hard - Full Demand Response): Minimize cost, maintain temperature, respond to grid stress (shed when grid_stress_signal > 0.7), schedule batch jobs, minimize carbon.",
49
  }
50
 
51
  ACTION_SCHEMA_STR = """{
 
57
  }"""
58
 
59
 
60
+ def extract_json_object(text: str) -> dict[str, Any] | None:
61
+ """Parse first balanced {...} JSON object from text (handles nested braces)."""
62
+ start = text.find("{")
63
+ if start < 0:
64
+ return None
65
+ depth = 0
66
+ for i in range(start, len(text)):
67
+ c = text[i]
68
+ if c == "{":
69
+ depth += 1
70
+ elif c == "}":
71
+ depth -= 1
72
+ if depth == 0:
73
+ try:
74
+ return json.loads(text[start : i + 1])
75
+ except json.JSONDecodeError:
76
+ return None
77
+ return None
78
+
79
+
80
  # ── Environment client ───────────────────────────────────────────────────────
81
 
82
+
83
  class GridMindEnvClient:
84
  """Simple HTTP client for the GridMind-RL Go environment server."""
85
 
 
118
 
119
  # ── LLM agent ───────────────────────────────────────────────────────────────
120
 
121
+
122
  class LLMAgent:
123
  """OpenAI-compatible LLM agent that chooses actions given observations."""
124
 
 
145
  - Hour of day: {obs.get('hour_of_day', 12)} (0=midnight, peak prices 8-12 and 17-21)
146
  - Pending batch job deadlines: {obs.get('batch_queue', [])}
147
  - Cumulative cost so far: ${obs.get('cumulative_cost', 0):.4f}
148
+ - Episode step: {obs.get('step', 0)}/{LAST_STEP_INDEX}
149
 
150
  Strategy hints:
151
  - Charge thermal storage when price < $0.08/kWh, discharge when price > $0.15/kWh
 
165
  {"role": "user", "content": prompt},
166
  ],
167
  max_tokens=128,
168
+ temperature=0.0,
169
  )
170
  content = completion.choices[0].message.content.strip()
171
+ parsed = extract_json_object(content)
172
+ if parsed is not None:
173
+ return self._clamp_action(parsed)
174
+ action = json.loads(content)
175
+ return self._clamp_action(action)
176
  except Exception as e:
177
  print(f" [LLM attempt {attempt+1}/{MAX_RETRIES}] error: {e}")
178
  time.sleep(1)
179
 
 
180
  return self._heuristic_action(obs)
181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  def _clamp_action(self, action: dict) -> dict:
183
  return {
184
  "hvac_power_level": max(0.0, min(1.0, float(action.get("hvac_power_level", 0.5)))),
 
189
  }
190
 
191
  def _heuristic_action(self, obs: dict) -> dict:
192
+ """Rule-based policy (deterministic given obs)."""
193
  price = obs.get("current_price", 0.10)
194
  stress = obs.get("grid_stress_signal", 0.0)
195
  temp = obs.get("indoor_temperature", 21.0)
196
  storage = obs.get("thermal_storage_level", 0.5)
197
  queue = obs.get("batch_queue", [])
198
 
 
199
  hvac = 0.7 if price < 0.08 else (0.3 if price > 0.15 else 0.5)
 
200
  if temp > 23.0:
201
  hvac = max(hvac, 0.8)
202
  elif temp < 19.0:
203
  hvac = min(hvac, 0.2)
204
 
 
205
  charge = 0.0
206
  if price < 0.07 and storage < 0.8:
207
  charge = 0.5
208
  elif price > 0.15 and storage > 0.3:
209
  charge = -0.5
210
 
 
211
  shed = 0.0
212
  if stress > 0.7:
213
  shed = 0.4
214
  elif stress > 0.5:
215
  shed = 0.2
216
 
 
217
  slot = 2
218
+ if queue and min(queue) < 8:
219
  slot = 0
220
 
221
  return {
 
227
  }
228
 
229
  def _default_action(self) -> dict:
230
+ return {
231
+ "hvac_power_level": 0.5,
232
+ "thermal_charge_rate": 0.0,
233
+ "batch_job_slot": 0,
234
+ "load_shed_fraction": 0.0,
235
+ "building_id": 0,
236
+ }
237
 
238
 
239
  # ── Episode runner ───────────────────────────────────────────────────────────
240
 
241
+
242
+ def run_episode(
243
+ env_client: GridMindEnvClient,
244
+ agent: LLMAgent,
245
+ task_id: int,
246
+ seed: int,
247
+ *,
248
+ fast_mode: bool,
249
+ llm_every: int,
250
+ max_steps: int | None,
251
+ verbose: bool = False,
252
+ ) -> dict[str, Any]:
253
+ """Run a single episode and return grade + metadata. Prints [START], [STEPn], [END]."""
254
  reset_resp = env_client.reset(task_id=task_id, seed=seed)
255
  obs = reset_resp["observations"][0]
256
 
257
+ print("[START]", flush=True)
258
+
259
  total_reward = 0.0
260
  total_steps = 0
261
  start_time = time.time()
262
+ step_resp: dict[str, Any] = {}
263
+ step_limit = EPISODE_STEPS if max_steps is None else min(max_steps, EPISODE_STEPS)
264
+
265
+ llm_reuse_remaining = 0
266
+ cached_action = agent._default_action()
267
 
 
 
268
  while not step_resp.get("done", False):
269
+ if total_steps >= step_limit:
270
+ break
271
 
272
+ if fast_mode:
273
+ action = agent._heuristic_action(obs)
274
+ else:
275
+ if llm_reuse_remaining <= 0:
276
+ cached_action = agent.choose_action(obs, task_id)
277
+ llm_reuse_remaining = max(1, llm_every)
278
+ action = cached_action
279
+
280
+ step_resp = env_client.step(action)
281
  if step_resp is None or "observation" not in step_resp:
282
+ print(f" [WARN] step {total_steps}: invalid step response", flush=True)
 
283
  break
284
 
285
+ if not fast_mode:
286
+ llm_reuse_remaining -= 1
287
+
288
  obs = step_resp["observation"]
289
+ total_reward += float(step_resp["reward"])
290
  total_steps += 1
291
+ print(f"[STEP{total_steps}]", flush=True)
292
+
293
+ if verbose and total_steps % 16 == 0:
294
+ print(
295
+ f" step={total_steps:02d} price=${obs['current_price']:.3f} "
296
+ f"temp={obs['indoor_temperature']:.1f}°C "
297
+ f"stress={obs['grid_stress_signal']:.2f} "
298
+ f"cost=${obs['cumulative_cost']:.2f} "
299
+ f"reward={step_resp['reward']:.3f}",
300
+ flush=True,
301
+ )
302
 
303
  elapsed = time.time() - start_time
304
  grade = env_client.grade()
305
 
306
+ print("[END]", flush=True)
307
+
308
  return {
309
  "task_id": task_id,
310
  "seed": seed,
 
319
 
320
  # ── Main ─────────────────────────────────────────────────────────────────────
321
 
322
+
323
+ def main() -> None:
324
  parser = argparse.ArgumentParser(description="GridMind-RL baseline inference")
325
  parser.add_argument("--episodes", type=int, default=DEFAULT_EPISODES)
326
  parser.add_argument("--env-url", type=str, default=ENV_URL)
327
  parser.add_argument("--verbose", action="store_true")
328
  parser.add_argument("--output", type=str, default="baseline_scores.json")
329
+ parser.add_argument(
330
+ "--fast-mode",
331
+ action="store_true",
332
+ help="Heuristic policy only (no LLM calls; fastest, fully reproducible).",
333
+ )
334
+ parser.add_argument(
335
+ "--llm-every",
336
+ type=int,
337
+ default=4,
338
+ metavar="N",
339
+ help="Reuse the same LLM action for N consecutive steps (default: 4).",
340
+ )
341
+ parser.add_argument(
342
+ "--max-steps",
343
+ type=int,
344
+ default=None,
345
+ metavar="N",
346
+ help="Stop after N steps (default: full episode). Grade uses partial episode.",
347
+ )
348
  args = parser.parse_args()
349
 
350
  print("=" * 60)
 
353
  print(f" API: {API_BASE_URL}")
354
  print(f" Env: {args.env_url}")
355
  print(f" Episodes per task: {args.episodes}")
356
+ print(f" Fast mode: {args.fast_mode} | LLM every: {args.llm_every} steps")
357
  print("=" * 60)
358
 
359
  env_client = GridMindEnvClient(base_url=args.env_url)
360
 
 
361
  print("\nWaiting for environment server...")
362
  for attempt in range(30):
363
  if env_client.health():
364
+ print(" [OK] Environment server is healthy")
365
  break
366
  time.sleep(2)
367
  if attempt == 29:
368
+ print(" [FAIL] Environment server not reachable. Exiting.")
369
  sys.exit(1)
370
 
371
  agent = LLMAgent()
372
+ all_results: list[dict[str, Any]] = []
373
 
374
  for task_id in [1, 2, 3]:
375
+ print(f"\n-- Task {task_id}: {TASK_DESCRIPTIONS[task_id][:60]}...")
376
+ task_scores: list[float] = []
377
  for ep in range(args.episodes):
378
  seed = DEFAULT_SEED_BASE + task_id * 100 + ep
379
  print(f" Episode {ep+1}/{args.episodes} (seed={seed})")
380
+ result = run_episode(
381
+ env_client,
382
+ agent,
383
+ task_id=task_id,
384
+ seed=seed,
385
+ fast_mode=args.fast_mode,
386
+ llm_every=args.llm_every,
387
+ max_steps=args.max_steps,
388
+ verbose=args.verbose,
389
+ )
390
+ task_scores.append(float(result["score"]))
391
  all_results.append(result)
392
+ print(
393
+ f" → score={result['score']:.4f} | reward={result['total_reward']:.3f} | "
394
+ f"{result['elapsed_sec']:.1f}s | steps={result['total_steps']}"
395
+ )
396
 
397
  avg_score = sum(task_scores) / len(task_scores)
398
  print(f" Task {task_id} average score: {avg_score:.4f}")
399
 
 
400
  print("\n" + "=" * 60)
401
  print("BASELINE SCORES SUMMARY")
402
  print("=" * 60)
403
  print(f"{'Task':<10} {'Model':<30} {'Score':<10} {'Episodes':<10}")
404
  print("-" * 60)
405
 
406
+ task_avgs: dict[int, float] = {}
407
  for task_id in [1, 2, 3]:
408
+ scores = [float(r["score"]) for r in all_results if r["task_id"] == task_id]
409
  avg = sum(scores) / len(scores) if scores else 0.0
410
  task_avgs[task_id] = avg
411
  print(f"Task {task_id:<6} {MODEL_NAME:<30} {avg:<10.4f} {len(scores)}")
 
414
  overall = sum(task_avgs.values()) / len(task_avgs)
415
  print(f"{'Overall':<10} {'':<30} {overall:<10.4f}")
416
 
 
417
  output = {
418
  "model": MODEL_NAME,
419
  "api_base": API_BASE_URL,
420
  "episodes_per_task": args.episodes,
421
  "seed_base": DEFAULT_SEED_BASE,
422
+ "fast_mode": args.fast_mode,
423
+ "llm_every": args.llm_every,
424
+ "max_steps": args.max_steps,
425
  "task_averages": {str(k): v for k, v in task_avgs.items()},
426
  "overall_average": overall,
427
  "all_results": all_results,
428
  }
429
+ with open(args.output, "w", encoding="utf-8") as f:
430
  json.dump(output, f, indent=2)
431
+ print(f"\n[OK] Results saved to {args.output}")
432
 
433
 
434
  if __name__ == "__main__":
python/models.py CHANGED
@@ -29,7 +29,7 @@ class ObservationModel(BaseModel):
29
  hour_of_day: int = Field(..., ge=0, le=23, description="Current hour of day (0–23)")
30
  batch_queue: List[int] = Field(default_factory=list, description="Deadline slots of pending batch jobs")
31
  cumulative_cost: float = Field(..., ge=0.0, description="Running energy cost this episode ($)")
32
- step: int = Field(..., ge=0, description="Current timestep (0–287)")
33
  building_id: int = Field(default=0, description="Building index in federation")
34
 
35
 
 
29
  hour_of_day: int = Field(..., ge=0, le=23, description="Current hour of day (0–23)")
30
  batch_queue: List[int] = Field(default_factory=list, description="Deadline slots of pending batch jobs")
31
  cumulative_cost: float = Field(..., ge=0.0, description="Running energy cost this episode ($)")
32
+ step: int = Field(..., ge=0, le=95, description="Current timestep (0–95); 96 steps = 24h")
33
  building_id: int = Field(default=0, description="Building index in federation")
34
 
35
 
python/validate.py CHANGED
@@ -16,16 +16,16 @@ import requests
16
 
17
  ENV_URL = "http://localhost:7860"
18
 
19
- PASS = ""
20
- FAIL = ""
21
- WARN = ""
22
 
23
 
24
  def check(label: str, condition: bool, detail: str = "") -> bool:
25
  icon = PASS if condition else FAIL
26
  line = f" {icon} {label}"
27
  if detail:
28
- line += f" {detail}"
29
  print(line)
30
  return condition
31
 
@@ -42,20 +42,22 @@ def validate(env_url: str) -> bool:
42
  base = env_url.rstrip("/")
43
  results = []
44
 
45
- print("\n══════════════════════════════════════════")
46
  print(" GridMind-RL OpenEnv Validation Report")
47
- print("══════════════════════════════════════════\n")
48
 
49
- # ── 1. Health endpoint ──────────────────────────────────────────────────
50
- print("1. Health Endpoint")
51
  try:
52
  r = get(f"{base}/health")
53
  results.append(check("GET /health returns 200", r.status_code == 200, f"got {r.status_code}"))
54
  data = r.json()
55
  results.append(check("Response has 'status' field", "status" in data))
 
 
56
  except Exception as e:
57
  results.append(check("GET /health reachable", False, str(e)))
58
- print(f"\n Cannot reach server at {base}. Is it running?\n")
59
  return False
60
 
61
  # ── 2. Reset endpoint ───────────────────────────────────────────────────
@@ -122,7 +124,8 @@ def validate(env_url: str) -> bool:
122
 
123
  rc = info.get("reward_components", {})
124
  rc_fields = ["cost_savings", "temp_constraint", "grid_response",
125
- "deadline_penalty", "efficiency_bonus", "stability_penalty", "total"]
 
126
  for f in rc_fields:
127
  results.append(check(f"reward_components has '{f}'", f in rc))
128
 
@@ -139,14 +142,15 @@ def validate(env_url: str) -> bool:
139
  r = get(f"{base}/state")
140
  results.append(check("GET /state returns 200", r.status_code == 200))
141
  state = r.json()
142
- state_fields = ["buildings", "price_curve_24h", "carbon_curve_24h",
143
  "episode", "step", "task_id", "done", "seed"]
144
  for f in state_fields:
145
  results.append(check(f"state has '{f}'", f in state))
146
- results.append(check("price_curve_24h has 24 entries",
147
- len(state.get("price_curve_24h", [])) == 24))
148
- results.append(check("carbon_curve_24h has 24 entries",
149
- len(state.get("carbon_curve_24h", [])) == 24))
 
150
  except Exception as e:
151
  results.append(check("GET /state succeeds", False, str(e)))
152
 
@@ -240,13 +244,13 @@ def validate(env_url: str) -> bool:
240
  total = len(results)
241
  pct = 100 * passed // total if total > 0 else 0
242
 
243
- print(f"\n══════════════════════════════════════════")
244
  print(f" Result: {passed}/{total} checks passed ({pct}%)")
245
  if passed == total:
246
- print(" 🎉 ALL CHECKS PASSED Ready for submission!")
247
  else:
248
- print(f" {total - passed} checks failed. Fix errors above.")
249
- print("══════════════════════════════════════════\n")
250
 
251
  return passed == total
252
 
 
16
 
17
  ENV_URL = "http://localhost:7860"
18
 
19
+ PASS = "[OK]"
20
+ FAIL = "[FAIL]"
21
+ WARN = "[WARN]"
22
 
23
 
24
  def check(label: str, condition: bool, detail: str = "") -> bool:
25
  icon = PASS if condition else FAIL
26
  line = f" {icon} {label}"
27
  if detail:
28
+ line += f" - {detail}"
29
  print(line)
30
  return condition
31
 
 
42
  base = env_url.rstrip("/")
43
  results = []
44
 
45
+ print("\n" + "=" * 50)
46
  print(" GridMind-RL OpenEnv Validation Report")
47
+ print("=" * 50 + "\n")
48
 
49
+ # ── 1. Health & ping ─────────────────────────────────────────────────────
50
+ print("1. Health & Ping")
51
  try:
52
  r = get(f"{base}/health")
53
  results.append(check("GET /health returns 200", r.status_code == 200, f"got {r.status_code}"))
54
  data = r.json()
55
  results.append(check("Response has 'status' field", "status" in data))
56
+ rp = get(f"{base}/ping")
57
+ results.append(check("GET /ping returns 200", rp.status_code == 200, f"got {rp.status_code}"))
58
  except Exception as e:
59
  results.append(check("GET /health reachable", False, str(e)))
60
+ print(f"\n [FAIL] Cannot reach server at {base}. Is it running?\n")
61
  return False
62
 
63
  # ── 2. Reset endpoint ───────────────────────────────────────────────────
 
124
 
125
  rc = info.get("reward_components", {})
126
  rc_fields = ["cost_savings", "temp_constraint", "grid_response",
127
+ "deadline_penalty", "efficiency_bonus", "stability_penalty",
128
+ "carbon_reward", "total"]
129
  for f in rc_fields:
130
  results.append(check(f"reward_components has '{f}'", f in rc))
131
 
 
142
  r = get(f"{base}/state")
143
  results.append(check("GET /state returns 200", r.status_code == 200))
144
  state = r.json()
145
+ state_fields = ["buildings", "price_curve_episode", "carbon_curve_episode",
146
  "episode", "step", "task_id", "done", "seed"]
147
  for f in state_fields:
148
  results.append(check(f"state has '{f}'", f in state))
149
+ curve_n = 24 # EpisodeSteps/4 (96/4) downsamples to hourly points
150
+ results.append(check("price_curve_episode has 24 entries",
151
+ len(state.get("price_curve_episode", [])) == curve_n))
152
+ results.append(check("carbon_curve_episode has 24 entries",
153
+ len(state.get("carbon_curve_episode", [])) == curve_n))
154
  except Exception as e:
155
  results.append(check("GET /state succeeds", False, str(e)))
156
 
 
244
  total = len(results)
245
  pct = 100 * passed // total if total > 0 else 0
246
 
247
+ print(f"\n" + "=" * 50)
248
  print(f" Result: {passed}/{total} checks passed ({pct}%)")
249
  if passed == total:
250
+ print(" ALL CHECKS PASSED - Ready for submission!")
251
  else:
252
+ print(f" {total - passed} checks failed. Fix errors above.")
253
+ print("=" * 50 + "\n")
254
 
255
  return passed == total
256
 
tests/environment_test.go CHANGED
@@ -55,21 +55,24 @@ func TestStepAdvancesState(t *testing.T) {
55
  if state.Step != 1 {
56
  t.Errorf("expected step=1 after one step, got %d", state.Step)
57
  }
 
 
 
58
  }
59
 
60
- // TestEpisodeLengthIs288 verifies the episode terminates at step 288.
61
- func TestEpisodeLengthIs288(t *testing.T) {
62
  e := env.NewEnvironment()
63
  var seed int64 = 99
64
  e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1})
65
 
66
  action := []env.ActionModel{{HVACPowerLevel: 0.5}}
67
  var lastDone bool
68
- for i := 0; i < 288; i++ {
69
  _, lastDone = e.Step(action)
70
  }
71
  if !lastDone {
72
- t.Errorf("episode should be done after 288 steps")
73
  }
74
  }
75
 
@@ -162,7 +165,7 @@ func TestGraderTask1ScoreRange(t *testing.T) {
162
  e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1})
163
 
164
  action := []env.ActionModel{{HVACPowerLevel: 0.3}}
165
- for i := 0; i < 288; i++ {
166
  e.Step(action)
167
  }
168
 
@@ -171,9 +174,14 @@ func TestGraderTask1ScoreRange(t *testing.T) {
171
 
172
  buildings := make([]*env.BuildingState, len(state.Buildings))
173
  for i, pub := range state.Buildings {
 
 
174
  buildings[i] = &env.BuildingState{
175
- CumulativeCost: pub.CumulativeCost,
176
- BaselineCost: pub.BaselineCost,
 
 
 
177
  }
178
  }
179
 
 
55
  if state.Step != 1 {
56
  t.Errorf("expected step=1 after one step, got %d", state.Step)
57
  }
58
+ if resps[0].Observation.Step != 0 {
59
+ t.Errorf("expected observation.step=0 after first transition, got %d", resps[0].Observation.Step)
60
+ }
61
  }
62
 
63
+ // TestEpisodeLengthIs96 verifies the episode terminates after 96 steps (24h).
64
+ func TestEpisodeLengthIs96(t *testing.T) {
65
  e := env.NewEnvironment()
66
  var seed int64 = 99
67
  e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1})
68
 
69
  action := []env.ActionModel{{HVACPowerLevel: 0.5}}
70
  var lastDone bool
71
+ for i := 0; i < env.EpisodeSteps; i++ {
72
  _, lastDone = e.Step(action)
73
  }
74
  if !lastDone {
75
+ t.Errorf("episode should be done after %d steps", env.EpisodeSteps)
76
  }
77
  }
78
 
 
165
  e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1})
166
 
167
  action := []env.ActionModel{{HVACPowerLevel: 0.3}}
168
+ for i := 0; i < env.EpisodeSteps; i++ {
169
  e.Step(action)
170
  }
171
 
 
174
 
175
  buildings := make([]*env.BuildingState, len(state.Buildings))
176
  for i, pub := range state.Buildings {
177
+ jobsCopy := make([]env.BatchJob, len(pub.Jobs))
178
+ copy(jobsCopy, pub.Jobs)
179
  buildings[i] = &env.BuildingState{
180
+ CumulativeCost: pub.CumulativeCost,
181
+ BaselineCost: pub.BaselineCost,
182
+ CumulativeCarbon: pub.CumulativeCarbon,
183
+ BaselineCarbon: pub.BaselineCarbon,
184
+ Jobs: jobsCopy,
185
  }
186
  }
187
 
tests/test_graders.py CHANGED
@@ -135,7 +135,7 @@ class TestTask3:
135
 
136
  def test_has_all_sub_scores(self):
137
  g = run_full_episode(task_id=3, seed=31)
138
- for key in ["cost", "temperature", "grid_response", "batch_deadline"]:
139
  assert key in g["sub_scores"], f"Missing sub-score: {key}"
140
 
141
  def test_all_sub_scores_in_range(self):
@@ -146,7 +146,13 @@ class TestTask3:
146
  def test_weights_sum_correct(self):
147
  g = run_full_episode(task_id=3, seed=33)
148
  ss = g["sub_scores"]
149
- expected = ss["cost"]*0.35 + ss["temperature"]*0.25 + ss["grid_response"]*0.25 + ss["batch_deadline"]*0.15
 
 
 
 
 
 
150
  assert abs(g["score"] - expected) < 0.01 or g["exploit_detected"]
151
 
152
  def test_grid_response_sub_score(self):
 
135
 
136
  def test_has_all_sub_scores(self):
137
  g = run_full_episode(task_id=3, seed=31)
138
+ for key in ["cost", "temperature", "grid_response", "batch_deadline", "carbon"]:
139
  assert key in g["sub_scores"], f"Missing sub-score: {key}"
140
 
141
  def test_all_sub_scores_in_range(self):
 
146
  def test_weights_sum_correct(self):
147
  g = run_full_episode(task_id=3, seed=33)
148
  ss = g["sub_scores"]
149
+ expected = (
150
+ ss["cost"] * 0.28
151
+ + ss["temperature"] * 0.20
152
+ + ss["grid_response"] * 0.20
153
+ + ss["batch_deadline"] * 0.12
154
+ + ss["carbon"] * 0.20
155
+ )
156
  assert abs(g["score"] - expected) < 0.01 or g["exploit_detected"]
157
 
158
  def test_grid_response_sub_score(self):