Spaces:
Running
Running
Commit ·
6d74982
1
Parent(s): e3130b4
feat: add baseline scores JSON, inference script, and update Dockerfile for improved project structure
Browse files- Dockerfile +1 -0
- README.md +3 -3
- baseline_scores.json +58 -0
- dashboard/__pycache__/server.cpython-311.pyc +0 -0
- dashboard/static/dashboard.js +12 -11
- dashboard/static/index.html +2 -2
- env/environment.go +15 -5
- env/models.go +3 -1
- env/rewards.go +13 -15
- env/tasks.go +17 -3
- inference.py +11 -0
- main.go +15 -3
- openenv.yaml +5 -2
- python/inference.py +147 -74
- python/models.py +1 -1
- python/validate.py +23 -19
- tests/environment_test.go +15 -7
- tests/test_graders.py +8 -2
Dockerfile
CHANGED
|
@@ -27,6 +27,7 @@ COPY --from=builder /app/gridmind-server /usr/local/bin/gridmind-server
|
|
| 27 |
|
| 28 |
# Copy Python layer and Dashboard
|
| 29 |
COPY python/ ./python/
|
|
|
|
| 30 |
COPY dashboard/ ./dashboard/
|
| 31 |
COPY data/ ./data/
|
| 32 |
COPY openenv.yaml ./
|
|
|
|
| 27 |
|
| 28 |
# Copy Python layer and Dashboard
|
| 29 |
COPY python/ ./python/
|
| 30 |
+
COPY inference.py ./inference.py
|
| 31 |
COPY dashboard/ ./dashboard/
|
| 32 |
COPY data/ ./data/
|
| 33 |
COPY openenv.yaml ./
|
README.md
CHANGED
|
@@ -10,7 +10,7 @@
|
|
| 10 |
|
| 11 |
1. **Get a free AI API key** from [Hugging Face](https://huggingface.co/join) (takes 2 minutes)
|
| 12 |
2. **Run the simulator**: `docker run -p 7860:7860 -p 7861:7861 ghcr.io/your-repo/gridmind-rl:latest`
|
| 13 |
-
3. **Watch the AI learn**: `python
|
| 14 |
|
| 15 |
That's it! The AI will start making energy decisions and you'll see live results.
|
| 16 |
|
|
@@ -122,7 +122,7 @@ export HF_TOKEN=hf_your_token_here
|
|
| 122 |
|
| 123 |
```bash
|
| 124 |
# Run 3 learning episodes (takes ~5 minutes)
|
| 125 |
-
python
|
| 126 |
```
|
| 127 |
|
| 128 |
You'll see output like:
|
|
@@ -202,7 +202,7 @@ $env:MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3" # Faster but less accura
|
|
| 202 |
|
| 203 |
**Run longer training**:
|
| 204 |
```bash
|
| 205 |
-
python
|
| 206 |
```
|
| 207 |
|
| 208 |
**Test the environment manually**:
|
|
|
|
| 10 |
|
| 11 |
1. **Get a free AI API key** from [Hugging Face](https://huggingface.co/join) (takes 2 minutes)
|
| 12 |
2. **Run the simulator**: `docker run -p 7860:7860 -p 7861:7861 ghcr.io/your-repo/gridmind-rl:latest`
|
| 13 |
+
3. **Watch the AI learn**: `python inference.py --episodes 1` (or `--fast-mode` for a quick heuristic run, no API calls)
|
| 14 |
|
| 15 |
That's it! The AI will start making energy decisions and you'll see live results.
|
| 16 |
|
|
|
|
| 122 |
|
| 123 |
```bash
|
| 124 |
# Run 3 learning episodes (takes ~5 minutes)
|
| 125 |
+
python inference.py --episodes 3
|
| 126 |
```
|
| 127 |
|
| 128 |
You'll see output like:
|
|
|
|
| 202 |
|
| 203 |
**Run longer training**:
|
| 204 |
```bash
|
| 205 |
+
python inference.py --episodes 10 --llm-every 4 # Scale LLM calls via --llm-every; use --fast-mode for tests
|
| 206 |
```
|
| 207 |
|
| 208 |
**Test the environment manually**:
|
baseline_scores.json
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
| 3 |
+
"api_base": "https://router.huggingface.co/v1",
|
| 4 |
+
"episodes_per_task": 1,
|
| 5 |
+
"seed_base": 1000,
|
| 6 |
+
"fast_mode": true,
|
| 7 |
+
"llm_every": 4,
|
| 8 |
+
"max_steps": null,
|
| 9 |
+
"task_averages": {
|
| 10 |
+
"1": 0.2776,
|
| 11 |
+
"2": 0.2182,
|
| 12 |
+
"3": 0.3115
|
| 13 |
+
},
|
| 14 |
+
"overall_average": 0.2691,
|
| 15 |
+
"all_results": [
|
| 16 |
+
{
|
| 17 |
+
"task_id": 1,
|
| 18 |
+
"seed": 1100,
|
| 19 |
+
"total_reward": -54.91106240679752,
|
| 20 |
+
"total_steps": 96,
|
| 21 |
+
"elapsed_sec": 0.8684265613555908,
|
| 22 |
+
"score": 0.2776,
|
| 23 |
+
"sub_scores": {
|
| 24 |
+
"cost": 0.277555958007489
|
| 25 |
+
},
|
| 26 |
+
"exploit_detected": false
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"task_id": 2,
|
| 30 |
+
"seed": 1200,
|
| 31 |
+
"total_reward": -573.2793620498348,
|
| 32 |
+
"total_steps": 96,
|
| 33 |
+
"elapsed_sec": 0.9907081127166748,
|
| 34 |
+
"score": 0.2182,
|
| 35 |
+
"sub_scores": {
|
| 36 |
+
"cost": 0.2595566056450961,
|
| 37 |
+
"temperature": 0.15625
|
| 38 |
+
},
|
| 39 |
+
"exploit_detected": false
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"task_id": 3,
|
| 43 |
+
"seed": 1300,
|
| 44 |
+
"total_reward": -670.8653705366278,
|
| 45 |
+
"total_steps": 96,
|
| 46 |
+
"elapsed_sec": 0.8988945484161377,
|
| 47 |
+
"score": 0.3115,
|
| 48 |
+
"sub_scores": {
|
| 49 |
+
"batch_deadline": 1,
|
| 50 |
+
"carbon": 0.24377839161166936,
|
| 51 |
+
"cost": 0.25263438913936676,
|
| 52 |
+
"grid_response": 0.21428571428571427,
|
| 53 |
+
"temperature": 0.14583333333333334
|
| 54 |
+
},
|
| 55 |
+
"exploit_detected": false
|
| 56 |
+
}
|
| 57 |
+
]
|
| 58 |
+
}
|
dashboard/__pycache__/server.cpython-311.pyc
ADDED
|
Binary file (5.56 kB). View file
|
|
|
dashboard/static/dashboard.js
CHANGED
|
@@ -7,7 +7,9 @@
|
|
| 7 |
|
| 8 |
// ── Config ──────────────────────────────────────────────────────────────────
|
| 9 |
const POLL_MS = 500;
|
| 10 |
-
const
|
|
|
|
|
|
|
| 11 |
const API_BASE = '/api';
|
| 12 |
const TASK_NAMES = {
|
| 13 |
1: 'Task 1 — Cost Minimization (Easy)',
|
|
@@ -95,8 +97,8 @@ function makeBarChart(id, labels, datasets) {
|
|
| 95 |
}
|
| 96 |
|
| 97 |
// ── Initialise all charts ─────────────────────────────────────────────────────
|
| 98 |
-
const emptyLabels = Array.from({ length:
|
| 99 |
-
const emptyData = Array(
|
| 100 |
|
| 101 |
// 1. Price curve
|
| 102 |
const priceChart = makeLineChart('chart-price',
|
|
@@ -306,7 +308,7 @@ function renderGantt(jobs, currentStep) {
|
|
| 306 |
wrap.innerHTML = '<div style="color:var(--text-dim);font-size:0.8rem">No batch jobs in this episode.</div>';
|
| 307 |
return;
|
| 308 |
}
|
| 309 |
-
const totalSlots =
|
| 310 |
wrap.innerHTML = '';
|
| 311 |
jobs.forEach(job => {
|
| 312 |
const row = document.createElement('div');
|
|
@@ -416,7 +418,7 @@ async function fetchAndUpdate() {
|
|
| 416 |
const hourOfDay = b.hour_of_day || 0;
|
| 417 |
|
| 418 |
// ── Header ──
|
| 419 |
-
document.getElementById('ep-step').textContent = `ep:${state.episode} step:${step}/
|
| 420 |
document.getElementById('task-badge').textContent = TASK_NAMES[state.task_id] || 'Task 1';
|
| 421 |
|
| 422 |
// ── KPIs ──
|
|
@@ -444,20 +446,19 @@ async function fetchAndUpdate() {
|
|
| 444 |
document.getElementById('kpi-storage').textContent = `${(b.thermal_storage_level * 100).toFixed(1)}`;
|
| 445 |
|
| 446 |
// ── Price curve chart ──
|
| 447 |
-
if (state.price_curve_episode && state.price_curve_episode.length ===
|
| 448 |
-
const labels = Array.from({ length:
|
| 449 |
priceChart.data.labels = labels;
|
| 450 |
priceChart.data.datasets[0].data = state.price_curve_episode;
|
| 451 |
-
|
| 452 |
-
const marker = Array(72).fill(null);
|
| 453 |
marker[Math.floor(step / 4)] = state.price_curve_episode[Math.floor(step / 4)];
|
| 454 |
priceChart.data.datasets[1].data = marker;
|
| 455 |
priceChart.update('none');
|
| 456 |
}
|
| 457 |
|
| 458 |
// ── Carbon curve ──
|
| 459 |
-
if (state.carbon_curve_episode && state.carbon_curve_episode.length ===
|
| 460 |
-
carbonChart.data.labels = Array.from({ length:
|
| 461 |
carbonChart.data.datasets[0].data = state.carbon_curve_episode;
|
| 462 |
carbonChart.update('none');
|
| 463 |
}
|
|
|
|
| 7 |
|
| 8 |
// ── Config ──────────────────────────────────────────────────────────────────
|
| 9 |
const POLL_MS = 500;
|
| 10 |
+
const EPISODE_STEPS = 96; // 24h × 4 steps/h (15-min)
|
| 11 |
+
const HISTORY_LEN = EPISODE_STEPS;
|
| 12 |
+
const CURVE_POINTS = 24; // hourly downsample (EpisodeSteps/4)
|
| 13 |
const API_BASE = '/api';
|
| 14 |
const TASK_NAMES = {
|
| 15 |
1: 'Task 1 — Cost Minimization (Easy)',
|
|
|
|
| 97 |
}
|
| 98 |
|
| 99 |
// ── Initialise all charts ─────────────────────────────────────────────────────
|
| 100 |
+
const emptyLabels = Array.from({ length: CURVE_POINTS }, (_, i) => `${i}h`);
|
| 101 |
+
const emptyData = Array(CURVE_POINTS).fill(null);
|
| 102 |
|
| 103 |
// 1. Price curve
|
| 104 |
const priceChart = makeLineChart('chart-price',
|
|
|
|
| 308 |
wrap.innerHTML = '<div style="color:var(--text-dim);font-size:0.8rem">No batch jobs in this episode.</div>';
|
| 309 |
return;
|
| 310 |
}
|
| 311 |
+
const totalSlots = EPISODE_STEPS;
|
| 312 |
wrap.innerHTML = '';
|
| 313 |
jobs.forEach(job => {
|
| 314 |
const row = document.createElement('div');
|
|
|
|
| 418 |
const hourOfDay = b.hour_of_day || 0;
|
| 419 |
|
| 420 |
// ── Header ──
|
| 421 |
+
document.getElementById('ep-step').textContent = `ep:${state.episode} step:${step}/${EPISODE_STEPS - 1}`;
|
| 422 |
document.getElementById('task-badge').textContent = TASK_NAMES[state.task_id] || 'Task 1';
|
| 423 |
|
| 424 |
// ── KPIs ──
|
|
|
|
| 446 |
document.getElementById('kpi-storage').textContent = `${(b.thermal_storage_level * 100).toFixed(1)}`;
|
| 447 |
|
| 448 |
// ── Price curve chart ──
|
| 449 |
+
if (state.price_curve_episode && state.price_curve_episode.length === CURVE_POINTS) {
|
| 450 |
+
const labels = Array.from({ length: CURVE_POINTS }, (_, i) => `${i}:00`);
|
| 451 |
priceChart.data.labels = labels;
|
| 452 |
priceChart.data.datasets[0].data = state.price_curve_episode;
|
| 453 |
+
const marker = Array(CURVE_POINTS).fill(null);
|
|
|
|
| 454 |
marker[Math.floor(step / 4)] = state.price_curve_episode[Math.floor(step / 4)];
|
| 455 |
priceChart.data.datasets[1].data = marker;
|
| 456 |
priceChart.update('none');
|
| 457 |
}
|
| 458 |
|
| 459 |
// ── Carbon curve ──
|
| 460 |
+
if (state.carbon_curve_episode && state.carbon_curve_episode.length === CURVE_POINTS) {
|
| 461 |
+
carbonChart.data.labels = Array.from({ length: CURVE_POINTS }, (_, i) => `${i}:00`);
|
| 462 |
carbonChart.data.datasets[0].data = state.carbon_curve_episode;
|
| 463 |
carbonChart.update('none');
|
| 464 |
}
|
dashboard/static/index.html
CHANGED
|
@@ -514,7 +514,7 @@
|
|
| 514 |
|
| 515 |
<!-- Row 1: Price curve + Temperature + Controls -->
|
| 516 |
<div class="card col-8">
|
| 517 |
-
<div class="card-title"><span class="icon">💰</span> Electricity Price Curve (
|
| 518 |
<div class="chart-wrap">
|
| 519 |
<canvas id="chart-price"></canvas>
|
| 520 |
</div>
|
|
@@ -587,7 +587,7 @@
|
|
| 587 |
</div>
|
| 588 |
|
| 589 |
<div class="card col-6">
|
| 590 |
-
<div class="card-title"><span class="icon">🌍</span> Carbon Intensity Curve (
|
| 591 |
<div class="chart-wrap">
|
| 592 |
<canvas id="chart-carbon"></canvas>
|
| 593 |
</div>
|
|
|
|
| 514 |
|
| 515 |
<!-- Row 1: Price curve + Temperature + Controls -->
|
| 516 |
<div class="card col-8">
|
| 517 |
+
<div class="card-title"><span class="icon">💰</span> Electricity Price Curve (24h)</div>
|
| 518 |
<div class="chart-wrap">
|
| 519 |
<canvas id="chart-price"></canvas>
|
| 520 |
</div>
|
|
|
|
| 587 |
</div>
|
| 588 |
|
| 589 |
<div class="card col-6">
|
| 590 |
+
<div class="card-title"><span class="icon">🌍</span> Carbon Intensity Curve (24h)</div>
|
| 591 |
<div class="chart-wrap">
|
| 592 |
<canvas id="chart-carbon"></canvas>
|
| 593 |
</div>
|
env/environment.go
CHANGED
|
@@ -11,7 +11,7 @@ import (
|
|
| 11 |
)
|
| 12 |
|
| 13 |
const (
|
| 14 |
-
EpisodeSteps =
|
| 15 |
StepDurationHrs = 0.25 // each step = 15 minutes = 0.25 h
|
| 16 |
MaxBuildings = 3
|
| 17 |
DefaultSetpoint = 21.0 // °C comfortable indoor temp
|
|
@@ -206,6 +206,7 @@ func (e *Environment) GetState() StateResponse {
|
|
| 206 |
OutdoorTemperature: b.OutdoorTemperature,
|
| 207 |
SetpointTemperature: b.SetpointTemperature,
|
| 208 |
BaselineCost: b.BaselineCost,
|
|
|
|
| 209 |
CumulativeCarbon: b.CumulativeCarbon,
|
| 210 |
Jobs: b.Jobs,
|
| 211 |
}
|
|
@@ -276,6 +277,7 @@ func (e *Environment) newBuildingState(id int) *BuildingState {
|
|
| 276 |
OutdoorTemperature: outdoorTemp,
|
| 277 |
PrevHVACLevel: 0.5,
|
| 278 |
BaselineCost: 0.0,
|
|
|
|
| 279 |
SetpointTemperature: DefaultSetpoint,
|
| 280 |
MaxHVACPower: MaxHVACPowerKW,
|
| 281 |
MaxStorageCapacity: MaxStorageKWh,
|
|
@@ -299,8 +301,12 @@ func (e *Environment) generateBatchJobs() []BatchJob {
|
|
| 299 |
|
| 300 |
jobs := make([]BatchJob, numJobs)
|
| 301 |
for i := range jobs {
|
| 302 |
-
// Deadline spread across episode
|
| 303 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
jobs[i] = BatchJob{
|
| 305 |
ID: i + 1,
|
| 306 |
DeadlineSlot: deadline,
|
|
@@ -438,9 +444,10 @@ func (e *Environment) stepBuilding(b *BuildingState, act ActionModel, idx int) S
|
|
| 438 |
// Baseline (always-on at 70% HVAC, no storage/shedding)
|
| 439 |
baselineKW := 0.7*b.MaxHVACPower + b.ProcessDemand
|
| 440 |
baselineEnergy := baselineKW * StepDurationHrs
|
| 441 |
-
|
|
|
|
| 442 |
|
| 443 |
-
|
| 444 |
rc := ComputeReward(ComputeRewardInput{
|
| 445 |
B: b,
|
| 446 |
Act: act,
|
|
@@ -479,6 +486,9 @@ func (e *Environment) stepBuilding(b *BuildingState, act ActionModel, idx int) S
|
|
| 479 |
}
|
| 480 |
}
|
| 481 |
|
|
|
|
|
|
|
|
|
|
| 482 |
// Record history
|
| 483 |
if idx < len(e.TempHistory) {
|
| 484 |
e.TempHistory[idx] = append(e.TempHistory[idx], b.IndoorTemperature)
|
|
|
|
| 11 |
)
|
| 12 |
|
| 13 |
const (
|
| 14 |
+
EpisodeSteps = 96 // 24 hours × 15-min intervals (96 × 0.25h = 24h)
|
| 15 |
StepDurationHrs = 0.25 // each step = 15 minutes = 0.25 h
|
| 16 |
MaxBuildings = 3
|
| 17 |
DefaultSetpoint = 21.0 // °C comfortable indoor temp
|
|
|
|
| 206 |
OutdoorTemperature: b.OutdoorTemperature,
|
| 207 |
SetpointTemperature: b.SetpointTemperature,
|
| 208 |
BaselineCost: b.BaselineCost,
|
| 209 |
+
BaselineCarbon: b.BaselineCarbon,
|
| 210 |
CumulativeCarbon: b.CumulativeCarbon,
|
| 211 |
Jobs: b.Jobs,
|
| 212 |
}
|
|
|
|
| 277 |
OutdoorTemperature: outdoorTemp,
|
| 278 |
PrevHVACLevel: 0.5,
|
| 279 |
BaselineCost: 0.0,
|
| 280 |
+
BaselineCarbon: 0.0,
|
| 281 |
SetpointTemperature: DefaultSetpoint,
|
| 282 |
MaxHVACPower: MaxHVACPowerKW,
|
| 283 |
MaxStorageCapacity: MaxStorageKWh,
|
|
|
|
| 301 |
|
| 302 |
jobs := make([]BatchJob, numJobs)
|
| 303 |
for i := range jobs {
|
| 304 |
+
// Deadline spread across episode (leave slack at end for duration)
|
| 305 |
+
span := EpisodeSteps - 12
|
| 306 |
+
if span < 8 {
|
| 307 |
+
span = 8
|
| 308 |
+
}
|
| 309 |
+
deadline := 4 + e.rng.Intn(span)
|
| 310 |
jobs[i] = BatchJob{
|
| 311 |
ID: i + 1,
|
| 312 |
DeadlineSlot: deadline,
|
|
|
|
| 444 |
// Baseline (always-on at 70% HVAC, no storage/shedding)
|
| 445 |
baselineKW := 0.7*b.MaxHVACPower + b.ProcessDemand
|
| 446 |
baselineEnergy := baselineKW * StepDurationHrs
|
| 447 |
+
b.BaselineCost += baselineEnergy * b.CurrentPrice
|
| 448 |
+
b.BaselineCarbon += baselineEnergy * b.CarbonIntensity
|
| 449 |
|
| 450 |
+
// ----- Reward computation -----
|
| 451 |
rc := ComputeReward(ComputeRewardInput{
|
| 452 |
B: b,
|
| 453 |
Act: act,
|
|
|
|
| 486 |
}
|
| 487 |
}
|
| 488 |
|
| 489 |
+
// Per-building step index matches global timestep for this transition (0 .. EpisodeSteps-1)
|
| 490 |
+
b.Step = s
|
| 491 |
+
|
| 492 |
// Record history
|
| 493 |
if idx < len(e.TempHistory) {
|
| 494 |
e.TempHistory[idx] = append(e.TempHistory[idx], b.IndoorTemperature)
|
env/models.go
CHANGED
|
@@ -27,7 +27,7 @@ type BuildingState struct {
|
|
| 27 |
|
| 28 |
// Temporal
|
| 29 |
HourOfDay int `json:"hour_of_day"` // 0–23
|
| 30 |
-
Step int `json:"step"` // 0–95 within episode
|
| 31 |
|
| 32 |
// Batch job queue: pending deadlines (raw slots)
|
| 33 |
BatchQueue []int `json:"batch_queue"` // deadline slots of pending jobs
|
|
@@ -41,6 +41,7 @@ type BuildingState struct {
|
|
| 41 |
OutdoorTemperature float64 `json:"-"` // °C for weather perturbation
|
| 42 |
PrevHVACLevel float64 `json:"-"` // for stability penalty
|
| 43 |
BaselineCost float64 `json:"-"` // always-on policy running cost
|
|
|
|
| 44 |
SetpointTemperature float64 `json:"-"` // target indoor temp (°C)
|
| 45 |
MaxHVACPower float64 `json:"-"` // kW
|
| 46 |
MaxStorageCapacity float64 `json:"-"` // kWh
|
|
@@ -139,6 +140,7 @@ type BuildingStatePublic struct {
|
|
| 139 |
OutdoorTemperature float64 `json:"outdoor_temperature"`
|
| 140 |
SetpointTemperature float64 `json:"setpoint_temperature"`
|
| 141 |
BaselineCost float64 `json:"baseline_cost"`
|
|
|
|
| 142 |
CumulativeCarbon float64 `json:"cumulative_carbon"`
|
| 143 |
Jobs []BatchJob `json:"jobs"`
|
| 144 |
// History for chart rendering
|
|
|
|
| 27 |
|
| 28 |
// Temporal
|
| 29 |
HourOfDay int `json:"hour_of_day"` // 0–23
|
| 30 |
+
Step int `json:"step"` // 0–95 within a 96-step (24h) episode
|
| 31 |
|
| 32 |
// Batch job queue: pending deadlines (raw slots)
|
| 33 |
BatchQueue []int `json:"batch_queue"` // deadline slots of pending jobs
|
|
|
|
| 41 |
OutdoorTemperature float64 `json:"-"` // °C for weather perturbation
|
| 42 |
PrevHVACLevel float64 `json:"-"` // for stability penalty
|
| 43 |
BaselineCost float64 `json:"-"` // always-on policy running cost
|
| 44 |
+
BaselineCarbon float64 `json:"-"` // baseline policy gCO2 (for grading)
|
| 45 |
SetpointTemperature float64 `json:"-"` // target indoor temp (°C)
|
| 46 |
MaxHVACPower float64 `json:"-"` // kW
|
| 47 |
MaxStorageCapacity float64 `json:"-"` // kWh
|
|
|
|
| 140 |
OutdoorTemperature float64 `json:"outdoor_temperature"`
|
| 141 |
SetpointTemperature float64 `json:"setpoint_temperature"`
|
| 142 |
BaselineCost float64 `json:"baseline_cost"`
|
| 143 |
+
BaselineCarbon float64 `json:"baseline_carbon"`
|
| 144 |
CumulativeCarbon float64 `json:"cumulative_carbon"`
|
| 145 |
Jobs []BatchJob `json:"jobs"`
|
| 146 |
// History for chart rendering
|
env/rewards.go
CHANGED
|
@@ -50,7 +50,8 @@ func ComputeReward(inp ComputeRewardInput) RewardComponents {
|
|
| 50 |
}
|
| 51 |
|
| 52 |
// ── 4. Deadline Penalty ──────────────────────────────────────────────────
|
| 53 |
-
|
|
|
|
| 54 |
rc.DeadlinePenalty = -float64(inp.BatchMissed) * 1.5
|
| 55 |
}
|
| 56 |
|
|
@@ -115,29 +116,26 @@ func computeGridResponse(stress, shedFraction float64) float64 {
|
|
| 115 |
return -shedFraction * (0.7 - stress) * 0.3
|
| 116 |
}
|
| 117 |
|
| 118 |
-
// computeArbitrageBonus rewards
|
| 119 |
-
//
|
| 120 |
func computeArbitrageBonus(chargeRate, currentPrice float64, curve []float64, step int) float64 {
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
futureSum := 0.0
|
| 124 |
count := 0
|
| 125 |
-
for i := step
|
| 126 |
-
|
| 127 |
count++
|
| 128 |
}
|
| 129 |
if count == 0 {
|
| 130 |
return 0.0
|
| 131 |
}
|
| 132 |
-
|
| 133 |
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
return chargeRate * (futureAvg - currentPrice) * 2.0
|
| 137 |
}
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
return math.Abs(chargeRate) * (currentPrice - futureAvg) * 2.0
|
| 141 |
}
|
| 142 |
return 0.0
|
| 143 |
}
|
|
|
|
| 50 |
}
|
| 51 |
|
| 52 |
// ── 4. Deadline Penalty ──────────────────────────────────────────────────
|
| 53 |
+
// Task 1 is cost-only; batch jobs are not part of the objective.
|
| 54 |
+
if inp.BatchMissed > 0 && inp.TaskID >= 2 {
|
| 55 |
rc.DeadlinePenalty = -float64(inp.BatchMissed) * 1.5
|
| 56 |
}
|
| 57 |
|
|
|
|
| 116 |
return -shedFraction * (0.7 - stress) * 0.3
|
| 117 |
}
|
| 118 |
|
| 119 |
+
// computeArbitrageBonus rewards storage use when current price is low vs recent history
|
| 120 |
+
// (causal: uses only past prices, no future curve leakage).
|
| 121 |
func computeArbitrageBonus(chargeRate, currentPrice float64, curve []float64, step int) float64 {
|
| 122 |
+
lookBack := 8
|
| 123 |
+
pastSum := 0.0
|
|
|
|
| 124 |
count := 0
|
| 125 |
+
for i := step - lookBack; i < step && i >= 0; i++ {
|
| 126 |
+
pastSum += curve[i]
|
| 127 |
count++
|
| 128 |
}
|
| 129 |
if count == 0 {
|
| 130 |
return 0.0
|
| 131 |
}
|
| 132 |
+
pastAvg := pastSum / float64(count)
|
| 133 |
|
| 134 |
+
if chargeRate > 0 && currentPrice < pastAvg {
|
| 135 |
+
return chargeRate * (pastAvg - currentPrice) * 2.0
|
|
|
|
| 136 |
}
|
| 137 |
+
if chargeRate < 0 && currentPrice > pastAvg {
|
| 138 |
+
return math.Abs(chargeRate) * (currentPrice - pastAvg) * 2.0
|
|
|
|
| 139 |
}
|
| 140 |
return 0.0
|
| 141 |
}
|
env/tasks.go
CHANGED
|
@@ -34,7 +34,7 @@ func AllTasks() []TaskConfig {
|
|
| 34 |
Name: "Full Demand-Response with Batch Scheduling",
|
| 35 |
Description: "Minimize cost, maintain temperature, respond to grid stress events, schedule all batch jobs before their deadlines, and minimize carbon emissions.",
|
| 36 |
Difficulty: "hard",
|
| 37 |
-
Weights: map[string]float64{"cost": 0.
|
| 38 |
},
|
| 39 |
}
|
| 40 |
}
|
|
@@ -187,17 +187,31 @@ func gradeTask3(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade {
|
|
| 187 |
batchScore = float64(completedOnTime) / float64(totalJobs)
|
| 188 |
}
|
| 189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
grade.SubScores["cost"] = costScore
|
| 191 |
grade.SubScores["temperature"] = tempScore
|
| 192 |
grade.SubScores["grid_response"] = gridScore
|
| 193 |
grade.SubScores["batch_deadline"] = batchScore
|
|
|
|
| 194 |
|
| 195 |
-
|
| 196 |
-
grade.Score = costScore*0.35 + tempScore*0.25 + gridScore*0.25 + batchScore*0.15
|
| 197 |
|
| 198 |
grade.Details["grid_stress_steps"] = gridStressSteps
|
| 199 |
grade.Details["grid_response_steps"] = gridResponseSteps
|
| 200 |
grade.Details["total_jobs"] = totalJobs
|
| 201 |
grade.Details["completed_on_time"] = completedOnTime
|
|
|
|
|
|
|
| 202 |
return grade
|
| 203 |
}
|
|
|
|
| 34 |
Name: "Full Demand-Response with Batch Scheduling",
|
| 35 |
Description: "Minimize cost, maintain temperature, respond to grid stress events, schedule all batch jobs before their deadlines, and minimize carbon emissions.",
|
| 36 |
Difficulty: "hard",
|
| 37 |
+
Weights: map[string]float64{"cost": 0.28, "temperature": 0.20, "grid_response": 0.20, "batch_deadline": 0.12, "carbon": 0.20},
|
| 38 |
},
|
| 39 |
}
|
| 40 |
}
|
|
|
|
| 187 |
batchScore = float64(completedOnTime) / float64(totalJobs)
|
| 188 |
}
|
| 189 |
|
| 190 |
+
// Carbon sub-score vs baseline always-on policy (same spirit as cost)
|
| 191 |
+
agentCarbon := 0.0
|
| 192 |
+
baselineCarbon := 0.0
|
| 193 |
+
for _, b := range inp.Buildings {
|
| 194 |
+
agentCarbon += b.CumulativeCarbon
|
| 195 |
+
baselineCarbon += b.BaselineCarbon
|
| 196 |
+
}
|
| 197 |
+
carbonScore := 0.0
|
| 198 |
+
if baselineCarbon > 0 {
|
| 199 |
+
carbonScore = math.Max(0, 1.0-agentCarbon/baselineCarbon)
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
grade.SubScores["cost"] = costScore
|
| 203 |
grade.SubScores["temperature"] = tempScore
|
| 204 |
grade.SubScores["grid_response"] = gridScore
|
| 205 |
grade.SubScores["batch_deadline"] = batchScore
|
| 206 |
+
grade.SubScores["carbon"] = math.Min(1.0, carbonScore)
|
| 207 |
|
| 208 |
+
grade.Score = costScore*0.28 + tempScore*0.20 + gridScore*0.20 + batchScore*0.12 + carbonScore*0.20
|
|
|
|
| 209 |
|
| 210 |
grade.Details["grid_stress_steps"] = gridStressSteps
|
| 211 |
grade.Details["grid_response_steps"] = gridResponseSteps
|
| 212 |
grade.Details["total_jobs"] = totalJobs
|
| 213 |
grade.Details["completed_on_time"] = completedOnTime
|
| 214 |
+
grade.Details["agent_carbon"] = agentCarbon
|
| 215 |
+
grade.Details["baseline_carbon"] = baselineCarbon
|
| 216 |
return grade
|
| 217 |
}
|
inference.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Hackathon entrypoint: run from repo root with:
|
| 3 |
+
python inference.py
|
| 4 |
+
Delegates to python/inference.py (single source of truth).
|
| 5 |
+
"""
|
| 6 |
+
import runpy
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
|
| 9 |
+
if __name__ == "__main__":
|
| 10 |
+
impl = Path(__file__).resolve().parent / "python" / "inference.py"
|
| 11 |
+
runpy.run_path(str(impl), run_name="__main__")
|
main.go
CHANGED
|
@@ -135,6 +135,7 @@ func newServer() *Server {
|
|
| 135 |
func (s *Server) routes() *http.ServeMux {
|
| 136 |
mux := http.NewServeMux()
|
| 137 |
mux.HandleFunc("/health", s.handleHealth)
|
|
|
|
| 138 |
mux.HandleFunc("/reset", s.handleReset)
|
| 139 |
mux.HandleFunc("/step", s.handleStep)
|
| 140 |
mux.HandleFunc("/state", s.handleState)
|
|
@@ -153,6 +154,12 @@ func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) {
|
|
| 153 |
json.NewEncoder(w).Encode(map[string]string{"status": "ok", "version": "1.0.0"})
|
| 154 |
}
|
| 155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
// ── /reset ───────────────────────────────────────────────────────────────────
|
| 157 |
|
| 158 |
func (s *Server) handleReset(w http.ResponseWriter, r *http.Request) {
|
|
@@ -280,9 +287,14 @@ func (s *Server) handleGrade(w http.ResponseWriter, r *http.Request) {
|
|
| 280 |
// Build building states from public state
|
| 281 |
buildings := make([]*env.BuildingState, len(state.Buildings))
|
| 282 |
for i, pub := range state.Buildings {
|
|
|
|
|
|
|
| 283 |
buildings[i] = &env.BuildingState{
|
| 284 |
-
CumulativeCost:
|
| 285 |
-
BaselineCost:
|
|
|
|
|
|
|
|
|
|
| 286 |
}
|
| 287 |
}
|
| 288 |
|
|
@@ -342,7 +354,7 @@ func main() {
|
|
| 342 |
srv.envMgr.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1})
|
| 343 |
|
| 344 |
log.Printf("GridMind-RL environment server starting on :%s", port)
|
| 345 |
-
log.Printf("Endpoints: GET /health /state /replay /grade /tasks /metrics | POST /reset /step")
|
| 346 |
|
| 347 |
mux := withCORS(withLogging(srv.routes()))
|
| 348 |
if err := http.ListenAndServe(":"+port, mux); err != nil {
|
|
|
|
| 135 |
func (s *Server) routes() *http.ServeMux {
|
| 136 |
mux := http.NewServeMux()
|
| 137 |
mux.HandleFunc("/health", s.handleHealth)
|
| 138 |
+
mux.HandleFunc("/ping", s.handlePing)
|
| 139 |
mux.HandleFunc("/reset", s.handleReset)
|
| 140 |
mux.HandleFunc("/step", s.handleStep)
|
| 141 |
mux.HandleFunc("/state", s.handleState)
|
|
|
|
| 154 |
json.NewEncoder(w).Encode(map[string]string{"status": "ok", "version": "1.0.0"})
|
| 155 |
}
|
| 156 |
|
| 157 |
+
func (s *Server) handlePing(w http.ResponseWriter, r *http.Request) {
|
| 158 |
+
w.Header().Set("Content-Type", "application/json")
|
| 159 |
+
w.WriteHeader(http.StatusOK)
|
| 160 |
+
json.NewEncoder(w).Encode(map[string]string{"status": "ok"})
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
// ── /reset ───────────────────────────────────────────────────────────────────
|
| 164 |
|
| 165 |
func (s *Server) handleReset(w http.ResponseWriter, r *http.Request) {
|
|
|
|
| 287 |
// Build building states from public state
|
| 288 |
buildings := make([]*env.BuildingState, len(state.Buildings))
|
| 289 |
for i, pub := range state.Buildings {
|
| 290 |
+
jobsCopy := make([]env.BatchJob, len(pub.Jobs))
|
| 291 |
+
copy(jobsCopy, pub.Jobs)
|
| 292 |
buildings[i] = &env.BuildingState{
|
| 293 |
+
CumulativeCost: pub.CumulativeCost,
|
| 294 |
+
BaselineCost: pub.BaselineCost,
|
| 295 |
+
CumulativeCarbon: pub.CumulativeCarbon,
|
| 296 |
+
BaselineCarbon: pub.BaselineCarbon,
|
| 297 |
+
Jobs: jobsCopy,
|
| 298 |
}
|
| 299 |
}
|
| 300 |
|
|
|
|
| 354 |
srv.envMgr.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1})
|
| 355 |
|
| 356 |
log.Printf("GridMind-RL environment server starting on :%s", port)
|
| 357 |
+
log.Printf("Endpoints: GET /health /ping /state /replay /grade /tasks /metrics | POST /reset /step")
|
| 358 |
|
| 359 |
mux := withCORS(withLogging(srv.routes()))
|
| 360 |
if err := http.ListenAndServe(":"+port, mux); err != nil {
|
openenv.yaml
CHANGED
|
@@ -62,7 +62,7 @@ schemas:
|
|
| 62 |
type: integer
|
| 63 |
minimum: 0
|
| 64 |
maximum: 95
|
| 65 |
-
description: Current episode step (15-min
|
| 66 |
building_id:
|
| 67 |
type: integer
|
| 68 |
description: Building identifier for multi-building federation
|
|
@@ -103,7 +103,7 @@ schemas:
|
|
| 103 |
|
| 104 |
reward:
|
| 105 |
type: number
|
| 106 |
-
description: Dense multi-component reward
|
| 107 |
|
| 108 |
tasks:
|
| 109 |
- id: 1
|
|
@@ -123,6 +123,9 @@ endpoints:
|
|
| 123 |
health:
|
| 124 |
path: /health
|
| 125 |
method: GET
|
|
|
|
|
|
|
|
|
|
| 126 |
reset:
|
| 127 |
path: /reset
|
| 128 |
method: POST
|
|
|
|
| 62 |
type: integer
|
| 63 |
minimum: 0
|
| 64 |
maximum: 95
|
| 65 |
+
description: Current episode step index (0–95); 96 steps = 24 hours at 15-min resolution
|
| 66 |
building_id:
|
| 67 |
type: integer
|
| 68 |
description: Building identifier for multi-building federation
|
|
|
|
| 103 |
|
| 104 |
reward:
|
| 105 |
type: number
|
| 106 |
+
description: Dense multi-component reward (cost, optional temperature/grid/carbon/deadlines) task-gated to match objectives.
|
| 107 |
|
| 108 |
tasks:
|
| 109 |
- id: 1
|
|
|
|
| 123 |
health:
|
| 124 |
path: /health
|
| 125 |
method: GET
|
| 126 |
+
ping:
|
| 127 |
+
path: /ping
|
| 128 |
+
method: GET
|
| 129 |
reset:
|
| 130 |
path: /reset
|
| 131 |
method: POST
|
python/inference.py
CHANGED
|
@@ -8,14 +8,15 @@ Usage:
|
|
| 8 |
export API_BASE_URL=https://router.huggingface.co/v1
|
| 9 |
export MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
|
| 10 |
export HF_TOKEN=hf_xxxx
|
| 11 |
-
python
|
|
|
|
| 12 |
"""
|
| 13 |
|
|
|
|
|
|
|
| 14 |
import argparse
|
| 15 |
import json
|
| 16 |
import os
|
| 17 |
-
import random
|
| 18 |
-
import re
|
| 19 |
import sys
|
| 20 |
import time
|
| 21 |
from typing import Any
|
|
@@ -29,9 +30,12 @@ ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
|
|
| 29 |
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
|
| 30 |
MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
|
| 31 |
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
| 32 |
-
DEFAULT_EPISODES =
|
| 33 |
-
DEFAULT_SEED_BASE = 1000
|
| 34 |
MAX_RETRIES = 3
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
SYSPROMPT = """You are GridMind, an expert industrial energy management controller.
|
| 37 |
You control a building's HVAC, thermal storage, batch job scheduling, and load shedding.
|
|
@@ -39,9 +43,9 @@ Your goal is to minimize electricity costs while maintaining comfort and meeting
|
|
| 39 |
Always respond with a single valid JSON object matching the action schema. No explanation needed."""
|
| 40 |
|
| 41 |
TASK_DESCRIPTIONS = {
|
| 42 |
-
1: "Task 1 (Easy - Cost Minimization): Minimize total energy cost over 24 hours. No temperature constraints. Use cheap off-peak periods and thermal storage
|
| 43 |
2: "Task 2 (Medium - Temperature Management): Minimize cost AND keep indoor temperature within 19-23°C at all times. Balance comfort vs cost.",
|
| 44 |
-
3: "Task 3 (Hard - Full Demand Response): Minimize cost, maintain temperature, respond to grid stress
|
| 45 |
}
|
| 46 |
|
| 47 |
ACTION_SCHEMA_STR = """{
|
|
@@ -53,8 +57,29 @@ ACTION_SCHEMA_STR = """{
|
|
| 53 |
}"""
|
| 54 |
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
# ── Environment client ───────────────────────────────────────────────────────
|
| 57 |
|
|
|
|
| 58 |
class GridMindEnvClient:
|
| 59 |
"""Simple HTTP client for the GridMind-RL Go environment server."""
|
| 60 |
|
|
@@ -93,6 +118,7 @@ class GridMindEnvClient:
|
|
| 93 |
|
| 94 |
# ── LLM agent ───────────────────────────────────────────────────────────────
|
| 95 |
|
|
|
|
| 96 |
class LLMAgent:
|
| 97 |
"""OpenAI-compatible LLM agent that chooses actions given observations."""
|
| 98 |
|
|
@@ -119,7 +145,7 @@ Current observation:
|
|
| 119 |
- Hour of day: {obs.get('hour_of_day', 12)} (0=midnight, peak prices 8-12 and 17-21)
|
| 120 |
- Pending batch job deadlines: {obs.get('batch_queue', [])}
|
| 121 |
- Cumulative cost so far: ${obs.get('cumulative_cost', 0):.4f}
|
| 122 |
-
- Episode step: {obs.get('step', 0)}/
|
| 123 |
|
| 124 |
Strategy hints:
|
| 125 |
- Charge thermal storage when price < $0.08/kWh, discharge when price > $0.15/kWh
|
|
@@ -139,37 +165,20 @@ Respond with ONLY a JSON action:
|
|
| 139 |
{"role": "user", "content": prompt},
|
| 140 |
],
|
| 141 |
max_tokens=128,
|
| 142 |
-
temperature=0.
|
| 143 |
)
|
| 144 |
content = completion.choices[0].message.content.strip()
|
| 145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
except Exception as e:
|
| 147 |
print(f" [LLM attempt {attempt+1}/{MAX_RETRIES}] error: {e}")
|
| 148 |
time.sleep(1)
|
| 149 |
|
| 150 |
-
# Fallback: rule-based heuristic
|
| 151 |
return self._heuristic_action(obs)
|
| 152 |
|
| 153 |
-
def _parse_action(self, content: str) -> dict:
|
| 154 |
-
"""Extract and validate JSON action from LLM response."""
|
| 155 |
-
# Try direct JSON parse
|
| 156 |
-
try:
|
| 157 |
-
action = json.loads(content)
|
| 158 |
-
return self._clamp_action(action)
|
| 159 |
-
except json.JSONDecodeError:
|
| 160 |
-
pass
|
| 161 |
-
# Try to extract JSON block from text
|
| 162 |
-
match = re.search(r"\{[^}]+\}", content, re.DOTALL)
|
| 163 |
-
if match:
|
| 164 |
-
try:
|
| 165 |
-
action = json.loads(match.group())
|
| 166 |
-
return self._clamp_action(action)
|
| 167 |
-
except json.JSONDecodeError:
|
| 168 |
-
pass
|
| 169 |
-
# Fallback
|
| 170 |
-
print(f" [WARN] could not parse LLM response: {content[:100]}")
|
| 171 |
-
return self._default_action()
|
| 172 |
-
|
| 173 |
def _clamp_action(self, action: dict) -> dict:
|
| 174 |
return {
|
| 175 |
"hvac_power_level": max(0.0, min(1.0, float(action.get("hvac_power_level", 0.5)))),
|
|
@@ -180,38 +189,33 @@ Respond with ONLY a JSON action:
|
|
| 180 |
}
|
| 181 |
|
| 182 |
def _heuristic_action(self, obs: dict) -> dict:
|
| 183 |
-
"""
|
| 184 |
price = obs.get("current_price", 0.10)
|
| 185 |
stress = obs.get("grid_stress_signal", 0.0)
|
| 186 |
temp = obs.get("indoor_temperature", 21.0)
|
| 187 |
storage = obs.get("thermal_storage_level", 0.5)
|
| 188 |
queue = obs.get("batch_queue", [])
|
| 189 |
|
| 190 |
-
# HVAC: reduce during peak
|
| 191 |
hvac = 0.7 if price < 0.08 else (0.3 if price > 0.15 else 0.5)
|
| 192 |
-
# Adjust for temperature
|
| 193 |
if temp > 23.0:
|
| 194 |
hvac = max(hvac, 0.8)
|
| 195 |
elif temp < 19.0:
|
| 196 |
hvac = min(hvac, 0.2)
|
| 197 |
|
| 198 |
-
# Storage arbitrage
|
| 199 |
charge = 0.0
|
| 200 |
if price < 0.07 and storage < 0.8:
|
| 201 |
charge = 0.5
|
| 202 |
elif price > 0.15 and storage > 0.3:
|
| 203 |
charge = -0.5
|
| 204 |
|
| 205 |
-
# Load shedding
|
| 206 |
shed = 0.0
|
| 207 |
if stress > 0.7:
|
| 208 |
shed = 0.4
|
| 209 |
elif stress > 0.5:
|
| 210 |
shed = 0.2
|
| 211 |
|
| 212 |
-
# Batch jobs: schedule soon if deadline approaching
|
| 213 |
slot = 2
|
| 214 |
-
if queue and min(queue) <
|
| 215 |
slot = 0
|
| 216 |
|
| 217 |
return {
|
|
@@ -223,48 +227,84 @@ Respond with ONLY a JSON action:
|
|
| 223 |
}
|
| 224 |
|
| 225 |
def _default_action(self) -> dict:
|
| 226 |
-
return {
|
| 227 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
|
| 229 |
|
| 230 |
# ── Episode runner ───────────────────────────────────────────────────────────
|
| 231 |
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
reset_resp = env_client.reset(task_id=task_id, seed=seed)
|
| 236 |
obs = reset_resp["observations"][0]
|
| 237 |
|
|
|
|
|
|
|
| 238 |
total_reward = 0.0
|
| 239 |
total_steps = 0
|
| 240 |
start_time = time.time()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
|
| 242 |
-
step_resp = {}
|
| 243 |
-
_step = 0
|
| 244 |
while not step_resp.get("done", False):
|
| 245 |
-
|
| 246 |
-
|
| 247 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
if step_resp is None or "observation" not in step_resp:
|
| 249 |
-
print(f" [WARN] step {
|
| 250 |
-
_step += 1
|
| 251 |
break
|
| 252 |
|
|
|
|
|
|
|
|
|
|
| 253 |
obs = step_resp["observation"]
|
| 254 |
-
total_reward += step_resp["reward"]
|
| 255 |
total_steps += 1
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
|
|
|
|
|
|
|
|
|
| 264 |
|
| 265 |
elapsed = time.time() - start_time
|
| 266 |
grade = env_client.grade()
|
| 267 |
|
|
|
|
|
|
|
| 268 |
return {
|
| 269 |
"task_id": task_id,
|
| 270 |
"seed": seed,
|
|
@@ -279,12 +319,32 @@ def run_episode(env_client: GridMindEnvClient, agent: LLMAgent,
|
|
| 279 |
|
| 280 |
# ── Main ─────────────────────────────────────────────────────────────────────
|
| 281 |
|
| 282 |
-
|
|
|
|
| 283 |
parser = argparse.ArgumentParser(description="GridMind-RL baseline inference")
|
| 284 |
parser.add_argument("--episodes", type=int, default=DEFAULT_EPISODES)
|
| 285 |
parser.add_argument("--env-url", type=str, default=ENV_URL)
|
| 286 |
parser.add_argument("--verbose", action="store_true")
|
| 287 |
parser.add_argument("--output", type=str, default="baseline_scores.json")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
args = parser.parse_args()
|
| 289 |
|
| 290 |
print("=" * 60)
|
|
@@ -293,48 +353,59 @@ def main():
|
|
| 293 |
print(f" API: {API_BASE_URL}")
|
| 294 |
print(f" Env: {args.env_url}")
|
| 295 |
print(f" Episodes per task: {args.episodes}")
|
|
|
|
| 296 |
print("=" * 60)
|
| 297 |
|
| 298 |
env_client = GridMindEnvClient(base_url=args.env_url)
|
| 299 |
|
| 300 |
-
# Wait for env server to be healthy
|
| 301 |
print("\nWaiting for environment server...")
|
| 302 |
for attempt in range(30):
|
| 303 |
if env_client.health():
|
| 304 |
-
print("
|
| 305 |
break
|
| 306 |
time.sleep(2)
|
| 307 |
if attempt == 29:
|
| 308 |
-
print("
|
| 309 |
sys.exit(1)
|
| 310 |
|
| 311 |
agent = LLMAgent()
|
| 312 |
-
all_results = []
|
| 313 |
|
| 314 |
for task_id in [1, 2, 3]:
|
| 315 |
-
print(f"\n
|
| 316 |
-
task_scores = []
|
| 317 |
for ep in range(args.episodes):
|
| 318 |
seed = DEFAULT_SEED_BASE + task_id * 100 + ep
|
| 319 |
print(f" Episode {ep+1}/{args.episodes} (seed={seed})")
|
| 320 |
-
result = run_episode(
|
| 321 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
all_results.append(result)
|
| 323 |
-
print(
|
|
|
|
|
|
|
|
|
|
| 324 |
|
| 325 |
avg_score = sum(task_scores) / len(task_scores)
|
| 326 |
print(f" Task {task_id} average score: {avg_score:.4f}")
|
| 327 |
|
| 328 |
-
# Score summary table
|
| 329 |
print("\n" + "=" * 60)
|
| 330 |
print("BASELINE SCORES SUMMARY")
|
| 331 |
print("=" * 60)
|
| 332 |
print(f"{'Task':<10} {'Model':<30} {'Score':<10} {'Episodes':<10}")
|
| 333 |
print("-" * 60)
|
| 334 |
|
| 335 |
-
task_avgs = {}
|
| 336 |
for task_id in [1, 2, 3]:
|
| 337 |
-
scores = [r["score"] for r in all_results if r["task_id"] == task_id]
|
| 338 |
avg = sum(scores) / len(scores) if scores else 0.0
|
| 339 |
task_avgs[task_id] = avg
|
| 340 |
print(f"Task {task_id:<6} {MODEL_NAME:<30} {avg:<10.4f} {len(scores)}")
|
|
@@ -343,19 +414,21 @@ def main():
|
|
| 343 |
overall = sum(task_avgs.values()) / len(task_avgs)
|
| 344 |
print(f"{'Overall':<10} {'':<30} {overall:<10.4f}")
|
| 345 |
|
| 346 |
-
# Save results
|
| 347 |
output = {
|
| 348 |
"model": MODEL_NAME,
|
| 349 |
"api_base": API_BASE_URL,
|
| 350 |
"episodes_per_task": args.episodes,
|
| 351 |
"seed_base": DEFAULT_SEED_BASE,
|
|
|
|
|
|
|
|
|
|
| 352 |
"task_averages": {str(k): v for k, v in task_avgs.items()},
|
| 353 |
"overall_average": overall,
|
| 354 |
"all_results": all_results,
|
| 355 |
}
|
| 356 |
-
with open(args.output, "w") as f:
|
| 357 |
json.dump(output, f, indent=2)
|
| 358 |
-
print(f"\n
|
| 359 |
|
| 360 |
|
| 361 |
if __name__ == "__main__":
|
|
|
|
| 8 |
export API_BASE_URL=https://router.huggingface.co/v1
|
| 9 |
export MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
|
| 10 |
export HF_TOKEN=hf_xxxx
|
| 11 |
+
python inference.py
|
| 12 |
+
# or: python python/inference.py [--episodes 1] [--llm-every 4] [--fast-mode]
|
| 13 |
"""
|
| 14 |
|
| 15 |
+
from __future__ import annotations
|
| 16 |
+
|
| 17 |
import argparse
|
| 18 |
import json
|
| 19 |
import os
|
|
|
|
|
|
|
| 20 |
import sys
|
| 21 |
import time
|
| 22 |
from typing import Any
|
|
|
|
| 30 |
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
|
| 31 |
MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
|
| 32 |
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
| 33 |
+
DEFAULT_EPISODES = 1
|
| 34 |
+
DEFAULT_SEED_BASE = 1000
|
| 35 |
MAX_RETRIES = 3
|
| 36 |
+
# 96 steps × 15 min = 24 h (must match env.EpisodeSteps)
|
| 37 |
+
EPISODE_STEPS = 96
|
| 38 |
+
LAST_STEP_INDEX = EPISODE_STEPS - 1
|
| 39 |
|
| 40 |
SYSPROMPT = """You are GridMind, an expert industrial energy management controller.
|
| 41 |
You control a building's HVAC, thermal storage, batch job scheduling, and load shedding.
|
|
|
|
| 43 |
Always respond with a single valid JSON object matching the action schema. No explanation needed."""
|
| 44 |
|
| 45 |
TASK_DESCRIPTIONS = {
|
| 46 |
+
1: "Task 1 (Easy - Cost Minimization): Minimize total energy cost over 24 hours. No temperature or batch constraints. Use cheap off-peak periods and thermal storage.",
|
| 47 |
2: "Task 2 (Medium - Temperature Management): Minimize cost AND keep indoor temperature within 19-23°C at all times. Balance comfort vs cost.",
|
| 48 |
+
3: "Task 3 (Hard - Full Demand Response): Minimize cost, maintain temperature, respond to grid stress (shed when grid_stress_signal > 0.7), schedule batch jobs, minimize carbon.",
|
| 49 |
}
|
| 50 |
|
| 51 |
ACTION_SCHEMA_STR = """{
|
|
|
|
| 57 |
}"""
|
| 58 |
|
| 59 |
|
| 60 |
+
def extract_json_object(text: str) -> dict[str, Any] | None:
|
| 61 |
+
"""Parse first balanced {...} JSON object from text (handles nested braces)."""
|
| 62 |
+
start = text.find("{")
|
| 63 |
+
if start < 0:
|
| 64 |
+
return None
|
| 65 |
+
depth = 0
|
| 66 |
+
for i in range(start, len(text)):
|
| 67 |
+
c = text[i]
|
| 68 |
+
if c == "{":
|
| 69 |
+
depth += 1
|
| 70 |
+
elif c == "}":
|
| 71 |
+
depth -= 1
|
| 72 |
+
if depth == 0:
|
| 73 |
+
try:
|
| 74 |
+
return json.loads(text[start : i + 1])
|
| 75 |
+
except json.JSONDecodeError:
|
| 76 |
+
return None
|
| 77 |
+
return None
|
| 78 |
+
|
| 79 |
+
|
| 80 |
# ── Environment client ───────────────────────────────────────────────────────
|
| 81 |
|
| 82 |
+
|
| 83 |
class GridMindEnvClient:
|
| 84 |
"""Simple HTTP client for the GridMind-RL Go environment server."""
|
| 85 |
|
|
|
|
| 118 |
|
| 119 |
# ── LLM agent ───────────────────────────────────────────────────────────────
|
| 120 |
|
| 121 |
+
|
| 122 |
class LLMAgent:
|
| 123 |
"""OpenAI-compatible LLM agent that chooses actions given observations."""
|
| 124 |
|
|
|
|
| 145 |
- Hour of day: {obs.get('hour_of_day', 12)} (0=midnight, peak prices 8-12 and 17-21)
|
| 146 |
- Pending batch job deadlines: {obs.get('batch_queue', [])}
|
| 147 |
- Cumulative cost so far: ${obs.get('cumulative_cost', 0):.4f}
|
| 148 |
+
- Episode step: {obs.get('step', 0)}/{LAST_STEP_INDEX}
|
| 149 |
|
| 150 |
Strategy hints:
|
| 151 |
- Charge thermal storage when price < $0.08/kWh, discharge when price > $0.15/kWh
|
|
|
|
| 165 |
{"role": "user", "content": prompt},
|
| 166 |
],
|
| 167 |
max_tokens=128,
|
| 168 |
+
temperature=0.0,
|
| 169 |
)
|
| 170 |
content = completion.choices[0].message.content.strip()
|
| 171 |
+
parsed = extract_json_object(content)
|
| 172 |
+
if parsed is not None:
|
| 173 |
+
return self._clamp_action(parsed)
|
| 174 |
+
action = json.loads(content)
|
| 175 |
+
return self._clamp_action(action)
|
| 176 |
except Exception as e:
|
| 177 |
print(f" [LLM attempt {attempt+1}/{MAX_RETRIES}] error: {e}")
|
| 178 |
time.sleep(1)
|
| 179 |
|
|
|
|
| 180 |
return self._heuristic_action(obs)
|
| 181 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
def _clamp_action(self, action: dict) -> dict:
|
| 183 |
return {
|
| 184 |
"hvac_power_level": max(0.0, min(1.0, float(action.get("hvac_power_level", 0.5)))),
|
|
|
|
| 189 |
}
|
| 190 |
|
| 191 |
def _heuristic_action(self, obs: dict) -> dict:
|
| 192 |
+
"""Rule-based policy (deterministic given obs)."""
|
| 193 |
price = obs.get("current_price", 0.10)
|
| 194 |
stress = obs.get("grid_stress_signal", 0.0)
|
| 195 |
temp = obs.get("indoor_temperature", 21.0)
|
| 196 |
storage = obs.get("thermal_storage_level", 0.5)
|
| 197 |
queue = obs.get("batch_queue", [])
|
| 198 |
|
|
|
|
| 199 |
hvac = 0.7 if price < 0.08 else (0.3 if price > 0.15 else 0.5)
|
|
|
|
| 200 |
if temp > 23.0:
|
| 201 |
hvac = max(hvac, 0.8)
|
| 202 |
elif temp < 19.0:
|
| 203 |
hvac = min(hvac, 0.2)
|
| 204 |
|
|
|
|
| 205 |
charge = 0.0
|
| 206 |
if price < 0.07 and storage < 0.8:
|
| 207 |
charge = 0.5
|
| 208 |
elif price > 0.15 and storage > 0.3:
|
| 209 |
charge = -0.5
|
| 210 |
|
|
|
|
| 211 |
shed = 0.0
|
| 212 |
if stress > 0.7:
|
| 213 |
shed = 0.4
|
| 214 |
elif stress > 0.5:
|
| 215 |
shed = 0.2
|
| 216 |
|
|
|
|
| 217 |
slot = 2
|
| 218 |
+
if queue and min(queue) < 8:
|
| 219 |
slot = 0
|
| 220 |
|
| 221 |
return {
|
|
|
|
| 227 |
}
|
| 228 |
|
| 229 |
def _default_action(self) -> dict:
|
| 230 |
+
return {
|
| 231 |
+
"hvac_power_level": 0.5,
|
| 232 |
+
"thermal_charge_rate": 0.0,
|
| 233 |
+
"batch_job_slot": 0,
|
| 234 |
+
"load_shed_fraction": 0.0,
|
| 235 |
+
"building_id": 0,
|
| 236 |
+
}
|
| 237 |
|
| 238 |
|
| 239 |
# ── Episode runner ───────────────────────────────────────────────────────────
|
| 240 |
|
| 241 |
+
|
| 242 |
+
def run_episode(
|
| 243 |
+
env_client: GridMindEnvClient,
|
| 244 |
+
agent: LLMAgent,
|
| 245 |
+
task_id: int,
|
| 246 |
+
seed: int,
|
| 247 |
+
*,
|
| 248 |
+
fast_mode: bool,
|
| 249 |
+
llm_every: int,
|
| 250 |
+
max_steps: int | None,
|
| 251 |
+
verbose: bool = False,
|
| 252 |
+
) -> dict[str, Any]:
|
| 253 |
+
"""Run a single episode and return grade + metadata. Prints [START], [STEPn], [END]."""
|
| 254 |
reset_resp = env_client.reset(task_id=task_id, seed=seed)
|
| 255 |
obs = reset_resp["observations"][0]
|
| 256 |
|
| 257 |
+
print("[START]", flush=True)
|
| 258 |
+
|
| 259 |
total_reward = 0.0
|
| 260 |
total_steps = 0
|
| 261 |
start_time = time.time()
|
| 262 |
+
step_resp: dict[str, Any] = {}
|
| 263 |
+
step_limit = EPISODE_STEPS if max_steps is None else min(max_steps, EPISODE_STEPS)
|
| 264 |
+
|
| 265 |
+
llm_reuse_remaining = 0
|
| 266 |
+
cached_action = agent._default_action()
|
| 267 |
|
|
|
|
|
|
|
| 268 |
while not step_resp.get("done", False):
|
| 269 |
+
if total_steps >= step_limit:
|
| 270 |
+
break
|
| 271 |
|
| 272 |
+
if fast_mode:
|
| 273 |
+
action = agent._heuristic_action(obs)
|
| 274 |
+
else:
|
| 275 |
+
if llm_reuse_remaining <= 0:
|
| 276 |
+
cached_action = agent.choose_action(obs, task_id)
|
| 277 |
+
llm_reuse_remaining = max(1, llm_every)
|
| 278 |
+
action = cached_action
|
| 279 |
+
|
| 280 |
+
step_resp = env_client.step(action)
|
| 281 |
if step_resp is None or "observation" not in step_resp:
|
| 282 |
+
print(f" [WARN] step {total_steps}: invalid step response", flush=True)
|
|
|
|
| 283 |
break
|
| 284 |
|
| 285 |
+
if not fast_mode:
|
| 286 |
+
llm_reuse_remaining -= 1
|
| 287 |
+
|
| 288 |
obs = step_resp["observation"]
|
| 289 |
+
total_reward += float(step_resp["reward"])
|
| 290 |
total_steps += 1
|
| 291 |
+
print(f"[STEP{total_steps}]", flush=True)
|
| 292 |
+
|
| 293 |
+
if verbose and total_steps % 16 == 0:
|
| 294 |
+
print(
|
| 295 |
+
f" step={total_steps:02d} price=${obs['current_price']:.3f} "
|
| 296 |
+
f"temp={obs['indoor_temperature']:.1f}°C "
|
| 297 |
+
f"stress={obs['grid_stress_signal']:.2f} "
|
| 298 |
+
f"cost=${obs['cumulative_cost']:.2f} "
|
| 299 |
+
f"reward={step_resp['reward']:.3f}",
|
| 300 |
+
flush=True,
|
| 301 |
+
)
|
| 302 |
|
| 303 |
elapsed = time.time() - start_time
|
| 304 |
grade = env_client.grade()
|
| 305 |
|
| 306 |
+
print("[END]", flush=True)
|
| 307 |
+
|
| 308 |
return {
|
| 309 |
"task_id": task_id,
|
| 310 |
"seed": seed,
|
|
|
|
| 319 |
|
| 320 |
# ── Main ─────────────────────────────────────────────────────────────────────
|
| 321 |
|
| 322 |
+
|
| 323 |
+
def main() -> None:
|
| 324 |
parser = argparse.ArgumentParser(description="GridMind-RL baseline inference")
|
| 325 |
parser.add_argument("--episodes", type=int, default=DEFAULT_EPISODES)
|
| 326 |
parser.add_argument("--env-url", type=str, default=ENV_URL)
|
| 327 |
parser.add_argument("--verbose", action="store_true")
|
| 328 |
parser.add_argument("--output", type=str, default="baseline_scores.json")
|
| 329 |
+
parser.add_argument(
|
| 330 |
+
"--fast-mode",
|
| 331 |
+
action="store_true",
|
| 332 |
+
help="Heuristic policy only (no LLM calls; fastest, fully reproducible).",
|
| 333 |
+
)
|
| 334 |
+
parser.add_argument(
|
| 335 |
+
"--llm-every",
|
| 336 |
+
type=int,
|
| 337 |
+
default=4,
|
| 338 |
+
metavar="N",
|
| 339 |
+
help="Reuse the same LLM action for N consecutive steps (default: 4).",
|
| 340 |
+
)
|
| 341 |
+
parser.add_argument(
|
| 342 |
+
"--max-steps",
|
| 343 |
+
type=int,
|
| 344 |
+
default=None,
|
| 345 |
+
metavar="N",
|
| 346 |
+
help="Stop after N steps (default: full episode). Grade uses partial episode.",
|
| 347 |
+
)
|
| 348 |
args = parser.parse_args()
|
| 349 |
|
| 350 |
print("=" * 60)
|
|
|
|
| 353 |
print(f" API: {API_BASE_URL}")
|
| 354 |
print(f" Env: {args.env_url}")
|
| 355 |
print(f" Episodes per task: {args.episodes}")
|
| 356 |
+
print(f" Fast mode: {args.fast_mode} | LLM every: {args.llm_every} steps")
|
| 357 |
print("=" * 60)
|
| 358 |
|
| 359 |
env_client = GridMindEnvClient(base_url=args.env_url)
|
| 360 |
|
|
|
|
| 361 |
print("\nWaiting for environment server...")
|
| 362 |
for attempt in range(30):
|
| 363 |
if env_client.health():
|
| 364 |
+
print(" [OK] Environment server is healthy")
|
| 365 |
break
|
| 366 |
time.sleep(2)
|
| 367 |
if attempt == 29:
|
| 368 |
+
print(" [FAIL] Environment server not reachable. Exiting.")
|
| 369 |
sys.exit(1)
|
| 370 |
|
| 371 |
agent = LLMAgent()
|
| 372 |
+
all_results: list[dict[str, Any]] = []
|
| 373 |
|
| 374 |
for task_id in [1, 2, 3]:
|
| 375 |
+
print(f"\n-- Task {task_id}: {TASK_DESCRIPTIONS[task_id][:60]}...")
|
| 376 |
+
task_scores: list[float] = []
|
| 377 |
for ep in range(args.episodes):
|
| 378 |
seed = DEFAULT_SEED_BASE + task_id * 100 + ep
|
| 379 |
print(f" Episode {ep+1}/{args.episodes} (seed={seed})")
|
| 380 |
+
result = run_episode(
|
| 381 |
+
env_client,
|
| 382 |
+
agent,
|
| 383 |
+
task_id=task_id,
|
| 384 |
+
seed=seed,
|
| 385 |
+
fast_mode=args.fast_mode,
|
| 386 |
+
llm_every=args.llm_every,
|
| 387 |
+
max_steps=args.max_steps,
|
| 388 |
+
verbose=args.verbose,
|
| 389 |
+
)
|
| 390 |
+
task_scores.append(float(result["score"]))
|
| 391 |
all_results.append(result)
|
| 392 |
+
print(
|
| 393 |
+
f" → score={result['score']:.4f} | reward={result['total_reward']:.3f} | "
|
| 394 |
+
f"{result['elapsed_sec']:.1f}s | steps={result['total_steps']}"
|
| 395 |
+
)
|
| 396 |
|
| 397 |
avg_score = sum(task_scores) / len(task_scores)
|
| 398 |
print(f" Task {task_id} average score: {avg_score:.4f}")
|
| 399 |
|
|
|
|
| 400 |
print("\n" + "=" * 60)
|
| 401 |
print("BASELINE SCORES SUMMARY")
|
| 402 |
print("=" * 60)
|
| 403 |
print(f"{'Task':<10} {'Model':<30} {'Score':<10} {'Episodes':<10}")
|
| 404 |
print("-" * 60)
|
| 405 |
|
| 406 |
+
task_avgs: dict[int, float] = {}
|
| 407 |
for task_id in [1, 2, 3]:
|
| 408 |
+
scores = [float(r["score"]) for r in all_results if r["task_id"] == task_id]
|
| 409 |
avg = sum(scores) / len(scores) if scores else 0.0
|
| 410 |
task_avgs[task_id] = avg
|
| 411 |
print(f"Task {task_id:<6} {MODEL_NAME:<30} {avg:<10.4f} {len(scores)}")
|
|
|
|
| 414 |
overall = sum(task_avgs.values()) / len(task_avgs)
|
| 415 |
print(f"{'Overall':<10} {'':<30} {overall:<10.4f}")
|
| 416 |
|
|
|
|
| 417 |
output = {
|
| 418 |
"model": MODEL_NAME,
|
| 419 |
"api_base": API_BASE_URL,
|
| 420 |
"episodes_per_task": args.episodes,
|
| 421 |
"seed_base": DEFAULT_SEED_BASE,
|
| 422 |
+
"fast_mode": args.fast_mode,
|
| 423 |
+
"llm_every": args.llm_every,
|
| 424 |
+
"max_steps": args.max_steps,
|
| 425 |
"task_averages": {str(k): v for k, v in task_avgs.items()},
|
| 426 |
"overall_average": overall,
|
| 427 |
"all_results": all_results,
|
| 428 |
}
|
| 429 |
+
with open(args.output, "w", encoding="utf-8") as f:
|
| 430 |
json.dump(output, f, indent=2)
|
| 431 |
+
print(f"\n[OK] Results saved to {args.output}")
|
| 432 |
|
| 433 |
|
| 434 |
if __name__ == "__main__":
|
python/models.py
CHANGED
|
@@ -29,7 +29,7 @@ class ObservationModel(BaseModel):
|
|
| 29 |
hour_of_day: int = Field(..., ge=0, le=23, description="Current hour of day (0–23)")
|
| 30 |
batch_queue: List[int] = Field(default_factory=list, description="Deadline slots of pending batch jobs")
|
| 31 |
cumulative_cost: float = Field(..., ge=0.0, description="Running energy cost this episode ($)")
|
| 32 |
-
step: int = Field(..., ge=0, description="Current timestep (0–
|
| 33 |
building_id: int = Field(default=0, description="Building index in federation")
|
| 34 |
|
| 35 |
|
|
|
|
| 29 |
hour_of_day: int = Field(..., ge=0, le=23, description="Current hour of day (0–23)")
|
| 30 |
batch_queue: List[int] = Field(default_factory=list, description="Deadline slots of pending batch jobs")
|
| 31 |
cumulative_cost: float = Field(..., ge=0.0, description="Running energy cost this episode ($)")
|
| 32 |
+
step: int = Field(..., ge=0, le=95, description="Current timestep (0–95); 96 steps = 24h")
|
| 33 |
building_id: int = Field(default=0, description="Building index in federation")
|
| 34 |
|
| 35 |
|
python/validate.py
CHANGED
|
@@ -16,16 +16,16 @@ import requests
|
|
| 16 |
|
| 17 |
ENV_URL = "http://localhost:7860"
|
| 18 |
|
| 19 |
-
PASS = "
|
| 20 |
-
FAIL = "
|
| 21 |
-
WARN = "
|
| 22 |
|
| 23 |
|
| 24 |
def check(label: str, condition: bool, detail: str = "") -> bool:
|
| 25 |
icon = PASS if condition else FAIL
|
| 26 |
line = f" {icon} {label}"
|
| 27 |
if detail:
|
| 28 |
-
line += f"
|
| 29 |
print(line)
|
| 30 |
return condition
|
| 31 |
|
|
@@ -42,20 +42,22 @@ def validate(env_url: str) -> bool:
|
|
| 42 |
base = env_url.rstrip("/")
|
| 43 |
results = []
|
| 44 |
|
| 45 |
-
print("\n
|
| 46 |
print(" GridMind-RL OpenEnv Validation Report")
|
| 47 |
-
print("
|
| 48 |
|
| 49 |
-
# ── 1. Health
|
| 50 |
-
print("1. Health
|
| 51 |
try:
|
| 52 |
r = get(f"{base}/health")
|
| 53 |
results.append(check("GET /health returns 200", r.status_code == 200, f"got {r.status_code}"))
|
| 54 |
data = r.json()
|
| 55 |
results.append(check("Response has 'status' field", "status" in data))
|
|
|
|
|
|
|
| 56 |
except Exception as e:
|
| 57 |
results.append(check("GET /health reachable", False, str(e)))
|
| 58 |
-
print(f"\n
|
| 59 |
return False
|
| 60 |
|
| 61 |
# ── 2. Reset endpoint ───────────────────────────────────────────────────
|
|
@@ -122,7 +124,8 @@ def validate(env_url: str) -> bool:
|
|
| 122 |
|
| 123 |
rc = info.get("reward_components", {})
|
| 124 |
rc_fields = ["cost_savings", "temp_constraint", "grid_response",
|
| 125 |
-
"deadline_penalty", "efficiency_bonus", "stability_penalty",
|
|
|
|
| 126 |
for f in rc_fields:
|
| 127 |
results.append(check(f"reward_components has '{f}'", f in rc))
|
| 128 |
|
|
@@ -139,14 +142,15 @@ def validate(env_url: str) -> bool:
|
|
| 139 |
r = get(f"{base}/state")
|
| 140 |
results.append(check("GET /state returns 200", r.status_code == 200))
|
| 141 |
state = r.json()
|
| 142 |
-
state_fields = ["buildings", "
|
| 143 |
"episode", "step", "task_id", "done", "seed"]
|
| 144 |
for f in state_fields:
|
| 145 |
results.append(check(f"state has '{f}'", f in state))
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
|
|
|
| 150 |
except Exception as e:
|
| 151 |
results.append(check("GET /state succeeds", False, str(e)))
|
| 152 |
|
|
@@ -240,13 +244,13 @@ def validate(env_url: str) -> bool:
|
|
| 240 |
total = len(results)
|
| 241 |
pct = 100 * passed // total if total > 0 else 0
|
| 242 |
|
| 243 |
-
print(f"\n
|
| 244 |
print(f" Result: {passed}/{total} checks passed ({pct}%)")
|
| 245 |
if passed == total:
|
| 246 |
-
print("
|
| 247 |
else:
|
| 248 |
-
print(f"
|
| 249 |
-
print("
|
| 250 |
|
| 251 |
return passed == total
|
| 252 |
|
|
|
|
| 16 |
|
| 17 |
ENV_URL = "http://localhost:7860"
|
| 18 |
|
| 19 |
+
PASS = "[OK]"
|
| 20 |
+
FAIL = "[FAIL]"
|
| 21 |
+
WARN = "[WARN]"
|
| 22 |
|
| 23 |
|
| 24 |
def check(label: str, condition: bool, detail: str = "") -> bool:
|
| 25 |
icon = PASS if condition else FAIL
|
| 26 |
line = f" {icon} {label}"
|
| 27 |
if detail:
|
| 28 |
+
line += f" - {detail}"
|
| 29 |
print(line)
|
| 30 |
return condition
|
| 31 |
|
|
|
|
| 42 |
base = env_url.rstrip("/")
|
| 43 |
results = []
|
| 44 |
|
| 45 |
+
print("\n" + "=" * 50)
|
| 46 |
print(" GridMind-RL OpenEnv Validation Report")
|
| 47 |
+
print("=" * 50 + "\n")
|
| 48 |
|
| 49 |
+
# ── 1. Health & ping ─────────────────────────────────────────────────────
|
| 50 |
+
print("1. Health & Ping")
|
| 51 |
try:
|
| 52 |
r = get(f"{base}/health")
|
| 53 |
results.append(check("GET /health returns 200", r.status_code == 200, f"got {r.status_code}"))
|
| 54 |
data = r.json()
|
| 55 |
results.append(check("Response has 'status' field", "status" in data))
|
| 56 |
+
rp = get(f"{base}/ping")
|
| 57 |
+
results.append(check("GET /ping returns 200", rp.status_code == 200, f"got {rp.status_code}"))
|
| 58 |
except Exception as e:
|
| 59 |
results.append(check("GET /health reachable", False, str(e)))
|
| 60 |
+
print(f"\n [FAIL] Cannot reach server at {base}. Is it running?\n")
|
| 61 |
return False
|
| 62 |
|
| 63 |
# ── 2. Reset endpoint ───────────────────────────────────────────────────
|
|
|
|
| 124 |
|
| 125 |
rc = info.get("reward_components", {})
|
| 126 |
rc_fields = ["cost_savings", "temp_constraint", "grid_response",
|
| 127 |
+
"deadline_penalty", "efficiency_bonus", "stability_penalty",
|
| 128 |
+
"carbon_reward", "total"]
|
| 129 |
for f in rc_fields:
|
| 130 |
results.append(check(f"reward_components has '{f}'", f in rc))
|
| 131 |
|
|
|
|
| 142 |
r = get(f"{base}/state")
|
| 143 |
results.append(check("GET /state returns 200", r.status_code == 200))
|
| 144 |
state = r.json()
|
| 145 |
+
state_fields = ["buildings", "price_curve_episode", "carbon_curve_episode",
|
| 146 |
"episode", "step", "task_id", "done", "seed"]
|
| 147 |
for f in state_fields:
|
| 148 |
results.append(check(f"state has '{f}'", f in state))
|
| 149 |
+
curve_n = 24 # EpisodeSteps/4 (96/4) downsamples to hourly points
|
| 150 |
+
results.append(check("price_curve_episode has 24 entries",
|
| 151 |
+
len(state.get("price_curve_episode", [])) == curve_n))
|
| 152 |
+
results.append(check("carbon_curve_episode has 24 entries",
|
| 153 |
+
len(state.get("carbon_curve_episode", [])) == curve_n))
|
| 154 |
except Exception as e:
|
| 155 |
results.append(check("GET /state succeeds", False, str(e)))
|
| 156 |
|
|
|
|
| 244 |
total = len(results)
|
| 245 |
pct = 100 * passed // total if total > 0 else 0
|
| 246 |
|
| 247 |
+
print(f"\n" + "=" * 50)
|
| 248 |
print(f" Result: {passed}/{total} checks passed ({pct}%)")
|
| 249 |
if passed == total:
|
| 250 |
+
print(" ALL CHECKS PASSED - Ready for submission!")
|
| 251 |
else:
|
| 252 |
+
print(f" {total - passed} checks failed. Fix errors above.")
|
| 253 |
+
print("=" * 50 + "\n")
|
| 254 |
|
| 255 |
return passed == total
|
| 256 |
|
tests/environment_test.go
CHANGED
|
@@ -55,21 +55,24 @@ func TestStepAdvancesState(t *testing.T) {
|
|
| 55 |
if state.Step != 1 {
|
| 56 |
t.Errorf("expected step=1 after one step, got %d", state.Step)
|
| 57 |
}
|
|
|
|
|
|
|
|
|
|
| 58 |
}
|
| 59 |
|
| 60 |
-
//
|
| 61 |
-
func
|
| 62 |
e := env.NewEnvironment()
|
| 63 |
var seed int64 = 99
|
| 64 |
e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1})
|
| 65 |
|
| 66 |
action := []env.ActionModel{{HVACPowerLevel: 0.5}}
|
| 67 |
var lastDone bool
|
| 68 |
-
for i := 0; i <
|
| 69 |
_, lastDone = e.Step(action)
|
| 70 |
}
|
| 71 |
if !lastDone {
|
| 72 |
-
t.Errorf("episode should be done after
|
| 73 |
}
|
| 74 |
}
|
| 75 |
|
|
@@ -162,7 +165,7 @@ func TestGraderTask1ScoreRange(t *testing.T) {
|
|
| 162 |
e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1})
|
| 163 |
|
| 164 |
action := []env.ActionModel{{HVACPowerLevel: 0.3}}
|
| 165 |
-
for i := 0; i <
|
| 166 |
e.Step(action)
|
| 167 |
}
|
| 168 |
|
|
@@ -171,9 +174,14 @@ func TestGraderTask1ScoreRange(t *testing.T) {
|
|
| 171 |
|
| 172 |
buildings := make([]*env.BuildingState, len(state.Buildings))
|
| 173 |
for i, pub := range state.Buildings {
|
|
|
|
|
|
|
| 174 |
buildings[i] = &env.BuildingState{
|
| 175 |
-
CumulativeCost:
|
| 176 |
-
BaselineCost:
|
|
|
|
|
|
|
|
|
|
| 177 |
}
|
| 178 |
}
|
| 179 |
|
|
|
|
| 55 |
if state.Step != 1 {
|
| 56 |
t.Errorf("expected step=1 after one step, got %d", state.Step)
|
| 57 |
}
|
| 58 |
+
if resps[0].Observation.Step != 0 {
|
| 59 |
+
t.Errorf("expected observation.step=0 after first transition, got %d", resps[0].Observation.Step)
|
| 60 |
+
}
|
| 61 |
}
|
| 62 |
|
| 63 |
+
// TestEpisodeLengthIs96 verifies the episode terminates after 96 steps (24h).
|
| 64 |
+
func TestEpisodeLengthIs96(t *testing.T) {
|
| 65 |
e := env.NewEnvironment()
|
| 66 |
var seed int64 = 99
|
| 67 |
e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1})
|
| 68 |
|
| 69 |
action := []env.ActionModel{{HVACPowerLevel: 0.5}}
|
| 70 |
var lastDone bool
|
| 71 |
+
for i := 0; i < env.EpisodeSteps; i++ {
|
| 72 |
_, lastDone = e.Step(action)
|
| 73 |
}
|
| 74 |
if !lastDone {
|
| 75 |
+
t.Errorf("episode should be done after %d steps", env.EpisodeSteps)
|
| 76 |
}
|
| 77 |
}
|
| 78 |
|
|
|
|
| 165 |
e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1})
|
| 166 |
|
| 167 |
action := []env.ActionModel{{HVACPowerLevel: 0.3}}
|
| 168 |
+
for i := 0; i < env.EpisodeSteps; i++ {
|
| 169 |
e.Step(action)
|
| 170 |
}
|
| 171 |
|
|
|
|
| 174 |
|
| 175 |
buildings := make([]*env.BuildingState, len(state.Buildings))
|
| 176 |
for i, pub := range state.Buildings {
|
| 177 |
+
jobsCopy := make([]env.BatchJob, len(pub.Jobs))
|
| 178 |
+
copy(jobsCopy, pub.Jobs)
|
| 179 |
buildings[i] = &env.BuildingState{
|
| 180 |
+
CumulativeCost: pub.CumulativeCost,
|
| 181 |
+
BaselineCost: pub.BaselineCost,
|
| 182 |
+
CumulativeCarbon: pub.CumulativeCarbon,
|
| 183 |
+
BaselineCarbon: pub.BaselineCarbon,
|
| 184 |
+
Jobs: jobsCopy,
|
| 185 |
}
|
| 186 |
}
|
| 187 |
|
tests/test_graders.py
CHANGED
|
@@ -135,7 +135,7 @@ class TestTask3:
|
|
| 135 |
|
| 136 |
def test_has_all_sub_scores(self):
|
| 137 |
g = run_full_episode(task_id=3, seed=31)
|
| 138 |
-
for key in ["cost", "temperature", "grid_response", "batch_deadline"]:
|
| 139 |
assert key in g["sub_scores"], f"Missing sub-score: {key}"
|
| 140 |
|
| 141 |
def test_all_sub_scores_in_range(self):
|
|
@@ -146,7 +146,13 @@ class TestTask3:
|
|
| 146 |
def test_weights_sum_correct(self):
|
| 147 |
g = run_full_episode(task_id=3, seed=33)
|
| 148 |
ss = g["sub_scores"]
|
| 149 |
-
expected =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
assert abs(g["score"] - expected) < 0.01 or g["exploit_detected"]
|
| 151 |
|
| 152 |
def test_grid_response_sub_score(self):
|
|
|
|
| 135 |
|
| 136 |
def test_has_all_sub_scores(self):
|
| 137 |
g = run_full_episode(task_id=3, seed=31)
|
| 138 |
+
for key in ["cost", "temperature", "grid_response", "batch_deadline", "carbon"]:
|
| 139 |
assert key in g["sub_scores"], f"Missing sub-score: {key}"
|
| 140 |
|
| 141 |
def test_all_sub_scores_in_range(self):
|
|
|
|
| 146 |
def test_weights_sum_correct(self):
|
| 147 |
g = run_full_episode(task_id=3, seed=33)
|
| 148 |
ss = g["sub_scores"]
|
| 149 |
+
expected = (
|
| 150 |
+
ss["cost"] * 0.28
|
| 151 |
+
+ ss["temperature"] * 0.20
|
| 152 |
+
+ ss["grid_response"] * 0.20
|
| 153 |
+
+ ss["batch_deadline"] * 0.12
|
| 154 |
+
+ ss["carbon"] * 0.20
|
| 155 |
+
)
|
| 156 |
assert abs(g["score"] - expected) < 0.01 or g["exploit_detected"]
|
| 157 |
|
| 158 |
def test_grid_response_sub_score(self):
|