Spaces:

Prajwal782007
/

Gridmind

Running

App Files Files Community

ShreeshantXD commited on Apr 2

Commit

6d74982

1 Parent(s): e3130b4

feat: add baseline scores JSON, inference script, and update Dockerfile for improved project structure

Browse files

Files changed (18) hide show

Dockerfile +1 -0
README.md +3 -3
baseline_scores.json +58 -0
dashboard/__pycache__/server.cpython-311.pyc +0 -0
dashboard/static/dashboard.js +12 -11
dashboard/static/index.html +2 -2
env/environment.go +15 -5
env/models.go +3 -1
env/rewards.go +13 -15
env/tasks.go +17 -3
inference.py +11 -0
main.go +15 -3
openenv.yaml +5 -2
python/inference.py +147 -74
python/models.py +1 -1
python/validate.py +23 -19
tests/environment_test.go +15 -7
tests/test_graders.py +8 -2

Dockerfile CHANGED Viewed

@@ -27,6 +27,7 @@ COPY --from=builder /app/gridmind-server /usr/local/bin/gridmind-server
 # Copy Python layer and Dashboard
 COPY python/ ./python/
 COPY dashboard/ ./dashboard/
 COPY data/ ./data/
 COPY openenv.yaml ./

 # Copy Python layer and Dashboard
 COPY python/ ./python/
+COPY inference.py ./inference.py
 COPY dashboard/ ./dashboard/
 COPY data/ ./data/
 COPY openenv.yaml ./

README.md CHANGED Viewed

@@ -10,7 +10,7 @@
 1. **Get a free AI API key** from [Hugging Face](https://huggingface.co/join) (takes 2 minutes)
 2. **Run the simulator**: `docker run -p 7860:7860 -p 7861:7861 ghcr.io/your-repo/gridmind-rl:latest`
-3. **Watch the AI learn**: `python python/inference.py --episodes 1`
 That's it! The AI will start making energy decisions and you'll see live results.
@@ -122,7 +122,7 @@ export HF_TOKEN=hf_your_token_here
 ```bash
 # Run 3 learning episodes (takes ~5 minutes)
-python python/inference.py --episodes 3
 ```
 You'll see output like:
@@ -202,7 +202,7 @@ $env:MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"  # Faster but less accura
 **Run longer training**:
 ```bash
-python python/inference.py --episodes 10  # Takes ~30 minutes
 ```
 **Test the environment manually**:

 1. **Get a free AI API key** from [Hugging Face](https://huggingface.co/join) (takes 2 minutes)
 2. **Run the simulator**: `docker run -p 7860:7860 -p 7861:7861 ghcr.io/your-repo/gridmind-rl:latest`
+3. **Watch the AI learn**: `python inference.py --episodes 1` (or `--fast-mode` for a quick heuristic run, no API calls)
 That's it! The AI will start making energy decisions and you'll see live results.
 ```bash
 # Run 3 learning episodes (takes ~5 minutes)
+python inference.py --episodes 3
 ```
 You'll see output like:
 **Run longer training**:
 ```bash
+python inference.py --episodes 10 --llm-every 4  # Scale LLM calls via --llm-every; use --fast-mode for tests
 ```
 **Test the environment manually**:

baseline_scores.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "model": "meta-llama/Llama-3.1-8B-Instruct",
+  "api_base": "https://router.huggingface.co/v1",
+  "episodes_per_task": 1,
+  "seed_base": 1000,
+  "fast_mode": true,
+  "llm_every": 4,
+  "max_steps": null,
+  "task_averages": {
+    "1": 0.2776,
+    "2": 0.2182,
+    "3": 0.3115
+  },
+  "overall_average": 0.2691,
+  "all_results": [
+    {
+      "task_id": 1,
+      "seed": 1100,
+      "total_reward": -54.91106240679752,
+      "total_steps": 96,
+      "elapsed_sec": 0.8684265613555908,
+      "score": 0.2776,
+      "sub_scores": {
+        "cost": 0.277555958007489
+      },
+      "exploit_detected": false
+    },
+    {
+      "task_id": 2,
+      "seed": 1200,
+      "total_reward": -573.2793620498348,
+      "total_steps": 96,
+      "elapsed_sec": 0.9907081127166748,
+      "score": 0.2182,
+      "sub_scores": {
+        "cost": 0.2595566056450961,
+        "temperature": 0.15625
+      },
+      "exploit_detected": false
+    },
+    {
+      "task_id": 3,
+      "seed": 1300,
+      "total_reward": -670.8653705366278,
+      "total_steps": 96,
+      "elapsed_sec": 0.8988945484161377,
+      "score": 0.3115,
+      "sub_scores": {
+        "batch_deadline": 1,
+        "carbon": 0.24377839161166936,
+        "cost": 0.25263438913936676,
+        "grid_response": 0.21428571428571427,
+        "temperature": 0.14583333333333334
+      },
+      "exploit_detected": false
+    }
+  ]
+}

dashboard/__pycache__/server.cpython-311.pyc ADDED Viewed

Binary file (5.56 kB). View file

dashboard/static/dashboard.js CHANGED Viewed

@@ -7,7 +7,9 @@
 // ── Config ──────────────────────────────────────────────────────────────────
 const POLL_MS        = 500;
-const HISTORY_LEN    = 288;   // 288 steps = full episode
 const API_BASE       = '/api';
 const TASK_NAMES = {
   1: 'Task 1 — Cost Minimization (Easy)',
@@ -95,8 +97,8 @@ function makeBarChart(id, labels, datasets) {
 }
 // ── Initialise all charts ─────────────────────────────────────────────────────
-const emptyLabels = Array.from({ length: 72 }, (_, i) => `${i}h`);
-const emptyData   = Array(72).fill(null);
 // 1. Price curve
 const priceChart = makeLineChart('chart-price',
@@ -306,7 +308,7 @@ function renderGantt(jobs, currentStep) {
     wrap.innerHTML = '<div style="color:var(--text-dim);font-size:0.8rem">No batch jobs in this episode.</div>';
     return;
   }
-  const totalSlots = 288;
   wrap.innerHTML = '';
   jobs.forEach(job => {
     const row = document.createElement('div');
@@ -416,7 +418,7 @@ async function fetchAndUpdate() {
     const hourOfDay = b.hour_of_day || 0;
     // ── Header ──
-    document.getElementById('ep-step').textContent = `ep:${state.episode} step:${step}/287`;
     document.getElementById('task-badge').textContent = TASK_NAMES[state.task_id] || 'Task 1';
     // ── KPIs ──
@@ -444,20 +446,19 @@ async function fetchAndUpdate() {
     document.getElementById('kpi-storage').textContent = `${(b.thermal_storage_level * 100).toFixed(1)}`;
     // ── Price curve chart ──
-    if (state.price_curve_episode && state.price_curve_episode.length === 72) {
-      const labels = Array.from({ length: 72 }, (_, i) => `${i}:00`);
       priceChart.data.labels = labels;
       priceChart.data.datasets[0].data = state.price_curve_episode;
-      // Current position marker
-      const marker = Array(72).fill(null);
       marker[Math.floor(step / 4)] = state.price_curve_episode[Math.floor(step / 4)];
       priceChart.data.datasets[1].data = marker;
       priceChart.update('none');
     }
     // ── Carbon curve ──
-    if (state.carbon_curve_episode && state.carbon_curve_episode.length === 72) {
-      carbonChart.data.labels = Array.from({ length: 72 }, (_, i) => `${i}:00`);
       carbonChart.data.datasets[0].data = state.carbon_curve_episode;
       carbonChart.update('none');
     }

 // ── Config ──────────────────────────────────────────────────────────────────
 const POLL_MS        = 500;
+const EPISODE_STEPS  = 96;    // 24h × 4 steps/h (15-min)
+const HISTORY_LEN    = EPISODE_STEPS;
+const CURVE_POINTS   = 24;    // hourly downsample (EpisodeSteps/4)
 const API_BASE       = '/api';
 const TASK_NAMES = {
   1: 'Task 1 — Cost Minimization (Easy)',
 }
 // ── Initialise all charts ─────────────────────────────────────────────────────
+const emptyLabels = Array.from({ length: CURVE_POINTS }, (_, i) => `${i}h`);
+const emptyData   = Array(CURVE_POINTS).fill(null);
 // 1. Price curve
 const priceChart = makeLineChart('chart-price',
     wrap.innerHTML = '<div style="color:var(--text-dim);font-size:0.8rem">No batch jobs in this episode.</div>';
     return;
   }
+  const totalSlots = EPISODE_STEPS;
   wrap.innerHTML = '';
   jobs.forEach(job => {
     const row = document.createElement('div');
     const hourOfDay = b.hour_of_day || 0;
     // ── Header ──
+    document.getElementById('ep-step').textContent = `ep:${state.episode} step:${step}/${EPISODE_STEPS - 1}`;
     document.getElementById('task-badge').textContent = TASK_NAMES[state.task_id] || 'Task 1';
     // ── KPIs ──
     document.getElementById('kpi-storage').textContent = `${(b.thermal_storage_level * 100).toFixed(1)}`;
     // ── Price curve chart ──
+    if (state.price_curve_episode && state.price_curve_episode.length === CURVE_POINTS) {
+      const labels = Array.from({ length: CURVE_POINTS }, (_, i) => `${i}:00`);
       priceChart.data.labels = labels;
       priceChart.data.datasets[0].data = state.price_curve_episode;
+      const marker = Array(CURVE_POINTS).fill(null);
       marker[Math.floor(step / 4)] = state.price_curve_episode[Math.floor(step / 4)];
       priceChart.data.datasets[1].data = marker;
       priceChart.update('none');
     }
     // ── Carbon curve ──
+    if (state.carbon_curve_episode && state.carbon_curve_episode.length === CURVE_POINTS) {
+      carbonChart.data.labels = Array.from({ length: CURVE_POINTS }, (_, i) => `${i}:00`);
       carbonChart.data.datasets[0].data = state.carbon_curve_episode;
       carbonChart.update('none');
     }

dashboard/static/index.html CHANGED Viewed

@@ -514,7 +514,7 @@
   <!-- Row 1: Price curve + Temperature + Controls -->
   <div class="card col-8">
-    <div class="card-title"><span class="icon">💰</span> Electricity Price Curve (72h)</div>
     <div class="chart-wrap">
       <canvas id="chart-price"></canvas>
     </div>
@@ -587,7 +587,7 @@
   </div>
   <div class="card col-6">
-    <div class="card-title"><span class="icon">🌍</span> Carbon Intensity Curve (72h)</div>
     <div class="chart-wrap">
       <canvas id="chart-carbon"></canvas>
     </div>

   <!-- Row 1: Price curve + Temperature + Controls -->
   <div class="card col-8">
+    <div class="card-title"><span class="icon">💰</span> Electricity Price Curve (24h)</div>
     <div class="chart-wrap">
       <canvas id="chart-price"></canvas>
     </div>
   </div>
   <div class="card col-6">
+    <div class="card-title"><span class="icon">🌍</span> Carbon Intensity Curve (24h)</div>
     <div class="chart-wrap">
       <canvas id="chart-carbon"></canvas>
     </div>

env/environment.go CHANGED Viewed

@@ -11,7 +11,7 @@ import (
 )
 const (
-	EpisodeSteps     = 288    // 72 hours × 15-min intervals
 	StepDurationHrs  = 0.25  // each step = 15 minutes = 0.25 h
 	MaxBuildings     = 3
 	DefaultSetpoint  = 21.0  // °C comfortable indoor temp
@@ -206,6 +206,7 @@ func (e *Environment) GetState() StateResponse {
 			OutdoorTemperature:  b.OutdoorTemperature,
 			SetpointTemperature: b.SetpointTemperature,
 			BaselineCost:        b.BaselineCost,
 			CumulativeCarbon:    b.CumulativeCarbon,
 			Jobs:                b.Jobs,
 		}
@@ -276,6 +277,7 @@ func (e *Environment) newBuildingState(id int) *BuildingState {
 		OutdoorTemperature:  outdoorTemp,
 		PrevHVACLevel:       0.5,
 		BaselineCost:        0.0,
 		SetpointTemperature: DefaultSetpoint,
 		MaxHVACPower:        MaxHVACPowerKW,
 		MaxStorageCapacity:  MaxStorageKWh,
@@ -299,8 +301,12 @@ func (e *Environment) generateBatchJobs() []BatchJob {
 	jobs := make([]BatchJob, numJobs)
 	for i := range jobs {
-		// Deadline spread across episode, ensuring feasibility
-		deadline := 20 + e.rng.Intn(60)
 		jobs[i] = BatchJob{
 			ID:           i + 1,
 			DeadlineSlot: deadline,
@@ -438,9 +444,10 @@ func (e *Environment) stepBuilding(b *BuildingState, act ActionModel, idx int) S
 	// Baseline (always-on at 70% HVAC, no storage/shedding)
 	baselineKW := 0.7*b.MaxHVACPower + b.ProcessDemand
 	baselineEnergy := baselineKW * StepDurationHrs
-	b.BaselineCost += baselineEnergy * b.CurrentPrice
-	// ----- Reward computation -----
 	rc := ComputeReward(ComputeRewardInput{
 		B:               b,
 		Act:             act,
@@ -479,6 +486,9 @@ func (e *Environment) stepBuilding(b *BuildingState, act ActionModel, idx int) S
 		}
 	}
 	// Record history
 	if idx < len(e.TempHistory) {
 		e.TempHistory[idx] = append(e.TempHistory[idx], b.IndoorTemperature)

 )
 const (
+	EpisodeSteps     = 96     // 24 hours × 15-min intervals (96 × 0.25h = 24h)
 	StepDurationHrs  = 0.25  // each step = 15 minutes = 0.25 h
 	MaxBuildings     = 3
 	DefaultSetpoint  = 21.0  // °C comfortable indoor temp
 			OutdoorTemperature:  b.OutdoorTemperature,
 			SetpointTemperature: b.SetpointTemperature,
 			BaselineCost:        b.BaselineCost,
+			BaselineCarbon:      b.BaselineCarbon,
 			CumulativeCarbon:    b.CumulativeCarbon,
 			Jobs:                b.Jobs,
 		}
 		OutdoorTemperature:  outdoorTemp,
 		PrevHVACLevel:       0.5,
 		BaselineCost:        0.0,
+		BaselineCarbon:      0.0,
 		SetpointTemperature: DefaultSetpoint,
 		MaxHVACPower:        MaxHVACPowerKW,
 		MaxStorageCapacity:  MaxStorageKWh,
 	jobs := make([]BatchJob, numJobs)
 	for i := range jobs {
+		// Deadline spread across episode (leave slack at end for duration)
+		span := EpisodeSteps - 12
+		if span < 8 {
+			span = 8
+		}
+		deadline := 4 + e.rng.Intn(span)
 		jobs[i] = BatchJob{
 			ID:           i + 1,
 			DeadlineSlot: deadline,
 	// Baseline (always-on at 70% HVAC, no storage/shedding)
 	baselineKW := 0.7*b.MaxHVACPower + b.ProcessDemand
 	baselineEnergy := baselineKW * StepDurationHrs
+		b.BaselineCost += baselineEnergy * b.CurrentPrice
+		b.BaselineCarbon += baselineEnergy * b.CarbonIntensity
+		// ----- Reward computation -----
 	rc := ComputeReward(ComputeRewardInput{
 		B:               b,
 		Act:             act,
 		}
 	}
+	// Per-building step index matches global timestep for this transition (0 .. EpisodeSteps-1)
+	b.Step = s
 	// Record history
 	if idx < len(e.TempHistory) {
 		e.TempHistory[idx] = append(e.TempHistory[idx], b.IndoorTemperature)

env/models.go CHANGED Viewed

@@ -27,7 +27,7 @@ type BuildingState struct {
 	// Temporal
 	HourOfDay            int     `json:"hour_of_day"`            // 0–23
-	Step                 int     `json:"step"`                   // 0–95 within episode
 	// Batch job queue: pending deadlines (raw slots)
 	BatchQueue           []int   `json:"batch_queue"`            // deadline slots of pending jobs
@@ -41,6 +41,7 @@ type BuildingState struct {
 	OutdoorTemperature   float64    `json:"-"` // °C for weather perturbation
 	PrevHVACLevel        float64    `json:"-"` // for stability penalty
 	BaselineCost         float64    `json:"-"` // always-on policy running cost
 	SetpointTemperature  float64    `json:"-"` // target indoor temp (°C)
 	MaxHVACPower         float64    `json:"-"` // kW
 	MaxStorageCapacity   float64    `json:"-"` // kWh
@@ -139,6 +140,7 @@ type BuildingStatePublic struct {
 	OutdoorTemperature  float64    `json:"outdoor_temperature"`
 	SetpointTemperature float64    `json:"setpoint_temperature"`
 	BaselineCost        float64    `json:"baseline_cost"`
 	CumulativeCarbon    float64    `json:"cumulative_carbon"`
 	Jobs                []BatchJob `json:"jobs"`
 	// History for chart rendering

 	// Temporal
 	HourOfDay            int     `json:"hour_of_day"`            // 0–23
+	Step                 int     `json:"step"`                   // 0–95 within a 96-step (24h) episode
 	// Batch job queue: pending deadlines (raw slots)
 	BatchQueue           []int   `json:"batch_queue"`            // deadline slots of pending jobs
 	OutdoorTemperature   float64    `json:"-"` // °C for weather perturbation
 	PrevHVACLevel        float64    `json:"-"` // for stability penalty
 	BaselineCost         float64    `json:"-"` // always-on policy running cost
+	BaselineCarbon       float64    `json:"-"` // baseline policy gCO2 (for grading)
 	SetpointTemperature  float64    `json:"-"` // target indoor temp (°C)
 	MaxHVACPower         float64    `json:"-"` // kW
 	MaxStorageCapacity   float64    `json:"-"` // kWh
 	OutdoorTemperature  float64    `json:"outdoor_temperature"`
 	SetpointTemperature float64    `json:"setpoint_temperature"`
 	BaselineCost        float64    `json:"baseline_cost"`
+	BaselineCarbon      float64    `json:"baseline_carbon"`
 	CumulativeCarbon    float64    `json:"cumulative_carbon"`
 	Jobs                []BatchJob `json:"jobs"`
 	// History for chart rendering

env/rewards.go CHANGED Viewed

@@ -50,7 +50,8 @@ func ComputeReward(inp ComputeRewardInput) RewardComponents {
 	}
 	// ── 4. Deadline Penalty ──────────────────────────────────────────────────
-	if inp.BatchMissed > 0 {
 		rc.DeadlinePenalty = -float64(inp.BatchMissed) * 1.5
 	}
@@ -115,29 +116,26 @@ func computeGridResponse(stress, shedFraction float64) float64 {
 	return -shedFraction * (0.7 - stress) * 0.3
 }
-// computeArbitrageBonus rewards charging storage during cheap periods and
-// discharging during expensive periods.
 func computeArbitrageBonus(chargeRate, currentPrice float64, curve []float64, step int) float64 {
-	// Compute rolling average of future prices (next 8 steps = 2 hours)
-	lookAhead := 8
-	futureSum := 0.0
 	count := 0
-	for i := step + 1; i <= step+lookAhead && i < len(curve); i++ {
-		futureSum += curve[i]
 		count++
 	}
 	if count == 0 {
 		return 0.0
 	}
-	futureAvg := futureSum / float64(count)
-	// If current price is lower than future avg → charging is smart → reward
-	if chargeRate > 0 && currentPrice < futureAvg {
-		return chargeRate * (futureAvg - currentPrice) * 2.0
 	}
-	// If current price is higher than future avg → discharging is smart → reward
-	if chargeRate < 0 && currentPrice > futureAvg {
-		return math.Abs(chargeRate) * (currentPrice - futureAvg) * 2.0
 	}
 	return 0.0
 }

 	}
 	// ── 4. Deadline Penalty ──────────────────────────────────────────────────
+	// Task 1 is cost-only; batch jobs are not part of the objective.
+	if inp.BatchMissed > 0 && inp.TaskID >= 2 {
 		rc.DeadlinePenalty = -float64(inp.BatchMissed) * 1.5
 	}
 	return -shedFraction * (0.7 - stress) * 0.3
 }
+// computeArbitrageBonus rewards storage use when current price is low vs recent history
+// (causal: uses only past prices, no future curve leakage).
 func computeArbitrageBonus(chargeRate, currentPrice float64, curve []float64, step int) float64 {
+	lookBack := 8
+	pastSum := 0.0
 	count := 0
+	for i := step - lookBack; i < step && i >= 0; i++ {
+		pastSum += curve[i]
 		count++
 	}
 	if count == 0 {
 		return 0.0
 	}
+	pastAvg := pastSum / float64(count)
+	if chargeRate > 0 && currentPrice < pastAvg {
+		return chargeRate * (pastAvg - currentPrice) * 2.0
 	}
+	if chargeRate < 0 && currentPrice > pastAvg {
+		return math.Abs(chargeRate) * (currentPrice - pastAvg) * 2.0
 	}
 	return 0.0
 }

env/tasks.go CHANGED Viewed

@@ -34,7 +34,7 @@ func AllTasks() []TaskConfig {
 			Name:        "Full Demand-Response with Batch Scheduling",
 			Description: "Minimize cost, maintain temperature, respond to grid stress events, schedule all batch jobs before their deadlines, and minimize carbon emissions.",
 			Difficulty:  "hard",
-			Weights:     map[string]float64{"cost": 0.35, "temperature": 0.25, "grid_response": 0.25, "batch_deadline": 0.15},
 		},
 	}
 }
@@ -187,17 +187,31 @@ func gradeTask3(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade {
 		batchScore = float64(completedOnTime) / float64(totalJobs)
 	}
 	grade.SubScores["cost"] = costScore
 	grade.SubScores["temperature"] = tempScore
 	grade.SubScores["grid_response"] = gridScore
 	grade.SubScores["batch_deadline"] = batchScore
-	// Weighted composite score
-	grade.Score = costScore*0.35 + tempScore*0.25 + gridScore*0.25 + batchScore*0.15
 	grade.Details["grid_stress_steps"] = gridStressSteps
 	grade.Details["grid_response_steps"] = gridResponseSteps
 	grade.Details["total_jobs"] = totalJobs
 	grade.Details["completed_on_time"] = completedOnTime
 	return grade
 }

 			Name:        "Full Demand-Response with Batch Scheduling",
 			Description: "Minimize cost, maintain temperature, respond to grid stress events, schedule all batch jobs before their deadlines, and minimize carbon emissions.",
 			Difficulty:  "hard",
+			Weights:     map[string]float64{"cost": 0.28, "temperature": 0.20, "grid_response": 0.20, "batch_deadline": 0.12, "carbon": 0.20},
 		},
 	}
 }
 		batchScore = float64(completedOnTime) / float64(totalJobs)
 	}
+	// Carbon sub-score vs baseline always-on policy (same spirit as cost)
+	agentCarbon := 0.0
+	baselineCarbon := 0.0
+	for _, b := range inp.Buildings {
+		agentCarbon += b.CumulativeCarbon
+		baselineCarbon += b.BaselineCarbon
+	}
+	carbonScore := 0.0
+	if baselineCarbon > 0 {
+		carbonScore = math.Max(0, 1.0-agentCarbon/baselineCarbon)
+	}
 	grade.SubScores["cost"] = costScore
 	grade.SubScores["temperature"] = tempScore
 	grade.SubScores["grid_response"] = gridScore
 	grade.SubScores["batch_deadline"] = batchScore
+	grade.SubScores["carbon"] = math.Min(1.0, carbonScore)
+	grade.Score = costScore*0.28 + tempScore*0.20 + gridScore*0.20 + batchScore*0.12 + carbonScore*0.20
 	grade.Details["grid_stress_steps"] = gridStressSteps
 	grade.Details["grid_response_steps"] = gridResponseSteps
 	grade.Details["total_jobs"] = totalJobs
 	grade.Details["completed_on_time"] = completedOnTime
+	grade.Details["agent_carbon"] = agentCarbon
+	grade.Details["baseline_carbon"] = baselineCarbon
 	return grade
 }

inference.py ADDED Viewed

	@@ -0,0 +1,11 @@

+"""
+Hackathon entrypoint: run from repo root with:
+  python inference.py
+Delegates to python/inference.py (single source of truth).
+"""
+import runpy
+from pathlib import Path
+if __name__ == "__main__":
+    impl = Path(__file__).resolve().parent / "python" / "inference.py"
+    runpy.run_path(str(impl), run_name="__main__")

main.go CHANGED Viewed

@@ -135,6 +135,7 @@ func newServer() *Server {
 func (s *Server) routes() *http.ServeMux {
 	mux := http.NewServeMux()
 	mux.HandleFunc("/health", s.handleHealth)
 	mux.HandleFunc("/reset", s.handleReset)
 	mux.HandleFunc("/step", s.handleStep)
 	mux.HandleFunc("/state", s.handleState)
@@ -153,6 +154,12 @@ func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) {
 	json.NewEncoder(w).Encode(map[string]string{"status": "ok", "version": "1.0.0"})
 }
 // ── /reset ───────────────────────────────────────────────────────────────────
 func (s *Server) handleReset(w http.ResponseWriter, r *http.Request) {
@@ -280,9 +287,14 @@ func (s *Server) handleGrade(w http.ResponseWriter, r *http.Request) {
 	// Build building states from public state
 	buildings := make([]*env.BuildingState, len(state.Buildings))
 	for i, pub := range state.Buildings {
 		buildings[i] = &env.BuildingState{
-			CumulativeCost: pub.CumulativeCost,
-			BaselineCost:   pub.BaselineCost,
 		}
 	}
@@ -342,7 +354,7 @@ func main() {
 	srv.envMgr.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1})
 	log.Printf("GridMind-RL environment server starting on :%s", port)
-	log.Printf("Endpoints: GET /health /state /replay /grade /tasks /metrics | POST /reset /step")
 	mux := withCORS(withLogging(srv.routes()))
 	if err := http.ListenAndServe(":"+port, mux); err != nil {

 func (s *Server) routes() *http.ServeMux {
 	mux := http.NewServeMux()
 	mux.HandleFunc("/health", s.handleHealth)
+	mux.HandleFunc("/ping", s.handlePing)
 	mux.HandleFunc("/reset", s.handleReset)
 	mux.HandleFunc("/step", s.handleStep)
 	mux.HandleFunc("/state", s.handleState)
 	json.NewEncoder(w).Encode(map[string]string{"status": "ok", "version": "1.0.0"})
 }
+func (s *Server) handlePing(w http.ResponseWriter, r *http.Request) {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(http.StatusOK)
+	json.NewEncoder(w).Encode(map[string]string{"status": "ok"})
+}
 // ── /reset ───────────────────────────────────────────────────────────────────
 func (s *Server) handleReset(w http.ResponseWriter, r *http.Request) {
 	// Build building states from public state
 	buildings := make([]*env.BuildingState, len(state.Buildings))
 	for i, pub := range state.Buildings {
+		jobsCopy := make([]env.BatchJob, len(pub.Jobs))
+		copy(jobsCopy, pub.Jobs)
 		buildings[i] = &env.BuildingState{
+			CumulativeCost:   pub.CumulativeCost,
+			BaselineCost:     pub.BaselineCost,
+			CumulativeCarbon: pub.CumulativeCarbon,
+			BaselineCarbon:   pub.BaselineCarbon,
+			Jobs:             jobsCopy,
 		}
 	}
 	srv.envMgr.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1})
 	log.Printf("GridMind-RL environment server starting on :%s", port)
+	log.Printf("Endpoints: GET /health /ping /state /replay /grade /tasks /metrics | POST /reset /step")
 	mux := withCORS(withLogging(srv.routes()))
 	if err := http.ListenAndServe(":"+port, mux); err != nil {

openenv.yaml CHANGED Viewed

@@ -62,7 +62,7 @@ schemas:
         type: integer
         minimum: 0
         maximum: 95
-        description: Current episode step (15-min intervals)
       building_id:
         type: integer
         description: Building identifier for multi-building federation
@@ -103,7 +103,7 @@ schemas:
   reward:
     type: number
-    description: Dense multi-component reward combining cost savings, temperature constraints, grid response, and deadlines.
 tasks:
   - id: 1
@@ -123,6 +123,9 @@ endpoints:
   health:
     path: /health
     method: GET
   reset:
     path: /reset
     method: POST

         type: integer
         minimum: 0
         maximum: 95
+        description: Current episode step index (0–95); 96 steps = 24 hours at 15-min resolution
       building_id:
         type: integer
         description: Building identifier for multi-building federation
   reward:
     type: number
+    description: Dense multi-component reward (cost, optional temperature/grid/carbon/deadlines) task-gated to match objectives.
 tasks:
   - id: 1
   health:
     path: /health
     method: GET
+  ping:
+    path: /ping
+    method: GET
   reset:
     path: /reset
     method: POST

python/inference.py CHANGED Viewed

@@ -8,14 +8,15 @@ Usage:
     export API_BASE_URL=https://router.huggingface.co/v1
     export MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
     export HF_TOKEN=hf_xxxx
-    python python/inference.py [--episodes 3] [--env-url http://localhost:7860]
 """
 import argparse
 import json
 import os
-import random
-import re
 import sys
 import time
 from typing import Any
@@ -29,9 +30,12 @@ ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
 API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
 MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
 HF_TOKEN = os.getenv("HF_TOKEN", "")
-DEFAULT_EPISODES = 3
-DEFAULT_SEED_BASE = 1000  # episodes use seed BASE+episode_idx for reproducibility
 MAX_RETRIES = 3
 SYSPROMPT = """You are GridMind, an expert industrial energy management controller.
 You control a building's HVAC, thermal storage, batch job scheduling, and load shedding.
@@ -39,9 +43,9 @@ Your goal is to minimize electricity costs while maintaining comfort and meeting
 Always respond with a single valid JSON object matching the action schema. No explanation needed."""
 TASK_DESCRIPTIONS = {
-    1: "Task 1 (Easy - Cost Minimization): Minimize total energy cost over 24 hours. No temperature constraints. Use cheap off-peak periods and thermal storage arbitrage.",
     2: "Task 2 (Medium - Temperature Management): Minimize cost AND keep indoor temperature within 19-23°C at all times. Balance comfort vs cost.",
-    3: "Task 3 (Hard - Full Demand Response): Minimize cost, maintain temperature, respond to grid stress events by shedding load when grid_stress_signal > 0.7, AND schedule all batch jobs before their deadlines.",
 }
 ACTION_SCHEMA_STR = """{
@@ -53,8 +57,29 @@ ACTION_SCHEMA_STR = """{
 }"""
 # ── Environment client ───────────────────────────────────────────────────────
 class GridMindEnvClient:
     """Simple HTTP client for the GridMind-RL Go environment server."""
@@ -93,6 +118,7 @@ class GridMindEnvClient:
 # ── LLM agent ───────────────────────────────────────────────────────────────
 class LLMAgent:
     """OpenAI-compatible LLM agent that chooses actions given observations."""
@@ -119,7 +145,7 @@ Current observation:
 - Hour of day: {obs.get('hour_of_day', 12)} (0=midnight, peak prices 8-12 and 17-21)
 - Pending batch job deadlines: {obs.get('batch_queue', [])}
 - Cumulative cost so far: ${obs.get('cumulative_cost', 0):.4f}
-- Episode step: {obs.get('step', 0)}/95
 Strategy hints:
 - Charge thermal storage when price < $0.08/kWh, discharge when price > $0.15/kWh
@@ -139,37 +165,20 @@ Respond with ONLY a JSON action:
                         {"role": "user", "content": prompt},
                     ],
                     max_tokens=128,
-                    temperature=0.1,
                 )
                 content = completion.choices[0].message.content.strip()
-                return self._parse_action(content)
             except Exception as e:
                 print(f"  [LLM attempt {attempt+1}/{MAX_RETRIES}] error: {e}")
                 time.sleep(1)
-        # Fallback: rule-based heuristic
         return self._heuristic_action(obs)
-    def _parse_action(self, content: str) -> dict:
-        """Extract and validate JSON action from LLM response."""
-        # Try direct JSON parse
-        try:
-            action = json.loads(content)
-            return self._clamp_action(action)
-        except json.JSONDecodeError:
-            pass
-        # Try to extract JSON block from text
-        match = re.search(r"\{[^}]+\}", content, re.DOTALL)
-        if match:
-            try:
-                action = json.loads(match.group())
-                return self._clamp_action(action)
-            except json.JSONDecodeError:
-                pass
-        # Fallback
-        print(f"  [WARN] could not parse LLM response: {content[:100]}")
-        return self._default_action()
     def _clamp_action(self, action: dict) -> dict:
         return {
             "hvac_power_level": max(0.0, min(1.0, float(action.get("hvac_power_level", 0.5)))),
@@ -180,38 +189,33 @@ Respond with ONLY a JSON action:
         }
     def _heuristic_action(self, obs: dict) -> dict:
-        """Simple rule-based heuristic when LLM is unavailable."""
         price = obs.get("current_price", 0.10)
         stress = obs.get("grid_stress_signal", 0.0)
         temp = obs.get("indoor_temperature", 21.0)
         storage = obs.get("thermal_storage_level", 0.5)
         queue = obs.get("batch_queue", [])
-        # HVAC: reduce during peak
         hvac = 0.7 if price < 0.08 else (0.3 if price > 0.15 else 0.5)
-        # Adjust for temperature
         if temp > 23.0:
             hvac = max(hvac, 0.8)
         elif temp < 19.0:
             hvac = min(hvac, 0.2)
-        # Storage arbitrage
         charge = 0.0
         if price < 0.07 and storage < 0.8:
             charge = 0.5
         elif price > 0.15 and storage > 0.3:
             charge = -0.5
-        # Load shedding
         shed = 0.0
         if stress > 0.7:
             shed = 0.4
         elif stress > 0.5:
             shed = 0.2
-        # Batch jobs: schedule soon if deadline approaching
         slot = 2
-        if queue and min(queue) < 10:
             slot = 0
         return {
@@ -223,48 +227,84 @@ Respond with ONLY a JSON action:
         }
     def _default_action(self) -> dict:
-        return {"hvac_power_level": 0.5, "thermal_charge_rate": 0.0,
-                "batch_job_slot": 0, "load_shed_fraction": 0.0, "building_id": 0}
 # ── Episode runner ───────────────────────────────────────────────────────────
-def run_episode(env_client: GridMindEnvClient, agent: LLMAgent,
-                task_id: int, seed: int, verbose: bool = False) -> dict[str, Any]:
-    """Run a single episode and return grade + metadata."""
     reset_resp = env_client.reset(task_id=task_id, seed=seed)
     obs = reset_resp["observations"][0]
     total_reward = 0.0
     total_steps = 0
     start_time = time.time()
-    step_resp = {}
-    _step = 0
     while not step_resp.get("done", False):
-        action = agent.choose_action(obs, task_id)
-        step_resp = env_client.step(action)
         if step_resp is None or "observation" not in step_resp:
-            print(f"  [WARN] step {_step}: server returned invalid response, skipping step")
-            _step += 1
             break
         obs = step_resp["observation"]
-        total_reward += step_resp["reward"]
         total_steps += 1
-        if verbose and _step % 16 == 0:
-            print(f"    step={_step:02d} price=${obs['current_price']:.3f} "
-                  f"temp={obs['indoor_temperature']:.1f}°C "
-                  f"stress={obs['grid_stress_signal']:.2f} "
-                  f"cost=${obs['cumulative_cost']:.2f} "
-                  f"reward={step_resp['reward']:.3f}")
-        _step += 1
     elapsed = time.time() - start_time
     grade = env_client.grade()
     return {
         "task_id": task_id,
         "seed": seed,
@@ -279,12 +319,32 @@ def run_episode(env_client: GridMindEnvClient, agent: LLMAgent,
 # ── Main ─────────────────────────────────────────────────────────────────────
-def main():
     parser = argparse.ArgumentParser(description="GridMind-RL baseline inference")
     parser.add_argument("--episodes", type=int, default=DEFAULT_EPISODES)
     parser.add_argument("--env-url", type=str, default=ENV_URL)
     parser.add_argument("--verbose", action="store_true")
     parser.add_argument("--output", type=str, default="baseline_scores.json")
     args = parser.parse_args()
     print("=" * 60)
@@ -293,48 +353,59 @@ def main():
     print(f"  API:   {API_BASE_URL}")
     print(f"  Env:   {args.env_url}")
     print(f"  Episodes per task: {args.episodes}")
     print("=" * 60)
     env_client = GridMindEnvClient(base_url=args.env_url)
-    # Wait for env server to be healthy
     print("\nWaiting for environment server...")
     for attempt in range(30):
         if env_client.health():
-            print("  ✓ Environment server is healthy")
             break
         time.sleep(2)
         if attempt == 29:
-            print("  ✗ Environment server not reachable. Exiting.")
             sys.exit(1)
     agent = LLMAgent()
-    all_results = []
     for task_id in [1, 2, 3]:
-        print(f"\n── Task {task_id}: {TASK_DESCRIPTIONS[task_id][:60]}...")
-        task_scores = []
         for ep in range(args.episodes):
             seed = DEFAULT_SEED_BASE + task_id * 100 + ep
             print(f"  Episode {ep+1}/{args.episodes} (seed={seed})")
-            result = run_episode(env_client, agent, task_id=task_id, seed=seed, verbose=args.verbose)
-            task_scores.append(result["score"])
             all_results.append(result)
-            print(f"    → score={result['score']:.4f} | reward={result['total_reward']:.3f} | {result['elapsed_sec']:.1f}s")
         avg_score = sum(task_scores) / len(task_scores)
         print(f"  Task {task_id} average score: {avg_score:.4f}")
-    # Score summary table
     print("\n" + "=" * 60)
     print("BASELINE SCORES SUMMARY")
     print("=" * 60)
     print(f"{'Task':<10} {'Model':<30} {'Score':<10} {'Episodes':<10}")
     print("-" * 60)
-    task_avgs = {}
     for task_id in [1, 2, 3]:
-        scores = [r["score"] for r in all_results if r["task_id"] == task_id]
         avg = sum(scores) / len(scores) if scores else 0.0
         task_avgs[task_id] = avg
         print(f"Task {task_id:<6} {MODEL_NAME:<30} {avg:<10.4f} {len(scores)}")
@@ -343,19 +414,21 @@ def main():
     overall = sum(task_avgs.values()) / len(task_avgs)
     print(f"{'Overall':<10} {'':<30} {overall:<10.4f}")
-    # Save results
     output = {
         "model": MODEL_NAME,
         "api_base": API_BASE_URL,
         "episodes_per_task": args.episodes,
         "seed_base": DEFAULT_SEED_BASE,
         "task_averages": {str(k): v for k, v in task_avgs.items()},
         "overall_average": overall,
         "all_results": all_results,
     }
-    with open(args.output, "w") as f:
         json.dump(output, f, indent=2)
-    print(f"\n✓ Results saved to {args.output}")
 if __name__ == "__main__":

     export API_BASE_URL=https://router.huggingface.co/v1
     export MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
     export HF_TOKEN=hf_xxxx
+    python inference.py
+    # or: python python/inference.py [--episodes 1] [--llm-every 4] [--fast-mode]
 """
+from __future__ import annotations
 import argparse
 import json
 import os
 import sys
 import time
 from typing import Any
 API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
 MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
 HF_TOKEN = os.getenv("HF_TOKEN", "")
+DEFAULT_EPISODES = 1
+DEFAULT_SEED_BASE = 1000
 MAX_RETRIES = 3
+# 96 steps × 15 min = 24 h (must match env.EpisodeSteps)
+EPISODE_STEPS = 96
+LAST_STEP_INDEX = EPISODE_STEPS - 1
 SYSPROMPT = """You are GridMind, an expert industrial energy management controller.
 You control a building's HVAC, thermal storage, batch job scheduling, and load shedding.
 Always respond with a single valid JSON object matching the action schema. No explanation needed."""
 TASK_DESCRIPTIONS = {
+    1: "Task 1 (Easy - Cost Minimization): Minimize total energy cost over 24 hours. No temperature or batch constraints. Use cheap off-peak periods and thermal storage.",
     2: "Task 2 (Medium - Temperature Management): Minimize cost AND keep indoor temperature within 19-23°C at all times. Balance comfort vs cost.",
+    3: "Task 3 (Hard - Full Demand Response): Minimize cost, maintain temperature, respond to grid stress (shed when grid_stress_signal > 0.7), schedule batch jobs, minimize carbon.",
 }
 ACTION_SCHEMA_STR = """{
 }"""
+def extract_json_object(text: str) -> dict[str, Any] | None:
+    """Parse first balanced {...} JSON object from text (handles nested braces)."""
+    start = text.find("{")
+    if start < 0:
+        return None
+    depth = 0
+    for i in range(start, len(text)):
+        c = text[i]
+        if c == "{":
+            depth += 1
+        elif c == "}":
+            depth -= 1
+            if depth == 0:
+                try:
+                    return json.loads(text[start : i + 1])
+                except json.JSONDecodeError:
+                    return None
+    return None
 # ── Environment client ───────────────────────────────────────────────────────
 class GridMindEnvClient:
     """Simple HTTP client for the GridMind-RL Go environment server."""
 # ── LLM agent ───────────────────────────────────────────────────────────────
 class LLMAgent:
     """OpenAI-compatible LLM agent that chooses actions given observations."""
 - Hour of day: {obs.get('hour_of_day', 12)} (0=midnight, peak prices 8-12 and 17-21)
 - Pending batch job deadlines: {obs.get('batch_queue', [])}
 - Cumulative cost so far: ${obs.get('cumulative_cost', 0):.4f}
+- Episode step: {obs.get('step', 0)}/{LAST_STEP_INDEX}
 Strategy hints:
 - Charge thermal storage when price < $0.08/kWh, discharge when price > $0.15/kWh
                         {"role": "user", "content": prompt},
                     ],
                     max_tokens=128,
+                    temperature=0.0,
                 )
                 content = completion.choices[0].message.content.strip()
+                parsed = extract_json_object(content)
+                if parsed is not None:
+                    return self._clamp_action(parsed)
+                action = json.loads(content)
+                return self._clamp_action(action)
             except Exception as e:
                 print(f"  [LLM attempt {attempt+1}/{MAX_RETRIES}] error: {e}")
                 time.sleep(1)
         return self._heuristic_action(obs)
     def _clamp_action(self, action: dict) -> dict:
         return {
             "hvac_power_level": max(0.0, min(1.0, float(action.get("hvac_power_level", 0.5)))),
         }
     def _heuristic_action(self, obs: dict) -> dict:
+        """Rule-based policy (deterministic given obs)."""
         price = obs.get("current_price", 0.10)
         stress = obs.get("grid_stress_signal", 0.0)
         temp = obs.get("indoor_temperature", 21.0)
         storage = obs.get("thermal_storage_level", 0.5)
         queue = obs.get("batch_queue", [])
         hvac = 0.7 if price < 0.08 else (0.3 if price > 0.15 else 0.5)
         if temp > 23.0:
             hvac = max(hvac, 0.8)
         elif temp < 19.0:
             hvac = min(hvac, 0.2)
         charge = 0.0
         if price < 0.07 and storage < 0.8:
             charge = 0.5
         elif price > 0.15 and storage > 0.3:
             charge = -0.5
         shed = 0.0
         if stress > 0.7:
             shed = 0.4
         elif stress > 0.5:
             shed = 0.2
         slot = 2
+        if queue and min(queue) < 8:
             slot = 0
         return {
         }
     def _default_action(self) -> dict:
+        return {
+            "hvac_power_level": 0.5,
+            "thermal_charge_rate": 0.0,
+            "batch_job_slot": 0,
+            "load_shed_fraction": 0.0,
+            "building_id": 0,
+        }
 # ── Episode runner ───────────────────────────────────────────────────────────
+def run_episode(
+    env_client: GridMindEnvClient,
+    agent: LLMAgent,
+    task_id: int,
+    seed: int,
+    *,
+    fast_mode: bool,
+    llm_every: int,
+    max_steps: int | None,
+    verbose: bool = False,
+) -> dict[str, Any]:
+    """Run a single episode and return grade + metadata. Prints [START], [STEPn], [END]."""
     reset_resp = env_client.reset(task_id=task_id, seed=seed)
     obs = reset_resp["observations"][0]
+    print("[START]", flush=True)
     total_reward = 0.0
     total_steps = 0
     start_time = time.time()
+    step_resp: dict[str, Any] = {}
+    step_limit = EPISODE_STEPS if max_steps is None else min(max_steps, EPISODE_STEPS)
+    llm_reuse_remaining = 0
+    cached_action = agent._default_action()
     while not step_resp.get("done", False):
+        if total_steps >= step_limit:
+            break
+        if fast_mode:
+            action = agent._heuristic_action(obs)
+        else:
+            if llm_reuse_remaining <= 0:
+                cached_action = agent.choose_action(obs, task_id)
+                llm_reuse_remaining = max(1, llm_every)
+            action = cached_action
+        step_resp = env_client.step(action)
         if step_resp is None or "observation" not in step_resp:
+            print(f"  [WARN] step {total_steps}: invalid step response", flush=True)
             break
+        if not fast_mode:
+            llm_reuse_remaining -= 1
         obs = step_resp["observation"]
+        total_reward += float(step_resp["reward"])
         total_steps += 1
+        print(f"[STEP{total_steps}]", flush=True)
+        if verbose and total_steps % 16 == 0:
+            print(
+                f"    step={total_steps:02d} price=${obs['current_price']:.3f} "
+                f"temp={obs['indoor_temperature']:.1f}°C "
+                f"stress={obs['grid_stress_signal']:.2f} "
+                f"cost=${obs['cumulative_cost']:.2f} "
+                f"reward={step_resp['reward']:.3f}",
+                flush=True,
+            )
     elapsed = time.time() - start_time
     grade = env_client.grade()
+    print("[END]", flush=True)
     return {
         "task_id": task_id,
         "seed": seed,
 # ── Main ─────────────────────────────────────────────────────────────────────
+def main() -> None:
     parser = argparse.ArgumentParser(description="GridMind-RL baseline inference")
     parser.add_argument("--episodes", type=int, default=DEFAULT_EPISODES)
     parser.add_argument("--env-url", type=str, default=ENV_URL)
     parser.add_argument("--verbose", action="store_true")
     parser.add_argument("--output", type=str, default="baseline_scores.json")
+    parser.add_argument(
+        "--fast-mode",
+        action="store_true",
+        help="Heuristic policy only (no LLM calls; fastest, fully reproducible).",
+    )
+    parser.add_argument(
+        "--llm-every",
+        type=int,
+        default=4,
+        metavar="N",
+        help="Reuse the same LLM action for N consecutive steps (default: 4).",
+    )
+    parser.add_argument(
+        "--max-steps",
+        type=int,
+        default=None,
+        metavar="N",
+        help="Stop after N steps (default: full episode). Grade uses partial episode.",
+    )
     args = parser.parse_args()
     print("=" * 60)
     print(f"  API:   {API_BASE_URL}")
     print(f"  Env:   {args.env_url}")
     print(f"  Episodes per task: {args.episodes}")
+    print(f"  Fast mode: {args.fast_mode} | LLM every: {args.llm_every} steps")
     print("=" * 60)
     env_client = GridMindEnvClient(base_url=args.env_url)
     print("\nWaiting for environment server...")
     for attempt in range(30):
         if env_client.health():
+            print("  [OK] Environment server is healthy")
             break
         time.sleep(2)
         if attempt == 29:
+            print("  [FAIL] Environment server not reachable. Exiting.")
             sys.exit(1)
     agent = LLMAgent()
+    all_results: list[dict[str, Any]] = []
     for task_id in [1, 2, 3]:
+        print(f"\n-- Task {task_id}: {TASK_DESCRIPTIONS[task_id][:60]}...")
+        task_scores: list[float] = []
         for ep in range(args.episodes):
             seed = DEFAULT_SEED_BASE + task_id * 100 + ep
             print(f"  Episode {ep+1}/{args.episodes} (seed={seed})")
+            result = run_episode(
+                env_client,
+                agent,
+                task_id=task_id,
+                seed=seed,
+                fast_mode=args.fast_mode,
+                llm_every=args.llm_every,
+                max_steps=args.max_steps,
+                verbose=args.verbose,
+            )
+            task_scores.append(float(result["score"]))
             all_results.append(result)
+            print(
+                f"    → score={result['score']:.4f} | reward={result['total_reward']:.3f} | "
+                f"{result['elapsed_sec']:.1f}s | steps={result['total_steps']}"
+            )
         avg_score = sum(task_scores) / len(task_scores)
         print(f"  Task {task_id} average score: {avg_score:.4f}")
     print("\n" + "=" * 60)
     print("BASELINE SCORES SUMMARY")
     print("=" * 60)
     print(f"{'Task':<10} {'Model':<30} {'Score':<10} {'Episodes':<10}")
     print("-" * 60)
+    task_avgs: dict[int, float] = {}
     for task_id in [1, 2, 3]:
+        scores = [float(r["score"]) for r in all_results if r["task_id"] == task_id]
         avg = sum(scores) / len(scores) if scores else 0.0
         task_avgs[task_id] = avg
         print(f"Task {task_id:<6} {MODEL_NAME:<30} {avg:<10.4f} {len(scores)}")
     overall = sum(task_avgs.values()) / len(task_avgs)
     print(f"{'Overall':<10} {'':<30} {overall:<10.4f}")
     output = {
         "model": MODEL_NAME,
         "api_base": API_BASE_URL,
         "episodes_per_task": args.episodes,
         "seed_base": DEFAULT_SEED_BASE,
+        "fast_mode": args.fast_mode,
+        "llm_every": args.llm_every,
+        "max_steps": args.max_steps,
         "task_averages": {str(k): v for k, v in task_avgs.items()},
         "overall_average": overall,
         "all_results": all_results,
     }
+    with open(args.output, "w", encoding="utf-8") as f:
         json.dump(output, f, indent=2)
+    print(f"\n[OK] Results saved to {args.output}")
 if __name__ == "__main__":

python/models.py CHANGED Viewed

@@ -29,7 +29,7 @@ class ObservationModel(BaseModel):
     hour_of_day: int = Field(..., ge=0, le=23, description="Current hour of day (0–23)")
     batch_queue: List[int] = Field(default_factory=list, description="Deadline slots of pending batch jobs")
     cumulative_cost: float = Field(..., ge=0.0, description="Running energy cost this episode ($)")
-    step: int = Field(..., ge=0, description="Current timestep (0–287)")
     building_id: int = Field(default=0, description="Building index in federation")

     hour_of_day: int = Field(..., ge=0, le=23, description="Current hour of day (0–23)")
     batch_queue: List[int] = Field(default_factory=list, description="Deadline slots of pending batch jobs")
     cumulative_cost: float = Field(..., ge=0.0, description="Running energy cost this episode ($)")
+    step: int = Field(..., ge=0, le=95, description="Current timestep (0–95); 96 steps = 24h")
     building_id: int = Field(default=0, description="Building index in federation")

python/validate.py CHANGED Viewed

@@ -16,16 +16,16 @@ import requests
 ENV_URL = "http://localhost:7860"
-PASS = "✓"
-FAIL = "✗"
-WARN = "⚠"
 def check(label: str, condition: bool, detail: str = "") -> bool:
     icon = PASS if condition else FAIL
     line = f"  {icon} {label}"
     if detail:
-        line += f" — {detail}"
     print(line)
     return condition
@@ -42,20 +42,22 @@ def validate(env_url: str) -> bool:
     base = env_url.rstrip("/")
     results = []
-    print("\n══════════════════════════════════════════")
     print("  GridMind-RL OpenEnv Validation Report")
-    print("══════════════════════════════════════════\n")
-    # ── 1. Health endpoint ──────────────────────────────────────────────────
-    print("1. Health Endpoint")
     try:
         r = get(f"{base}/health")
         results.append(check("GET /health returns 200", r.status_code == 200, f"got {r.status_code}"))
         data = r.json()
         results.append(check("Response has 'status' field", "status" in data))
     except Exception as e:
         results.append(check("GET /health reachable", False, str(e)))
-        print(f"\n  ✗ Cannot reach server at {base}. Is it running?\n")
         return False
     # ── 2. Reset endpoint ───────────────────────────────────────────────────
@@ -122,7 +124,8 @@ def validate(env_url: str) -> bool:
         rc = info.get("reward_components", {})
         rc_fields = ["cost_savings", "temp_constraint", "grid_response",
-                     "deadline_penalty", "efficiency_bonus", "stability_penalty", "total"]
         for f in rc_fields:
             results.append(check(f"reward_components has '{f}'", f in rc))
@@ -139,14 +142,15 @@ def validate(env_url: str) -> bool:
         r = get(f"{base}/state")
         results.append(check("GET /state returns 200", r.status_code == 200))
         state = r.json()
-        state_fields = ["buildings", "price_curve_24h", "carbon_curve_24h",
                         "episode", "step", "task_id", "done", "seed"]
         for f in state_fields:
             results.append(check(f"state has '{f}'", f in state))
-        results.append(check("price_curve_24h has 24 entries",
-                             len(state.get("price_curve_24h", [])) == 24))
-        results.append(check("carbon_curve_24h has 24 entries",
-                             len(state.get("carbon_curve_24h", [])) == 24))
     except Exception as e:
         results.append(check("GET /state succeeds", False, str(e)))
@@ -240,13 +244,13 @@ def validate(env_url: str) -> bool:
     total = len(results)
     pct = 100 * passed // total if total > 0 else 0
-    print(f"\n══════════════════════════════════════════")
     print(f"  Result: {passed}/{total} checks passed ({pct}%)")
     if passed == total:
-        print("  🎉 ALL CHECKS PASSED — Ready for submission!")
     else:
-        print(f"  ⚠  {total - passed} checks failed. Fix errors above.")
-    print("══════════════════════════════════════════\n")
     return passed == total

 ENV_URL = "http://localhost:7860"
+PASS = "[OK]"
+FAIL = "[FAIL]"
+WARN = "[WARN]"
 def check(label: str, condition: bool, detail: str = "") -> bool:
     icon = PASS if condition else FAIL
     line = f"  {icon} {label}"
     if detail:
+        line += f" - {detail}"
     print(line)
     return condition
     base = env_url.rstrip("/")
     results = []
+    print("\n" + "=" * 50)
     print("  GridMind-RL OpenEnv Validation Report")
+    print("=" * 50 + "\n")
+    # ── 1. Health & ping ─────────────────────────────────────────────────────
+    print("1. Health & Ping")
     try:
         r = get(f"{base}/health")
         results.append(check("GET /health returns 200", r.status_code == 200, f"got {r.status_code}"))
         data = r.json()
         results.append(check("Response has 'status' field", "status" in data))
+        rp = get(f"{base}/ping")
+        results.append(check("GET /ping returns 200", rp.status_code == 200, f"got {rp.status_code}"))
     except Exception as e:
         results.append(check("GET /health reachable", False, str(e)))
+        print(f"\n  [FAIL] Cannot reach server at {base}. Is it running?\n")
         return False
     # ── 2. Reset endpoint ───────────────────────────────────────────────────
         rc = info.get("reward_components", {})
         rc_fields = ["cost_savings", "temp_constraint", "grid_response",
+                     "deadline_penalty", "efficiency_bonus", "stability_penalty",
+                     "carbon_reward", "total"]
         for f in rc_fields:
             results.append(check(f"reward_components has '{f}'", f in rc))
         r = get(f"{base}/state")
         results.append(check("GET /state returns 200", r.status_code == 200))
         state = r.json()
+        state_fields = ["buildings", "price_curve_episode", "carbon_curve_episode",
                         "episode", "step", "task_id", "done", "seed"]
         for f in state_fields:
             results.append(check(f"state has '{f}'", f in state))
+        curve_n = 24  # EpisodeSteps/4 (96/4) downsamples to hourly points
+        results.append(check("price_curve_episode has 24 entries",
+                             len(state.get("price_curve_episode", [])) == curve_n))
+        results.append(check("carbon_curve_episode has 24 entries",
+                             len(state.get("carbon_curve_episode", [])) == curve_n))
     except Exception as e:
         results.append(check("GET /state succeeds", False, str(e)))
     total = len(results)
     pct = 100 * passed // total if total > 0 else 0
+    print(f"\n" + "=" * 50)
     print(f"  Result: {passed}/{total} checks passed ({pct}%)")
     if passed == total:
+        print("  ALL CHECKS PASSED - Ready for submission!")
     else:
+        print(f"  {total - passed} checks failed. Fix errors above.")
+    print("=" * 50 + "\n")
     return passed == total

tests/environment_test.go CHANGED Viewed

@@ -55,21 +55,24 @@ func TestStepAdvancesState(t *testing.T) {
 	if state.Step != 1 {
 		t.Errorf("expected step=1 after one step, got %d", state.Step)
 	}
 }
-// TestEpisodeLengthIs288 verifies the episode terminates at step 288.
-func TestEpisodeLengthIs288(t *testing.T) {
 	e := env.NewEnvironment()
 	var seed int64 = 99
 	e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1})
 	action := []env.ActionModel{{HVACPowerLevel: 0.5}}
 	var lastDone bool
-	for i := 0; i < 288; i++ {
 		_, lastDone = e.Step(action)
 	}
 	if !lastDone {
-		t.Errorf("episode should be done after 288 steps")
 	}
 }
@@ -162,7 +165,7 @@ func TestGraderTask1ScoreRange(t *testing.T) {
 	e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1})
 	action := []env.ActionModel{{HVACPowerLevel: 0.3}}
-	for i := 0; i < 288; i++ {
 		e.Step(action)
 	}
@@ -171,9 +174,14 @@ func TestGraderTask1ScoreRange(t *testing.T) {
 	buildings := make([]*env.BuildingState, len(state.Buildings))
 	for i, pub := range state.Buildings {
 		buildings[i] = &env.BuildingState{
-			CumulativeCost: pub.CumulativeCost,
-			BaselineCost:   pub.BaselineCost,
 		}
 	}

 	if state.Step != 1 {
 		t.Errorf("expected step=1 after one step, got %d", state.Step)
 	}
+	if resps[0].Observation.Step != 0 {
+		t.Errorf("expected observation.step=0 after first transition, got %d", resps[0].Observation.Step)
+	}
 }
+// TestEpisodeLengthIs96 verifies the episode terminates after 96 steps (24h).
+func TestEpisodeLengthIs96(t *testing.T) {
 	e := env.NewEnvironment()
 	var seed int64 = 99
 	e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1})
 	action := []env.ActionModel{{HVACPowerLevel: 0.5}}
 	var lastDone bool
+	for i := 0; i < env.EpisodeSteps; i++ {
 		_, lastDone = e.Step(action)
 	}
 	if !lastDone {
+		t.Errorf("episode should be done after %d steps", env.EpisodeSteps)
 	}
 }
 	e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1})
 	action := []env.ActionModel{{HVACPowerLevel: 0.3}}
+	for i := 0; i < env.EpisodeSteps; i++ {
 		e.Step(action)
 	}
 	buildings := make([]*env.BuildingState, len(state.Buildings))
 	for i, pub := range state.Buildings {
+		jobsCopy := make([]env.BatchJob, len(pub.Jobs))
+		copy(jobsCopy, pub.Jobs)
 		buildings[i] = &env.BuildingState{
+			CumulativeCost:   pub.CumulativeCost,
+			BaselineCost:     pub.BaselineCost,
+			CumulativeCarbon: pub.CumulativeCarbon,
+			BaselineCarbon:   pub.BaselineCarbon,
+			Jobs:             jobsCopy,
 		}
 	}

tests/test_graders.py CHANGED Viewed

@@ -135,7 +135,7 @@ class TestTask3:
     def test_has_all_sub_scores(self):
         g = run_full_episode(task_id=3, seed=31)
-        for key in ["cost", "temperature", "grid_response", "batch_deadline"]:
             assert key in g["sub_scores"], f"Missing sub-score: {key}"
     def test_all_sub_scores_in_range(self):
@@ -146,7 +146,13 @@ class TestTask3:
     def test_weights_sum_correct(self):
         g = run_full_episode(task_id=3, seed=33)
         ss = g["sub_scores"]
-        expected = ss["cost"]*0.35 + ss["temperature"]*0.25 + ss["grid_response"]*0.25 + ss["batch_deadline"]*0.15
         assert abs(g["score"] - expected) < 0.01 or g["exploit_detected"]
     def test_grid_response_sub_score(self):

     def test_has_all_sub_scores(self):
         g = run_full_episode(task_id=3, seed=31)
+        for key in ["cost", "temperature", "grid_response", "batch_deadline", "carbon"]:
             assert key in g["sub_scores"], f"Missing sub-score: {key}"
     def test_all_sub_scores_in_range(self):
     def test_weights_sum_correct(self):
         g = run_full_episode(task_id=3, seed=33)
         ss = g["sub_scores"]
+        expected = (
+            ss["cost"] * 0.28
+            + ss["temperature"] * 0.20
+            + ss["grid_response"] * 0.20
+            + ss["batch_deadline"] * 0.12
+            + ss["carbon"] * 0.20
+        )
         assert abs(g["score"] - expected) < 0.01 or g["exploit_detected"]
     def test_grid_response_sub_score(self):