Spaces:

Prajwal782007
/

Gridmind

Running

adityss commited on Apr 25

Commit

b81683f

1 Parent(s): 0af208b

feat: implement multi-component dense reward function and environmental logic for GridMind-RL

Files changed (2) hide show

env/environment.go CHANGED Viewed

@@ -84,7 +84,7 @@ func (e *Environment) Reset(req ResetRequest) ResetResponse {
 	// Apply task and difficulty
 	e.taskID = req.TaskID
-	if e.taskID < 1 || e.taskID > 3 {
 		e.taskID = 1
 	}
 	e.difficulty = req.Difficulty
@@ -94,7 +94,7 @@ func (e *Environment) Reset(req ResetRequest) ResetResponse {
 			e.difficulty = "easy"
 		case 2:
 			e.difficulty = "medium"
-		case 3:
 			e.difficulty = "hard"
 		}
 	}

 	// Apply task and difficulty
 	e.taskID = req.TaskID
+	if e.taskID < 1 || e.taskID > 4 {
 		e.taskID = 1
 	}
 	e.difficulty = req.Difficulty
 			e.difficulty = "easy"
 		case 2:
 			e.difficulty = "medium"
+		case 3, 4:
 			e.difficulty = "hard"
 		}
 	}

env/rewards.go CHANGED Viewed

@@ -116,11 +116,11 @@ func ComputeReward(inp ComputeRewardInput) RewardComponents {
 	}
 	// ── Aggregate ────────────────────────────────────────────────────────────
-	// Total includes all 9 components with fault_mitigation weighted at 0.05
-	// Reduce StabilityPenalty weight by 0.05 to keep sum = 1.0
 	rc.Total = rc.CostSavings + rc.TempConstraint + rc.GridResponse +
 		rc.DeadlinePenalty + rc.EfficiencyBonus + rc.StabilityPenalty + rc.CarbonReward +
-		rc.InstructionReward + rc.FaultMitigation*0.05 + rc.FaultMitigation*0.95
 	return rc
 }

 	}
 	// ── Aggregate ────────────────────────────────────────────────────────────
+	// Total is the sum of all 9 reward components. Each component is computed
+	// independently above and contributes directly to the total signal.
 	rc.Total = rc.CostSavings + rc.TempConstraint + rc.GridResponse +
 		rc.DeadlinePenalty + rc.EfficiencyBonus + rc.StabilityPenalty + rc.CarbonReward +
+		rc.InstructionReward + rc.FaultMitigation
 	return rc
 }