Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- Dockerfile +30 -0
- README.md +242 -10
- __init__.py +0 -0
- client.py +68 -0
- inference.py +330 -0
- models.py +39 -0
- openenv.yaml +6 -0
- pyproject.toml +24 -0
- scripts/validate-submission.sh +172 -0
- server/__init__.py +0 -0
- server/app.py +92 -0
- server/constants.py +186 -0
- server/grader.py +124 -0
- server/inventory_env.py +264 -0
- uv.lock +0 -0
Dockerfile
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM ghcr.io/meta-pytorch/openenv-base:latest AS builder
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
RUN apt-get update && apt-get install -y git curl && \
|
| 5 |
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
| 6 |
+
ENV PATH="/root/.local/bin:$PATH"
|
| 7 |
+
|
| 8 |
+
WORKDIR /app
|
| 9 |
+
COPY pyproject.toml uv.lock* ./
|
| 10 |
+
RUN uv sync --no-install-project --frozen || uv sync --no-install-project
|
| 11 |
+
COPY . .
|
| 12 |
+
RUN uv sync
|
| 13 |
+
|
| 14 |
+
FROM ghcr.io/meta-pytorch/openenv-base:latest
|
| 15 |
+
|
| 16 |
+
WORKDIR /app
|
| 17 |
+
COPY --from=builder /app/.venv /app/.venv
|
| 18 |
+
COPY --from=builder /app /app
|
| 19 |
+
|
| 20 |
+
ENV PATH="/app/.venv/bin:$PATH"
|
| 21 |
+
ENV PYTHONUNBUFFERED=1
|
| 22 |
+
ENV PYTHONPATH="/app:$PYTHONPATH"
|
| 23 |
+
|
| 24 |
+
EXPOSE 8000
|
| 25 |
+
|
| 26 |
+
HEALTHCHECK --interval=30s --timeout=3s \
|
| 27 |
+
CMD curl -f http://localhost:8000/health || exit 1
|
| 28 |
+
|
| 29 |
+
ENV ENABLE_WEB_INTERFACE=true
|
| 30 |
+
CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "8000"]
|
README.md
CHANGED
|
@@ -1,10 +1,242 @@
|
|
| 1 |
-
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
-
sdk: docker
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Inventory Optimization Environment
|
| 3 |
+
emoji: 📦
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 8000
|
| 8 |
+
tags:
|
| 9 |
+
- openenv
|
| 10 |
+
base_path: /web
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
# Retail Inventory Optimization Environment
|
| 14 |
+
|
| 15 |
+
An OpenEnv reinforcement learning environment that simulates day-by-day retail inventory management across 5 product categories. An AI agent must balance purchasing, pricing, shipping, and liquidation decisions to maximize profit over a 30-day episode.
|
| 16 |
+
|
| 17 |
+
## Why Inventory Management?
|
| 18 |
+
|
| 19 |
+
Retail inventory optimization is a real-world task performed daily by store managers, warehouse operators, and supply chain planners. The agent faces the same challenges as a human manager: uncertain demand, perishable goods, shipping delays, seasonal events, and limited cash flow. Poor decisions lead to stockouts (lost sales), waste (expired goods), or cash tied up in unsold inventory.
|
| 20 |
+
|
| 21 |
+
## Environment Description
|
| 22 |
+
|
| 23 |
+
You manage a retail store selling 5 products with different characteristics:
|
| 24 |
+
|
| 25 |
+
| Product | Sell Price | Cost Price | Profit Margin | Shelf Life |
|
| 26 |
+
|---------|-----------|------------|---------------|------------|
|
| 27 |
+
| Electronics | $150 | $100 | $50 | No expiry |
|
| 28 |
+
| Clothing | $40 | $25 | $15 | No expiry |
|
| 29 |
+
| Groceries | $10 | $5 | $5 | 5 days |
|
| 30 |
+
| Furniture | $200 | $130 | $70 | No expiry |
|
| 31 |
+
| Toys | $25 | $12 | $13 | No expiry |
|
| 32 |
+
|
| 33 |
+
Each day the agent receives the current store state (cash, inventory with batch expiry, pending deliveries, upcoming events) and must decide:
|
| 34 |
+
- **What to buy** and how much of each product
|
| 35 |
+
- **How to ship** — slow (cheap but unreliable), medium, or fast (expensive but guaranteed)
|
| 36 |
+
- **What to liquidate** — dispose of expiring or excess stock
|
| 37 |
+
- **How to price** — set per-product price multipliers that affect demand via elasticity
|
| 38 |
+
|
| 39 |
+
Customer demand is generated each day based on base ranges, weekend boosts (1.2x on days 5-6), and seasonal event multipliers (up to 3x during Black Friday, Christmas, etc.). The agent cannot see future demand — only yesterday's demand as feedback.
|
| 40 |
+
|
| 41 |
+
The episode runs for 30 days. The goal is to maximize total profit.
|
| 42 |
+
|
| 43 |
+
## Environment Design Highlights
|
| 44 |
+
|
| 45 |
+
### Batch-Tracked Inventory with FIFO
|
| 46 |
+
Inventory is tracked per batch with individual expiry dates. Groceries expire after 5 days. Selling and liquidation follow FIFO (First In, First Out) — oldest batches are consumed first, mimicking real warehouse operations.
|
| 47 |
+
|
| 48 |
+
```json
|
| 49 |
+
{"groceries": [[20, 3], [15, 5], [10, 1]]}
|
| 50 |
+
```
|
| 51 |
+
Three batches: 20 units (3 days left), 15 units (5 days left), 10 units (1 day left — liquidate or lose them).
|
| 52 |
+
|
| 53 |
+
### Dynamic Pricing with Price Elasticity
|
| 54 |
+
The agent can set per-product price multipliers (0.5x to 1.5x) each day. Demand responds to pricing via realistic elasticity values — groceries are inelastic (people buy regardless), while clothing and toys are highly elastic (price-sensitive customers).
|
| 55 |
+
|
| 56 |
+
| Product | Elasticity | Effect of 1.3x price |
|
| 57 |
+
|---------|-----------|----------------------|
|
| 58 |
+
| Electronics | 1.2 | Demand drops ~24% |
|
| 59 |
+
| Clothing | 1.5 | Demand drops ~38% |
|
| 60 |
+
| Groceries | 0.4 | Demand drops only ~11% |
|
| 61 |
+
| Furniture | 0.8 | Demand drops ~22% |
|
| 62 |
+
| Toys | 1.3 | Demand drops ~33% |
|
| 63 |
+
|
| 64 |
+
### Delivery Jitter
|
| 65 |
+
Shipping isn't perfectly reliable. Slow delivery has +/-2 day variance, medium has +/-1 day. Only fast delivery (at 5x the cost) is guaranteed next-day. The agent must account for uncertainty when planning restocks before events.
|
| 66 |
+
|
| 67 |
+
### Seasonal Events with Demand Spikes
|
| 68 |
+
Five events are spread across the 30-day episode. Each event triggers a 2-day demand multiplier — Black Friday triples electronics demand, Christmas triples toys, etc. A "new competitor" event actually reduces demand. The agent sees countdowns and must stock up in advance.
|
| 69 |
+
|
| 70 |
+
### Decomposed Per-Step Reward
|
| 71 |
+
The reward function provides granular feedback every step, not just end-of-episode:
|
| 72 |
+
|
| 73 |
+
| Signal | Formula | Purpose |
|
| 74 |
+
|--------|---------|---------|
|
| 75 |
+
| Successful sales | `+sold * sell_price * 0.001` | Reward revenue proportional to product value |
|
| 76 |
+
| Missed sales | `-missed * sell_price * 0.001` | Penalize stockouts, weighted by product value |
|
| 77 |
+
| Expired groceries | `-0.05 * expired_count` | Penalize waste from overbuying perishables |
|
| 78 |
+
| Failed purchases | `-0.5 per rejected order` | Penalize ordering beyond cash budget |
|
| 79 |
+
| Liquidation loss | `-disposed_value * 0.001` | Penalize disposal proportional to cost |
|
| 80 |
+
|
| 81 |
+
### Conversation History for LLM Agents
|
| 82 |
+
The inference script maintains a rolling 7-day conversation history. The LLM sees its past observations and decisions, enabling it to spot demand trends, learn from mistakes, and adjust strategy across the episode.
|
| 83 |
+
|
| 84 |
+
## Action Space
|
| 85 |
+
|
| 86 |
+
```python
|
| 87 |
+
class InventoryAction(Action):
|
| 88 |
+
buy_quantities: Dict[str, int] = {}
|
| 89 |
+
delivery_method: Literal["slow", "medium", "fast"] = "slow"
|
| 90 |
+
liquidate: Dict[str, int] = {}
|
| 91 |
+
price_multipliers: Dict[str, float] = {}
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
| Field | Description |
|
| 95 |
+
|-------|-------------|
|
| 96 |
+
| `buy_quantities` | Products and amounts to order. Empty `{}` to skip buying. |
|
| 97 |
+
| `delivery_method` | `"slow"` ($2/unit, 3-7 days), `"medium"` ($5/unit, 2-4 days), `"fast"` ($10/unit, 1 day guaranteed) |
|
| 98 |
+
| `liquidate` | Products and amounts to dispose of (no revenue). Use for expiring groceries or freeing warehouse space. |
|
| 99 |
+
| `price_multipliers` | Per-product selling price multiplier (0.5-1.5). Affects demand via elasticity. Default 1.0 if omitted. |
|
| 100 |
+
|
| 101 |
+
## Observation Space
|
| 102 |
+
|
| 103 |
+
```python
|
| 104 |
+
class InventoryObservation(Observation):
|
| 105 |
+
current_day: int
|
| 106 |
+
total_cash: float
|
| 107 |
+
day_profit: float
|
| 108 |
+
total_profit: float
|
| 109 |
+
demand_today: Dict[str, int] # yesterday's demand (feedback)
|
| 110 |
+
updated_inventory: Dict[str, List] # [[qty, days_left], ...] per batch
|
| 111 |
+
remaining_capacity: Dict[str, int] # warehouse space left per product
|
| 112 |
+
updated_events: Dict[str, int] # event countdowns (negative = active/ended)
|
| 113 |
+
updated_deliveries: List[Dict] # in-transit shipments
|
| 114 |
+
```
|
| 115 |
+
|
| 116 |
+
## Tasks (Easy / Medium / Hard)
|
| 117 |
+
|
| 118 |
+
### Easy — "Steady State"
|
| 119 |
+
- Low starting stock, low steady demand, no events
|
| 120 |
+
- Starting cash: $1,000 | Full warehouse capacity
|
| 121 |
+
- Agent needs to restock regularly but demand is predictable
|
| 122 |
+
- No events, no demand spikes — pure supply chain management
|
| 123 |
+
|
| 124 |
+
### Medium — "Seasonal Rush"
|
| 125 |
+
- Default stock/cash, all 5 events spread across 30 days
|
| 126 |
+
- Events: Black Friday (day 6), Christmas (day 12), Back to School (day 18), Summer Clearance (day 24), New Competitor (day 28)
|
| 127 |
+
- Agent must anticipate demand spikes and restock before events hit
|
| 128 |
+
|
| 129 |
+
### Hard — "Chaos Mode"
|
| 130 |
+
- Half starting cash ($500), low stock, events packed close together (days 4, 8, 12, 16, 20)
|
| 131 |
+
- Higher base demand, smaller warehouse capacity
|
| 132 |
+
- Agent must balance tight budget, overlapping event spikes, perishable goods, and limited storage
|
| 133 |
+
|
| 134 |
+
## Grading (0.0 - 1.0)
|
| 135 |
+
|
| 136 |
+
Each task is scored by comparing agent profit against two deterministic baselines:
|
| 137 |
+
- **Floor**: Passive agent that never buys (sells initial stock until depleted)
|
| 138 |
+
- **Ceiling**: Theoretical max profit assuming perfect demand knowledge and cheapest shipping
|
| 139 |
+
|
| 140 |
+
```
|
| 141 |
+
score = clamp((agent_profit - floor) / (ceiling - floor), 0.0, 1.0)
|
| 142 |
+
```
|
| 143 |
+
|
| 144 |
+
Both baselines are deterministic (seeded RNG) and computed fresh each run to ensure reproducibility.
|
| 145 |
+
|
| 146 |
+
## Setup
|
| 147 |
+
|
| 148 |
+
```bash
|
| 149 |
+
# Install dependencies
|
| 150 |
+
pip install openenv-core[core] fastapi uvicorn pydantic openai numpy python-dotenv
|
| 151 |
+
|
| 152 |
+
# Run grader baselines
|
| 153 |
+
python -c "from server.grader import compute_baselines; [print(f'{t}: floor={f:.2f}, ceiling={c:.2f}') for t in ['easy','medium','hard'] for f,c in [compute_baselines(t)]]"
|
| 154 |
+
|
| 155 |
+
# Start server locally
|
| 156 |
+
uvicorn server.app:app --host 0.0.0.0 --port 8000
|
| 157 |
+
|
| 158 |
+
# Test endpoints
|
| 159 |
+
curl http://localhost:8000/health
|
| 160 |
+
curl -X POST http://localhost:8000/reset
|
| 161 |
+
```
|
| 162 |
+
|
| 163 |
+
## Running Inference
|
| 164 |
+
|
| 165 |
+
```bash
|
| 166 |
+
# Using HuggingFace Router
|
| 167 |
+
export API_BASE_URL="https://router.huggingface.co/v1"
|
| 168 |
+
export MODEL_NAME="Qwen/Qwen3-32B"
|
| 169 |
+
export HF_TOKEN="your-token"
|
| 170 |
+
python inference.py
|
| 171 |
+
|
| 172 |
+
# Using OpenAI
|
| 173 |
+
export API_BASE_URL="https://api.openai.com/v1"
|
| 174 |
+
export MODEL_NAME="gpt-4o"
|
| 175 |
+
export API_KEY="sk-your-key"
|
| 176 |
+
python inference.py
|
| 177 |
+
```
|
| 178 |
+
|
| 179 |
+
## Docker
|
| 180 |
+
|
| 181 |
+
```bash
|
| 182 |
+
docker build -t inventory-env .
|
| 183 |
+
docker run -p 8000:8000 inventory-env
|
| 184 |
+
```
|
| 185 |
+
|
| 186 |
+
## API Endpoints
|
| 187 |
+
|
| 188 |
+
| Endpoint | Method | Description |
|
| 189 |
+
|----------|--------|-------------|
|
| 190 |
+
| `/health` | GET | Health check — returns 200 if server is running |
|
| 191 |
+
| `/reset` | POST | Reset environment, returns initial observation |
|
| 192 |
+
| `/step` | POST | Submit an action (JSON body), returns next observation with reward |
|
| 193 |
+
| `/state` | GET | Get current episode state (day, cash, inventory) |
|
| 194 |
+
| `/tasks` | GET | List all 3 tasks with full config (stock, capacity, demand ranges, events) |
|
| 195 |
+
| `/grader` | POST | Score an episode given task name and agent profit |
|
| 196 |
+
| `/baseline` | GET | Run LLM inference on a task and return the score |
|
| 197 |
+
|
| 198 |
+
### Example Queries
|
| 199 |
+
|
| 200 |
+
```bash
|
| 201 |
+
# List all tasks with full schemas
|
| 202 |
+
curl http://localhost:8000/tasks
|
| 203 |
+
|
| 204 |
+
# Grade a specific profit
|
| 205 |
+
curl -X POST "http://localhost:8000/grader?task_name=easy&agent_profit=5000"
|
| 206 |
+
# → {"task_name":"easy","agent_profit":5000.0,"floor":2200.0,"ceiling":10011.0,"score":0.358}
|
| 207 |
+
|
| 208 |
+
# Run baseline inference (requires API keys in container env)
|
| 209 |
+
curl "http://localhost:8000/baseline"
|
| 210 |
+
curl "http://localhost:8000/baseline?task_name=hard"
|
| 211 |
+
# → {"task_name":"easy","score":0.822}
|
| 212 |
+
```
|
| 213 |
+
|
| 214 |
+
## Step Execution Order
|
| 215 |
+
|
| 216 |
+
Each `step()` call processes in this order:
|
| 217 |
+
1. Tick event countdowns (into negatives to track active duration)
|
| 218 |
+
2. Remove expired groceries (shelf life = 0)
|
| 219 |
+
3. Receive arriving deliveries (add to inventory with fresh shelf life)
|
| 220 |
+
4. Process purchase orders (deduct cash, schedule deliveries with jitter)
|
| 221 |
+
5. Generate demand (base + weekend boost + event multipliers + price elasticity)
|
| 222 |
+
6. Sell products FIFO (oldest batches first, track missed sales)
|
| 223 |
+
7. Liquidate requested stock FIFO (no revenue)
|
| 224 |
+
8. Compute profit, reward, update state, return observation
|
| 225 |
+
|
| 226 |
+
## Project Structure
|
| 227 |
+
|
| 228 |
+
```
|
| 229 |
+
├── models.py # InventoryAction, InventoryObservation, InventoryState (Pydantic)
|
| 230 |
+
├── client.py # EnvClient for remote WebSocket connections
|
| 231 |
+
├── inference.py # LLM inference script with conversation history (runs all 3 tasks)
|
| 232 |
+
├── openenv.yaml # OpenEnv spec manifest
|
| 233 |
+
├── pyproject.toml # Python dependencies
|
| 234 |
+
├── Dockerfile # Multi-stage container build from openenv-base
|
| 235 |
+
├── server/
|
| 236 |
+
│ ├── app.py # FastAPI server (create_app + uvicorn entry point)
|
| 237 |
+
│ ├── inventory_env.py # Environment (reset, step, state, demand generation)
|
| 238 |
+
│ ├── constants.py # All configs: prices, stock, events, tasks, elasticity
|
| 239 |
+
│ └── grader.py # Floor/ceiling baselines and 0.0-1.0 scoring
|
| 240 |
+
└── scripts/
|
| 241 |
+
└── validate-submission.sh # Pre-submission validator
|
| 242 |
+
```
|
__init__.py
ADDED
|
File without changes
|
client.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import Any, Dict
|
| 4 |
+
|
| 5 |
+
from openenv.core.client_types import StepResult
|
| 6 |
+
from openenv.core.env_client import EnvClient
|
| 7 |
+
|
| 8 |
+
from models import InventoryAction, InventoryObservation, InventoryState
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class InventoryEnv(EnvClient[InventoryAction, InventoryObservation, InventoryState]):
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def _step_payload(self, action : InventoryAction) -> Dict[str, Any]:
|
| 16 |
+
|
| 17 |
+
payload: Dict[str, Any] = {}
|
| 18 |
+
|
| 19 |
+
if action.buy_quantities is not None:
|
| 20 |
+
payload["buy_quantities"] = action.buy_quantities
|
| 21 |
+
|
| 22 |
+
if action.delivery_method is not None:
|
| 23 |
+
payload["delivery_method"] = action.delivery_method
|
| 24 |
+
|
| 25 |
+
if action.liquidate is not None:
|
| 26 |
+
payload["liquidate"] = action.liquidate
|
| 27 |
+
|
| 28 |
+
if action.price_multipliers is not None:
|
| 29 |
+
payload["price_multipliers"] = action.price_multipliers
|
| 30 |
+
|
| 31 |
+
return payload
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def _parse_result(self, payload: Dict) -> StepResult[InventoryObservation]:
|
| 35 |
+
|
| 36 |
+
obs_data = payload.get("observation", {})
|
| 37 |
+
|
| 38 |
+
observation = InventoryObservation(
|
| 39 |
+
|
| 40 |
+
current_day = obs_data.get("current_day", 0),
|
| 41 |
+
total_cash = obs_data.get("total_cash", 0),
|
| 42 |
+
day_profit = obs_data.get("day_profit", 0),
|
| 43 |
+
total_profit = obs_data.get("total_profit", 0),
|
| 44 |
+
demand_today = obs_data.get("demand_today", {}),
|
| 45 |
+
updated_inventory = obs_data.get("updated_inventory", {}),
|
| 46 |
+
remaining_capacity = obs_data.get("remaining_capacity", {}),
|
| 47 |
+
updated_events = obs_data.get("updated_events", {}),
|
| 48 |
+
updated_deliveries = obs_data.get("updated_deliveries", []),
|
| 49 |
+
done = obs_data.get("done", False),
|
| 50 |
+
reward = obs_data.get("reward", 0.0),
|
| 51 |
+
metadata=obs_data.get("metadata", {}),
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
return StepResult(
|
| 55 |
+
observation = observation,
|
| 56 |
+
reward = observation.reward,
|
| 57 |
+
done = observation.done,
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def _parse_state(self, payload: Dict[str, Any]) -> InventoryState:
|
| 62 |
+
|
| 63 |
+
return InventoryState(
|
| 64 |
+
episode_id = payload.get("episode_id", ""),
|
| 65 |
+
current_day = payload.get("current_day", 0),
|
| 66 |
+
cash = payload.get("cash", 0.0),
|
| 67 |
+
inventory = payload.get("inventory", {}),
|
| 68 |
+
)
|
inference.py
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Inference Script - Inventory Optimization Environment
|
| 3 |
+
=====================================================
|
| 4 |
+
Required env vars:
|
| 5 |
+
API_BASE_URL The API endpoint for the LLM.
|
| 6 |
+
MODEL_NAME The model identifier to use for inference.
|
| 7 |
+
HF_TOKEN Hugging Face token (preferred for HF Router).
|
| 8 |
+
|
| 9 |
+
Supported key env vars (first non-empty wins): HF_TOKEN, API_KEY, OPENAI_API_KEY.
|
| 10 |
+
For non-OpenAI endpoints, a dummy key is used when no key is provided because
|
| 11 |
+
the OpenAI Python SDK requires a non-empty api_key argument.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import os
|
| 15 |
+
import json
|
| 16 |
+
import textwrap
|
| 17 |
+
|
| 18 |
+
from dotenv import load_dotenv
|
| 19 |
+
load_dotenv()
|
| 20 |
+
|
| 21 |
+
from openai import OpenAI
|
| 22 |
+
|
| 23 |
+
from server.inventory_env import InventoryEnvironment
|
| 24 |
+
from server.constants import EXTRA_INVENTORY_COST, EVENT_DURATION, TASKS, COST_PRICES, SHIPPING_COST, BASE_PRICES
|
| 25 |
+
from models import InventoryAction
|
| 26 |
+
|
| 27 |
+
API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
|
| 28 |
+
API_KEY = os.getenv("API_KEY") or os.getenv("HF_TOKEN") or os.getenv("OPENAI_API_KEY")
|
| 29 |
+
MODEL_NAME = os.getenv("MODEL_NAME") or "Qwen/Qwen3-32B"
|
| 30 |
+
TASK_NAME = os.getenv("TASK_NAME") or "easy"
|
| 31 |
+
MAX_DAYS = 30
|
| 32 |
+
|
| 33 |
+
SYSTEM_PROMPT = textwrap.dedent("""
|
| 34 |
+
You are an inventory management AI agent. Each day you receive the current state
|
| 35 |
+
of a retail store with 5 products: electronics, clothing, groceries, furniture, toys.
|
| 36 |
+
|
| 37 |
+
You will be shown your decision history from recent days so you can learn from
|
| 38 |
+
past outcomes. Use this history to spot demand trends, identify what worked vs.
|
| 39 |
+
what didn't, and adjust your strategy accordingly.
|
| 40 |
+
|
| 41 |
+
Groceries are perishable (5-day shelf life). Other products don't expire.
|
| 42 |
+
|
| 43 |
+
Product selling prices: electronics=$150, clothing=$40, groceries=$10, furniture=$200, toys=$25
|
| 44 |
+
Product cost prices: electronics=$100, clothing=$25, groceries=$5, furniture=$130, toys=$12
|
| 45 |
+
Profit margins: electronics=$50, clothing=$15, groceries=$5, furniture=$70, toys=$13
|
| 46 |
+
Shipping costs per unit: slow=$2 (3-7 days), medium=$5 (2-4 days), fast=$10 (1 day, always reliable)
|
| 47 |
+
Warehouse capacity: electronics=100, clothing=200, groceries=500, furniture=50, toys=300
|
| 48 |
+
|
| 49 |
+
Events (like black_friday, christmas) boost demand when their countdown hits 0 and last for 2 days.
|
| 50 |
+
Weekends (day%7 == 5 or 6) have 1.2x demand.
|
| 51 |
+
|
| 52 |
+
CRITICAL STRATEGY:
|
| 53 |
+
- Review your history: if reward was negative, identify why and change approach.
|
| 54 |
+
- Track demand trends across days.
|
| 55 |
+
- You MUST restock products when inventory is low. Missed sales = lost revenue = negative reward.
|
| 56 |
+
- Do NOT overbuy when demand is low — unsold stock ties up cash and perishables expire.
|
| 57 |
+
- Stock up BEFORE events hit (check event countdowns — order 3-5 days ahead).
|
| 58 |
+
- When no events are approaching, slow shipping is often sufficient and saves significant cost.
|
| 59 |
+
- Near end of episode (last 2 days), stop buying — focus on selling remaining stock.
|
| 60 |
+
|
| 61 |
+
DYNAMIC PRICING:
|
| 62 |
+
You can set a price multiplier (0.5 to 1.5) per product each day. Default is 1.0.
|
| 63 |
+
- Lower price (e.g. 0.7) = more demand but less revenue per unit. Good for clearing excess stock.
|
| 64 |
+
- Higher price (e.g. 1.3) = less demand but more revenue per unit. Good when stock is low.
|
| 65 |
+
- Price elasticity varies across different products.
|
| 66 |
+
- Elasticity values: electronics=1.2, clothing=1.5, groceries=0.4, furniture=0.8, toys=1.3
|
| 67 |
+
|
| 68 |
+
Each day you must respond with a JSON action:
|
| 69 |
+
{
|
| 70 |
+
"buy_quantities": {"product_name": quantity, ...},
|
| 71 |
+
"delivery_method": "slow" | "medium" | "fast",
|
| 72 |
+
"liquidate": {"product_name": quantity, ...},
|
| 73 |
+
"price_multipliers": {"product_name": multiplier, ...}
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
- buy_quantities: products and amounts to order.
|
| 77 |
+
- delivery_method: shipping speed for this order
|
| 78 |
+
- liquidate: products and amounts to dispose of (no revenue, empty {} to skip)
|
| 79 |
+
Use liquidate to free up warehouse space before a restock.
|
| 80 |
+
- price_multipliers: set selling price multiplier per product (0.5-1.5, default 1.0 if omitted)
|
| 81 |
+
|
| 82 |
+
LEARNING FROM HISTORY:
|
| 83 |
+
- Compare your past buy quantities to the demand that followed — were you over or under?
|
| 84 |
+
- If you see repeated stockouts for a product, increase orders for it.
|
| 85 |
+
- If groceries expired, you overbought — reduce grocery orders or use faster shipping.
|
| 86 |
+
- A negative reward means your last action was bad — adjust immediately.
|
| 87 |
+
|
| 88 |
+
Before responding with JSON, briefly reason (2-3 lines max):
|
| 89 |
+
1. What did I learn from recent history? What went wrong/right?
|
| 90 |
+
2. What products need restocking vs. are overstocked?
|
| 91 |
+
3. Are any events approaching?
|
| 92 |
+
|
| 93 |
+
Then output ONLY the final JSON action on the last line.
|
| 94 |
+
""").strip()
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def format_observation(obs):
|
| 98 |
+
"""Convert observation into a readable prompt for the LLM."""
|
| 99 |
+
|
| 100 |
+
# format inventory with batch detail, remaining capacity, and extra cost
|
| 101 |
+
inv_lines = []
|
| 102 |
+
for product, batches in obs.updated_inventory.items():
|
| 103 |
+
total = sum(b[0] for b in batches)
|
| 104 |
+
remaining = obs.remaining_capacity.get(product, 0)
|
| 105 |
+
extra_cost = EXTRA_INVENTORY_COST.get(product, 0)
|
| 106 |
+
batch_detail = ", ".join(
|
| 107 |
+
f"{b[0]} units" + (f" ({b[1]}d left)" if b[1] is not None else "")
|
| 108 |
+
for b in batches
|
| 109 |
+
)
|
| 110 |
+
inv_lines.append(f" {product}: {total} total [{batch_detail}] | space left: {remaining} (extra space: ${extra_cost}/unit)")
|
| 111 |
+
inv_text = "\n".join(inv_lines)
|
| 112 |
+
|
| 113 |
+
# format events
|
| 114 |
+
event_lines = []
|
| 115 |
+
for event, days in obs.updated_events.items():
|
| 116 |
+
if days > 0:
|
| 117 |
+
event_lines.append(f" {event}: in {days} days")
|
| 118 |
+
elif -EVENT_DURATION < days <= 0:
|
| 119 |
+
event_lines.append(f" {event}: ACTIVE NOW")
|
| 120 |
+
else:
|
| 121 |
+
event_lines.append(f" {event}: ended")
|
| 122 |
+
events_text = "\n".join(event_lines) if event_lines else " None"
|
| 123 |
+
|
| 124 |
+
# format deliveries
|
| 125 |
+
delivery_lines = []
|
| 126 |
+
for delivery in obs.updated_deliveries:
|
| 127 |
+
for product, shipment in delivery.items():
|
| 128 |
+
qty, arrival_day = shipment
|
| 129 |
+
days_away = arrival_day - obs.current_day
|
| 130 |
+
delivery_lines.append(f" {product}: {qty} units arriving in {days_away} days")
|
| 131 |
+
deliveries_text = "\n".join(delivery_lines) if delivery_lines else " None"
|
| 132 |
+
|
| 133 |
+
# format demand (yesterday's demand — feedback, not prediction)
|
| 134 |
+
demand_lines = []
|
| 135 |
+
for product, units in obs.demand_today.items():
|
| 136 |
+
demand_lines.append(f" {product}: {units} units")
|
| 137 |
+
demand_text = "\n".join(demand_lines) if demand_lines else " No demand data yet"
|
| 138 |
+
|
| 139 |
+
prompt = f"""Day: {obs.current_day}/{MAX_DAYS}
|
| 140 |
+
Cash: ${obs.total_cash:.2f}
|
| 141 |
+
Day Profit: ${obs.day_profit:.2f}
|
| 142 |
+
Total Profit: ${obs.total_profit:.2f}
|
| 143 |
+
Last Step Reward: {obs.reward:.3f}
|
| 144 |
+
|
| 145 |
+
Inventory:
|
| 146 |
+
{inv_text}
|
| 147 |
+
|
| 148 |
+
Yesterday's Demand:
|
| 149 |
+
{demand_text}
|
| 150 |
+
|
| 151 |
+
Upcoming Events:
|
| 152 |
+
{events_text}
|
| 153 |
+
|
| 154 |
+
Pending Deliveries:
|
| 155 |
+
{deliveries_text}
|
| 156 |
+
|
| 157 |
+
Respond with your action as JSON."""
|
| 158 |
+
|
| 159 |
+
return prompt
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def parse_action(response_text):
|
| 163 |
+
"""Parse LLM response into InventoryAction. Extracts JSON even if surrounded by text."""
|
| 164 |
+
try:
|
| 165 |
+
text = response_text.strip()
|
| 166 |
+
|
| 167 |
+
# strip markdown code fences
|
| 168 |
+
if "```" in text:
|
| 169 |
+
parts = text.split("```")
|
| 170 |
+
for part in parts:
|
| 171 |
+
part = part.strip()
|
| 172 |
+
if part.startswith("json"):
|
| 173 |
+
part = part[4:].strip()
|
| 174 |
+
if part.startswith("{"):
|
| 175 |
+
text = part
|
| 176 |
+
break
|
| 177 |
+
|
| 178 |
+
# find the first { and last } to extract JSON
|
| 179 |
+
start = text.find("{")
|
| 180 |
+
end = text.rfind("}")
|
| 181 |
+
if start != -1 and end != -1 and end > start:
|
| 182 |
+
text = text[start:end + 1]
|
| 183 |
+
|
| 184 |
+
data = json.loads(text)
|
| 185 |
+
|
| 186 |
+
# only keep valid fields
|
| 187 |
+
clean = {}
|
| 188 |
+
if "buy_quantities" in data:
|
| 189 |
+
clean["buy_quantities"] = data["buy_quantities"]
|
| 190 |
+
if "delivery_method" in data:
|
| 191 |
+
clean["delivery_method"] = data["delivery_method"]
|
| 192 |
+
if "liquidate" in data:
|
| 193 |
+
clean["liquidate"] = data["liquidate"]
|
| 194 |
+
if "price_multipliers" in data:
|
| 195 |
+
clean["price_multipliers"] = data["price_multipliers"]
|
| 196 |
+
|
| 197 |
+
return InventoryAction(**clean)
|
| 198 |
+
except Exception as e:
|
| 199 |
+
print(f" [DEBUG] Parse FAILED: {e}")
|
| 200 |
+
print(f" [DEBUG] Raw LLM response: {response_text[:500]}")
|
| 201 |
+
return InventoryAction(
|
| 202 |
+
buy_quantities={},
|
| 203 |
+
delivery_method="slow",
|
| 204 |
+
liquidate={},
|
| 205 |
+
price_multipliers={},
|
| 206 |
+
)
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
HISTORY_WINDOW = 7 # rolling window of past days to include in context
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
def run_task(client, task_name):
|
| 213 |
+
"""Run a single task and return total profit."""
|
| 214 |
+
env = InventoryEnvironment(task_name)
|
| 215 |
+
obs = env.reset()
|
| 216 |
+
|
| 217 |
+
rewards = []
|
| 218 |
+
steps_taken = 0
|
| 219 |
+
success = False
|
| 220 |
+
|
| 221 |
+
print(f"[START] task={task_name} env=inventory_env model={MODEL_NAME}", flush=True)
|
| 222 |
+
|
| 223 |
+
# Rolling history of (user_observation, assistant_response) pairs
|
| 224 |
+
history = []
|
| 225 |
+
|
| 226 |
+
try:
|
| 227 |
+
for day in range(1, env.max_days + 1):
|
| 228 |
+
if obs.done:
|
| 229 |
+
break
|
| 230 |
+
|
| 231 |
+
user_prompt = format_observation(obs)
|
| 232 |
+
|
| 233 |
+
# Build messages: system + history context + current observation
|
| 234 |
+
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
|
| 235 |
+
|
| 236 |
+
recent = history[-HISTORY_WINDOW:]
|
| 237 |
+
if recent:
|
| 238 |
+
messages.append({
|
| 239 |
+
"role": "user",
|
| 240 |
+
"content": f"Here is your decision history from the last {len(recent)} day(s). "
|
| 241 |
+
"Use this to identify demand trends, adjust restocking, and avoid repeating mistakes.",
|
| 242 |
+
})
|
| 243 |
+
messages.append({
|
| 244 |
+
"role": "assistant",
|
| 245 |
+
"content": "Understood. I'll review my past decisions and their outcomes to make better choices today.",
|
| 246 |
+
})
|
| 247 |
+
for past_user, past_assistant in recent:
|
| 248 |
+
messages.append({"role": "user", "content": past_user})
|
| 249 |
+
messages.append({"role": "assistant", "content": past_assistant})
|
| 250 |
+
|
| 251 |
+
messages.append({"role": "user", "content": user_prompt})
|
| 252 |
+
|
| 253 |
+
error = None
|
| 254 |
+
try:
|
| 255 |
+
completion = client.chat.completions.create(
|
| 256 |
+
model=MODEL_NAME,
|
| 257 |
+
messages=messages,
|
| 258 |
+
temperature=0.0,
|
| 259 |
+
max_completion_tokens=500,
|
| 260 |
+
stream=False,
|
| 261 |
+
)
|
| 262 |
+
response_text = completion.choices[0].message.content or ""
|
| 263 |
+
except Exception as exc:
|
| 264 |
+
error = str(exc)
|
| 265 |
+
response_text = "{}"
|
| 266 |
+
|
| 267 |
+
# Save this turn to rolling history
|
| 268 |
+
history.append((user_prompt, response_text))
|
| 269 |
+
|
| 270 |
+
action = parse_action(response_text)
|
| 271 |
+
action_str = json.dumps({"buy": action.buy_quantities, "deliver": action.delivery_method, "liquidate": action.liquidate, "prices": action.price_multipliers})
|
| 272 |
+
|
| 273 |
+
obs = env.step(action)
|
| 274 |
+
|
| 275 |
+
reward = obs.reward
|
| 276 |
+
done = obs.done
|
| 277 |
+
rewards.append(reward)
|
| 278 |
+
steps_taken = day
|
| 279 |
+
|
| 280 |
+
print(f"[STEP] step={day} action={action_str} reward={reward:.2f} done={str(done).lower()} error={error if error else 'null'}", flush=True)
|
| 281 |
+
|
| 282 |
+
if done:
|
| 283 |
+
break
|
| 284 |
+
|
| 285 |
+
# compute score
|
| 286 |
+
from server.grader import grade
|
| 287 |
+
score = grade(task_name, obs.total_profit)
|
| 288 |
+
success = score >= 0.1
|
| 289 |
+
|
| 290 |
+
finally:
|
| 291 |
+
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
| 292 |
+
print(f"[END] success={str(success).lower()} steps={steps_taken} score={score:.3f} rewards={rewards_str}", flush=True)
|
| 293 |
+
|
| 294 |
+
return obs.total_profit
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
def main():
|
| 298 |
+
from server.grader import grade, compute_baselines
|
| 299 |
+
|
| 300 |
+
if not MODEL_NAME:
|
| 301 |
+
raise RuntimeError("MODEL_NAME is not set. Please export MODEL_NAME before running inference.")
|
| 302 |
+
|
| 303 |
+
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
|
| 304 |
+
|
| 305 |
+
tasks = ["easy", "medium", "hard"]
|
| 306 |
+
|
| 307 |
+
# print baselines
|
| 308 |
+
print(f"\n{'=' * 50}")
|
| 309 |
+
print("BASELINES")
|
| 310 |
+
print(f"{'=' * 50}")
|
| 311 |
+
for task_name in tasks:
|
| 312 |
+
floor, ceiling = compute_baselines(task_name)
|
| 313 |
+
print(f" {task_name}: floor=${floor:.2f} (passive) | ceiling=${ceiling:.2f} (heuristic)")
|
| 314 |
+
|
| 315 |
+
results = {}
|
| 316 |
+
for task_name in tasks:
|
| 317 |
+
profit = run_task(client, task_name)
|
| 318 |
+
results[task_name] = profit
|
| 319 |
+
|
| 320 |
+
print(f"\n{'=' * 50}")
|
| 321 |
+
print("FINAL SCORES")
|
| 322 |
+
print(f"{'=' * 50}")
|
| 323 |
+
for task_name in tasks:
|
| 324 |
+
floor, ceiling = compute_baselines(task_name)
|
| 325 |
+
score = grade(task_name, results[task_name])
|
| 326 |
+
print(f" {task_name}: {score:.3f} (profit: ${results[task_name]:.2f} | floor: ${floor:.2f} | ceiling: ${ceiling:.2f})")
|
| 327 |
+
|
| 328 |
+
|
| 329 |
+
if __name__ == "__main__":
|
| 330 |
+
main()
|
models.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
from openenv.core.env_server import Action, Observation, State
|
| 5 |
+
from typing import Literal, Dict, List, Optional
|
| 6 |
+
from pydantic import field_validator
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class InventoryAction(Action):
|
| 10 |
+
buy_quantities : Dict[str, int] = {}
|
| 11 |
+
delivery_method : Literal["slow", "medium", "fast"] = "slow"
|
| 12 |
+
liquidate : Dict[str, int] = {}
|
| 13 |
+
price_multipliers : Dict[str, float] = {} # product -> 0.5 to 1.5 (default 1.0)
|
| 14 |
+
|
| 15 |
+
@field_validator("buy_quantities", "liquidate", "price_multipliers", mode="before")
|
| 16 |
+
@classmethod
|
| 17 |
+
def parse_dict_strings(cls, v):
|
| 18 |
+
if isinstance(v, str):
|
| 19 |
+
return json.loads(v)
|
| 20 |
+
return v
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class InventoryObservation(Observation):
|
| 24 |
+
current_day : int
|
| 25 |
+
total_cash : float
|
| 26 |
+
day_profit : float
|
| 27 |
+
total_profit : float
|
| 28 |
+
demand_today : Dict[str, int] # product -> units demanded today
|
| 29 |
+
updated_inventory : Dict[str, List[List[Optional[int]]]] # product -> [[qty, days_left], ...] per batch
|
| 30 |
+
remaining_capacity : Dict[str, int] # product -> remaining warehouse space
|
| 31 |
+
updated_events : Dict[str, int]
|
| 32 |
+
updated_deliveries : List[Dict[str, List[int]]] # product name, (quantity of product, days to arrival)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class InventoryState(State):
|
| 36 |
+
episode_id : str
|
| 37 |
+
current_day : int
|
| 38 |
+
cash : float
|
| 39 |
+
inventory : Dict[str, int]
|
openenv.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
spec_version: 1
|
| 2 |
+
name: inventory_env
|
| 3 |
+
type: space
|
| 4 |
+
runtime: fastapi
|
| 5 |
+
app: server.app:app
|
| 6 |
+
port: 8000
|
pyproject.toml
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "inventory-env"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "Retail Inventory Optimization RL Environment for OpenEnv"
|
| 5 |
+
requires-python = ">=3.10"
|
| 6 |
+
dependencies = [
|
| 7 |
+
"openenv-core[core]>=0.2.0",
|
| 8 |
+
"fastapi>=0.115.0",
|
| 9 |
+
"uvicorn>=0.24.0",
|
| 10 |
+
"pydantic>=2.0.0",
|
| 11 |
+
"numpy>=1.24.0",
|
| 12 |
+
"openai>=1.0.0",
|
| 13 |
+
"python-dotenv>=1.0.0",
|
| 14 |
+
]
|
| 15 |
+
|
| 16 |
+
[build-system]
|
| 17 |
+
requires = ["setuptools>=61.0"]
|
| 18 |
+
build-backend = "setuptools.build_meta"
|
| 19 |
+
|
| 20 |
+
[tool.setuptools.packages.find]
|
| 21 |
+
where = ["server"]
|
| 22 |
+
|
| 23 |
+
[project.scripts]
|
| 24 |
+
server = "server.app:main"
|
scripts/validate-submission.sh
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
#
|
| 3 |
+
# validate-submission.sh — OpenEnv Submission Validator
|
| 4 |
+
#
|
| 5 |
+
# Checks that your HF Space is live, Docker image builds, and openenv validate passes.
|
| 6 |
+
#
|
| 7 |
+
# Run:
|
| 8 |
+
# ./scripts/validate-submission.sh <ping_url> [repo_dir]
|
| 9 |
+
#
|
| 10 |
+
# Arguments:
|
| 11 |
+
# ping_url Your HuggingFace Space URL (e.g. https://your-space.hf.space)
|
| 12 |
+
# repo_dir Path to your repo (default: current directory)
|
| 13 |
+
#
|
| 14 |
+
|
| 15 |
+
set -uo pipefail
|
| 16 |
+
|
| 17 |
+
DOCKER_BUILD_TIMEOUT=600
|
| 18 |
+
if [ -t 1 ]; then
|
| 19 |
+
RED='\033[0;31m'
|
| 20 |
+
GREEN='\033[0;32m'
|
| 21 |
+
YELLOW='\033[1;33m'
|
| 22 |
+
BOLD='\033[1m'
|
| 23 |
+
NC='\033[0m'
|
| 24 |
+
else
|
| 25 |
+
RED='' GREEN='' YELLOW='' BOLD='' NC=''
|
| 26 |
+
fi
|
| 27 |
+
|
| 28 |
+
run_with_timeout() {
|
| 29 |
+
local secs="$1"; shift
|
| 30 |
+
if command -v timeout &>/dev/null; then
|
| 31 |
+
timeout "$secs" "$@"
|
| 32 |
+
elif command -v gtimeout &>/dev/null; then
|
| 33 |
+
gtimeout "$secs" "$@"
|
| 34 |
+
else
|
| 35 |
+
"$@" &
|
| 36 |
+
local pid=$!
|
| 37 |
+
( sleep "$secs" && kill "$pid" 2>/dev/null ) &
|
| 38 |
+
local watcher=$!
|
| 39 |
+
wait "$pid" 2>/dev/null
|
| 40 |
+
local rc=$?
|
| 41 |
+
kill "$watcher" 2>/dev/null
|
| 42 |
+
wait "$watcher" 2>/dev/null
|
| 43 |
+
return $rc
|
| 44 |
+
fi
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
portable_mktemp() {
|
| 48 |
+
local prefix="${1:-validate}"
|
| 49 |
+
mktemp "${TMPDIR:-/tmp}/${prefix}-XXXXXX" 2>/dev/null || mktemp
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
CLEANUP_FILES=()
|
| 53 |
+
cleanup() { rm -f "${CLEANUP_FILES[@]+"${CLEANUP_FILES[@]}"}"; }
|
| 54 |
+
trap cleanup EXIT
|
| 55 |
+
|
| 56 |
+
PING_URL="${1:-}"
|
| 57 |
+
REPO_DIR="${2:-.}"
|
| 58 |
+
|
| 59 |
+
if [ -z "$PING_URL" ]; then
|
| 60 |
+
printf "Usage: %s <ping_url> [repo_dir]\n" "$0"
|
| 61 |
+
printf "\n"
|
| 62 |
+
printf " ping_url Your HuggingFace Space URL (e.g. https://your-space.hf.space)\n"
|
| 63 |
+
printf " repo_dir Path to your repo (default: current directory)\n"
|
| 64 |
+
exit 1
|
| 65 |
+
fi
|
| 66 |
+
|
| 67 |
+
if ! REPO_DIR="$(cd "$REPO_DIR" 2>/dev/null && pwd)"; then
|
| 68 |
+
printf "Error: directory '%s' not found\n" "${2:-.}"
|
| 69 |
+
exit 1
|
| 70 |
+
fi
|
| 71 |
+
PING_URL="${PING_URL%/}"
|
| 72 |
+
export PING_URL
|
| 73 |
+
PASS=0
|
| 74 |
+
|
| 75 |
+
log() { printf "[%s] %b\n" "$(date -u +%H:%M:%S)" "$*"; }
|
| 76 |
+
pass() { log "${GREEN}PASSED${NC} -- $1"; PASS=$((PASS + 1)); }
|
| 77 |
+
fail() { log "${RED}FAILED${NC} -- $1"; }
|
| 78 |
+
hint() { printf " ${YELLOW}Hint:${NC} %b\n" "$1"; }
|
| 79 |
+
stop_at() {
|
| 80 |
+
printf "\n"
|
| 81 |
+
printf "${RED}${BOLD}Validation stopped at %s.${NC} Fix the above before continuing.\n" "$1"
|
| 82 |
+
exit 1
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
printf "\n"
|
| 86 |
+
printf "${BOLD}========================================${NC}\n"
|
| 87 |
+
printf "${BOLD} OpenEnv Submission Validator${NC}\n"
|
| 88 |
+
printf "${BOLD}========================================${NC}\n"
|
| 89 |
+
log "Repo: $REPO_DIR"
|
| 90 |
+
log "Ping URL: $PING_URL"
|
| 91 |
+
printf "\n"
|
| 92 |
+
|
| 93 |
+
log "${BOLD}Step 1/3: Pinging HF Space${NC} ($PING_URL/reset) ..."
|
| 94 |
+
|
| 95 |
+
CURL_OUTPUT=$(portable_mktemp "validate-curl")
|
| 96 |
+
CLEANUP_FILES+=("$CURL_OUTPUT")
|
| 97 |
+
HTTP_CODE=$(curl -s -o "$CURL_OUTPUT" -w "%{http_code}" -X POST \
|
| 98 |
+
-H "Content-Type: application/json" -d '{}' \
|
| 99 |
+
"$PING_URL/reset" --max-time 30 2>"$CURL_OUTPUT" || printf "000")
|
| 100 |
+
|
| 101 |
+
if [ "$HTTP_CODE" = "200" ]; then
|
| 102 |
+
pass "HF Space is live and responds to /reset"
|
| 103 |
+
elif [ "$HTTP_CODE" = "000" ]; then
|
| 104 |
+
fail "HF Space not reachable (connection failed or timed out)"
|
| 105 |
+
hint "Check your network connection and that the Space is running."
|
| 106 |
+
hint "Try: curl -s -o /dev/null -w '%%{http_code}' -X POST $PING_URL/reset"
|
| 107 |
+
stop_at "Step 1"
|
| 108 |
+
else
|
| 109 |
+
fail "HF Space /reset returned HTTP $HTTP_CODE (expected 200)"
|
| 110 |
+
hint "Make sure your Space is running and the URL is correct."
|
| 111 |
+
hint "Try opening $PING_URL in your browser first."
|
| 112 |
+
stop_at "Step 1"
|
| 113 |
+
fi
|
| 114 |
+
|
| 115 |
+
log "${BOLD}Step 2/3: Running docker build${NC} ..."
|
| 116 |
+
|
| 117 |
+
if ! command -v docker &>/dev/null; then
|
| 118 |
+
fail "docker command not found"
|
| 119 |
+
hint "Install Docker: https://docs.docker.com/get-docker/"
|
| 120 |
+
stop_at "Step 2"
|
| 121 |
+
fi
|
| 122 |
+
|
| 123 |
+
if [ -f "$REPO_DIR/Dockerfile" ]; then
|
| 124 |
+
DOCKER_CONTEXT="$REPO_DIR"
|
| 125 |
+
elif [ -f "$REPO_DIR/server/Dockerfile" ]; then
|
| 126 |
+
DOCKER_CONTEXT="$REPO_DIR/server"
|
| 127 |
+
else
|
| 128 |
+
fail "No Dockerfile found in repo root or server/ directory"
|
| 129 |
+
stop_at "Step 2"
|
| 130 |
+
fi
|
| 131 |
+
|
| 132 |
+
log " Found Dockerfile in $DOCKER_CONTEXT"
|
| 133 |
+
|
| 134 |
+
BUILD_OK=false
|
| 135 |
+
BUILD_OUTPUT=$(run_with_timeout "$DOCKER_BUILD_TIMEOUT" docker build "$DOCKER_CONTEXT" 2>&1) && BUILD_OK=true
|
| 136 |
+
|
| 137 |
+
if [ "$BUILD_OK" = true ]; then
|
| 138 |
+
pass "Docker build succeeded"
|
| 139 |
+
else
|
| 140 |
+
fail "Docker build failed (timeout=${DOCKER_BUILD_TIMEOUT}s)"
|
| 141 |
+
printf "%s\n" "$BUILD_OUTPUT" | tail -20
|
| 142 |
+
stop_at "Step 2"
|
| 143 |
+
fi
|
| 144 |
+
|
| 145 |
+
log "${BOLD}Step 3/3: Running openenv validate${NC} ..."
|
| 146 |
+
|
| 147 |
+
if ! command -v openenv &>/dev/null; then
|
| 148 |
+
fail "openenv command not found"
|
| 149 |
+
hint "Install it: pip install openenv-core"
|
| 150 |
+
stop_at "Step 3"
|
| 151 |
+
fi
|
| 152 |
+
|
| 153 |
+
VALIDATE_OK=false
|
| 154 |
+
VALIDATE_OUTPUT=$(cd "$REPO_DIR" && openenv validate 2>&1) && VALIDATE_OK=true
|
| 155 |
+
|
| 156 |
+
if [ "$VALIDATE_OK" = true ]; then
|
| 157 |
+
pass "openenv validate passed"
|
| 158 |
+
[ -n "$VALIDATE_OUTPUT" ] && log " $VALIDATE_OUTPUT"
|
| 159 |
+
else
|
| 160 |
+
fail "openenv validate failed"
|
| 161 |
+
printf "%s\n" "$VALIDATE_OUTPUT"
|
| 162 |
+
stop_at "Step 3"
|
| 163 |
+
fi
|
| 164 |
+
|
| 165 |
+
printf "\n"
|
| 166 |
+
printf "${BOLD}========================================${NC}\n"
|
| 167 |
+
printf "${GREEN}${BOLD} All 3/3 checks passed!${NC}\n"
|
| 168 |
+
printf "${GREEN}${BOLD} Your submission is ready to submit.${NC}\n"
|
| 169 |
+
printf "${BOLD}========================================${NC}\n"
|
| 170 |
+
printf "\n"
|
| 171 |
+
|
| 172 |
+
exit 0
|
server/__init__.py
ADDED
|
File without changes
|
server/app.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from openenv.core.env_server import create_app
|
| 2 |
+
from server.inventory_env import InventoryEnvironment
|
| 3 |
+
from server.grader import grade, compute_baselines
|
| 4 |
+
from server.constants import TASKS
|
| 5 |
+
from models import InventoryAction, InventoryObservation
|
| 6 |
+
|
| 7 |
+
app = create_app(InventoryEnvironment, InventoryAction, InventoryObservation, env_name="inventory_env")
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
@app.get("/tasks")
|
| 11 |
+
def list_tasks():
|
| 12 |
+
"""List available tasks with full schemas."""
|
| 13 |
+
task_list = []
|
| 14 |
+
for name, config in TASKS.items():
|
| 15 |
+
demand = {p: list(v) for p, v in config["base_demand"].items()}
|
| 16 |
+
task_list.append({
|
| 17 |
+
"task_name": name,
|
| 18 |
+
"seed": config["seed"],
|
| 19 |
+
"max_days": config["max_days"],
|
| 20 |
+
"initial_cash": config["initial_cash"],
|
| 21 |
+
"initial_stock": config["initial_stock"],
|
| 22 |
+
"inventory_capacity": config["inventory_capacity"],
|
| 23 |
+
"base_demand": demand,
|
| 24 |
+
"events": config["events"],
|
| 25 |
+
})
|
| 26 |
+
return {"tasks": task_list}
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
@app.post("/grader")
|
| 30 |
+
def grader_endpoint(task_name: str, agent_profit: float):
|
| 31 |
+
"""Return the evaluation score for an episode."""
|
| 32 |
+
if task_name not in TASKS:
|
| 33 |
+
return {"error": f"Unknown task: {task_name}. Available: {list(TASKS.keys())}"}
|
| 34 |
+
floor, ceiling = compute_baselines(task_name)
|
| 35 |
+
score = grade(task_name, agent_profit)
|
| 36 |
+
return {
|
| 37 |
+
"task_name": task_name,
|
| 38 |
+
"agent_profit": agent_profit,
|
| 39 |
+
"floor": floor,
|
| 40 |
+
"ceiling": ceiling,
|
| 41 |
+
"score": score,
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
@app.get("/baseline")
|
| 46 |
+
def baseline_endpoint(task_name: str = "easy"):
|
| 47 |
+
"""Run baseline inference on a task and return score."""
|
| 48 |
+
import subprocess
|
| 49 |
+
import os
|
| 50 |
+
import re
|
| 51 |
+
|
| 52 |
+
if task_name not in TASKS:
|
| 53 |
+
return {"error": f"Unknown task: {task_name}. Available: {list(TASKS.keys())}"}
|
| 54 |
+
|
| 55 |
+
env = os.environ.copy()
|
| 56 |
+
env["TASK_NAME"] = task_name
|
| 57 |
+
|
| 58 |
+
try:
|
| 59 |
+
result = subprocess.run(
|
| 60 |
+
["python", "inference.py"],
|
| 61 |
+
capture_output=True,
|
| 62 |
+
text=True,
|
| 63 |
+
timeout=1200,
|
| 64 |
+
env=env,
|
| 65 |
+
)
|
| 66 |
+
output = result.stdout
|
| 67 |
+
|
| 68 |
+
# parse score from output
|
| 69 |
+
score = None
|
| 70 |
+
for line in output.splitlines():
|
| 71 |
+
if task_name + ":" in line and "profit" in line:
|
| 72 |
+
score_match = re.search(r"(\d+\.\d+)\s*\(profit", line)
|
| 73 |
+
if score_match:
|
| 74 |
+
score = float(score_match.group(1))
|
| 75 |
+
|
| 76 |
+
return {
|
| 77 |
+
"task_name": task_name,
|
| 78 |
+
"score": score,
|
| 79 |
+
}
|
| 80 |
+
except subprocess.TimeoutExpired:
|
| 81 |
+
return {"error": "Inference timed out (20 min limit)"}
|
| 82 |
+
except Exception as e:
|
| 83 |
+
return {"error": str(e)}
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def main():
|
| 87 |
+
import uvicorn
|
| 88 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
if __name__ == "__main__":
|
| 92 |
+
main()
|
server/constants.py
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INITIAL_CASH = 1000.0
|
| 2 |
+
|
| 3 |
+
# Product name -> base price (selling price before multiplier)
|
| 4 |
+
BASE_PRICES = {
|
| 5 |
+
"electronics": 150.0,
|
| 6 |
+
"clothing": 40.0,
|
| 7 |
+
"groceries": 10.0,
|
| 8 |
+
"furniture": 200.0,
|
| 9 |
+
"toys": 25.0,
|
| 10 |
+
}
|
| 11 |
+
|
| 12 |
+
# Product name -> cost price (what you pay to buy stock)
|
| 13 |
+
COST_PRICES = {
|
| 14 |
+
"electronics": 100.0,
|
| 15 |
+
"clothing": 25.0,
|
| 16 |
+
"groceries": 5.0,
|
| 17 |
+
"furniture": 130.0,
|
| 18 |
+
"toys": 12.0,
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
# Product name -> shelf life in days (None = no expiry)
|
| 22 |
+
SHELF_LIFE = {
|
| 23 |
+
"electronics": None,
|
| 24 |
+
"clothing": None,
|
| 25 |
+
"groceries": 5,
|
| 26 |
+
"furniture": None,
|
| 27 |
+
"toys": None,
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
# Product name -> starting stock quantity
|
| 31 |
+
INITIAL_STOCK = {
|
| 32 |
+
"electronics": 10,
|
| 33 |
+
"clothing": 20,
|
| 34 |
+
"groceries": 50,
|
| 35 |
+
"furniture": 5,
|
| 36 |
+
"toys": 30,
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
# Delivery method -> cost per unit
|
| 40 |
+
SHIPPING_COST = {
|
| 41 |
+
"slow": 2.0,
|
| 42 |
+
"medium": 5.0,
|
| 43 |
+
"fast": 10.0,
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
# Delivery method -> days to arrive
|
| 47 |
+
SHIPPING_DAYS = {
|
| 48 |
+
"slow": 5,
|
| 49 |
+
"medium": 3,
|
| 50 |
+
"fast": 1,
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
# Event name -> days until event (spread across 30 days)
|
| 54 |
+
EVENTS = {
|
| 55 |
+
"black_friday": 6,
|
| 56 |
+
"christmas": 12,
|
| 57 |
+
"back_to_school": 18,
|
| 58 |
+
"summer_clearance": 24,
|
| 59 |
+
"new_competitor": 28,
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
# Product name -> max inventory space (units)
|
| 63 |
+
INVENTORY_CAPACITY = {
|
| 64 |
+
"electronics": 100,
|
| 65 |
+
"clothing": 200,
|
| 66 |
+
"groceries": 500,
|
| 67 |
+
"furniture": 50,
|
| 68 |
+
"toys": 300,
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
# Product name -> additional cost per unit for extra inventory beyond capacity
|
| 72 |
+
EXTRA_INVENTORY_COST = {
|
| 73 |
+
"electronics": 20.0,
|
| 74 |
+
"clothing": 5.0,
|
| 75 |
+
"groceries": 2.0,
|
| 76 |
+
"furniture": 30.0,
|
| 77 |
+
"toys": 4.0,
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
# Product name -> (min_demand, max_demand) per day
|
| 81 |
+
BASE_DEMAND = {
|
| 82 |
+
"electronics": (3, 8),
|
| 83 |
+
"clothing": (5, 15),
|
| 84 |
+
"groceries": (20, 40),
|
| 85 |
+
"furniture": (1, 3),
|
| 86 |
+
"toys": (5, 12),
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
WEEKEND_MULTIPLIER = 1.2
|
| 90 |
+
|
| 91 |
+
# Event name -> {product: demand_multiplier} when event triggers
|
| 92 |
+
EVENT_EFFECTS = {
|
| 93 |
+
"black_friday": {"electronics": 3.0, "clothing": 2.5, "toys": 2.0, "furniture": 1.5, "groceries": 1.0},
|
| 94 |
+
"christmas": {"toys": 3.0, "electronics": 2.0, "clothing": 1.5, "furniture": 1.0, "groceries": 1.5},
|
| 95 |
+
"back_to_school": {"clothing": 2.5, "electronics": 1.5, "toys": 1.5, "furniture": 1.0, "groceries": 1.0},
|
| 96 |
+
"summer_clearance": {"clothing": 2.0, "toys": 1.5, "electronics": 1.0, "furniture": 1.5, "groceries": 1.0},
|
| 97 |
+
"new_competitor": {"electronics": 0.6, "clothing": 0.7, "toys": 0.7, "furniture": 0.8, "groceries": 0.9},
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
EVENT_DURATION = 2
|
| 101 |
+
|
| 102 |
+
MAX_DAYS = 30
|
| 103 |
+
|
| 104 |
+
UPGRADE_DELIVERY_COST = 50.0
|
| 105 |
+
|
| 106 |
+
# Task configs for easy/medium/hard
|
| 107 |
+
TASKS = {
|
| 108 |
+
# Easy: High starting stock, low demand, no events, full warehouse capacity.
|
| 109 |
+
# Agent just needs to maintain stock and sell. Minimal challenge.
|
| 110 |
+
"easy": {
|
| 111 |
+
"seed": 100,
|
| 112 |
+
"max_days": 30,
|
| 113 |
+
"initial_cash": 1000.0,
|
| 114 |
+
"events": {}, # no events
|
| 115 |
+
"initial_stock": {
|
| 116 |
+
"electronics": 5,
|
| 117 |
+
"clothing": 10,
|
| 118 |
+
"groceries": 20,
|
| 119 |
+
"furniture": 3,
|
| 120 |
+
"toys": 10,
|
| 121 |
+
},
|
| 122 |
+
"inventory_capacity": INVENTORY_CAPACITY,
|
| 123 |
+
"base_demand": {
|
| 124 |
+
"electronics": (2, 5),
|
| 125 |
+
"clothing": (3, 10),
|
| 126 |
+
"groceries": (15, 30),
|
| 127 |
+
"furniture": (1, 2),
|
| 128 |
+
"toys": (3, 8),
|
| 129 |
+
},
|
| 130 |
+
},
|
| 131 |
+
# Medium: Default stock/cash, all 5 events spread across 30 days, normal demand.
|
| 132 |
+
# Agent must anticipate demand spikes from events and restock accordingly.
|
| 133 |
+
"medium": {
|
| 134 |
+
"seed": 200,
|
| 135 |
+
"max_days": 30,
|
| 136 |
+
"initial_cash": 1000.0,
|
| 137 |
+
"events": EVENTS,
|
| 138 |
+
"initial_stock": INITIAL_STOCK,
|
| 139 |
+
"inventory_capacity": INVENTORY_CAPACITY,
|
| 140 |
+
"base_demand": BASE_DEMAND,
|
| 141 |
+
},
|
| 142 |
+
# Hard: Half starting cash ($500), low stock, events packed close together,
|
| 143 |
+
# higher demand, smaller warehouse. Agent must balance tight budget,
|
| 144 |
+
# overlapping event spikes, and fast-expiring groceries.
|
| 145 |
+
"hard": {
|
| 146 |
+
"seed": 300,
|
| 147 |
+
"max_days": 30,
|
| 148 |
+
"initial_cash": 500.0,
|
| 149 |
+
"events": {
|
| 150 |
+
"black_friday": 4,
|
| 151 |
+
"christmas": 8,
|
| 152 |
+
"back_to_school": 12,
|
| 153 |
+
"summer_clearance": 16,
|
| 154 |
+
"new_competitor": 20,
|
| 155 |
+
},
|
| 156 |
+
"initial_stock": {
|
| 157 |
+
"electronics": 5,
|
| 158 |
+
"clothing": 10,
|
| 159 |
+
"groceries": 30,
|
| 160 |
+
"furniture": 3,
|
| 161 |
+
"toys": 15,
|
| 162 |
+
},
|
| 163 |
+
"inventory_capacity": {
|
| 164 |
+
"electronics": 50,
|
| 165 |
+
"clothing": 100,
|
| 166 |
+
"groceries": 250,
|
| 167 |
+
"furniture": 25,
|
| 168 |
+
"toys": 150,
|
| 169 |
+
},
|
| 170 |
+
"base_demand": {
|
| 171 |
+
"electronics": (5, 12),
|
| 172 |
+
"clothing": (8, 20),
|
| 173 |
+
"groceries": (30, 60),
|
| 174 |
+
"furniture": (2, 5),
|
| 175 |
+
"toys": (8, 18),
|
| 176 |
+
},
|
| 177 |
+
},
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
PRICE_ELASTICITY = {
|
| 181 |
+
"electronics": 1.2,
|
| 182 |
+
"clothing": 1.5,
|
| 183 |
+
"groceries": 0.4,
|
| 184 |
+
"furniture": 0.8,
|
| 185 |
+
"toys": 1.3,
|
| 186 |
+
}
|
server/grader.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Grader for inventory optimization tasks.
|
| 3 |
+
Scores agent performance on a 0.0-1.0 scale using floor/ceiling approach.
|
| 4 |
+
- floor: passive agent (no buys, just sells initial stock until empty)
|
| 5 |
+
- ceiling: theoretical max profit with perfect demand knowledge
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from server.inventory_env import InventoryEnvironment
|
| 9 |
+
from models import InventoryAction
|
| 10 |
+
from server.constants import (
|
| 11 |
+
TASKS, BASE_PRICES, COST_PRICES, SHIPPING_COST, EVENT_EFFECTS,
|
| 12 |
+
WEEKEND_MULTIPLIER, EVENT_DURATION,
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
import random
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def _run_passive(task_name):
|
| 19 |
+
"""Floor baseline: do nothing, just sell whatever initial stock covers."""
|
| 20 |
+
env = InventoryEnvironment(task_name)
|
| 21 |
+
obs = env.reset()
|
| 22 |
+
|
| 23 |
+
while not obs.done:
|
| 24 |
+
action = InventoryAction(
|
| 25 |
+
buy_quantities={},
|
| 26 |
+
delivery_method="slow",
|
| 27 |
+
liquidate={},
|
| 28 |
+
)
|
| 29 |
+
obs = env.step(action)
|
| 30 |
+
|
| 31 |
+
return obs.total_profit
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def _run_heuristic(task_name):
|
| 35 |
+
task = TASKS[task_name]
|
| 36 |
+
events = dict(task["events"])
|
| 37 |
+
|
| 38 |
+
total_demand = {p: 0 for p in task["base_demand"]}
|
| 39 |
+
|
| 40 |
+
for day in range(1, task["max_days"] + 1):
|
| 41 |
+
# tick events
|
| 42 |
+
for event_name in events:
|
| 43 |
+
events[event_name] -= 1
|
| 44 |
+
|
| 45 |
+
rng = random.Random(task["seed"] * 1000 + day)
|
| 46 |
+
|
| 47 |
+
for product, (lo, hi) in task["base_demand"].items():
|
| 48 |
+
demand = rng.randint(lo, hi)
|
| 49 |
+
|
| 50 |
+
# weekend boost
|
| 51 |
+
if day % 7 == 5 or day % 7 == 6:
|
| 52 |
+
demand = int(WEEKEND_MULTIPLIER * demand)
|
| 53 |
+
|
| 54 |
+
# event multipliers
|
| 55 |
+
for event_name, days_left in events.items():
|
| 56 |
+
if -EVENT_DURATION < days_left <= 0 and event_name in EVENT_EFFECTS:
|
| 57 |
+
mult = EVENT_EFFECTS[event_name].get(product, 1.0)
|
| 58 |
+
demand = int(demand * mult)
|
| 59 |
+
|
| 60 |
+
total_demand[product] += demand
|
| 61 |
+
|
| 62 |
+
total_profit = 0.0
|
| 63 |
+
|
| 64 |
+
# sell the initial stock first
|
| 65 |
+
initial_stock = task["initial_stock"]
|
| 66 |
+
|
| 67 |
+
for product in task["base_demand"]:
|
| 68 |
+
total_profit += min(initial_stock.get(product, 0), total_demand[product]) * BASE_PRICES[product]
|
| 69 |
+
total_demand[product] = max(0, total_demand[product] - initial_stock.get(product, 0))
|
| 70 |
+
|
| 71 |
+
# cost price and shipping cost applies after initial stock
|
| 72 |
+
total_profit += total_demand[product] * (BASE_PRICES[product] - COST_PRICES[product] - SHIPPING_COST["slow"])
|
| 73 |
+
|
| 74 |
+
return total_profit
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def compute_baselines(task_name):
|
| 78 |
+
"""Pre-compute floor and ceiling for a task."""
|
| 79 |
+
floor = _run_passive(task_name)
|
| 80 |
+
ceiling = _run_heuristic(task_name)
|
| 81 |
+
return floor, ceiling
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def grade(task_name, agent_profit):
|
| 85 |
+
"""
|
| 86 |
+
Grade agent performance on 0.0-1.0 scale.
|
| 87 |
+
|
| 88 |
+
Args:
|
| 89 |
+
task_name: "easy", "medium", or "hard"
|
| 90 |
+
agent_profit: total profit achieved by the agent
|
| 91 |
+
|
| 92 |
+
Returns:
|
| 93 |
+
float score between 0.0 and 1.0
|
| 94 |
+
"""
|
| 95 |
+
floor, ceiling = compute_baselines(task_name)
|
| 96 |
+
|
| 97 |
+
if ceiling <= floor:
|
| 98 |
+
return 1.0 if agent_profit >= ceiling else 0.0
|
| 99 |
+
|
| 100 |
+
score = (agent_profit - floor) / (ceiling - floor)
|
| 101 |
+
return max(0.002, min(0.998, score))
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def grade_all(results):
|
| 105 |
+
"""
|
| 106 |
+
Grade all 3 tasks.
|
| 107 |
+
|
| 108 |
+
Args:
|
| 109 |
+
results: dict of {task_name: agent_profit}
|
| 110 |
+
|
| 111 |
+
Returns:
|
| 112 |
+
dict of {task_name: score}
|
| 113 |
+
"""
|
| 114 |
+
scores = {}
|
| 115 |
+
for task_name, agent_profit in results.items():
|
| 116 |
+
scores[task_name] = grade(task_name, agent_profit)
|
| 117 |
+
return scores
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
if __name__ == "__main__":
|
| 121 |
+
print("Computing baselines for all tasks...")
|
| 122 |
+
for task_name in ["easy", "medium", "hard"]:
|
| 123 |
+
floor, ceiling = compute_baselines(task_name)
|
| 124 |
+
print(f" {task_name}: floor={floor:.2f}, ceiling={ceiling:.2f}")
|
server/inventory_env.py
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from openenv.core.env_server.interfaces import Environment
|
| 2 |
+
import copy
|
| 3 |
+
import random
|
| 4 |
+
from uuid import uuid4
|
| 5 |
+
|
| 6 |
+
from models import InventoryAction, InventoryObservation, InventoryState
|
| 7 |
+
from .constants import (
|
| 8 |
+
INITIAL_CASH, BASE_PRICES, COST_PRICES, SHELF_LIFE, INITIAL_STOCK,
|
| 9 |
+
EVENTS, SHIPPING_COST, SHIPPING_DAYS, INVENTORY_CAPACITY,
|
| 10 |
+
EXTRA_INVENTORY_COST, BASE_DEMAND, WEEKEND_MULTIPLIER, EVENT_EFFECTS,
|
| 11 |
+
EVENT_DURATION, MAX_DAYS, UPGRADE_DELIVERY_COST, TASKS, PRICE_ELASTICITY
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def _build_inventory(stock):
|
| 16 |
+
"""Convert stock dict to batch format: {product: [[qty, days_left], ...]}"""
|
| 17 |
+
inv = {}
|
| 18 |
+
for product, qty in stock.items():
|
| 19 |
+
shelf = SHELF_LIFE[product]
|
| 20 |
+
inv[product] = [[qty, shelf]]
|
| 21 |
+
return inv
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class InventoryEnvironment(Environment):
|
| 25 |
+
|
| 26 |
+
def __init__(self, task_name="medium"):
|
| 27 |
+
self.task_name = task_name
|
| 28 |
+
self.task = TASKS[task_name]
|
| 29 |
+
self.cash = self.task["initial_cash"]
|
| 30 |
+
self.inventory = _build_inventory(self.task["initial_stock"])
|
| 31 |
+
self.events = copy.deepcopy(self.task["events"])
|
| 32 |
+
self.deliveries = []
|
| 33 |
+
self.current_day = 0
|
| 34 |
+
self.total_profit = 0.0
|
| 35 |
+
self.seed = self.task["seed"]
|
| 36 |
+
self.reward = 0.0
|
| 37 |
+
self.max_days = self.task["max_days"]
|
| 38 |
+
self.inventory_capacity = self.task["inventory_capacity"]
|
| 39 |
+
self.base_demand = self.task["base_demand"]
|
| 40 |
+
self.reset()
|
| 41 |
+
|
| 42 |
+
def reset(self, seed: int = None) -> InventoryObservation:
|
| 43 |
+
if seed is not None:
|
| 44 |
+
self.seed = seed
|
| 45 |
+
else:
|
| 46 |
+
self.seed = self.task["seed"]
|
| 47 |
+
self.cash = self.task["initial_cash"]
|
| 48 |
+
self.inventory = _build_inventory(self.task["initial_stock"])
|
| 49 |
+
self.events = copy.deepcopy(self.task["events"])
|
| 50 |
+
self.deliveries = []
|
| 51 |
+
self.current_day = 0
|
| 52 |
+
self.total_profit = 0.0
|
| 53 |
+
self.reward = 0.0
|
| 54 |
+
|
| 55 |
+
self._state = InventoryState(
|
| 56 |
+
episode_id = str(uuid4()),
|
| 57 |
+
current_day = 0,
|
| 58 |
+
cash = self.task["initial_cash"],
|
| 59 |
+
inventory = dict(self.task["initial_stock"])
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
return InventoryObservation(
|
| 63 |
+
current_day = 0,
|
| 64 |
+
total_cash = self.cash,
|
| 65 |
+
day_profit = 0.0,
|
| 66 |
+
total_profit = 0.0,
|
| 67 |
+
demand_today = {},
|
| 68 |
+
updated_inventory = copy.deepcopy(self.inventory),
|
| 69 |
+
remaining_capacity = {p: max(0, self.inventory_capacity[p] - sum(b[0] for b in self.inventory[p])) for p in self.inventory},
|
| 70 |
+
updated_events = copy.deepcopy(self.events),
|
| 71 |
+
updated_deliveries = [],
|
| 72 |
+
reward = 0.0,
|
| 73 |
+
done = False,
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
def step(self, action: InventoryAction) -> InventoryObservation:
|
| 77 |
+
self.current_day += 1
|
| 78 |
+
self.reward = 0.0 # reset reward each step
|
| 79 |
+
day_cost = 0.0
|
| 80 |
+
day_revenue = 0.0
|
| 81 |
+
|
| 82 |
+
# 1. tick event countdowns (keep ticking into negative to track active duration)
|
| 83 |
+
for event_name in self.events:
|
| 84 |
+
self.events[event_name] -= 1
|
| 85 |
+
|
| 86 |
+
# 2. remove expired groceries
|
| 87 |
+
new_batches = []
|
| 88 |
+
expired_groceries_count = 0
|
| 89 |
+
for batch in self.inventory["groceries"]:
|
| 90 |
+
if batch[1] == 0:
|
| 91 |
+
expired_groceries_count += batch[0]
|
| 92 |
+
continue
|
| 93 |
+
|
| 94 |
+
else:
|
| 95 |
+
new_batches.append([batch[0], batch[1] - 1])
|
| 96 |
+
|
| 97 |
+
self.inventory["groceries"] = new_batches
|
| 98 |
+
|
| 99 |
+
self.reward -= 0.05 * expired_groceries_count
|
| 100 |
+
|
| 101 |
+
# 3. Handle incoming deliveries
|
| 102 |
+
remaining_deliveries = []
|
| 103 |
+
for delivery in self.deliveries:
|
| 104 |
+
for product, shipment in delivery.items():
|
| 105 |
+
qty, arrival_day = shipment
|
| 106 |
+
if arrival_day <= self.current_day:
|
| 107 |
+
self.inventory[product].append([qty, SHELF_LIFE[product]])
|
| 108 |
+
else:
|
| 109 |
+
remaining_deliveries.append(delivery)
|
| 110 |
+
self.deliveries = remaining_deliveries
|
| 111 |
+
|
| 112 |
+
# 4. process purchases
|
| 113 |
+
for product, qty in action.buy_quantities.items():
|
| 114 |
+
unit_cost = COST_PRICES[product] + SHIPPING_COST[action.delivery_method]
|
| 115 |
+
total_cost = qty * unit_cost
|
| 116 |
+
|
| 117 |
+
# capacity overage cost
|
| 118 |
+
current_qty = sum(b[0] for b in self.inventory[product])
|
| 119 |
+
overage = max(0, (current_qty + qty) - self.inventory_capacity[product])
|
| 120 |
+
extra_cost = overage * EXTRA_INVENTORY_COST[product]
|
| 121 |
+
total_cost += extra_cost
|
| 122 |
+
|
| 123 |
+
if total_cost > self.cash:
|
| 124 |
+
self.reward -= 0.5 # penalize for ordering what you can't afford
|
| 125 |
+
continue
|
| 126 |
+
|
| 127 |
+
self.cash -= total_cost
|
| 128 |
+
day_cost += total_cost
|
| 129 |
+
|
| 130 |
+
arrival_day = self.current_day + SHIPPING_DAYS[action.delivery_method]
|
| 131 |
+
# add jitter: slow ±2 days, medium ±1 day, fast is reliable
|
| 132 |
+
jitter_rng = random.Random(self.seed * 2000 + self.current_day * 100 + hash(product))
|
| 133 |
+
if action.delivery_method == "slow":
|
| 134 |
+
arrival_day += jitter_rng.randint(-2, 2)
|
| 135 |
+
elif action.delivery_method == "medium":
|
| 136 |
+
arrival_day += jitter_rng.randint(-1, 1)
|
| 137 |
+
# ensure arrival is at least next day
|
| 138 |
+
arrival_day = max(self.current_day + 1, arrival_day)
|
| 139 |
+
self.deliveries.append({product: [qty, arrival_day]})
|
| 140 |
+
|
| 141 |
+
# 5. generate demand
|
| 142 |
+
demand = self._generate_demand()
|
| 143 |
+
|
| 144 |
+
# apply price elasticity: demand scales with price^(-elasticity)
|
| 145 |
+
price_mults = {}
|
| 146 |
+
for product in demand:
|
| 147 |
+
pm = max(0.5, min(1.5, action.price_multipliers.get(product, 1.0)))
|
| 148 |
+
price_mults[product] = pm
|
| 149 |
+
e = PRICE_ELASTICITY[product]
|
| 150 |
+
demand[product] = max(0, int(demand[product] * pm ** -e))
|
| 151 |
+
|
| 152 |
+
# 6. sell products (fifo)
|
| 153 |
+
for product, demand_today in demand.items():
|
| 154 |
+
|
| 155 |
+
sell_price = BASE_PRICES[product] * price_mults[product]
|
| 156 |
+
product_availability = sum(batch[0] for batch in self.inventory[product])
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
if demand_today > product_availability:
|
| 160 |
+
missed_sales = demand_today - product_availability
|
| 161 |
+
sold = product_availability
|
| 162 |
+
day_revenue += sold * sell_price
|
| 163 |
+
self.inventory[product] = []
|
| 164 |
+
self.reward -= missed_sales * sell_price * 0.001
|
| 165 |
+
self.reward += sold * sell_price * 0.001
|
| 166 |
+
|
| 167 |
+
else:
|
| 168 |
+
day_revenue += demand_today * sell_price
|
| 169 |
+
self.reward += demand_today * sell_price * 0.001
|
| 170 |
+
|
| 171 |
+
new_batches = []
|
| 172 |
+
|
| 173 |
+
for batch in self.inventory[product]:
|
| 174 |
+
if batch[0] < demand_today:
|
| 175 |
+
demand_today = demand_today - batch[0]
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
elif demand_today == 0:
|
| 179 |
+
new_batches.append(batch)
|
| 180 |
+
|
| 181 |
+
else:
|
| 182 |
+
remaining = batch[0] - demand_today
|
| 183 |
+
if remaining > 0:
|
| 184 |
+
new_batches.append([remaining, batch[1]])
|
| 185 |
+
demand_today = 0
|
| 186 |
+
|
| 187 |
+
self.inventory[product] = new_batches
|
| 188 |
+
|
| 189 |
+
# 7. Liquidate some stock (FIFO, no revenue)
|
| 190 |
+
total_liquidation_loss = 0.0
|
| 191 |
+
for product, count in action.liquidate.items():
|
| 192 |
+
if product not in self.inventory or count <= 0:
|
| 193 |
+
continue
|
| 194 |
+
actually_removed = min(count, sum(b[0] for b in self.inventory[product]))
|
| 195 |
+
total_liquidation_loss += actually_removed * COST_PRICES[product]
|
| 196 |
+
remaining = count
|
| 197 |
+
new_batches = []
|
| 198 |
+
for batch in self.inventory[product]:
|
| 199 |
+
if remaining <= 0:
|
| 200 |
+
new_batches.append(batch)
|
| 201 |
+
elif batch[0] <= remaining:
|
| 202 |
+
remaining -= batch[0]
|
| 203 |
+
else:
|
| 204 |
+
new_batches.append([batch[0] - remaining, batch[1]])
|
| 205 |
+
remaining = 0
|
| 206 |
+
self.inventory[product] = new_batches
|
| 207 |
+
|
| 208 |
+
self.reward -= total_liquidation_loss * 0.001
|
| 209 |
+
|
| 210 |
+
# compute day profit
|
| 211 |
+
day_profit = day_revenue - day_cost
|
| 212 |
+
self.cash += day_revenue
|
| 213 |
+
self.total_profit += day_profit
|
| 214 |
+
|
| 215 |
+
# check done
|
| 216 |
+
done = self.current_day >= self.max_days
|
| 217 |
+
|
| 218 |
+
# update state
|
| 219 |
+
self._state = InventoryState(
|
| 220 |
+
episode_id = self._state.episode_id,
|
| 221 |
+
current_day = self.current_day,
|
| 222 |
+
cash = self.cash,
|
| 223 |
+
inventory = {p: sum(b[0] for b in self.inventory[p]) for p in self.inventory},
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
return InventoryObservation(
|
| 227 |
+
current_day = self.current_day,
|
| 228 |
+
total_cash = self.cash,
|
| 229 |
+
day_profit = day_profit,
|
| 230 |
+
total_profit = self.total_profit,
|
| 231 |
+
demand_today = demand,
|
| 232 |
+
updated_inventory = copy.deepcopy(self.inventory),
|
| 233 |
+
remaining_capacity = {p: max(0, self.inventory_capacity[p] - sum(b[0] for b in self.inventory[p])) for p in self.inventory},
|
| 234 |
+
updated_events = copy.deepcopy(self.events),
|
| 235 |
+
updated_deliveries = copy.deepcopy(self.deliveries),
|
| 236 |
+
reward = self.reward,
|
| 237 |
+
done = done,
|
| 238 |
+
)
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
def _generate_demand(self):
|
| 242 |
+
rng = random.Random(self.seed * 1000 + self.current_day)
|
| 243 |
+
demand = {}
|
| 244 |
+
|
| 245 |
+
for product, (lo, hi) in self.base_demand.items():
|
| 246 |
+
demand[product] = rng.randint(lo, hi)
|
| 247 |
+
|
| 248 |
+
# weekend boost
|
| 249 |
+
if self.current_day % 7 in (5, 6):
|
| 250 |
+
for product in demand:
|
| 251 |
+
demand[product] = int(demand[product] * WEEKEND_MULTIPLIER)
|
| 252 |
+
|
| 253 |
+
# active event multipliers (only for EVENT_DURATION days after triggering)
|
| 254 |
+
for event_name, days in self.events.items():
|
| 255 |
+
if -EVENT_DURATION < days <= 0 and event_name in EVENT_EFFECTS:
|
| 256 |
+
for product, mult in EVENT_EFFECTS[event_name].items():
|
| 257 |
+
demand[product] = int(demand[product] * mult)
|
| 258 |
+
|
| 259 |
+
return demand
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
@property
|
| 263 |
+
def state(self) -> InventoryState:
|
| 264 |
+
return self._state
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|