tazwarrrr commited on
Commit
fcea1da
·
1 Parent(s): e76f404

Fix encoding: remove all non-ASCII chars from app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -59
app.py CHANGED
@@ -1,5 +1,5 @@
1
- """
2
- ROCmPort AI ΓÇö Gradio Space entry point
3
  Calls the deployed FastAPI backend (Render) and streams agent events.
4
  """
5
 
@@ -9,20 +9,20 @@ import json
9
 
10
  BACKEND_URL = "https://rocmport-ai-q2b1.onrender.com"
11
 
12
- AGENT_ICONS = {
13
- "analyzer": "🔍",
14
- "translator": "🔄",
15
- "optimizer": "ΓÜí",
16
- "tester": "🧪",
17
- "coordinator": "🎯",
18
  }
19
 
20
- STATUS_ICONS = {
21
- "waiting": "⏳",
22
- "running": "🔄",
23
- "done": "✅",
24
- "failed": "❌",
25
- "retrying": "🔁",
26
  }
27
 
28
  EXAMPLE_REDUCTION = """\
@@ -58,7 +58,7 @@ __global__ void vectorAdd(const float *A, const float *B, float *C, int n) {
58
  C[i] = A[i] + B[i];
59
  // Warp-size assumption: 32 threads per warp (wrong on AMD wavefront-64)
60
  if (threadIdx.x % 32 == 0) {
61
- printf("Warp leader: %d\\n", threadIdx.x / 32);
62
  }
63
  }
64
  }"""
@@ -85,7 +85,7 @@ __global__ void matmul(float *A, float *B, float *C, int N) {
85
  def port_kernel(cuda_code: str, kernel_name: str, simple_mode: bool):
86
  """Generator: streams agent events and yields (log_markdown, hip_code)."""
87
  if not cuda_code or len(cuda_code.strip()) < 10:
88
- yield "❌ Please provide CUDA kernel code (at least 10 characters).", ""
89
  return
90
 
91
  kernel_name = kernel_name.strip() or "custom"
@@ -98,7 +98,7 @@ def port_kernel(cuda_code: str, kernel_name: str, simple_mode: bool):
98
  "simple_mode": bool(simple_mode),
99
  }
100
 
101
- log_lines.append("🚀 **Connecting to ROCmPort AI backend…**")
102
  yield "\n\n".join(log_lines), hip_code
103
 
104
  try:
@@ -126,12 +126,11 @@ def port_kernel(cuda_code: str, kernel_name: str, simple_mode: bool):
126
  message = event.get("message", "")
127
  detail = event.get("detail") or ""
128
 
129
- icon = AGENT_ICONS.get(agent, "🤖")
130
- s_icon = STATUS_ICONS.get(status, "ΓÇó")
131
 
132
- log_lines.append(f"{icon} **{agent.capitalize()}** {s_icon} ΓÇö {message}")
133
 
134
- # Extract HIP code from coordinator or translator done events
135
  if status == "done" and detail:
136
  try:
137
  detail_json = json.loads(detail)
@@ -150,58 +149,55 @@ def port_kernel(cuda_code: str, kernel_name: str, simple_mode: bool):
150
 
151
  except httpx.ConnectError:
152
  log_lines.append(
153
- "❌ **Could not connect to backend.**\n\n"
154
- "> The server may be in a cold-start state ΓÇö please wait ~30 s and retry."
155
  )
156
  yield "\n\n".join(log_lines), hip_code
157
  return
158
  except httpx.TimeoutException:
159
- log_lines.append("⏱️ **Request timed out.** The pipeline may still be running — try again shortly.")
160
  yield "\n\n".join(log_lines), hip_code
161
  return
162
  except httpx.HTTPStatusError as exc:
163
- log_lines.append(f"❌ **HTTP {exc.response.status_code}**: {exc.response.text[:300]}")
164
  yield "\n\n".join(log_lines), hip_code
165
  return
166
  except Exception as exc: # noqa: BLE001
167
- log_lines.append(f"❌ **Unexpected error**: {exc}")
168
  yield "\n\n".join(log_lines), hip_code
169
  return
170
 
171
  if not hip_code:
172
- log_lines.append("\n⚠️ Pipeline finished but no HIP code was extracted. Check agent logs above.")
173
  else:
174
- log_lines.append("\n✅ **Migration complete.** HIP code is shown on the right →")
175
 
176
  yield "\n\n".join(log_lines), hip_code
177
 
178
 
179
- # ΓöÇΓöÇ UI ΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓö��ΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇ
180
-
181
- CSS = """
182
- .panel-header { font-weight: 600; font-size: 1rem; margin-bottom: 4px; }
183
- footer { display: none !important; }
184
- """
185
 
186
- with gr.Blocks(title="ROCmPort AI CUDA to ROCm Migration") as demo:
187
 
188
  gr.Markdown(
189
- """# ΓÜí ROCmPort AI
190
- ### Agentic CUDA → ROCm/HIP migration with wavefront-64 bug detection
191
 
192
- > **Backend API**: [rocmport-ai-q2b1.onrender.com](https://rocmport-ai-q2b1.onrender.com) &nbsp;|&nbsp;
193
  > **GitHub**: [tazwaryayyyy/ROCmPort-AI](https://github.com/tazwaryayyyy/ROCmPort-AI)
194
 
195
- `hipify-clang` translates CUDA API calls mechanically ΓÇö it **cannot** detect that `if (tid < 32)` in a
196
- warp-level reduction silently skips lanes 32ΓÇô63 on AMD wavefront-64.
197
  The code compiles, the output is wrong, no errors. **ROCmPort AI catches this before execution.**
198
  """
199
  )
200
 
201
  with gr.Row():
202
- # ΓöÇΓöÇ Left: input ΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇ
203
  with gr.Column(scale=1):
204
- gr.Markdown("### 📥 Input", elem_classes="panel-header")
205
  cuda_input = gr.Code(
206
  label="CUDA Kernel Code",
207
  language="cpp",
@@ -216,13 +212,13 @@ The code compiles, the output is wrong, no errors. **ROCmPort AI catches this be
216
  scale=2,
217
  )
218
  simple_mode = gr.Checkbox(
219
- label="Explain Like I'm 5",
220
  value=False,
221
  scale=1,
222
  )
223
  with gr.Row():
224
- port_btn = gr.Button("ΓÜí Port to ROCm", variant="primary", scale=3)
225
- clear_btn = gr.Button("🗑 Clear", scale=1)
226
 
227
  gr.Examples(
228
  examples=[
@@ -234,15 +230,14 @@ The code compiles, the output is wrong, no errors. **ROCmPort AI catches this be
234
  label="Demo Kernels (pre-loaded with intentional AMD bugs)",
235
  )
236
 
237
- # ΓöÇΓöÇ Right: output ΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇ
238
  with gr.Column(scale=1):
239
- gr.Markdown("### 📤 Output", elem_classes="panel-header")
240
  log_output = gr.Markdown(
241
  value="*Agent steps will appear here once you click **Port to ROCm**.*",
242
  label="Agent Pipeline Log",
243
  )
244
  hip_output = gr.Code(
245
- label="Translated & Optimized HIP Code",
246
  language="cpp",
247
  lines=18,
248
  )
@@ -254,16 +249,16 @@ The code compiles, the output is wrong, no errors. **ROCmPort AI catches this be
254
 
255
  | Agent | Role |
256
  |-------|------|
257
- | 🔍 **Analyzer** | Scans CUDA for AMD-specific risks: wavefront size, ballot/shuffle idioms, shared-memory layout |
258
- | 🔄 **Translator** | Runs `hipify` then applies LLM-guided fixes for bugs `hipify` cannot detect |
259
- | 🧪 **Tester** | Verifies compilation with `hipcc` and checks output correctness |
260
- | ΓÜí **Optimizer** | Proposes MI300X-specific optimisations; re-tested against baseline |
261
- | 🎯 **Coordinator** | Orchestrates the loop; retries up to 3× if the optimised output regresses |
262
 
263
  ### The key bug: warp-size assumption
264
 
265
  ```c
266
- // NVIDIA (warpSize = 32) ΓÇö silently WRONG on AMD
267
  if (tid < 32) { vsmem[tid] += vsmem[tid + 32]; ... }
268
 
269
  // AMD-correct (wavefront = 64)
@@ -277,16 +272,15 @@ if (tid < 64) {
277
 
278
  | Kernel | Result |
279
  |--------|--------|
280
- | matrix_multiply 512×512 | 2.91× speedup over baseline HIP |
281
- | vector_add 32 M elements | ~3 918 GB/s (~74 % of MI300X peak) |
282
- | reduction 16 M elements | correctness PASS after wavefront-64 fix |
283
 
284
- > Source: `docs/benchmark_runs/` ΓÇö real `rocprof` CSV output, May 2026.
285
  > Results vary with kernel complexity; these figures are not guaranteed on every input.
286
  """
287
  )
288
 
289
- # ΓöÇΓöÇ Event wiring ΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇΓöÇ
290
  port_btn.click(
291
  fn=port_kernel,
292
  inputs=[cuda_input, kernel_name, simple_mode],
 
1
+ """
2
+ ROCmPort AI - Gradio Space entry point
3
  Calls the deployed FastAPI backend (Render) and streams agent events.
4
  """
5
 
 
9
 
10
  BACKEND_URL = "https://rocmport-ai-q2b1.onrender.com"
11
 
12
+ AGENT_LABELS = {
13
+ "analyzer": "Analyzer",
14
+ "translator": "Translator",
15
+ "optimizer": "Optimizer",
16
+ "tester": "Tester",
17
+ "coordinator": "Coordinator",
18
  }
19
 
20
+ STATUS_LABELS = {
21
+ "waiting": "[waiting]",
22
+ "running": "[running]",
23
+ "done": "[done]",
24
+ "failed": "[FAILED]",
25
+ "retrying": "[retrying]",
26
  }
27
 
28
  EXAMPLE_REDUCTION = """\
 
58
  C[i] = A[i] + B[i];
59
  // Warp-size assumption: 32 threads per warp (wrong on AMD wavefront-64)
60
  if (threadIdx.x % 32 == 0) {
61
+ printf("Warp leader: %d\n", threadIdx.x / 32);
62
  }
63
  }
64
  }"""
 
85
  def port_kernel(cuda_code: str, kernel_name: str, simple_mode: bool):
86
  """Generator: streams agent events and yields (log_markdown, hip_code)."""
87
  if not cuda_code or len(cuda_code.strip()) < 10:
88
+ yield "Please provide CUDA kernel code (at least 10 characters).", ""
89
  return
90
 
91
  kernel_name = kernel_name.strip() or "custom"
 
98
  "simple_mode": bool(simple_mode),
99
  }
100
 
101
+ log_lines.append("**Connecting to ROCmPort AI backend...**")
102
  yield "\n\n".join(log_lines), hip_code
103
 
104
  try:
 
126
  message = event.get("message", "")
127
  detail = event.get("detail") or ""
128
 
129
+ label = AGENT_LABELS.get(agent, agent.capitalize())
130
+ s_label = STATUS_LABELS.get(status, status)
131
 
132
+ log_lines.append(f"**{label}** {s_label} -- {message}")
133
 
 
134
  if status == "done" and detail:
135
  try:
136
  detail_json = json.loads(detail)
 
149
 
150
  except httpx.ConnectError:
151
  log_lines.append(
152
+ "**Could not connect to backend.**\n\n"
153
+ "> The server may be cold-starting -- please wait ~30 s and retry."
154
  )
155
  yield "\n\n".join(log_lines), hip_code
156
  return
157
  except httpx.TimeoutException:
158
+ log_lines.append("**Request timed out.** The pipeline may still be running -- try again shortly.")
159
  yield "\n\n".join(log_lines), hip_code
160
  return
161
  except httpx.HTTPStatusError as exc:
162
+ log_lines.append(f"**HTTP {exc.response.status_code}**: {exc.response.text[:300]}")
163
  yield "\n\n".join(log_lines), hip_code
164
  return
165
  except Exception as exc: # noqa: BLE001
166
+ log_lines.append(f"**Unexpected error**: {exc}")
167
  yield "\n\n".join(log_lines), hip_code
168
  return
169
 
170
  if not hip_code:
171
+ log_lines.append("\nPipeline finished but no HIP code was extracted. Check agent logs above.")
172
  else:
173
+ log_lines.append("\n**Migration complete.** HIP code is shown on the right.")
174
 
175
  yield "\n\n".join(log_lines), hip_code
176
 
177
 
178
+ CSS = (
179
+ ".panel-header { font-weight: 600; font-size: 1rem; margin-bottom: 4px; } "
180
+ "footer { display: none !important; }"
181
+ )
 
 
182
 
183
+ with gr.Blocks(title="ROCmPort AI -- CUDA to ROCm Migration") as demo:
184
 
185
  gr.Markdown(
186
+ """# ROCmPort AI
187
+ ### Agentic CUDA to ROCm/HIP migration with wavefront-64 bug detection
188
 
189
+ > **Backend API**: [rocmport-ai-q2b1.onrender.com](https://rocmport-ai-q2b1.onrender.com) |
190
  > **GitHub**: [tazwaryayyyy/ROCmPort-AI](https://github.com/tazwaryayyyy/ROCmPort-AI)
191
 
192
+ `hipify-clang` translates CUDA API calls mechanically -- it **cannot** detect that `if (tid < 32)` in a
193
+ warp-level reduction silently skips lanes 32-63 on AMD wavefront-64.
194
  The code compiles, the output is wrong, no errors. **ROCmPort AI catches this before execution.**
195
  """
196
  )
197
 
198
  with gr.Row():
 
199
  with gr.Column(scale=1):
200
+ gr.Markdown("### Input", elem_classes="panel-header")
201
  cuda_input = gr.Code(
202
  label="CUDA Kernel Code",
203
  language="cpp",
 
212
  scale=2,
213
  )
214
  simple_mode = gr.Checkbox(
215
+ label="Explain Like I am 5",
216
  value=False,
217
  scale=1,
218
  )
219
  with gr.Row():
220
+ port_btn = gr.Button("Port to ROCm", variant="primary", scale=3)
221
+ clear_btn = gr.Button("Clear", scale=1)
222
 
223
  gr.Examples(
224
  examples=[
 
230
  label="Demo Kernels (pre-loaded with intentional AMD bugs)",
231
  )
232
 
 
233
  with gr.Column(scale=1):
234
+ gr.Markdown("### Output", elem_classes="panel-header")
235
  log_output = gr.Markdown(
236
  value="*Agent steps will appear here once you click **Port to ROCm**.*",
237
  label="Agent Pipeline Log",
238
  )
239
  hip_output = gr.Code(
240
+ label="Translated and Optimized HIP Code",
241
  language="cpp",
242
  lines=18,
243
  )
 
249
 
250
  | Agent | Role |
251
  |-------|------|
252
+ | **Analyzer** | Scans CUDA for AMD-specific risks: wavefront size, ballot/shuffle idioms, shared-memory layout |
253
+ | **Translator** | Runs `hipify` then applies LLM-guided fixes for bugs `hipify` cannot detect |
254
+ | **Tester** | Verifies compilation with `hipcc` and checks output correctness |
255
+ | **Optimizer** | Proposes MI300X-specific optimisations; re-tested against baseline |
256
+ | **Coordinator** | Orchestrates the loop; retries up to 3x if the optimised output regresses |
257
 
258
  ### The key bug: warp-size assumption
259
 
260
  ```c
261
+ // NVIDIA (warpSize = 32) -- silently WRONG on AMD
262
  if (tid < 32) { vsmem[tid] += vsmem[tid + 32]; ... }
263
 
264
  // AMD-correct (wavefront = 64)
 
272
 
273
  | Kernel | Result |
274
  |--------|--------|
275
+ | matrix_multiply 512x512 | 2.91x speedup over baseline HIP |
276
+ | vector_add 32M elements | ~3918 GB/s (~74% of MI300X peak) |
277
+ | reduction 16M elements | correctness PASS after wavefront-64 fix |
278
 
279
+ > Source: `docs/benchmark_runs/` -- real `rocprof` CSV output, May 2026.
280
  > Results vary with kernel complexity; these figures are not guaranteed on every input.
281
  """
282
  )
283
 
 
284
  port_btn.click(
285
  fn=port_kernel,
286
  inputs=[cuda_input, kernel_name, simple_mode],