""" Tzefa - Complete Pipeline Demo Space Image -> Binarization -> Line Segmentation -> Word Segmentation -> OCR -> Error Correction -> Compilation -> Execution """ import os import gc import sys import subprocess import importlib import traceback import cv2 import torch import numpy as np from PIL import Image import gradio as gr from huggingface_hub import hf_hub_download import segmentation_models_pytorch as smp import torch.nn as nn import torch.nn.functional as F from transformers import TrOCRProcessor, VisionEncoderDecoderModel from ultralytics import YOLO SPACE_DIR = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, SPACE_DIR) from language.dialects import THREE_WORD, FOUR_WORD, CAPS_ONLY, MIXED_CASE from language.ErrorCorrection import TzefaParser from language import topy # ══════════════════════════════════════════════════════════════ # CONFIG # ══════════════════════════════════════════════════════════════ HF_TOKEN = os.environ.get("HF_TOKEN") DEVICE = "cuda" if torch.cuda.is_available() else "cpu" BIN_B3_REPO = "WARAJA/b3_model" BIN_B3_FILE = "b3_model.pth" BIN_B5_REPO = "WARAJA/b5_model" BIN_B5_FILE = "b5_model.pth" YOLO_REPO = "WARAJA/Tzefa-Line-Segmentation-YOLO" YOLO_FILE = "best.pt" TROCR_REPO = "WARAJA/Tzefa-Word-OCR-TrOCR" TROCR_BASE_PROC = "microsoft/trocr-small-stage1" TILE_SIZE = 640 YOLO_IMGSZ = 640 MAX_DILATE_ITERS = 200 EYNOLLAH_REPO = "SBB/eynollah-textline" _DIALECT_MAP = {"4-word (verbose)": FOUR_WORD, "3-word (classic)": THREE_WORD} _CASING_MAP = {"CAPS only": CAPS_ONLY, "Mixed case": MIXED_CASE} # ══════════════════════════════════════════════════════════════ # 1. BINARIZATION # ══════════════════════════════════════════════════════════════ class HighResMAnet(nn.Module): def __init__(self, encoder_name="mit_b5", classes=1): super().__init__() self.base_model = smp.MAnet( encoder_name=encoder_name, encoder_weights=None, in_channels=3, classes=classes, encoder_depth=5, decoder_channels=(256, 128, 64, 32, 16), ) self.high_res_stem = nn.Sequential( nn.Conv2d(3, 16, 3, padding=1), nn.BatchNorm2d(16), nn.ReLU(True), nn.Conv2d(16, 32, 3, padding=1), nn.BatchNorm2d(32), nn.ReLU(True), ) self.final_fusion = nn.Sequential( nn.Conv2d(48, 16, 3, padding=1), nn.ReLU(True), nn.Conv2d(16, classes, 1), ) def forward(self, x): hr = self.high_res_stem(x) feat = self.base_model.encoder(x) dec = self.base_model.decoder(feat) return self.final_fusion(torch.cat([dec, hr], dim=1)) def _load_bin_models(): models = {} print(f"Downloading b3_model from: {BIN_B3_REPO}") b3_path = hf_hub_download(repo_id=BIN_B3_REPO, filename=BIN_B3_FILE, token=HF_TOKEN, repo_type="model") m3 = smp.Unet(encoder_name="mit_b3", encoder_weights=None, in_channels=3, classes=1) ckpt3 = torch.load(b3_path, map_location=DEVICE) m3.load_state_dict(ckpt3.get("model_state_dict", ckpt3)) models["mit_b3"] = m3.to(DEVICE).eval() print(f"Downloading b5_model from: {BIN_B5_REPO}") b5_path = hf_hub_download(repo_id=BIN_B5_REPO, filename=BIN_B5_FILE, token=HF_TOKEN, repo_type="model") m5 = HighResMAnet(encoder_name="mit_b5") ckpt5 = torch.load(b5_path, map_location=DEVICE) m5.load_state_dict(ckpt5.get("model_state_dict", ckpt5)) models["mit_b5"] = m5.to(DEVICE).eval() return models def _preprocess_tile(pil_img): arr = np.array(pil_img).astype(np.float32) / 255.0 mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) return torch.from_numpy(((arr - mean) / std).transpose(2, 0, 1)) def binarize(pil_img, model): orig_w, orig_h = pil_img.size pad_w = (TILE_SIZE - orig_w % TILE_SIZE) % TILE_SIZE pad_h = (TILE_SIZE - orig_h % TILE_SIZE) % TILE_SIZE padded = Image.new("RGB", (orig_w + pad_w, orig_h + pad_h), (255, 255, 255)) padded.paste(pil_img, (0, 0)) nw, nh = padded.size canvas = Image.new("L", (nw, nh), 255) for y in range(0, nh, TILE_SIZE): for x in range(0, nw, TILE_SIZE): tile = padded.crop((x, y, x + TILE_SIZE, y + TILE_SIZE)) t = _preprocess_tile(tile).unsqueeze(0).to(DEVICE).float() with torch.no_grad(): logits = model(t) if logits.shape[-2:] != (TILE_SIZE, TILE_SIZE): logits = F.interpolate(logits, (TILE_SIZE, TILE_SIZE), mode="bilinear") mask = (torch.sigmoid(logits) > 0.5).float().cpu().numpy()[0, 0] canvas.paste(Image.fromarray(((1.0 - mask) * 255).astype(np.uint8)), (x, y)) return canvas.crop((0, 0, orig_w, orig_h)) # ══════════════════════════════════════════════════════════════ # 2. LINE SEGMENTATION # ══════════════════════════════════════════════════════════════ def _load_yolo(): path = hf_hub_download(YOLO_REPO, YOLO_FILE, token=HF_TOKEN, repo_type="model") return YOLO(path) def segment_lines_yolo(bin_arr, yolo_model): img_rgb = cv2.cvtColor(bin_arr, cv2.COLOR_GRAY2RGB) if len(bin_arr.shape) == 2 else bin_arr orig_h, orig_w = img_rgb.shape[:2] results = yolo_model.predict(img_rgb, imgsz=YOLO_IMGSZ, conf=0.1, iou=0.2, verbose=False) truelines = [] if len(results) == 0: return truelines result = results[0] extracted_boxes = [] # 1. Extract raw bounding boxes, preserving the center Y and height for accurate grouping if result.obb is not None and len(result.obb) > 0: xywhr = result.obb.xywhr.cpu().numpy() # [x_center, y_center, width, height, rotation] xyxyxyxy = result.obb.xyxyxyxy.cpu().numpy() # [4 corner points] for i in range(len(xywhr)): extracted_boxes.append({ 'yc': xywhr[i][1], 'h': xywhr[i][3], 'pts': xyxyxyxy[i] # Keep all 4 oriented points for the final boundary }) elif result.boxes is not None and len(result.boxes) > 0: # Fallback if a standard YOLO AABB model is loaded xywh = result.boxes.xywh.cpu().numpy() xyxy = result.boxes.xyxy.cpu().numpy() for i in range(len(xywh)): bx1, by1, bx2, by2 = xyxy[i] extracted_boxes.append({ 'yc': xywh[i][1], 'h': xywh[i][3], 'pts': np.array([[bx1, by1], [bx2, by1], [bx2, by2], [bx1, by2]]) }) if not extracted_boxes: return [] # 2. Sort boxes vertically by their true center Y coordinate extracted_boxes.sort(key=lambda b: b['yc']) # 3. Smart Merge: Group boxes that belong to the same line based on Center Y merged_groups = [] for box in extracted_boxes: placed = False for group in merged_groups: # Get the average center Y and height of the current line group group_yc = np.mean([b['yc'] for b in group]) avg_h = np.mean([b['h'] for b in group]) # If the box's center is within ~60% of the line's height, it's the same line if abs(box['yc'] - group_yc) < (avg_h * 0.6): group.append(box) placed = True break if not placed: merged_groups.append([box]) # 4. Compute final Axis-Aligned Bounding Box for the merged OBB points for group in merged_groups: # Pool all oriented corner points from every word in this line group all_pts = np.concatenate([b['pts'] for b in group], axis=0) # Now find the absolute min/max to create the final crop box for downstream processing rx0, rx1 = np.min(all_pts[:, 0]), np.max(all_pts[:, 0]) ry0, ry1 = np.min(all_pts[:, 1]), np.max(all_pts[:, 1]) # Apply standard padding pad_x = (rx1 - rx0) * 0.12 pad_y = (ry1 - ry0) * 0.10 x0 = int(np.clip(rx0 - pad_x, 0, orig_w)) x1 = int(np.clip(rx1 + pad_x, 0, orig_w)) y0 = int(np.clip(ry0 - pad_y, 0, orig_h)) y1 = int(np.clip(ry1 + pad_y, 0, orig_h)) if x1 - x0 > 0 and y1 - y0 > 0: truelines.append((x0, y0, x1 - x0, y1 - y0)) # 5. Sort final lines top-to-bottom truelines.sort(key=lambda b: b[1]) return truelines _eynollah_model = None _eynollah_model_dir = None def _load_eynollah_model(): global _eynollah_model, _eynollah_model_dir if _eynollah_model is not None: return _eynollah_model import tensorflow as tf from huggingface_hub import snapshot_download # -- CRITICAL FIX: Stop TensorFlow from hoarding all GPU memory -- gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: try: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) except RuntimeError as e: print(f"[Eynollah] Warning: Could not set TF memory growth: {e}") print("[Eynollah] Downloading SBB/eynollah-textline from HF...") _eynollah_model_dir = snapshot_download(repo_id=EYNOLLAH_REPO, repo_type="model") print(f"[Eynollah] Model directory: {_eynollah_model_dir}") _eynollah_model = tf.saved_model.load(_eynollah_model_dir) print("[Eynollah] Model loaded.") return _eynollah_model def segment_lines_eynollah(img_input): import tensorflow as tf print("\n[Eynollah] Starting Line Segmentation...") if isinstance(img_input, np.ndarray): if len(img_input.shape) == 2: img_rgb = cv2.cvtColor(img_input, cv2.COLOR_GRAY2RGB) else: img_rgb = img_input pil_img = Image.fromarray(img_rgb).convert("RGB") else: pil_img = img_input.convert("RGB") orig_w, orig_h = pil_img.size print(f"[Eynollah] Input size: {orig_w}x{orig_h}") model = _load_eynollah_model() # ── Resize to model's expected input ── MODEL_H, MODEL_W = 448, 672 resized = pil_img.resize((MODEL_W, MODEL_H), Image.LANCZOS) arr = np.array(resized).astype(np.float32) / 255.0 inp = tf.constant(arr[np.newaxis, ...]) # ── Inference ────────────────── infer = model.signatures.get("serving_default") or model raw_out = infer(input_1=inp) # -- CRITICAL FIX: Safe Dict extraction -- if isinstance(raw_out, dict): key = list(raw_out.keys())[0] prob = raw_out[key].numpy()[0] else: prob = raw_out.numpy()[0] # Grab the 'Text Line' channel explicitly (Channel 1) if prob.ndim == 3: if prob.shape[-1] >= 2: prob = prob[..., 1] else: prob = prob[..., 0] mask = (prob > 0.5).astype(np.uint8) * 255 mask_full = cv2.resize(mask, (orig_w, orig_h), interpolation=cv2.INTER_NEAREST) num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(mask_full, connectivity=8) MIN_AREA = orig_w * orig_h * 0.0005 raw_boxes = [] for lbl in range(1, num_labels): area = stats[lbl, cv2.CC_STAT_AREA] if area < MIN_AREA: continue x = stats[lbl, cv2.CC_STAT_LEFT] y = stats[lbl, cv2.CC_STAT_TOP] w = stats[lbl, cv2.CC_STAT_WIDTH] h = stats[lbl, cv2.CC_STAT_HEIGHT] raw_boxes.append([float(x), float(y), float(x + w), float(y + h)]) if not raw_boxes: print("[Eynollah] No line regions detected.") return [] print(f"[Eynollah] Raw components: {len(raw_boxes)}") raw_boxes.sort(key=lambda b: (b[1] + b[3]) / 2) merged = [] for box in raw_boxes: placed = False for m in merged: if max(box[1], m[1]) < min(box[3], m[3]): m[0] = min(m[0], box[0]) m[1] = min(m[1], box[1]) m[2] = max(m[2], box[2]) m[3] = max(m[3], box[3]) placed = True break if not placed: merged.append(list(box)) merged.sort(key=lambda b: b[1]) print(f"[Eynollah] Found {len(merged)} lines.") # --- FIX: Prevent tight clipping on shorter lines --- # Calculate global X boundaries so every line spans the full code block width. if merged: global_min_x = min(b[0] for b in merged) global_max_x = max(b[2] for b in merged) # Add a 5% horizontal padding so edge characters aren't cut off pad_x = (global_max_x - global_min_x) * 0.05 x0_global = int(np.clip(global_min_x - pad_x, 0, orig_w)) x1_global = int(np.clip(global_max_x + pad_x, 0, orig_w)) final_lines = [] for b in merged: # Also adding a 10% vertical pad for safety on ascenders/descenders pad_y = (b[3] - b[1]) * 0.10 y0 = int(np.clip(b[1] - pad_y, 0, orig_h)) y1 = int(np.clip(b[3] + pad_y, 0, orig_h)) final_lines.append((x0_global, y0, x1_global - x0_global, y1 - y0)) return final_lines return [] # ══════════════════════════════════════════════════════════════ # 3. WORD SEGMENTATION # ══════════════════════════════════════════════════════════════ def _get_word_boxes(dilated, min_w, min_h): contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) return sorted( [b for b in [cv2.boundingRect(c) for c in contours] if b[2] >= min_w and b[3] >= min_h], key=lambda b: b[0], ) def segment_words(bin_arr, lines, target_words): words_dict = {} for i, (lx, ly, lw, lh) in enumerate(lines): ih, iw = bin_arr.shape[:2] ly, lx = max(0, ly), max(0, lx) lh, lw = min(lh, ih - ly), min(lw, iw - lx) if lw <= 0 or lh <= 0: continue crop = bin_arr[ly:ly+lh, lx:lx+lw] inv = cv2.bitwise_not(crop) min_ww = max(5, int(lw * 0.02)) min_wh = max(5, int(lh * 0.25)) kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 3)) dilated, prev, found = inv.copy(), None, False for _ in range(MAX_DILATE_ITERS): dilated = cv2.dilate(dilated, kernel, iterations=1) boxes = _get_word_boxes(dilated, min_ww, min_wh) if len(boxes) == target_words: prev = boxes; found = True; break elif len(boxes) < target_words: break else: prev = boxes if not found and prev and len(prev) > target_words: while len(prev) > target_words: gaps = [(prev[j+1][0] - (prev[j][0]+prev[j][2]), j) for j in range(len(prev)-1)] _, mi = min(gaps) b1, b2 = prev[mi], prev[mi+1] merged = ( min(b1[0],b2[0]), min(b1[1],b2[1]), max(b1[0]+b1[2],b2[0]+b2[2])-min(b1[0],b2[0]), max(b1[1]+b1[3],b2[1]+b2[3])-min(b1[1],b2[1]), ) prev = list(prev); prev[mi] = merged; prev.pop(mi+1) found = True if not found or not prev or len(prev) != target_words: continue words_dict[i+1] = {wi+1: (wx, wx+ww) for wi, (wx, wy, ww, wh) in enumerate(prev)} return words_dict # ══════════════════════════════════════════════════════════════ # 4. OCR # ══════════════════════════════════════════════════════════════ def _load_trocr(): proc = TrOCRProcessor.from_pretrained(TROCR_BASE_PROC, use_fast=False) model = VisionEncoderDecoderModel.from_pretrained(TROCR_REPO, token=HF_TOKEN).to(DEVICE).eval() return proc, model def _pad_aspect(img, max_ratio=4.0): w, h = img.size if w <= max_ratio * h: return img th = int(w / max_ratio) pad = th - h from PIL import ImageOps return ImageOps.expand(img, (0, pad//2, 0, pad - pad//2), fill=(255, 255, 255)) def ocr_word(img_pil, proc, model): if img_pil.mode != "RGB": img_pil = img_pil.convert("RGB") img_pil = _pad_aspect(img_pil) pv = proc(img_pil, return_tensors="pt").pixel_values.to(DEVICE) with torch.no_grad(): ids = model.generate(pv) txt = proc.batch_decode(ids, skip_special_tokens=True)[0] parts = txt.split() return max(parts, key=len) if parts else txt # ══════════════════════════════════════════════════════════════ # 5. VISUALISATION # ══════════════════════════════════════════════════════════════ def draw_line_bboxes(img_arr, bboxes): vis = cv2.cvtColor(img_arr, cv2.COLOR_GRAY2RGB) if len(img_arr.shape) == 2 else img_arr.copy() for i, (x, y, w, h) in enumerate(bboxes): cv2.rectangle(vis, (x, y), (x+w, y+h), (255, 50, 50), 2) cv2.putText(vis, str(i+1), (x, max(y-5, 0)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (50, 50, 255), 2) return vis def draw_word_bboxes(img_arr, word_tuples): vis = cv2.cvtColor(img_arr, cv2.COLOR_GRAY2RGB) if len(img_arr.shape) == 2 else img_arr.copy() colors = [(50, 220, 50), (50, 180, 255), (255, 180, 50), (220, 50, 220)] for lt in word_tuples: for wi, (text, (x1, y1, x2, y2)) in enumerate(lt): c = colors[wi % len(colors)] cv2.rectangle(vis, (x1, y1), (x2, y2), c, 2) cv2.putText(vis, text, (x1, max(y1-4, 0)), cv2.FONT_HERSHEY_SIMPLEX, 0.45, c, 1) return vis # ══════════════════════════════════════════════════════════════ # 6. UTILITIES # ══════════════════════════════════════════════════════════════ def clear_vram(): gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() def execute_code(compiled_code): if not compiled_code: return "[Execution error: Compiled code is empty.]" python_exe = sys.executable if sys.executable else "python" working_dir = SPACE_DIR if SPACE_DIR is not None else os.getcwd() # --- FIX: Intercept and patch the incorrect imports from topy.py --- patched_code = compiled_code.replace("Tzefa_Language.createdpython", "language.createdpython") patched_code = patched_code.replace("from vm import *", "from language.createdpython import *") # Inject the workspace path so 'language.createdpython' resolves correctly path_injection = f"""import sys\nimport os\nsys.path.insert(0, {repr(working_dir)})\n""" full_code = path_injection + patched_code try: result = subprocess.run( [python_exe, "-c", full_code], capture_output=True, text=True, timeout=15, cwd=working_dir, ) output = result.stdout if result.stderr: output += "\n--- STDERR ---\n" + result.stderr if result.returncode != 0: output += f"\n[Process exited with code {result.returncode}]" return output.strip() if output.strip() else "(no output)" except subprocess.TimeoutExpired: return "[Execution timed out after 15 seconds]" except Exception as e: return f"[Execution error: {e}]"# ══════════════════════════════════════════════════════════════ # 7. FULL PIPELINE # ══════════════════════════════════════════════════════════════ def run_full_pipeline(input_image, bin_model_choice, dialect_choice, casing_choice, seg_method): if input_image is None: return None, None, None, "", "", "", "", "No image provided." if isinstance(bin_model_choice, int): bin_model_choice = ["mit_b3", "mit_b5"][bin_model_choice] if isinstance(dialect_choice, int): dialect_choice = ["4-word (verbose)", "3-word (classic)"][dialect_choice] if isinstance(casing_choice, int): casing_choice = ["CAPS only", "Mixed case"][casing_choice] if isinstance(seg_method, int): seg_method = ["YOLO", "Eynollah"][seg_method] if isinstance(input_image, np.ndarray): pil_img = Image.fromarray(input_image).convert("RGB") else: pil_img = input_image.convert("RGB") dialect = _DIALECT_MAP.get(dialect_choice, FOUR_WORD) casing = _CASING_MAP.get(casing_choice, CAPS_ONLY) status = [] importlib.reload(topy) parser = TzefaParser(dialect=dialect, casing=casing) target_words = parser.expected_words_per_line # ── Stage 1: Binarization ── try: status.append("[1/6] Binarization...") bin_models = _load_bin_models() bin_pil = binarize(pil_img, bin_models[bin_model_choice]) bin_arr = np.array(bin_pil) del bin_models; clear_vram() status.append(" OK") except Exception as e: return None, None, None, "", "", "", "", f"Binarization failed: {e}" # ── Stage 2: Line Segmentation ── try: status.append(f"[2/6] Line Segmentation ({seg_method})...") if seg_method == "Eynollah": truelines = segment_lines_eynollah(bin_arr) else: yolo_model = _load_yolo() truelines = segment_lines_yolo(bin_arr, yolo_model) del yolo_model; clear_vram() status.append(f" OK {len(truelines)} lines detected") line_vis = draw_line_bboxes(bin_arr, truelines) except Exception as e: return bin_arr, None, None, "", "", "", "", f"Line Seg failed: {e}\n{traceback.format_exc()}" # ── Stage 3: Word Seg + OCR ── try: status.append("[3/6] Word Segmentation + OCR...") words = segment_words(bin_arr, truelines, target_words) proc, trocr_model = _load_trocr() all_line_tuples, raw_lines = [], [] for ln in sorted(words.keys()): if ln - 1 >= len(truelines): continue lx, ly, lw, lh = truelines[ln - 1] line_tuples = [] for wn in sorted(words[ln].keys()): wx1, wx2 = words[ln][wn] ax1 = max(0, int(lx + wx1)) ax2 = min(bin_arr.shape[1], int(lx + wx2)) ay1 = max(0, ly - 20) ay2 = min(bin_arr.shape[0], ly + lh + 20) text = ocr_word(Image.fromarray(bin_arr[ay1:ay2, ax1:ax2]), proc, trocr_model) line_tuples.append((text, (ax1, ay1, ax2, ay2))) raw_lines.append(" ".join(t[0] for t in line_tuples)) all_line_tuples.append(line_tuples) del proc, trocr_model; clear_vram() word_vis = draw_word_bboxes(bin_arr, all_line_tuples) raw_text = "\n".join(raw_lines) status.append(f" OK {len(raw_lines)} lines recognised") except Exception as e: return bin_arr, line_vis, None, "", "", "", "", f"OCR failed: {e}\n{traceback.format_exc()}" # ── Stage 4: Error Correction ── try: status.append("[4/6] Error Correction...") parser.init_indent_table(len(truelines)) corrected_lines, bytecode_list = [], [] for line_entries in all_line_tuples: if not line_entries: corrected_lines.append("") bytecode_list.append(["MAKE", "INTEGER", "TEMPORARY", "0"]) continue raw_tokens = [t[0] for t in line_entries] while len(raw_tokens) < target_words: raw_tokens.append("") raw_tokens = raw_tokens[:target_words] normalised = parser.normalize_source_line(raw_tokens) bytecode = parser.parse_line(normalised) bytecode_list.append(bytecode) corrected_lines.append(" ".join(bytecode)) corrected_text = "\n".join(corrected_lines) status.append(" OK") except Exception as e: return bin_arr, line_vis, word_vis, raw_text, "", "", "", f"Error Correction failed: {e}\n{traceback.format_exc()}" # ── Stage 5: Compilation ── try: status.append("[5/6] Compilation...") compiled_python = topy.make_py_file(bytecode_list) status.append(" OK") except Exception as e: return bin_arr, line_vis, word_vis, raw_text, corrected_text, "", "", f"Compilation failed: {e}\n{traceback.format_exc()}" # ── Stage 6: Execution ── try: status.append("[6/6] Execution...") execution_output = execute_code(compiled_python) status.append(" OK") except Exception as e: return bin_arr, line_vis, word_vis, raw_text, corrected_text, compiled_python, "", f"Execution failed: {e}\n{traceback.format_exc()}" status.append("\nPipeline Finished Successfully!") return bin_arr, line_vis, word_vis, raw_text, corrected_text, compiled_python, execution_output, "\n".join(status) # ══════════════════════════════════════════════════════════════ # 8. GRADIO INTERFACE # ══════════════════════════════════════════════════════════════ with gr.Blocks(title="Tzefa Pipeline", theme=gr.themes.Soft()) as demo: gr.Markdown("# Tzefa - Complete Pipeline Demo Space") gr.Markdown("Upload handwritten code to process through Binarization, Segmentation, OCR, Error Correction, and Execution.") with gr.Row(): with gr.Column(scale=1): input_image = gr.Image( value="demo.png", type="pil", label="Upload Handwritten Code" ) bin_model = gr.Dropdown( choices=["mit_b3", "mit_b5"], value="mit_b5", label="Binarization Model" ) dialect = gr.Dropdown( choices=["4-word (verbose)", "3-word (classic)"], value="3-word (classic)", label="Dialect" ) casing = gr.Dropdown( choices=["CAPS only", "Mixed case"], value="CAPS only", label="Casing" ) seg_method = gr.Dropdown( choices=["YOLO", "Eynollah"], value="Eynollah", label="Line Segmentation Method" ) run_btn = gr.Button("Run Pipeline", variant="primary") with gr.Column(scale=2): with gr.Tabs(): with gr.TabItem("Visual Output"): out_bin = gr.Image(label="1. Binarization Result") out_line = gr.Image(label="2. Line Segmentation") out_word = gr.Image(label="3. Word Segmentation") with gr.TabItem("Text & Code"): out_raw = gr.Textbox(label="4. Raw OCR Text", lines=5) out_corrected = gr.Textbox(label="5. Corrected Bytecode", lines=5) out_bytecode = gr.Textbox(label="6. Compiled Python Output", lines=5) with gr.TabItem("Execution & Logs"): out_execution = gr.Textbox(label="7. Terminal Execution Output", lines=5) out_status = gr.Textbox(label="Pipeline Status / Errors", lines=8) run_btn.click( fn=run_full_pipeline, inputs=[input_image, bin_model, dialect, casing, seg_method], outputs=[out_bin, out_line, out_word, out_raw, out_corrected, out_bytecode, out_execution, out_status], api_name=False ) if __name__ == "__main__": demo.launch()